import pandas as pd
aspect=pd.read_csv('/work/small_aspect.csv')
aspect.head()
aspect=aspect.drop(['ASPECT3_TIF','Rowid_'], axis = 1) #drop extra column
aspect.head()
aspect.columns=['X','Y','aspect'] #rename columns
aspect['ID'] = aspect.index # add new index column
aspect.head()
aspect = aspect[['ID','X','Y','aspect']] # rearrange columns
aspect.to_csv ('/work/aspect.csv', index=None) # save back to csv
dem=pd.read_csv('/work/small_dem.csv')
dem.head()
dem=dem.drop(['Rowid_','DEM'], axis = 1)
dem.columns=['X','Y','DEM']
dem['ID'] = dem.index
dem.head()
dem = dem[['ID','X','Y','DEM']]
dem.head()
dem.to_csv ('/work/dem.csv', index=None)
lulc=pd.read_csv('/work/small_lulc.csv')
lulc.head()
lulc=lulc.drop(['Rowid_','LULC1'], axis = 1)
lulc['ID'] = lulc.index
lulc.head()
lulc = lulc[['ID','X','Y','LULC1_1']]
lulc.columns=['ID','X','Y','LULC']
lulc.head()
lulc.to_csv ('/work/lulc.csv', index=None)
#open all factors again from their saved modified csv files
aspect=pd.read_csv('/work/aspect.csv')
dem=pd.read_csv('/work/dem.csv')
lulc=pd.read_csv('/work/lulc.csv')
from functools import reduce
#combine all factors data frames based on ID column
df_prediction = reduce(lambda x,y: pd.merge(x,y, on='ID', how='outer'), [aspect, dem, lulc])
df_prediction.head()
#we got multiple X, Y columns so drop extra one's
df_prediction=df_prediction.drop(['X_x','Y_y','X_y','Y_x'], axis = 1)
df_prediction
#rearranging columns according to training data frame (created in part I)
#here I gave example of just 3 factors so it is different from training data frame which include all factors
df_prediction=df_prediction[['ID','X','Y','aspect','DEM','LULC']]
df_prediction.head()
df_prediction.to_csv ('/work/df_prediction.csv', index=None) # save dataframe
df_prediction=pd.read_csv('df_prediction.csv') #read csv file