from sklearn.tree import DecisionTreeRegressor
loc_race_tree_model = DecisionTreeRegressor()
y_train = train['DataValue']
y_test = train['DataValue']
X_train = train[['Stratification_American Indian or Alaska Native',
'Stratification_Asian or Pacific Islander',
'Stratification_Black, non-Hispanic', 'Stratification_Hispanic',
'Stratification_White, non-Hispanic','LocationAbbr_AK', 'LocationAbbr_AL', 'LocationAbbr_AR',
'LocationAbbr_AZ', 'LocationAbbr_CA', 'LocationAbbr_CO',
'LocationAbbr_CT', 'LocationAbbr_DC', 'LocationAbbr_DE',
'LocationAbbr_FL', 'LocationAbbr_GA', 'LocationAbbr_HI',
'LocationAbbr_IA', 'LocationAbbr_ID', 'LocationAbbr_IL',
'LocationAbbr_IN', 'LocationAbbr_KS', 'LocationAbbr_KY',
'LocationAbbr_LA', 'LocationAbbr_MA', 'LocationAbbr_MD',
'LocationAbbr_ME', 'LocationAbbr_MI', 'LocationAbbr_MN',
'LocationAbbr_MO', 'LocationAbbr_MS', 'LocationAbbr_MT',
'LocationAbbr_NC', 'LocationAbbr_ND', 'LocationAbbr_NE',
'LocationAbbr_NH', 'LocationAbbr_NJ', 'LocationAbbr_NM',
'LocationAbbr_NV', 'LocationAbbr_NY', 'LocationAbbr_OH',
'LocationAbbr_OK', 'LocationAbbr_OR', 'LocationAbbr_PA',
'LocationAbbr_RI', 'LocationAbbr_SC', 'LocationAbbr_SD',
'LocationAbbr_TN', 'LocationAbbr_TX', 'LocationAbbr_UT',
'LocationAbbr_VA', 'LocationAbbr_VT', 'LocationAbbr_WA',
'LocationAbbr_WI', 'LocationAbbr_WV', 'LocationAbbr_WY', 'YearStart']]
X_test = test[['Stratification_American Indian or Alaska Native',
'Stratification_Asian or Pacific Islander',
'Stratification_Black, non-Hispanic', 'Stratification_Hispanic',
'Stratification_White, non-Hispanic','LocationAbbr_AK', 'LocationAbbr_AL', 'LocationAbbr_AR',
'LocationAbbr_AZ', 'LocationAbbr_CA', 'LocationAbbr_CO',
'LocationAbbr_CT', 'LocationAbbr_DC', 'LocationAbbr_DE',
'LocationAbbr_FL', 'LocationAbbr_GA', 'LocationAbbr_HI',
'LocationAbbr_IA', 'LocationAbbr_ID', 'LocationAbbr_IL',
'LocationAbbr_IN', 'LocationAbbr_KS', 'LocationAbbr_KY',
'LocationAbbr_LA', 'LocationAbbr_MA', 'LocationAbbr_MD',
'LocationAbbr_ME', 'LocationAbbr_MI', 'LocationAbbr_MN',
'LocationAbbr_MO', 'LocationAbbr_MS', 'LocationAbbr_MT',
'LocationAbbr_NC', 'LocationAbbr_ND', 'LocationAbbr_NE',
'LocationAbbr_NH', 'LocationAbbr_NJ', 'LocationAbbr_NM',
'LocationAbbr_NV', 'LocationAbbr_NY', 'LocationAbbr_OH',
'LocationAbbr_OK', 'LocationAbbr_OR', 'LocationAbbr_PA',
'LocationAbbr_RI', 'LocationAbbr_SC', 'LocationAbbr_SD',
'LocationAbbr_TN', 'LocationAbbr_TX', 'LocationAbbr_UT',
'LocationAbbr_VA', 'LocationAbbr_VT', 'LocationAbbr_WA',
'LocationAbbr_WI', 'LocationAbbr_WV', 'LocationAbbr_WY', 'YearStart']]
loc_race_tree_model.fit(X_train,y_train)
train["loc_race_tree_pred"] = loc_race_tree_model.predict(X_train)
test["loc_race_tree_pred"] = loc_race_tree_model.predict(X_test)