import pandas as pd
df = pd.read_csv('stanford_msa.csv')
df.shape
# Get a sense of the distribution of the years of the incidents
collections.Counter(df['Year'])
df.columns
df.head(20)
# Extract year fromm date
df['Year'] = pd.DatetimeIndex(df['Date']).year
# Drop fields not used in analysis
df_select = df.drop(['CaseID','Title','Location','Date','Shooter Sex','Relationship to Incident Location','Targeted Victim/s - General','Latitude','Longitude','City','Number of Victim Fatalities','Number of Victims Injured','Description','Day of Week','Date - Detailed','Shooter Name','Shooter Age(s)','Type of Gun - Detailed','Type of Gun - General','Fate of Shooter at the scene','Shooter\'s Cause of Death','Targeted Victim/s - Detailed','Possible Motive - Detailed','Possible Motive - General','History of Mental Illness - Detailed','Shooter Race','Place Type','Data Source 1','Data Source 2','Data Source 3','Data Source 4','Data Source 5','Data Source 6','Data Source 7','Military Experience'], axis=1)
# Remaining fields
df_select.columns
# Since state and year are the attributes joining the two datasets, we drop the rows with either one of them absent
df_select = df_select.dropna(subset=['State', 'Year'])
# Move `Year` field to the beginning
year = df_select['Year']
df_select.drop(labels=['Year'], axis=1,inplace = True)
df_select.insert(1, 'Year', year)
# We only have provision data from 1991 to 2016, so we will drop the data for the other years
df_select = df_select[(df_select['Year'] >= 1991) & (df_select['Year'] <= 2016)]
# We are left with 274 rows, which is not too bad
# Data cleaning to remove unknown
df_select.replace('Unknown',0).head(20)
# Mean shooter age is 31.7 years
df_select['Average Shooter Age'].astype(float).mean()
# 70 cases of shooters with history of mental illness
df_select[df_select['History of Mental Illness - General'] > 0].count()
df_select.sort_values(by=['State','Year']).head(20)
# Data cleaning
df_select = df_select.replace('32\r+ Unknown', 32)
df_select = df_select.replace('No', 0)
df_select = df_select.replace('no', 0)
df_select = df_select.replace('Yes', 1)
df_select = df_select.replace('Unknown', '0')
df_select = df_select.replace('unknown', 0)
# Convert age into a binary variable indicating whether shooter is 21 years of younger
df_select['Shooter Age <= 21'] = df_select.apply(lambda row: 1 if float(row['Average Shooter Age']) <= 21 else 0, axis=1)
# Convert data type (simplier methods did not work)
for i in range(243):
df_select.loc[i, 'Total Number of Guns'] = int(df_select.loc[i, 'Total Number of Guns'])
# Convert data type
df_select['Number of Automatic Guns'] = df_select['Number of Automatic Guns'].astype(int)
df_select['Number of Semi-Automatic Guns'] = df_select['Number of Semi-Automatic Guns'].astype(int)
df_select['Total Number of Guns'] = df_select['Total Number of Guns'].astype(int)
df_select['School Related'] = df_select['Total Number of Guns'].astype(int)
df_states = df_select.groupby(['State', 'Year'])[['Total Number of Fatalities','Total Number of Victims','Total Number of Guns','Number of Automatic Guns','Number of Semi-Automatic Guns','School Related','History of Mental Illness - General','Shooter Age <= 21']].agg('sum').reset_index()
df_states['Number of (Semi-)Automatic Guns'] = df_states['Number of Semi-Automatic Guns'] + df_states['Number of Automatic Guns']
df_states.columns
# Convert absolute values into a binary variable indicator whether value is above median or not
df_states_median = pd.DataFrame()
for col in df_states.columns[2:]:
df_states_median[col] = df_states.apply(lambda row: 1 if float(row[col]) > df_states[col].median() else 0, axis=1)
NameError: name 'df_states' is not defined
df_states_median
laws = pd.read_csv('provisions_data.csv')
print(laws.shape)
(1350, 136)
laws['year'].unique()
laws.head(7)
laws.sort_values(by=['state','year']).head(7)
laws.year.unique()
laws = laws[laws['year'] <= 2016]
df_select = df_select[df_select['Average Shooter Age'] != 'Unknown']
df_select = df_select.reset_index()
df_select = df_select.drop(index=116, axis=1).reset_index()
laws_by_incident = pd.DataFrame()
for index, row in df_select.iterrows():
state = row['State']
year = row['Year']
laws_by_incident = laws_by_incident.append(laws[(laws['state'] == state) & (laws['year'] == year)])
print(df_select.shape)
print(laws_by_incident.shape)
(243, 16)
(243, 134)
laws_by_incident = laws_by_incident.drop(['state','year'],axis=1)
df_select = df_select.replace('Unknown',0)
df_select = df_select.replace('Yes',1)
df_select = df_select.replace('No',0)
df_select = df_select.drop(['Class'], axis=1)
incidents = df_select.drop(['State','Year'],axis=1)
incidents = incidents.replace('32\r+ Unknown', 32)
incidents = incidents.replace('unknown', 0)
incidents
laws_by_incident
laws_by_state = pd.DataFrame()
for index, row in df_states.iterrows():
state = row['State']
year = row['Year']
laws_by_state = laws_by_state.append(laws[(laws['state'] == state) & (laws['year'] == year)])
laws_by_state = laws_by_state.drop(['state','year'],axis=1)
states = df_states.drop(['State','Year','Number of Automatic Guns','Number of Semi-Automatic Guns'], axis=1)
states_median = df_states_median.drop(['Number of Automatic Guns','Number of Semi-Automatic Guns'], axis=1)
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso, LassoCV
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import balanced_accuracy_score
from sklearn import metrics
laws_by_incident.shape
y_age = incidents['Shooter Age <= 21']
y_mental = incidents['History of Mental Illness - General']
y_school = incidents['School Related']
def do_lin_regr(X, y):
X_train,X_test,y_train,y_test = train_test_split(X, y,test_size=0.25,random_state=0)
regr = LinearRegression()
regr.fit(X_train,y_train)
print(regr.score(X_test, y_test))
y_pred = regr.predict(X_test)
print(balanced_accuracy_score(y_test, y_pred))
return regr.coef_
def do_ridge_regr(X, y, alpha=10):
X_train,X_test,y_train,y_test = train_test_split(X, y,test_size=0.25,random_state=0)
regr = Ridge(alpha=alpha)
regr.fit(X_train,y_train)
print(regr.score(X_test, y_test))
y_pred = regr.predict(X_test)
print(balanced_accuracy_score(y_test, y_pred))
return regr.coef_
def do_lasso_regr(X, y, alpha=10):
X_train,X_test,y_train,y_test = train_test_split(X, y,test_size=0.25,random_state=0)
regr = Lasso(alpha=alpha)
# regr = LassoCV()
regr.fit(X_train,y_train)
print(regr.score(X_test, y_test))
y_pred = regr.predict(X_test)
print(balanced_accuracy_score(y_test, y_pred))
return regr.coef_
def do_log_regr(X, y):
X_train,X_test,y_train,y_test = train_test_split(X, y,test_size=0.25,random_state=0)
regr = LogisticRegression()
regr.fit(X_train,y_train)
print(regr.score(X_test, y_test))
y_pred = regr.predict(X_test)
print(balanced_accuracy_score(y_test, y_pred))
return regr.coef_
import numpy.ma as ma
# Age
coefs = do_log_regr(laws_by_incident,y_age)
NameError: name 'do_log_regr' is not defined
# Mental
do_log_regr(laws_by_incident,y_mental)
0.7868852459016393
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
y_school = y_school.replace('Killed',0)
y_school = y_school.replace('no',0)
# School
do_log_regr(laws_by_incident,y_school)
0.8688524590163934
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
states.shape
median_cols = np.delete(df_states_median.columns, [3,4])
# Do regression for each column, i.e. statistic, in a for loop
for col in median_cols:
print(col)
do_log_regr(laws_by_state, states_median[col])
Total Number of Fatalities
0.5909090909090909
Total Number of Victims
0.4772727272727273
Total Number of Guns
0.45454545454545453
School Related
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
0.45454545454545453
History of Mental Illness - General
0.5681818181818182
Shooter Age <= 21
0.5
Number of (Semi-)Automatic Guns
0.6590909090909091
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
coefs = do_lasso_regr(laws_by_state, states['Total Number of Fatalities'],5)
mask = ma.masked_where(coefs > 0, coefs).mask
laws_by_state.columns[mask]
0.008496113410434214
def select_features(X, y, k=10):
fs = SelectKBest(chi2, k=k)
X_new = fs.fit_transform(X, y)
feature_mask = fs.get_support()
print(X.columns[feature_mask])
return X_new
select_features(laws_by_incident, y_school, 10)
Index(['cap14', 'cap16', 'capaccess', 'capuses', 'dvro', 'dvrodating',
'liability', 'nosyg', 'opencarrypermith', 'lawtotal'],
dtype='object')
X_age_new = select_features(laws_by_incident, y_age, 10)
do_log_regr(X_age_new,y_age)
Index(['age21longgunpossess', 'capunloaded', 'incidentall', 'inspection',
'liability', 'locked', 'loststolen', 'opencarryh', 'personalized',
'residential'],
dtype='object')
0.7540983606557377
X_mental_new = select_features(laws_by_incident, y_mental)
do_log_regr(X_mental_new,y_mental)
Index(['ammrecords', 'cap18', 'dealer', 'immunity', 'opencarryh',
'preemptionnarrow', 'registration', 'reportall', 'training',
'lawtotal'],
dtype='object')
0.7704918032786885
X_school_new = select_features(laws_by_incident, y_school, 15)
do_log_regr(X_school_new,y_school)
Index(['cap14', 'cap16', 'capaccess', 'capuses', 'collegeconcealed', 'dvro',
'dvrodating', 'dvrosurrendernoconditions', 'expartedating', 'liability',
'magazinepreowned', 'mcdv', 'nosyg', 'opencarrypermith', 'lawtotal'],
dtype='object')
0.8360655737704918
for col in median_cols:
print("********" + col)
y = states_median[col]
X_new = select_features(laws_by_state, y, 15)
print(f'k=15')
do_log_regr(X_new, y)
X_new = select_features(laws_by_state, y, 10)
print(f'k=10')
do_log_regr(X_new, y)
Total Number of Fatalities
Index(['cap14', 'capuses', 'dvrosurrender', 'dvrosurrenderdating',
'dvrosurrendernoconditions', 'expartesurrender',
'expartesurrendernoconditions', 'junkgun', 'lockd', 'lockp',
'lockstandards', 'mcdvsurrender', 'mcdvsurrenderdating',
'mcdvsurrendernoconditions', 'lawtotal'],
dtype='object')
k=15
0.5681818181818182
Index(['cap14', 'capuses', 'dvrosurrender', 'dvrosurrenderdating',
'dvrosurrendernoconditions', 'lockstandards', 'mcdvsurrender',
'mcdvsurrenderdating', 'mcdvsurrendernoconditions', 'lawtotal'],
dtype='object')
k=10
0.6136363636363636
Total Number of Victims
Index(['ammrecords', 'cap14', 'cap16', 'capuses', 'dvro', 'dvrodating',
'dvrosurrender', 'dvrosurrenderdating', 'dvrosurrendernoconditions',
'expartedating', 'gunshow', 'mcdvsurrenderdating', 'microstamp',
'universal', 'lawtotal'],
dtype='object')
k=15
0.5227272727272727
Index(['ammrecords', 'dvro', 'dvrodating', 'dvrosurrenderdating',
'dvrosurrendernoconditions', 'expartedating', 'gunshow',
'mcdvsurrenderdating', 'universal', 'lawtotal'],
dtype='object')
k=10
0.5227272727272727
Total Number of Guns
Index(['ammlicense', 'danger', 'dvrosurrender', 'exparte', 'expartesurrender',
'expartesurrenderdating', 'gunshow', 'invoutpatient', 'lockd',
'lockstandards', 'mcdvremovalallowed', 'mcdvsurrender',
'mcdvsurrendernoconditions', 'recordsdealerh', 'lawtotal'],
dtype='object')
k=15
0.5454545454545454
Index(['ammlicense', 'danger', 'dvrosurrender', 'exparte', 'expartesurrender',
'invoutpatient', 'lockstandards', 'mcdvremovalallowed',
'mcdvsurrendernoconditions', 'lawtotal'],
dtype='object')
k=10
0.5909090909090909
School Related
Index(['ammlicense', 'danger', 'dvrosurrender', 'exparte', 'expartesurrender',
'expartesurrenderdating', 'gunshow', 'invoutpatient', 'lockd',
'lockstandards', 'mcdvremovalallowed', 'mcdvsurrender',
'mcdvsurrendernoconditions', 'recordsdealerh', 'lawtotal'],
dtype='object')
k=15
0.5454545454545454
Index(['ammlicense', 'danger', 'dvrosurrender', 'exparte', 'expartesurrender',
'invoutpatient', 'lockstandards', 'mcdvremovalallowed',
'mcdvsurrendernoconditions', 'lawtotal'],
dtype='object')
k=10
0.5909090909090909
History of Mental Illness - General
Index(['age21longgunsale', 'age21longgunsaled', 'ammrecords', 'ammrestrict',
'cap18', 'exparte', 'expartesurrender', 'mcdvremovalallowed',
'mcdvremovalrequired', 'mcdvsurrenderdating',
'mcdvsurrendernoconditions', 'opencarryh', 'personalized',
'registration', 'lawtotal'],
dtype='object')
k=15
0.6818181818181818
Index(['ammrecords', 'ammrestrict', 'cap18', 'exparte', 'expartesurrender',
'mcdvremovalallowed', 'mcdvsurrenderdating', 'opencarryh',
'registration', 'lawtotal'],
dtype='object')
k=10
0.6818181818181818
Shooter Age <= 21
Index(['cap14', 'capuses', 'incidentall', 'liability', 'loststolen',
'mcdvdating', 'opencarryh', 'opencarryl', 'personalized',
'preemptionbroad', 'preemptionnarrow', 'showing', 'violenth', 'waiting',
'lawtotal'],
dtype='object')
k=15
0.6136363636363636
Index(['capuses', 'incidentall', 'loststolen', 'mcdvdating', 'opencarryh',
'personalized', 'preemptionnarrow', 'showing', 'waiting', 'lawtotal'],
dtype='object')
k=10
0.6136363636363636
Number of (Semi-)Automatic Guns
Index(['ammrecords', 'assault', 'assaultlist', 'assaultregister',
'assaulttransfer', 'gunshow', 'lockd', 'lockp', 'lockstandards',
'magazine', 'mcdvsurrendernoconditions', 'microstamp', 'onefeature',
'tenroundlimit', 'lawtotal'],
dtype='object')
k=15
0.7727272727272727
Index(['assault', 'assaultlist', 'assaultregister', 'assaulttransfer', 'lockp',
'lockstandards', 'microstamp', 'onefeature', 'tenroundlimit',
'lawtotal'],
dtype='object')
k=10
0.7727272727272727
top10 = ['assault', 'assaultlist', 'assaultregister', 'assaulttransfer', 'lockp',
'lockstandards', 'microstamp', 'onefeature', 'tenroundlimit',
'lawtotal','capuses', 'incidentall', 'loststolen', 'mcdvdating', 'opencarryh',
'personalized', 'preemptionnarrow', 'showing', 'waiting', 'lawtotal','ammrecords', 'ammrestrict', 'cap18', 'exparte', 'expartesurrender',
'mcdvremovalallowed', 'mcdvsurrenderdating', 'opencarryh',
'registration', 'lawtotal','ammlicense', 'danger', 'dvrosurrender', 'exparte', 'expartesurrender',
'invoutpatient', 'lockstandards', 'mcdvremovalallowed',
'mcdvsurrendernoconditions', 'lawtotal','ammlicense', 'danger', 'dvrosurrender', 'exparte', 'expartesurrender',
'invoutpatient', 'lockstandards', 'mcdvremovalallowed',
'mcdvsurrendernoconditions', 'lawtotal','ammrecords', 'dvro', 'dvrodating', 'dvrosurrenderdating',
'dvrosurrendernoconditions', 'expartedating', 'gunshow',
'mcdvsurrenderdating', 'universal', 'lawtotal','cap14', 'capuses', 'dvrosurrender', 'dvrosurrenderdating',
'dvrosurrendernoconditions', 'lockstandards', 'mcdvsurrender',
'mcdvsurrenderdating', 'mcdvsurrendernoconditions', 'lawtotal']
import collections
top10_laws_count = collections.Counter(top10)
top10_laws_count
# Handpicked FS
X_age_manual = laws_by_incident[['age21handgunsale','age18longgunsale','age21longgunsaled','age21longgunsale','age21handgunpossess','age18longgunpossess','age21longgunpossess','lawtotal']]
do_log_regr(X_age_manual,y_age)
0.7377049180327869
# Automatic + handpicked FS
X_age = laws_by_incident[['age21handgunsale','age18longgunsale','age21longgunsaled','age21longgunsale','age21handgunpossess','age18longgunpossess','age21longgunpossess','lawtotal', 'capunloaded', 'incidentall', 'inspection',
'liability', 'locked', 'loststolen', 'opencarryh', 'personalized','residential']]
do_log_regr(X_age,y_age)
0.7213114754098361
X_mental_manual = laws_by_incident[['lawtotal','invcommitment','invoutpatient','danger','drugmisdemeanor','alctreatment','alcoholism','universal','universalh']]
do_log_regr(X_mental_manual,y_mental)
0.8032786885245902
X_mental = laws_by_incident[['ammrecords', 'cap18', 'dealer', 'immunity', 'opencarryh',
'preemptionnarrow', 'registration', 'reportall', 'training',
'lawtotal','invcommitment','invoutpatient','danger','drugmisdemeanor','alctreatment','alcoholism','universal','universalh']]
do_log_regr(X_mental,y_mental)
0.8360655737704918
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
X_school_manual = laws_by_incident[['college','collegeconcealed','elementary','opencarryh','opencarryl','opencarrypermith','opencarrypermitl','lawtotal']]
do_log_regr(X_school_manual,y_school)
0.9016393442622951
X_school = laws_by_incident[['college','collegeconcealed','elementary','opencarryh','opencarryl','opencarrypermith','opencarrypermitl','cap14', 'cap16', 'capaccess', 'capuses', 'collegeconcealed', 'dvro',
'dvrodating', 'dvrosurrendernoconditions', 'expartedating', 'liability',
'magazinepreowned', 'mcdv', 'nosyg', 'opencarrypermith', 'lawtotal']]
do_log_regr(X_school,y_school)
0.8524590163934426
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
X_age_new = select_features(X_age, y_age, 10)
do_log_regr(X_age_new,y_age)
Index(['age21longgunpossess', 'capunloaded', 'incidentall', 'inspection',
'liability', 'locked', 'loststolen', 'opencarryh', 'personalized',
'residential'],
dtype='object')
0.7540983606557377
0.5513888888888889
X_mental_new = select_features(X_mental, y_mental, 15)
do_log_regr(X_mental_new,y_mental)
Index(['ammrecords', 'cap18', 'dealer', 'immunity', 'opencarryh',
'preemptionnarrow', 'registration', 'reportall', 'training', 'lawtotal',
'invoutpatient', 'drugmisdemeanor', 'alctreatment', 'alcoholism',
'universalh'],
dtype='object')
0.8360655737704918
0.6777210884353742
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
X_school_new = select_features(X_school, y_school, 15)
do_log_regr(X_school_new,y_school)
Index(['collegeconcealed', 'opencarrypermith', 'cap14', 'cap16', 'capaccess',
'capuses', 'collegeconcealed', 'dvro', 'dvrodating',
'dvrosurrendernoconditions', 'expartedating', 'liability', 'nosyg',
'opencarrypermith', 'lawtotal'],
dtype='object')
0.819672131147541
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)