import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from pandas import DataFrame, Series
import seaborn as sns
import pandas as pd
# Load data into a dataframe & print
df = pd.read_csv ('admit_data.csv')
df
#
Englishfloat64
16.0 - 61.0
Mathfloat64
42.0 - 86.0
0
35
54.625
1
26.5
68.5
2
41
57
3
21.5
42
4
46
84
5
39.375
78.5
6
33
56
7
41.875
81.125
8
30
67
9
31
55.5
# Print out feature names to get exact names (some have spaces) using .columns
df.columns
# Get rid of spaces in feature names using df.columns = [ list of new names in quotes]
df.columns = ['English', 'Math', 'Outcome']
df.columns
#
# Create new feature called 'Admit' using map or apply with values "yes" or "no"
# For this case it's easier to use .map as we did for iris dataset
df ['Admit'] = df['Outcome'].map( {0: 'no', 1: 'yes'})
# Set background grid for Seaborn & plot Math vs English with color indicating whether the student was admitted
# or not
sns.set_style ('whitegrid')
sns.relplot (x = 'Math', y = 'English', data = df, hue = 'Admit')
# Create target 1D array and 2D data array dimensioned 100 by 2
#
# create target array
#
# Get NumPy array for english and math scores, stack them and take transpose
# Remember that np.vstack takes 1 argument so put your arrays in ( ) and remember to use .T to transpose
target array
Execution error
SyntaxError: invalid syntax (<ipython-input-9-5aa12b41e490>, line 8)
# split our data into a training set and a test set; we choose 75-25 split
# We are splitting the entire data set and target
#
# first import command fro scikit-learn
from sklearn.model_selection import train_test_split
#
# split the data
#
# Print out length of each set
# Now use logistic regression on training set; see how well we do
#
#
# Create model
#
# Fit with training set
#
# Calculate training score using .score(X_train, y_train) & print out percent accuracy
#
#
# Now see how well model does on test set using .score which requires 2 arguments
#
# We want to plot the prediction for each data point so first we add a column to dataframe with prediction
# To do this, predict all data using .predict; print out score
#
# predict all data
#
# Add column to dataframe
# Add column to dataframe for this prediction as we did before with .map
#
# Plot Math vs English with color indicating prediction & compare with scatterplot with actual outcome