import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from pandas import DataFrame, Series
import seaborn as sns
import pandas as pd
def logistic_fn (x, m, b):
y = 1 / (1 + np.exp (-m * (x-b)))
return (y)
x = np.linspace (-1,1,100)
y = np.linspace (-1,1,100)
plt.axvline (x =.5 , linestyle = "--", color = 'red')
plt.axhline (y =0.5 , linestyle = "--", color = 'red')
# Load data into a dataframe & print
#
my_data = pd.read_csv("admit_data.csv")
df = my_data
print (df)
English Math Outcome
0 35.00 54.625 0.0
1 26.50 68.500 0.0
2 41.00 57.000 0.0
3 21.50 42.000 0.0
4 46.00 84.000 1.0
.. ... ... ...
95 44.50 70.500 1.0
96 40.00 75.000 1.0
97 42.00 75.000 1.0
98 28.75 52.125 0.0
99 33.00 76.500 1.0
[100 rows x 3 columns]
# Print out feature names to get exact names (some have spaces) using .columns
print (df.columns [0], df.columns [1], df.columns [2] )
English Math Outcome
# Get rid of spaces in feature names using df.columns = [ list of new names in quotes]
#
df.columns = ['english', 'math','outcomes']
print (df.columns [0:])
Index(['english', 'math', 'outcomes'], dtype='object')
# Create new feature called 'Admit' using map or apply with values "yes" or "no"
# For this case it's easier to use .map as we did for iris dataset
#
df ['Admit'] = my_data.target
Execution Error
AttributeError: 'DataFrame' object has no attribute 'target'
# Set background grid for Seaborn & plot Math vs English with color indicating whether the student was admitted
# or not
#
plt.plot (english, outcome, 'ro')
plt.plot (math, outcome, 'bo')
# Create target 1D array and 2D data array dimensioned 100 by 2
#
# create target array
e = np.reshape (english, (100,2))
m = np.reshape (math, (100,2))
# Get NumPy array for english and math scores, stack them and take transpose
# Remember that np.vstack takes 1 argument so put your arrays in ( ) and remember to use .T to transpose
Execution Error
ValueError: cannot reshape array of size 100 into shape (100,2)
# split our data into a training set and a test set; we choose 75-25 split
# We are splitting the entire data set and target
#
# first import command fro scikit-learn
from sklearn.model_selection import train_test_split
#
# split the data
#
# Print out length of each set
# Now use logistic regression on training set; see how well we do
#
#
# Create model
#
# Fit with training set
#
# Calculate training score using .score(X_train, y_train) & print out percent accuracy
#
#
# Now see how well model does on test set using .score which requires 2 arguments
#
# We want to plot the prediction for each data point so first we add a column to dataframe with prediction
# To do this, predict all data using .predict; print out score
#
# predict all data
#
# Add column to dataframe
# Add column to dataframe for this prediction as we did before with .map
#
# Plot Math vs English with color indicating prediction & compare with scatterplot with actual outcome