Week 12 Practice Notebook

import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression from pandas import DataFrame, Series import seaborn as sns import pandas as pd

def logistic_fn (x, m, b): y = 1 / (1 + np.exp (-m * (x-b))) return (y) x = np.linspace (-1,1,100) y = np.linspace (-1,1,100) plt.axvline (x =.5 , linestyle = "--", color = 'red') plt.axhline (y =0.5 , linestyle = "--", color = 'red')

# Load data into a dataframe & print # my_data = pd.read_csv("admit_data.csv") df = my_data print (df)

# Print out feature names to get exact names (some have spaces) using .columns print (df.columns [0], df.columns [1], df.columns [2] )

# Get rid of spaces in feature names using df.columns = [ list of new names in quotes] # df.columns = ['english', 'math','outcomes'] print (df.columns [0:])

# Create new feature called 'Admit' using map or apply with values "yes" or "no" # For this case it's easier to use .map as we did for iris dataset # df ['Admit'] = my_data.target

# Set background grid for Seaborn & plot Math vs English with color indicating whether the student was admitted # or not # plt.plot (english, outcome, 'ro') plt.plot (math, outcome, 'bo')

# Create target 1D array and 2D data array dimensioned 100 by 2 # # create target array e = np.reshape (english, (100,2)) m = np.reshape (math, (100,2)) # Get NumPy array for english and math scores, stack them and take transpose # Remember that np.vstack takes 1 argument so put your arrays in ( ) and remember to use .T to transpose

# split our data into a training set and a test set; we choose 75-25 split # We are splitting the entire data set and target # # first import command fro scikit-learn from sklearn.model_selection import train_test_split # # split the data # # Print out length of each set

# Now use logistic regression on training set; see how well we do # # # Create model # # Fit with training set # # Calculate training score using .score(X_train, y_train) & print out percent accuracy # #

# Now see how well model does on test set using .score which requires 2 arguments #

# We want to plot the prediction for each data point so first we add a column to dataframe with prediction # To do this, predict all data using .predict; print out score # # predict all data # # Add column to dataframe

# Add column to dataframe for this prediction as we did before with .map #

# Plot Math vs English with color indicating prediction & compare with scatterplot with actual outcome