Week 12 Practice Notebook

import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression from pandas import DataFrame, Series import seaborn as sns import pandas as pd

# Load data into a dataframe & print df = pd.read_csv ('admit_data.csv') df #

# Print out feature names to get exact names (some have spaces) using .columns df.columns

# Get rid of spaces in feature names using df.columns = [ list of new names in quotes] df.columns = ['English', 'Math', 'Outcome'] df.columns #

# Create new feature called 'Admit' using map or apply with values "yes" or "no" # For this case it's easier to use .map as we did for iris dataset df ['Admit'] = df['Outcome'].map( {0: 'no', 1: 'yes'})

# Set background grid for Seaborn & plot Math vs English with color indicating whether the student was admitted # or not sns.set_style ('whitegrid') sns.relplot (x = 'Math', y = 'English', data = df, hue = 'Admit')

# Create target 1D array and 2D data array dimensioned 100 by 2 # # create target array # # Get NumPy array for english and math scores, stack them and take transpose # Remember that np.vstack takes 1 argument so put your arrays in ( ) and remember to use .T to transpose target array

# split our data into a training set and a test set; we choose 75-25 split # We are splitting the entire data set and target # # first import command fro scikit-learn from sklearn.model_selection import train_test_split # # split the data # # Print out length of each set

# Now use logistic regression on training set; see how well we do # # # Create model # # Fit with training set # # Calculate training score using .score(X_train, y_train) & print out percent accuracy # #

# Now see how well model does on test set using .score which requires 2 arguments #

# We want to plot the prediction for each data point so first we add a column to dataframe with prediction # To do this, predict all data using .predict; print out score # # predict all data # # Add column to dataframe

# Add column to dataframe for this prediction as we did before with .map #

# Plot Math vs English with color indicating prediction & compare with scatterplot with actual outcome