Week 12 Practice Notebook

import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression from pandas import DataFrame, Series import seaborn as sns import pandas as pd

# Load data into a dataframe & print # df = pd.read_csv('admit_data.csv') print(df.head())

# Print out feature names to get exact names (some have spaces) using .columns df.columns

# Get rid of spaces in feature names using df.columns = [ list of new names in quotes] # df.columns = ['English','Math','Outcome'] df.columns

# Create new feature called 'Admit' using map or apply with values "yes" or "no" # For this case it's easier to use .map as we did for iris dataset # df['Admit'] = df['Outcome'].map({0:'no',1:'yes'})

# Set background grid for Seaborn & plot Math vs English with color indicating whether the student was admitted # or not # sns.set_style('whitegrid') sns.relplot(x='Math',y='English',data=df,hue='Admit')

# Create target 1D array and 2D data array dimensioned 100 by 2 # # create target array # # Get NumPy array for english and math scores, stack them and take transpose # Remember that np.vstack takes 1 argument so put your arrays in ( ) and remember to use .T to transpose target_array = np.array(df.Outcome.values) target_array = np.reshape(target_array,[100,1]) math_array = np.array(df.Math.values) english_array = np.array(df.English.values) math_array = np.reshape(math_array,[100,1]) english_array = np.reshape(english_array,[100,1]) x = np.vstack((math_array,english_array)).T ''' I had some issues with reshaping the data, and it prevented me from going forward '''