import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from pandas import DataFrame, Series
import seaborn as sns
import pandas as pd
# Load data into a dataframe & print
#
df = pd.read_csv('admit_data.csv')
print(df.head())
# Print out feature names to get exact names (some have spaces) using .columns
df.columns
# Get rid of spaces in feature names using df.columns = [ list of new names in quotes]
#
df.columns = ['English','Math','Outcome']
df.columns
# Create new feature called 'Admit' using map or apply with values "yes" or "no"
# For this case it's easier to use .map as we did for iris dataset
#
df['Admit'] = df['Outcome'].map({0:'no',1:'yes'})
# Set background grid for Seaborn & plot Math vs English with color indicating whether the student was admitted
# or not
#
sns.set_style('whitegrid')
sns.relplot(x='Math',y='English',data=df,hue='Admit')
# Create target 1D array and 2D data array dimensioned 100 by 2
#
# create target array
#
# Get NumPy array for english and math scores, stack them and take transpose
# Remember that np.vstack takes 1 argument so put your arrays in ( ) and remember to use .T to transpose
target_array = np.array(df.Outcome.values)
target_array = np.reshape(target_array,[100,1])
math_array = np.array(df.Math.values)
english_array = np.array(df.English.values)
math_array = np.reshape(math_array,[100,1])
english_array = np.reshape(english_array,[100,1])
x = np.vstack((math_array,english_array)).T
'''
I had some issues with reshaping the data, and it prevented me from going forward
'''