Week 10 Practice Notebooks

# Import libraries and DataFrame # import numpy as np import matplotlib.pyplot as plt import pandas as pd from pandas import DataFrame, Series

# Read the data from pokeman.csv into a DataFrame using pandas read_csv() # Print out the first 6 lines of data using .head df=pd.read_csv('pokeman.csv') print(df.head(6))

# print out the data types of all features using .dtypes (no parentheses) print(df.dtypes)

# print out the column names using .columns for col in df.columns: print(col)

# Create a pandas Series for the feature Speed; print out type speed=df.Speed print(type(speed))

# Create a NumPy array for the feature Speed (use.values) ; print out type speed_2=df.Speed.values print(type(speed_2))

# Make 1D NumPy arrays from the features Attack and Defense and do a scatter plot # using matplotlib # attack=df.Attack.values defense=df.Defense.values plt.scatter(attack,defense)

# Create a new DataFrame "df_mod" which is same as original but we drop "Type 2" feature; print out to check df_mod = df.drop(columns='Type 2') print(df_mod)

# Import libraries and DataFrame # import numpy as np import matplotlib.pyplot as plt import pandas as pd from pandas import DataFrame, Series import seaborn as sns

# Read the data into a DataFrame # Print out the first 5 lines of data df=pd.read_csv('pokeman.csv') print(df.head(6))

# Add a white grid to the background of Seaborn plots using set_style sns.set_style("whitegrid")

# Make a scatter plot using Seaborn's relplot of Defense statistics (y-axis) # vs Attacks Stats sns.relplot(x=df.Attack,y=df.Defense)

# Repeat plot in previous cell but use color to indicate Type 1 (hue = ) type_1=df['Type 1'] sns.relplot(x=df.Attack,y=df.Defense,hue=type_1)

# Make a category plot of Defense statistics vs Type 1 (non-numerical) # Rotation labels on x-axis for readability using plt.xticks using plt.xticks(rotation=-45) sns.stripplot(x =defense, y =type_1, data = df)

# Make a Bar graph of Defense statistics for Type 1 sns.barplot(x=df.Defense,y=type_1)

# Make a violin plot of the Defense data for Type 1 sns.violinplot(x=df.Defense,y=type_1)

# Repeat the plot in the previous cell but change palette to 'prism' and change size sns.violinplot(x=df.Defense,y=type_1,palette='prism',figsize=(10.6))

# Import libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd from pandas import DataFrame, Series import seaborn as sns

# Import LinearRegression function from scikit-learn from sklearn.linear_model import LinearRegression

# Read in data from file insurance.csv and create a DataFrame; print out some lines # df=pd.read_csv('insurance.csv') print(df.head())

# Set background grid for Seaborn plots sns.set_style("whitegrid")

# Create scatter plot of charges vs BMI with color indiciating whether patient is # smoker or not sns.scatterplot(df.charges,df.bmi,hue=df.smoker)

# Make bmi an n by 1 array and charges n by 1 charges=df.charges.values bmi=df.bmi.values n=len(bmi) charges=np.reshape(charges,(n,1)) bmi=np.reshape(bmi,(n,1))

# Create model and fit data lr = LinearRegression() lr.fit(bmi,charges)

# write out equation of line print(f'The intercept is {lr.intercept_[0]}') print(f'The coefficient is {lr.coef_[0,0]}')

# Use regplot to plot data and line sns.regplot(x=charges,y=bmi,data=df)

# predict insurance costs for a person with BMI 31.7; round answer to nearest cent e_val = np.array([31.7]) e_val = np.reshape(e_val,(1,1)) lr.predict(e_val) # # Note that this value agrees with plot above because when x=31.7 y is around 14,000