# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
# Read the data from pokeman.csv into a DataFrame using pandas read_csv()
# Print out the first 6 lines of data using .head
df=pd.read_csv('pokeman.csv')
print(df.head(6))
# print out the data types of all features using .dtypes (no parentheses)
print(df.dtypes)
# print out the column names using .columns
for col in df.columns:
print(col)
# Create a pandas Series for the feature Speed; print out type
speed=df.Speed
print(type(speed))
# Create a NumPy array for the feature Speed (use.values) ; print out type
speed_2=df.Speed.values
print(type(speed_2))
# Make 1D NumPy arrays from the features Attack and Defense and do a scatter plot
# using matplotlib
#
attack=df.Attack.values
defense=df.Defense.values
plt.scatter(attack,defense)
# Create a new DataFrame "df_mod" which is same as original but we drop "Type 2" feature; print out to check
df_mod = df.drop(columns='Type 2')
print(df_mod)
# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Read the data into a DataFrame
# Print out the first 5 lines of data
df=pd.read_csv('pokeman.csv')
print(df.head(6))
# Add a white grid to the background of Seaborn plots using set_style
sns.set_style("whitegrid")
# Make a scatter plot using Seaborn's relplot of Defense statistics (y-axis)
# vs Attacks Stats
sns.relplot(x=df.Attack,y=df.Defense)
# Repeat plot in previous cell but use color to indicate Type 1 (hue = )
type_1=df['Type 1']
sns.relplot(x=df.Attack,y=df.Defense,hue=type_1)
# Make a category plot of Defense statistics vs Type 1 (non-numerical)
# Rotation labels on x-axis for readability using plt.xticks using plt.xticks(rotation=-45)
sns.stripplot(x =defense, y =type_1, data = df)
# Make a Bar graph of Defense statistics for Type 1
sns.barplot(x=df.Defense,y=type_1)
# Make a violin plot of the Defense data for Type 1
sns.violinplot(x=df.Defense,y=type_1)
# Repeat the plot in the previous cell but change palette to 'prism' and change size
sns.violinplot(x=df.Defense,y=type_1,palette='prism',figsize=(10.6))
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Import LinearRegression function from scikit-learn
from sklearn.linear_model import LinearRegression
# Read in data from file insurance.csv and create a DataFrame; print out some lines
#
df=pd.read_csv('insurance.csv')
print(df.head())
# Set background grid for Seaborn plots
sns.set_style("whitegrid")
# Create scatter plot of charges vs BMI with color indiciating whether patient is
# smoker or not
sns.scatterplot(df.charges,df.bmi,hue=df.smoker)
# Make bmi an n by 1 array and charges n by 1
charges=df.charges.values
bmi=df.bmi.values
n=len(bmi)
charges=np.reshape(charges,(n,1))
bmi=np.reshape(bmi,(n,1))
# Create model and fit data
lr = LinearRegression()
lr.fit(bmi,charges)
# write out equation of line
print(f'The intercept is {lr.intercept_[0]}')
print(f'The coefficient is {lr.coef_[0,0]}')
# Use regplot to plot data and line
sns.regplot(x=charges,y=bmi,data=df)
# predict insurance costs for a person with BMI 31.7; round answer to nearest cent
e_val = np.array([31.7])
e_val = np.reshape(e_val,(1,1))
lr.predict(e_val)
#
# Note that this value agrees with plot above because when x=31.7 y is around 14,000