# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series 
# Read  the data from pokeman.csv into a DataFrame using pandas read_csv()
# Print out the first 6 lines of data using .head
df = pd.read_csv ('pokeman.csv')
df.head(6)
# print out the data types of all features using .dtypes  (no parentheses)
df.dtypes
# print out the column names using .columns
df.Name
# Create a  pandas Series for the feature Speed; print out type   
s = df.Speed
print (type(s))
s
# Create a NumPy array  for the feature Speed (use.values) ; print out type
ss = df.Speed.values  
print ( type(ss))
print (ss)
# Make   1D NumPy arrays from the features Attack and Defense and do a scatter plot
# using matplotlib
#
a = df.Attack.values
d = df.Defense.values
plt.plot (a, d, 'ro')
# Create a new DataFrame "df_mod" which is same as original but we drop "Type 2" feature; print out to check
df_mod = df.drop (columns=['Type 2'])
df_mod
# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Read  the data into a DataFrame  
# Print out the first 5 lines of data  
df = pd.read_csv ('pokeman.csv')
df.head(5)
 
# Add a white grid to the background of Seaborn plots using set_style
sns.set_style ("whitegrid")
 
# Make a scatter plot using Seaborn's relplot of Defense statistics (y-axis)
# vs Attacks Stats
sns.relplot(x='Attack', y='Defense', data = df)
 
# Repeat plot in previous cell but use color to indicate Type 1 (hue = )
sns.relplot(x='Attack', y='Defense', data = df, hue='Type 1')
# Make a category plot of  Defense statistics vs Type 1 (non-numerical) 
# Rotation labels on x-axis for readability using plt.xticks using plt.xticks(rotation=-45)
sns.catplot(x='Type 1',y='Defense',data = df)
plt.xticks(rotation=-45)
# Make a Bar graph of Defense statistics for Type 1
sns.barplot (x='Type 1', y='Defense', data= df)
# Make a violin plot of the Defense data for Type 1
sns.violinplot(x='Type 1', y='Defense', data =  df)
# Repeat the plot in the previous cell but change palette to 'prism' and change size
sns.violinplot(x='Type 1', y='Defense', data =  df, palette = 'prism')
# Overlaying plots  - overlay violin plot of Defense with actual points 
#  To do this (1) increase figure size using ```plt.figure(figsize =  (10,6) )```;
#  (2) create violin plot and set  inner = None   to get rid of the bars inside violin plot;
#  (3) rotate x-axis labels for readability; 
#  (4) create swarmplot for points and set ```color='k'``` to create the points in black;
#  (5) add title "Defense Data for Type 1"
#
plt.figure(figsize =  (10,6) ) 
sns.violinplot(x='Type 1', y='Defense', data= df, palette='prism', inner=None)
plt.xticks(rotation=-45)
sns.swarmplot(x='Type 1', y='Defense', data = df, color='k' )
plt.title ("Defense Data for Type 1")
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns 
# Import LinearRegression function from scikit-learn
from sklearn.linear_model import LinearRegression  
# Read in data from file insurance.csv and create a DataFrame; print out some lines
#
df = pd.read_csv ('insurance.csv')
df.head(5)
# Set background grid for Seaborn plots
sns.set_style ("whitegrid")
# Create scatter plot of charges vs BMI with color indiciating whether patient is
# smoker or not
sns.relplot(x='bmi',y='charges',data=df, hue='smoker')
# Get data to use for linear regression
# Right now we see if there is a relationship between insurance charges and bmi
Charges = df.charges.values 
print (" Insurance Charges ",  Charges)
BMI = df.bmi.values
print ( "BMI", BMI) 
# Make bmi an n by 1 array and charges n by 1
n = len(Charges)
Charges = np.reshape ( Charges, (n,1) )
print (" Charges reshaped",  Charges)
BMI = np.reshape (BMI, (n,1))
print ( "BMI", BMI) 
# Create model and fit data
lr = LinearRegression()
lr.fit(BMI, Charges)
# write out equation of line
print ( "intercept", lr.intercept_)
print ("slope", lr.coef_)
print ("The equation which fits the data in a linear regression sense is:")
print (f"{round (lr.intercept_[0],4)} + {round(lr.coef_[0,0],4)} times charges")
# Use regplot to plot data and line
sns.regplot(x='bmi', y='charges', data=df)
# predict insurance costs for a person with BMI 31.7; round answer to nearest cent
 
#
# Note that this value agrees with plot above because when x=31.7 y is around 14,000