Week 10 Practice Notebooks

# Import libraries and DataFrame # import numpy as np import matplotlib.pyplot as plt import pandas as pd from pandas import DataFrame, Series

# Read the data from pokeman.csv into a DataFrame using pandas read_csv() # Print out the first 6 lines of data using .head df=pd.read_csv('pokeman.csv') print (df.head(5))

   #        Name Type 1  Type 2  Total  HP  Attack  Defense  Sp. Atk  Sp. Def  \
0  1   Bulbasaur  Grass  Poison    318  45      49       49       65       65   
1  2     Ivysaur  Grass  Poison    405  60      62       63       80       80   
2  3    Venusaur  Grass  Poison    525  80      82       83      100      100   
3  4  Charmander   Fire     NaN    309  39      52       43       60       50   
4  5  Charmeleon   Fire     NaN    405  58      64       58       80       65   

   Speed  Stage  Legendary  
0     45      1      False  
1     60      2      False  
2     80      3      False  
3     65      1      False  
4     80      2      False

# print out the data types of all features using .dtypes (no parentheses) print (df.dtypes)

#             int64
Name         object
Type 1       object
Type 2       object
Total         int64
HP            int64
Attack        int64
Defense       int64
Sp. Atk       int64
Sp. Def       int64
Speed         int64
Stage         int64
Legendary      bool
dtype: object

# print out the column names using .columns print (df.columns)

Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
       'Sp. Atk', 'Sp. Def', 'Speed', 'Stage', 'Legendary'],
      dtype='object')

# Create a pandas Series for the feature Speed; print out type

# Create a NumPy array for the feature Speed (use.values) ; print out type

# Make 1D NumPy arrays from the features Attack and Defense and do a scatter plot # using matplotlib #

# Create a new DataFrame "df_mod" which is same as original but we drop "Type 2" feature; print out to check

# Import libraries and DataFrame # import numpy as np import matplotlib.pyplot as plt import pandas as pd from pandas import DataFrame, Series import seaborn as sns

# Read the data into a DataFrame # Print out the first 5 lines of data

# Add a white grid to the background of Seaborn plots using set_style

# Make a scatter plot using Seaborn's relplot of Defense statistics (y-axis) # vs Attacks Stats

# Repeat plot in previous cell but use color to indicate Type 1 (hue = )

# Make a category plot of Defense statistics vs Type 1 (non-numerical) # Rotation labels on x-axis for readability using plt.xticks using plt.xticks(rotation=-45)

# Make a Bar graph of Defense statistics for Type 1

# Make a violin plot of the Defense data for Type 1

# Repeat the plot in the previous cell but change palette to 'prism' and change size

# Overlaying plots - overlay violin plot of Defense with actual points # To do this (1) increase figure size using ```plt.figure(figsize = (10,6) )```; # (2) create violin plot and set inner = None to get rid of the bars inside violin plot; # (3) rotate x-axis labels for readability; # (4) create swarmplot for points and set ```color='k'``` to create the points in black; # (5) add title "Defense Data for Type 1" #

# Import libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd from pandas import DataFrame, Series import seaborn as sns

# Import LinearRegression function from scikit-learn from sklearn.linear_model import LinearRegression

# Read in data from file insurance.csv and create a DataFrame; print out some lines #

# Set background grid for Seaborn plots

# Create scatter plot of charges vs BMI with color indiciating whether patient is # smoker or not

# Get data to use for linear regression # Right now we see if there is a relationship between insurance charges and bmi

# Make bmi an n by 1 array and charges n by 1

# Create model and fit data

# write out equation of line

# Use regplot to plot data and line

# predict insurance costs for a person with BMI 31.7; round answer to nearest cent # # Note that this value agrees with plot above because when x=31.7 y is around 14,000