# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
# Read the data from pokeman.csv into a DataFrame using pandas read_csv()
# Print out the first 6 lines of data using .head
df=pd.read_csv('pokeman.csv')
print (df.head(5))
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def \
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65
1 2 Ivysaur Grass Poison 405 60 62 63 80 80
2 3 Venusaur Grass Poison 525 80 82 83 100 100
3 4 Charmander Fire NaN 309 39 52 43 60 50
4 5 Charmeleon Fire NaN 405 58 64 58 80 65
Speed Stage Legendary
0 45 1 False
1 60 2 False
2 80 3 False
3 65 1 False
4 80 2 False
# print out the data types of all features using .dtypes (no parentheses)
print (df.dtypes)
# int64
Name object
Type 1 object
Type 2 object
Total int64
HP int64
Attack int64
Defense int64
Sp. Atk int64
Sp. Def int64
Speed int64
Stage int64
Legendary bool
dtype: object
# print out the column names using .columns
print (df.columns)
Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
'Sp. Atk', 'Sp. Def', 'Speed', 'Stage', 'Legendary'],
dtype='object')
# Create a pandas Series for the feature Speed; print out type
# Create a NumPy array for the feature Speed (use.values) ; print out type
# Make 1D NumPy arrays from the features Attack and Defense and do a scatter plot
# using matplotlib
#
# Create a new DataFrame "df_mod" which is same as original but we drop "Type 2" feature; print out to check
# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Read the data into a DataFrame
# Print out the first 5 lines of data
# Add a white grid to the background of Seaborn plots using set_style
# Make a scatter plot using Seaborn's relplot of Defense statistics (y-axis)
# vs Attacks Stats
# Repeat plot in previous cell but use color to indicate Type 1 (hue = )
# Make a category plot of Defense statistics vs Type 1 (non-numerical)
# Rotation labels on x-axis for readability using plt.xticks using plt.xticks(rotation=-45)
# Make a Bar graph of Defense statistics for Type 1
# Make a violin plot of the Defense data for Type 1
# Repeat the plot in the previous cell but change palette to 'prism' and change size
# Overlaying plots - overlay violin plot of Defense with actual points
# To do this (1) increase figure size using ```plt.figure(figsize = (10,6) )```;
# (2) create violin plot and set inner = None to get rid of the bars inside violin plot;
# (3) rotate x-axis labels for readability;
# (4) create swarmplot for points and set ```color='k'``` to create the points in black;
# (5) add title "Defense Data for Type 1"
#
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Import LinearRegression function from scikit-learn
from sklearn.linear_model import LinearRegression
# Read in data from file insurance.csv and create a DataFrame; print out some lines
#
# Set background grid for Seaborn plots
# Create scatter plot of charges vs BMI with color indiciating whether patient is
# smoker or not
# Get data to use for linear regression
# Right now we see if there is a relationship between insurance charges and bmi
# Make bmi an n by 1 array and charges n by 1
# Create model and fit data
# write out equation of line
# Use regplot to plot data and line
# predict insurance costs for a person with BMI 31.7; round answer to nearest cent
#
# Note that this value agrees with plot above because when x=31.7 y is around 14,000