# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
# Read the data from pokeman.csv into a DataFrame using pandas read_csv()
# Print out the first 6 lines of data using .head
df=pd.read_csv('pokeman.csv')
print(df.head(6))
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def \
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65
1 2 Ivysaur Grass Poison 405 60 62 63 80 80
2 3 Venusaur Grass Poison 525 80 82 83 100 100
3 4 Charmander Fire NaN 309 39 52 43 60 50
4 5 Charmeleon Fire NaN 405 58 64 58 80 65
5 6 Charizard Fire Flying 534 78 84 78 109 85
Speed Stage Legendary
0 45 1 False
1 60 2 False
2 80 3 False
3 65 1 False
4 80 2 False
5 100 3 False
# print out the data types of all features using .dtypes (no parentheses)
print(df.dtypes)
# int64
Name object
Type 1 object
Type 2 object
Total int64
HP int64
Attack int64
Defense int64
Sp. Atk int64
Sp. Def int64
Speed int64
Stage int64
Legendary bool
dtype: object
# print out the column names using .columns
for col in df.columns:
print(col)
#
Name
Type 1
Type 2
Total
HP
Attack
Defense
Sp. Atk
Sp. Def
Speed
Stage
Legendary
# Create a pandas Series for the feature Speed; print out type
speed=df.Speed
print(type(speed))
<class 'pandas.core.series.Series'>
# Create a NumPy array for the feature Speed (use.values) ; print out type
speed_2=df.Speed.values
print(type(speed_2))
<class 'numpy.ndarray'>
# Make 1D NumPy arrays from the features Attack and Defense and do a scatter plot
# using matplotlib
#
attack=df.Attack.values
defense=df.Defense.values
plt.scatter(attack,defense)
# Create a new DataFrame "df_mod" which is same as original but we drop "Type 2" feature; print out to check
df_mod = df.drop(columns='Type 2')
print(df_mod)
# Name Type 1 Total HP Attack Defense Sp. Atk Sp. Def \
0 1 Bulbasaur Grass 318 45 49 49 65 65
1 2 Ivysaur Grass 405 60 62 63 80 80
2 3 Venusaur Grass 525 80 82 83 100 100
3 4 Charmander Fire 309 39 52 43 60 50
4 5 Charmeleon Fire 405 58 64 58 80 65
.. ... ... ... ... ... ... ... ... ...
146 147 Dratini Dragon 300 41 64 45 50 50
147 148 Dragonair Dragon 420 61 84 65 70 70
148 149 Dragonite Dragon 600 91 134 95 100 100
149 150 Mewtwo Psychic 680 106 110 90 154 90
150 151 Mew Psychic 600 100 100 100 100 100
Speed Stage Legendary
0 45 1 False
1 60 2 False
2 80 3 False
3 65 1 False
4 80 2 False
.. ... ... ...
146 50 1 False
147 70 2 False
148 80 3 False
149 130 1 True
150 100 1 False
[151 rows x 12 columns]
# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Read the data into a DataFrame
# Print out the first 5 lines of data
df=pd.read_csv('pokeman.csv')
print(df.head(6))
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def \
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65
1 2 Ivysaur Grass Poison 405 60 62 63 80 80
2 3 Venusaur Grass Poison 525 80 82 83 100 100
3 4 Charmander Fire NaN 309 39 52 43 60 50
4 5 Charmeleon Fire NaN 405 58 64 58 80 65
5 6 Charizard Fire Flying 534 78 84 78 109 85
Speed Stage Legendary
0 45 1 False
1 60 2 False
2 80 3 False
3 65 1 False
4 80 2 False
5 100 3 False
# Add a white grid to the background of Seaborn plots using set_style
sns.set_style("whitegrid")
# Make a scatter plot using Seaborn's relplot of Defense statistics (y-axis)
# vs Attacks Stats
sns.relplot(x=df.Attack,y=df.Defense)
# Repeat plot in previous cell but use color to indicate Type 1 (hue = )
type_1=df['Type 1']
sns.relplot(x=df.Attack,y=df.Defense,hue=type_1)
# Make a category plot of Defense statistics vs Type 1 (non-numerical)
# Rotation labels on x-axis for readability using plt.xticks using plt.xticks(rotation=-45)
sns.stripplot(x =defense, y =type_1, data = df)
# Make a Bar graph of Defense statistics for Type 1
sns.barplot(x=df.Defense,y=type_1)
# Make a violin plot of the Defense data for Type 1
sns.violinplot(x=df.Defense,y=type_1)
# Repeat the plot in the previous cell but change palette to 'prism' and change size
sns.violinplot(x=df.Defense,y=type_1,palette='prism',figsize=(10.6))
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Import LinearRegression function from scikit-learn
from sklearn.linear_model import LinearRegression
# Read in data from file insurance.csv and create a DataFrame; print out some lines
#
df=pd.read_csv('insurance.csv')
print(df.head())
age sex bmi children smoker region charges
0 19 female 27.900 0 yes southwest 16884.92400
1 18 male 33.770 1 no southeast 1725.55230
2 28 male 33.000 3 no southeast 4449.46200
3 33 male 22.705 0 no northwest 21984.47061
4 32 male 28.880 0 no northwest 3866.85520
# Set background grid for Seaborn plots
sns.set_style("whitegrid")
# Create scatter plot of charges vs BMI with color indiciating whether patient is
# smoker or not
sns.scatterplot(df.charges,df.bmi,hue=df.smoker)
/shared-libs/python3.7/py/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
# Make bmi an n by 1 array and charges n by 1
charges=df.charges.values
bmi=df.bmi.values
n=len(bmi)
charges=np.reshape(charges,(n,1))
bmi=np.reshape(bmi,(n,1))
# Create model and fit data
lr = LinearRegression()
lr.fit(bmi,charges)
# write out equation of line
print(f'The intercept is {lr.intercept_[0]}')
print(f'The coefficient is {lr.coef_[0,0]}')
The intercept is 1192.9372089611497
The coefficient is 393.87303079739524
# Use regplot to plot data and line
sns.regplot(x=charges,y=bmi,data=df)
# predict insurance costs for a person with BMI 31.7; round answer to nearest cent
e_val = np.array([31.7])
e_val = np.reshape(e_val,(1,1))
lr.predict(e_val)
#
# Note that this value agrees with plot above because when x=31.7 y is around 14,000