# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
# Read the data from pokeman.csv into a DataFrame using pandas read_csv()
# Print out the first 6 lines of data using .head
df = pd.read_csv('pokeman.csv')
df.head (6)
#int64
Nameobject
0
1
Bulbasaur
1
2
Ivysaur
2
3
Venusaur
3
4
Charmander
4
5
Charmeleon
5
6
Charizard
# print out the data types of all features using .dtypes (no parentheses)
df = pd.read_csv('pokeman.csv')
df.dtypes
# print out the column names using .columns
df = pd.read_csv('pokeman.csv')
df.columns
# Create a pandas Series for the feature Speed; print out type
df.Speed
# Create a NumPy array for the feature Speed (use.values) ; print out type
speed = df.Speed.values
print (type(speed))
print (speed)
<class 'numpy.ndarray'>
[ 45 60 80 65 80 100 43 58 78 45 30 70 50 35 75 56 71 101
72 97 70 100 55 80 90 110 40 65 41 56 76 50 65 85 35 60
65 100 20 45 55 90 30 40 50 25 30 45 90 95 120 90 115 55
85 70 95 60 95 90 90 70 90 105 120 35 45 55 40 55 70 70
100 20 35 45 90 105 15 30 45 70 60 75 100 45 70 25 50 40
70 80 95 110 70 42 67 50 75 100 140 40 55 35 45 87 76 30
35 60 25 40 50 60 90 60 85 63 68 85 115 90 105 95 105 93
85 110 80 81 60 48 55 65 130 65 40 35 55 55 80 130 30 85
100 90 50 70 80 130 100]
# Make 1D NumPy arrays from the features Attack and Defense and do a scatter plot
# using matplotlib
#
m = df.Attack.values
a = df.Defense.values
plt.plot (m, a, 'ro')
plt.xlabel("Defense")
plt.ylabel("Attack")
# Create a new DataFrame "df_mod" which is same as original but we drop "Type 2" feature; print out to check
# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Read the data into a DataFrame
# Print out the first 5 lines of data
df = pd.read_csv ('pokeman.csv')
df.head(5)
#int64
Nameobject
0
1
Bulbasaur
1
2
Ivysaur
2
3
Venusaur
3
4
Charmander
4
5
Charmeleon
# Add a white grid to the background of Seaborn plots using set_style
sns.set_style ('whitegrid')
# Make a scatter plot using Seaborn's relplot of Defense statistics (y-axis)
# vs Attacks Stats
sns.relplot(x='Attack', y='Defense', data = df )
plt.title("Defense vs Attack")
# Repeat plot in previous cell but use color to indicate Type 1 (hue = )
sns.relplot(x='Attack', y='Defense', data = df, hue='Type 1' )
plt.title("Defense vs Attack")
# Make a category plot of Defense statistics vs Type 1 (non-numerical)
# Rotation labels on x-axis for readability using plt.xticks using plt.xticks(rotation=-45)
sns.catplot(x='Type 1',y='Defense',data = df )
plt.xticks(rotation=-45)
# Make a Bar graph of Defense statistics for Type 1
sns.barplot (x='Type 1', y='Defense', data= df)
plt.xticks (rotation=45)
# Make a violin plot of the Defense data for Type 1
sns.violinplot(x='Type 1', y= 'Defense', data = df)
# Repeat the plot in the previous cell but change palette to 'prism' and change size
plt.figure(figsize = (10,6))
sns.violinplot(x='Type 1', y= 'Defense', data = df, palette='prism')
# Overlaying plots - overlay violin plot of Defense with actual points
# To do this (1) increase figure size using ```plt.figure(figsize = (10,6) )```;
# (2) create violin plot and set inner = None to get rid of the bars inside violin plot;
# (3) rotate x-axis labels for readability;
# (4) create swarmplot for points and set ```color='k'``` to create the points in black;
# (5) add title "Defense Data for Type 1"
#
plt.figure(figsize = (10,6) )
sns.violinplot(x='Type 1', y='Defense', data= df, palette='prism', inner=None)
sns.swarmplot(x='Type 1', y='Defense', data = df, color='k' )
plt.title ('Defense Data for Type 1')
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Import LinearRegression function from scikit-learn
from sklearn.linear_model import LinearRegression
# Read in data from file insurance.csv and create a DataFrame; print out some lines
#
df = pd.read_csv ('insurance.csv')
df.head (5)
print (df)
age sex bmi children smoker region charges
0 19 female 27.900 0 yes southwest 16884.92400
1 18 male 33.770 1 no southeast 1725.55230
2 28 male 33.000 3 no southeast 4449.46200
3 33 male 22.705 0 no northwest 21984.47061
4 32 male 28.880 0 no northwest 3866.85520
... ... ... ... ... ... ... ...
1333 50 male 30.970 3 no northwest 10600.54830
1334 18 female 31.920 0 no northeast 2205.98080
1335 18 female 36.850 0 no southeast 1629.83350
1336 21 female 25.800 0 no southwest 2007.94500
1337 61 female 29.070 0 yes northwest 29141.36030
[1338 rows x 7 columns]
# Set background grid for Seaborn plots
sns.set_style ('whitegrid')
# Create scatter plot of charges vs BMI with color indiciating whether patient is
# smoker or not
sns.relplot(x='bmi',y='charges',data=df, hue = 'smoker')
# Get data to use for linear regression
# Right now we see if there is a relationship between insurance charges and bmi
bmi = df.bmi.values
print ('bmi', bmi)
charges = df.charges.values
print ('charges', charges)
bmi [27.9 33.77 33. ... 36.85 25.8 29.07]
charges [16884.924 1725.5523 4449.462 ... 1629.8335 2007.945 29141.3603]
# Make bmi an n by 1 array and charges n by 1
n=len(bmi)
bmi = np.reshape (bmi, (n,1))
charges = np.reshape, (charges, (n,1))
# Create model and fit data
bmi = df.bmi.values
print ('bmi', bmi)
n=len(bmi)
bmi = np.reshape (bmi, (n,1))
print ('BMI Reshaped', bmi)
charges = df.charges.values
charges = np.reshape, (charges, (n,1))
print ('charges', charges)
lr = LinearRegression()
lr.fit(bmi, charges)
bmi [27.9 33.77 33. ... 36.85 25.8 29.07]
BMI Reshaped [[27.9 ]
[33.77]
[33. ]
...
[36.85]
[25.8 ]
[29.07]]
charges (<function reshape at 0x7fd814093e60>, (array([16884.924 , 1725.5523, 4449.462 , ..., 1629.8335, 2007.945 ,
29141.3603]), (1338, 1)))
/shared-libs/python3.7/py/lib/python3.7/site-packages/sklearn/utils/validation.py:746: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
array = np.asarray(array, order=order, dtype=dtype)
Execution error
TypeError: float() argument must be a string or a number, not 'function'
# write out equation of line
print ( "intercept", lr.intercept_)
print ("slope", lr.coef_)
print ("The equation which fits the data in a linear regression sense is:")
print (f"{round (lr.intercept_[0],4)} + {round(lr.coef_[0,0],4)} times estriol")
Execution error
AttributeError: 'LinearRegression' object has no attribute 'intercept_'
# Use regplot to plot data and line
sns.regplot(x='bmi', y='charges', data=df)
# predict insurance costs for a person with BMI 31.7; round answer to nearest cent
#
# Note that this value agrees with plot above because when x=31.7 y is around 14,000
e_eval = [31.7]
e_eval = np.reshape(e_eval, (1,1)) # requires a 2D array
charges = lr.predict(e_eval) * 100
lbs = grams/453.592
print(f"baby's predicted birthweight is {grams[0,0]} grams and {lbs[0,0]} pounds")
Execution error
NotFittedError: This LinearRegression instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.