# Import libraries and DataFrame
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
# Read the data from pokeman.csv into a DataFrame using pandas read_csv()
# Print out the first 6 lines of data using .head
df = pd.read_csv("pokeman.csv")
df.head(6)
# print out the data types of all features using .dtypes (no parentheses)
df.dtypes
# print out the column names using .columns
df.columns
# Create a pandas Series for the feature Speed; print out type
df["Speed"]
# Create a NumPy array for the feature Speed (use.values) ; print out type
spd = df.Speed.values
print(spd)
# Make 1D NumPy arrays from the features Attack and Defense and do a scatter plot
# using matplotlib
Att = df.Attack.values
Def = df.Defense.values
plt.scatter(Att, Def)
plt.xlabel("Attack")
plt.ylabel("Defense")
plt.title("Attack vs. Defense")
# Create a new DataFrame "df_mod" which is same as original but we drop "Type 2" feature; print out to check
df_mod = df.drop(columns = ["Type 2"])
df_mod
# Import libraries and DataFrame
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Read the data into a DataFrame
# Print out the first 5 lines of data
# The section instructions say the first 6 lines of data, like the first section, so I'm going with 6 just in case!
df = pd.read_csv("pokeman.csv")
df.head(6)
# Add a white grid to the background of Seaborn plots using set_style
sns.set_style("whitegrid") #Don't capitalize the W, it won't accept it!
# Make a scatter plot using Seaborn's relplot of Defense statistics (y-axis)
# vs Attacks Stats
sns.relplot(x = "Attack", y = "Defense", data = df)
# Have to be very specific unlike with previous scatter plot code
# Repeat plot in previous cell but use color to indicate Type 1 (hue = )
sns.relplot(x = "Attack", y = "Defense", data = df, hue = "Type 1")
# Just add on the hue part to the code from previous box
# Make a category plot of Defense statistics vs Type 1 (non-numerical)
# Rotation labels on x-axis for readability using plt.xticks using plt.xticks(rotation=-45)
# Category, so use catplot
# Wasn't sure what data should be on which axis so I went with what I thought made more sense
sns.catplot(x = "Type 1", y = "Defense", data = df, hue = "Type 1")
plt.xticks(rotation = -45)
# Make a Bar graph of Defense statistics for Type 1
# Use barplot, continue being specific when entering data!
sns.barplot(x = "Type 1", y = "Defense", data = df)
plt.xticks(rotation = -45)
# Make a violin plot of the Defense data for Type 1
sns.violinplot(x = "Type 1", y = "Defense", data = df)
plt.xticks(rotation = -45)
# Repeat the plot in the previous cell but change palette to 'prism' and change size
sns.violinplot(x = "Type 1", y = "Defense", data = df,palette = "prism", height = 10)
plt.xticks(rotation = -45)
# I didn't notice any differences when changing size so I may have done it wrong
# Overlaying plots - overlay violin plot of Defense with actual points
# To do this (1) increase figure size using ```plt.figure(figsize = (10,6) )```;
# (2) create violin plot and set inner = None to get rid of the bars inside violin plot;
# (3) rotate x-axis labels for readability;
# (4) create swarmplot for points and set ```color='k'``` to create the points in black;
# (5) add title "Defense Data for Type 1"
#(1)
plt.figure(figsize = (10, 6))
#(2)
sns.violinplot(x = "Type 1", y = "Defense", data = df, inner = None)
#(3)
plt.xticks(rotation = -45)
#(4) Basically copy violinplot but change from violin to swarm
sns.swarmplot(x = "Type 1", y = "Defense", data = df, color = "k")
#(5)
plt.title("Defense Data for Type 1")
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
# Import LinearRegression function from scikit-learn
from sklearn.linear_model import LinearRegression
# Read in data from file insurance.csv and create a DataFrame; print out some lines
df = pd.read_csv("insurance.csv")
df.head(6)
# Set background grid for Seaborn plots
sns.set_style("whitegrid")
# Create scatter plot of charges vs BMI with color indiciating whether patient is
# smoker or not
sns.relplot(x = "bmi", y = "charges", data = df, hue = "smoker")
# Don't capitalize "charges" or "smoker", won't work then bc it doesn't specifically match insurance.csv
# Get data to use for linear regression
# Right now we see if there is a relationship between insurance charges and bmi
charges = df.charges.values
bmi = df.bmi.values
# Make bmi an n by 1 array and charges n by 1
n = len(bmi)
charges = np.reshape(charges, (n, 1))
bmi = np.reshape(bmi, (n, 1))
# Create model and fit data
lr = LinearRegression()
lr.fit(bmi, charges)
# write out equation of line
lr_int = lr.intercept_[0]
lr_sl = lr.coef_[0,0]
print("The intercept is", lr_int)
print("The slope is", lr_sl)
# Use regplot to plot data and line
sns.regplot(x = "bmi", y = "charges", data = df)
# predict insurance costs for a person with BMI 31.7; round answer to nearest cent
# Note that this value agrees with plot above because when x=31.7 y is around 14,000
x_eval = np.array([31.7])
x_eval = np.reshape(x_eval, (1, 1))
print("The predicted insurance costs for a person with BMI 31.7 is", lr.predict(x_eval))