import numpy as np
import pandas as pd
# Add this line so you can plot your charts into your Jupyter Notebook.
%matplotlib inline
claims = pd.read_csv('/Users/Kaemyuijang/SCMA248/Data/claimsNewFormat.csv')
claims2010 = claims[(claims['Year']==2010)]
claims2010['Freq'].head()
claims2010['Freq'].plot()
FreqDist = claims2010.groupby('Freq').count()[['PolicyNum']]
FreqDist.head()
FreqDist.plot()
FreqDist.plot(kind='bar')
FreqDist.plot.bar()
claims2010[['Freq']].hist()
claims2010[['Freq','y']].hist()
claims2010 = claims[(claims['Year']==2010) & (claims['Freq'] >= 1)]
output = claims2010[['yAvg']]
output.apply(np.log).hist(bins=20)
#claims2010['lnyAvg'] = claims2010['yAvg'].apply(np.log)
#claims2010['lnyAvg'] = np.log(claims2010['yAvg'])
claims2010 = claims[(claims['Year']==2010) & (claims['Freq'] >= 1)]
output = claims2010[['yAvg']]
import matplotlib.pyplot as plt
plt.hist(output, bins = 10)
plt.xlabel('Average Claims')
plt.ylabel('Frequency')
plt.title('Distribution of Positive Average Severities')
plt.hist(np.log(output), bins = 10)
plt.xlabel('Logarithmic Average Claims')
plt.ylabel('Frequency')
plt.title('Distribution of Positive Average Severities')
# We add the logarithms of claim average into the claims dataset.
claims['lnyAvg'] = np.log(claims[claims['Freq']>=1]['yAvg'])
claims2010 = claims[(claims['Year']==2010) & (claims['Freq'] >= 1)]
from plotnine import *
(
ggplot(claims2010) # What data to use
+ aes(x='lnyAvg') # What variable to use
+ geom_histogram(bins = 20) # Geometric object to use for drawing
)
(
ggplot(claims2010) # What data to use
+ aes(x='lnyAvg') # What variable to use
+ geom_histogram(bins = 20) # Geometric object to use
)
claims = pd.read_csv('/Users/Kaemyuijang/SCMA248/Data/claimsNewFormat.csv')
# Here we express the coverage of building and contents in millions of dollars
claims['BCcov'] = claims['BCcov']/(10**6)
claims2010 = claims[(claims['Year']==2010) & (claims['Freq'] >= 1)]
(
ggplot(data = claims2010) # What data to use
+ geom_point(aes(x = 'BCcov',y = 'y')) # Geometric object to use for drawing
)
(
ggplot(data = claims2010) # What data to use
+ geom_point(aes(x = 'BCcov',y = 'y'), size = 3) # Geometric object to use for drawing
+ geom_text(aes(x = 'BCcov', y = 'y', label = 'Type'))
)
(
ggplot(data = claims2010) # What data to use
+ geom_point(aes(x = 'BCcov',y = 'y'), size = 3) # Geometric object to use for drawing
+ geom_text(aes(x = 'BCcov', y = 'y', label = 'Type'), nudge_x = 200)
)
p = ggplot(data = claims2010) + aes(x = 'BCcov', y = 'y', label = 'Type')
(
p
+ geom_point(size = 3)
+ geom_text(nudge_x = 200)
)
p = ggplot(data = claims2010) + aes(x = 'BCcov', y = 'y', label = 'Type')
(
p
+ geom_point(size = 3)
+ geom_text(nudge_x = 200)
+ scale_y_continuous(trans = "log10")
)
(
p
+ geom_point(size = 3)
+ geom_text(nudge_x = 200)
+ scale_y_log10()
)
(
p
+ geom_point(size = 3)
+ geom_text(nudge_x = 200)
+ scale_y_log10()
+ xlab("Coverage (Millions)")
+ ylab("Claims (log scale)")
+ ggtitle("Scatter Plot of (Coverage,Claim) from claims Data")
)
p = ggplot(data = claims2010) + aes(x = 'BCcov', y = 'y', label = 'Type') + geom_point(size = 3) + geom_text(nudge_x = 200) + scale_y_log10() + xlab("Coverage (Millions)") + ylab("Claims (log scale)") + ggtitle("Scatter Plot of (Coverage,Claim) from claims Data")
(p + geom_point(size = 3, colour ="blue"))
(p + geom_point(aes(colour='Type'), size = 3) + scale_color_discrete(name = "Entity Type"))