import numpy as np # linear algebra
import pandas as pd # data processing
import seaborn as sns
import matplotlib.pyplot as plt
#import numpy.ma as ma # deal with NaN
import scipy.stats
# Import dataset
df = pd.read_csv('/work/class_data.csv')
df.info()
df.sample(5)
plt.plot(df['AT'], df['mb'], marker='.', linestyle='none', color = 'purple')
m, b = np.polyfit(df['AT'], df['mb'], 1)
plt.plot(df['AT'], m*df['AT'] + b)
plt.xlabel('Total Assets')
plt.ylabel('Market-to-Book')
xAT=np.log(df['AT'])
yAT=np.log(df['mb'])
plt.plot(xAT, yAT, marker='.', linestyle='none', color = 'purple')
m, b = np.polyfit(xAT, yAT, 1)
plt.plot(xAT, m*yAT + b, linestyle='none', color='blue')
plt.xlabel('Total Assets')
plt.ylabel('Market-to-Book')
print(m,b)