import numpy as np # linear algebra
import pandas as pd # data processing
import seaborn as sns
import matplotlib.pyplot as plt
#import numpy.ma as ma # deal with NaN
import scipy.stats
# Import dataset
df = pd.read_csv('/work/class_data.csv')
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303052 entries, 0 to 303051
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 AT 303052 non-null float64
1 mb 303052 non-null float64
dtypes: float64(2)
memory usage: 4.6 MB
df.sample(5)
plt.plot(df['AT'], df['mb'], marker='.', linestyle='none', color = 'purple')
m, b = np.polyfit(df['AT'], df['mb'], 1)
plt.plot(df['AT'], m*df['AT'] + b)
plt.xlabel('Total Assets')
plt.ylabel('Market-to-Book')
xAT=np.log(df['AT'])
yAT=np.log(df['mb'])
plt.plot(xAT, yAT, marker='.', linestyle='none', color = 'purple')
m, b = np.polyfit(xAT, yAT, 1)
plt.plot(xAT, m*yAT + b, linestyle='none', color='blue')
plt.xlabel('Total Assets')
plt.ylabel('Market-to-Book')
print(m,b)
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/arraylike.py:358: RuntimeWarning: divide by zero encountered in log
result = getattr(ufunc, method)(*inputs, **kwargs)
nan nan