df = pd.read_csv('/work/heart.csv')
df.shape
df.plot(x = 'Age', y = 'RestingBP', style = 'o')
plt.title('Age vs Resting Blood Pressure [mm Hg]')
plt.xlabel('Age')
plt.ylabel('RestingBP')
plt.show()
plt.figure(figsize = (15, 10))
plt.tight_layout()
sb.distplot(df['RestingBP'])
/shared-libs/python3.7/py/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
X = df['Age'].values.reshape(-1, 1)
y = df['RestingBP'].values.reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
regressor = LinearRegression()
regressor.fit(X_train, y_train)
print(regressor.intercept_)
[104.03159856]
print(regressor.coef_)
[[0.53160786]]
y_pred = regressor.predict(X_test)
acpre = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
acpre
acpre.head(25).plot(kind='bar', figsize=(16, 10))
plt.grid(which='major', linestyle='-', linewidth='0.5', color='yellow')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
plt.show()
plt.scatter(X_test, y_test, color='gray')
plt.plot(X_test, y_pred, color='red', linewidth=2)
plt.show()
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
Mean Absolute Error: 14.507487682676484
Mean Squared Error: 337.4674207658971
Root Mean Squared Error: 18.370286355032604
# Multi Regression
X = df[['Age', 'RestingBP', 'Cholesterol', 'MaxHR']]
y = df['HeartDisease'].values
plt.figure(figsize = (15, 10))
plt.tight_layout()
sb.displot(df['HeartDisease'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
regressor = LinearRegression()
regressor.fit(X_train, y_train)
pd.DataFrame(regressor.coef_, X.columns, columns=['Coefficient'])
y_pred = regressor.predict(X_test)
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
df
df.head(25).plot(kind='bar',figsize=(10,8))
plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
plt.show()
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
Mean Absolute Error: 0.40018278925501144
Mean Squared Error: 0.1931363148581789
Root Mean Squared Error: 0.43947276918846623