import pandas as pd
df = pd.read_csv('University_Rank.csv')
df

df['Citations_per_Faculty'].mean()

import numpy as np
from sklearn.linear_model import LinearRegression
x = [df['International_Student_Ratio'], df['International_Faculty_Ratio'], df['Faculty_Student_Ratio'],
df['Citations_per_Faculty'], df['Academic_Reputation'], df['Employer_Reputation']]
x = np.array(x).reshape((-1,6))
y = np.array(df['Overall_Score'])
model = LinearRegression().fit(x,y)
r_sq = model.score(x, y)
r_sq

model.intercept_

model.coef_

y_pred = model.predict(x)
y_pred

df

del df['Rank']
del df['University']
del df['Country']

import seaborn as sns
import matplotlib.pyplot as plt
correlation_coefficients = np.corrcoef(df, rowvar=False )
sns.heatmap( correlation_coefficients, annot=True )
plt.show()