import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
%matplotlib inline
df = pd.read_csv('Summary of Weather.csv')
df.shape
df.isnull().sum()
df_clean1 = df.dropna()
df_clean1.isnull().sum()
df_clean1.describe()
df.isnull().sum()
df.plot(x='MinTemp', y='MaxTemp', style='o')
plt.title('MinTemp vs MaxTemp')
plt.xlabel('Minimum Temperature')
plt.ylabel('Maximum Temperature')
plt.show()
plt.figure(figsize=(10,5))
plt.tight_layout()
sns.distplot(df['MaxTemp'])
X = df['MinTemp'].values.reshape(-1,1)
y = df['MaxTemp'].values.reshape(-1,1)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)
regressor = LinearRegression()
regressor.fit(X_train, y_train)
#intercept and slope respectively
print(regressor.intercept_)
print(regressor.coef_)
y_pred = regressor.predict(X_test)
df2= pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
df2.head(30)
df_plot = df2.head(20)
df_plot.plot(kind='bar', figsize=(10,5))
plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
plt.grid(which='minor', linestyle='-', linewidth='0.5', color='black')
plt.show()
#new_data= pd.df'MinTemp', 'MaxTemp'})
data = [df['MinTemp'],df['MaxTemp']]
headers = ['MinTemp','MaxTemp']
new_data = pd.concat(data,axis=1,keys=headers)
print(new_data)
new_data.isnull().sum()
new_data.describe()
file_name = 'MinMax_Temperature.xlsx'
new_data.to_excel(file_name)
pip install tabpy
python --version