import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
%matplotlib inline
df = pd.read_csv('Summary of Weather.csv')
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3165: DtypeWarning: Columns (7,8,18,25) have mixed types.Specify dtype option on import or set low_memory=False.
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
df.shape
df.isnull().sum()
df_clean1 = df.dropna()
df_clean1.isnull().sum()
df_clean1.describe()
df.isnull().sum()
df.plot(x='MinTemp', y='MaxTemp', style='o')
plt.title('MinTemp vs MaxTemp')
plt.xlabel('Minimum Temperature')
plt.ylabel('Maximum Temperature')
plt.show()
plt.figure(figsize=(10,5))
plt.tight_layout()
sns.distplot(df['MaxTemp'])
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
X = df['MinTemp'].values.reshape(-1,1)
y = df['MaxTemp'].values.reshape(-1,1)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)
regressor = LinearRegression()
regressor.fit(X_train, y_train)
#intercept and slope respectively
print(regressor.intercept_)
print(regressor.coef_)
[10.66185201]
[[0.92033997]]
y_pred = regressor.predict(X_test)
df2= pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
df2.head(30)
df_plot = df2.head(20)
df_plot.plot(kind='bar', figsize=(10,5))
plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
plt.grid(which='minor', linestyle='-', linewidth='0.5', color='black')
plt.show()
#new_data= pd.df'MinTemp', 'MaxTemp'})
data = [df['MinTemp'],df['MaxTemp']]
headers = ['MinTemp','MaxTemp']
new_data = pd.concat(data,axis=1,keys=headers)
print(new_data)
MinTemp MaxTemp
0 22.222222 25.555556
1 21.666667 28.888889
2 22.222222 26.111111
3 22.222222 26.666667
4 21.666667 26.666667
... ... ...
119035 18.333333 28.333333
119036 18.333333 29.444444
119037 18.333333 28.333333
119038 18.333333 28.333333
119039 17.222222 29.444444
[119040 rows x 2 columns]
new_data.isnull().sum()
new_data.describe()
file_name = 'MinMax_Temperature.xlsx'
new_data.to_excel(file_name)
pip install tabpy
Note: you may need to restart the kernel to use updated packages.
WARNING: Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x00000177B4D22580>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')': /simple/tabpy/
WARNING: Retrying (Retry(total=3, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x00000177B4D227C0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')': /simple/tabpy/
WARNING: Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x00000177B4D229A0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')': /simple/tabpy/
WARNING: Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x00000177B4D22B80>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')': /simple/tabpy/
WARNING: Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x00000177B4D22D60>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')': /simple/tabpy/
ERROR: Could not find a version that satisfies the requirement tabpy
ERROR: No matching distribution found for tabpy
python --version
Execution Error
NameError: name 'python' is not defined