# Data manipulation
!pip install pandas
import pandas as pd
# Data visualization
!pip install seaborn
import seaborn as sns
#Data visualization
!pip install matplotlib
import matplotlib.pyplot as plt
df = pd.read_csv('MiningProcess_Flotation_Plant_Database.csv')
df.head()
df = pd.read_csv('MiningProcess_Flotation_Plant_Database.csv',decimal=',')
df.head()
df.shape
df['% Iron Concentrate']
df.iloc[100:105,:]
print(type(df))
print(type(df['date']))
print(type(df['date'][0]))
df['date'] = pd.to_datetime(df['date'])
print(type(df['date']))
print(type(df['date'][0]))
df.describe()
max_date=df['date'].max()
print('The max date is' + str(max_date))
min_date=df['date'].min()
print('The min date is' + str(min_date))
df_june = df[(df['date'] > "2017-05-31 23:59:59") & (df['date'] < "2017-06-02")].reset_index(drop=True)
important_cols = [
'date',
'% Iron Concentrate',
'% Silica Concentrate',
'Ore Pulp pH',
'Flotation Column 05 Level'
]
df_june_important = df_june[important_cols]
df_june_important
sns.pairplot(df_june_important)
df_june_important.corr()
sns.lineplot(x='date',y='% Iron Concentrate',data=df_june)
for i in important_cols:
sns.lineplot(x='date',y=i,data=df_june)
import matplotlib.pyplot as plt
plt.show()
sns.histplot(df_june['Ore Pulp pH'])
sns.scatterplot(x='% Iron Feed',y='Flotation Column 05 Level',
data=df_june,
size='% Iron Concentrate',
legend=False, sizes=(1, 100))
df