import pandas as pd
df = pd.read_csv('merged/olist_dataset_train.csv')
df.head()
df.shape
df[['order_purchase_timestamp', 'order_approved_at', 'order_delivered_carrier_date',
'order_delivered_customer_date', 'order_estimated_delivery_date', 'review_creation_date',
'review_answer_timestamp']] = df[['order_purchase_timestamp', 'order_approved_at', 'order_delivered_carrier_date',
'order_delivered_customer_date', 'order_estimated_delivery_date', 'review_creation_date',
'review_answer_timestamp']].apply(pd.to_datetime)
from pandas_profiling import ProfileReport
profile = ProfileReport(df, title="Análise de vendas")
profile
df.drop(columns=['customer_id','customer_unique_id','order_id','review_id', 'review_comment_title'],inplace=True)
df['year_month_purchase'] = df['order_purchase_timestamp'].dt.strftime('%Y-%m')
def embeleze(fig):
fig.update_layout(
font_family = 'Calibri',
template = 'plotly_white'
)
df['2017-11_purchase'] = df['order_purchase_timestamp'].dt.strftime('%Y-%m-%d')
df7 = df[(df['2017-11_purchase'] > '2017-11-01') & (df['2017-11_purchase'] < '2017-11-30')]
df_SP2 = df7[df7['customer_state'] == 'SP']
df_RJ2 = df7[df7['customer_state'] == 'RJ']
df_MG2 = df7[df7['customer_state'] == 'MG']
df7 = pd.concat([df_SP2, df_RJ2, df_MG2])
df7 = df7.groupby(['2017-11_purchase', 'customer_state']).sum().reset_index()
df['review_score'].min()
df['review_score'].max()
df.max()
df[df['payment_value'] == 13664.08]