# Dataframe manipulation
import pandas as pd
# Linear algebra operation
import numpy as np
# Data visualization with matplotlib
import matplotlib.pyplot as plt
# Data visualization with seaborn
import seaborn as sns
# Datetime
import datetime as dt
df = pd.read_csv('data/sales_trx.txt', sep = ',')
df2 = pd.read_csv('data/sales_trx.txt', sep = ',',
converters={'TRX_DATE': lambda dt: pd.to_datetime(dt, format='%d%m%Y', errors='coerce')})
df.tail()
df.fillna(0, inplace=True)
df.isnull().sum()
df_terisi = df[df['OMSET'] != 0]
df_terisi.tail()
df_terisi.to_csv('data/sales_trx_clean.csv', sep = ',')
df_terisi['TRX_DATE'] = df_terisi['TRX_DATE'].apply(lambda x: pd.to_datetime(str(x), format='%Y%m%d'))
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
"""Entry point for launching an IPython kernel.