import pandas as pd
import numpy as np
df = pd.read_csv('sample_data.csv')
df.head()
This chart is empty
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 date 26 non-null object
1 sale 26 non-null int64
dtypes: int64(1), object(1)
memory usage: 544.0+ bytes
df
This chart is empty
df = pd.read_csv('sample_data.csv', parse_dates=['date'])
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 date 26 non-null datetime64[ns]
1 sale 26 non-null int64
dtypes: datetime64[ns](1), int64(1)
memory usage: 544.0 bytes
df.head()
# Read normally
df = pd.read_csv('sample_data.csv')
df.head()
df.date = pd.to_datetime(df.date, format = "%d/%m/%Y")
df.head()
df = pd.read_csv('sample_data.csv', parse_dates=['date'], dayfirst=True)
df.head()
df = df.set_index(['date'])
df.head()
df.loc['2019-02-04':'2019-04-04']
df.loc[df.index < '03-05-2019']
df.loc['2019'].sum()
df = pd.read_csv('sample_data.csv', parse_dates= {"date" : ["year","month","day"]}, keep_date_col=True)
df['year']= df['date'].dt.year
df['month']= df['date'].dt.month
df['day']= df['date'].dt.day
df.head()
pd.to_datetime('today')
df.head()
df.head()
df.sort_index()
pd.date_range(start='1/1/2019', end='2/01/2019')
pd.date_range(start='1/1/2019', end='2/01/2019', freq='W')
pd.to_datetime('2019-04-01') + pd.DateOffset(weeks=2)
df_temp = df.iloc[:5]
df_temp
df_temp.shift(2, freq="w")
range = pd.date_range("1/1/2019", periods=28, freq="D")
df = pd.Series(np.random.randint(0, 500, len(range)), index=range)
df.head()
df.resample('W').sum()
df.resample('W-MON').sum()
df.resample('W-TUE').sum()
range = pd.date_range("1/4/2019", "4/1/2019", freq="W")
df = pd.Series(np.random.randint(0, 500, len(range)), index=range)
df
df = df.to_frame()
df.head()
df = df.drop(index=[pd.to_datetime('2019-03-03'), pd.to_datetime('2019-03-24')])
# check for missing datetimeindex values based on reference index (with all values)
missing_dates = df.index[~df.index.isin(range)]
missing_dates
range
df.index.get_loc(pd.to_datetime('2019-03-03'), method="nearest")
didx = df.index[df.index.get_loc(pd.to_datetime('2019-03-03'), method='nearest')]
didx