import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import date
import csv
import seaborn as sns
# Icrease plot
plt.rcParams['figure.figsize'] = (10, 6)
today = date.today()
# Month abbreviation, day and year
d4 = today.strftime("%d-%b-%Y")
print(f'Date: {d4}')
# Read file
df_dm = pd.read_csv('/work/registrations_tgp_16092021.csv', encoding='latin-1')
df_dm.head()
df_dm.info()
df_dm['REGDATE'] = pd.to_datetime(df_dm['REGDATE'])
df_dm.info()
df_dm.REGDATE.dt.year.value_counts().sort_index()
df_dm['SEX'] = df_dm.SEX.str.replace('Unknown', '2')
df_dm['SEX'] = df_dm.SEX.str.replace('Male', '1')
df_dm['SEX'] = df_dm.SEX.str.replace('Female', '0')
df_dm['SEX'] = pd.to_numeric(df_dm['SEX'])
df_dm.info()
fig, ax = plt.subplots()
df_dm.plot(kind='scatter', x='REGDATE', y='AGE', c="SEX", s=5, cmap='viridis', alpha=0.65, linewidth=1, ax=ax, title='Registrations all time =. Scatter Plot AGE / REGDATE / SEX')
df_dm.REGDATE.dt.year.value_counts().sort_index().plot(kind='bar', title='Registration per Year')
df_dm.REGDATE.dt.month.value_counts().sort_index().plot(kind='bar', color=(0.4, 0.4, 0.6, 0.6),title='Registrations by Month of the Year (Jan - Dec) - All Time')
df_dm.REGDATE.dt.weekday.value_counts().sort_index().plot(kind='bar', color=(0.4, 0.4, 0.6, 0.6),title='Registrations by Month of the Year (Jan - Dec) - All Time')
df_dm[(df_dm.REGDATE.dt.year == 2019)].REGDATE.dt.month.value_counts().sort_index().plot(kind='bar', title='Registration by Month of the Year (Jan - Dec) - 2019')
df_dm[(df_dm.REGDATE.dt.year == 2021)].REGDATE.dt.month.value_counts().sort_index().plot(kind='bar', title='Registration by Month of the Year (Jan - Dec) - 2021')
df_dm[(df_dm.REGDATE.dt.year == 2021)].REGDATE.dt.weekday.value_counts().sort_index().plot(kind='bar', title='Registration by Month of the Year (Jan - Dec) - 2021')
df_dm[(df_dm.REGDATE.dt.year == 2021)].REGDATE.dt.week.value_counts().sort_index().plot(kind='bar', color=(0.4, 0.4, 0.6, 0.6), title='Registration in 2021 - Weeks)')
df_dm[(df_dm.REGDATE.dt.year == 2021)].REGDATE.dt.isocalendar().week.value_counts().sort_index().plot(kind='bar', title='Weekly Registration 2021')
df_dm[(df_dm.REGDATE.dt.year == 2013)].REGDATE.dt.isocalendar().week.value_counts().sort_index().plot(kind='bar', title='Weekly Registration 2019')
df_dm[(df_dm.REGDATE.dt.year == 2019)].REGDATE.dt.isocalendar().week.value_counts().sort_index().plot(kind='bar', title='Weekly Registration 2020')
df_dm[(df_dm.REGDATE.dt.year == 2020)].REGDATE.dt.month.value_counts().sort_index().plot(kind='bar', title='Weekly Registration 2020')
df_dm[(df_dm.REGDATE.dt.year == 2018)].REGDATE.dt.month.value_counts().sort_index().plot(kind='bar', title='Registration by Month of the Year (Jan - Dec) - 20181')