My first notebook on Deepnote
!kaggle datasets download -d arjunprasadsarkhel/2021-olympics-in-tokyo --unzip
# Importing libraries
#------Data Pre-Procesing----------
import pandas as pd
import numpy as np # data pre-processing
#------Data Visualizations---------
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px # data visualization
#------Additional-----------
import warnings
warnings.filterwarnings("ignore") # additional
Read Data
df_athletes = pd.read_excel('Athletes.xlsx')
df_coaches = pd.read_excel('Coaches.xlsx')
df_entries_gender = pd.read_excel('EntriesGender.xlsx')
df_medals = pd.read_excel('Medals.xlsx')
df_teams = pd.read_excel('Teams.xlsx')
Exploring our data
First Look
print(df_athletes.head())
print(f'Here we have {df_athletes.shape[0]} rows and {df_athletes.shape[1]} columns')
data_source = {'Athletes':df_athletes,
'Coaches':df_coaches,
'Entries_gender':df_entries_gender,
'Medals':df_medals,
'Teams':df_teams}
for dataset in data_source:
print('-'*60)
print(f'This is {dataset} dataset\n')
print(data_source[dataset].head())
print(f'Here we have {data_source[dataset].shape[0]} rows and {data_source[dataset].shape[1]} columns')
def first_look(dataset):
print(dataset.info())
print(f'\nUniques values:\n{dataset.nunique()}')
print(f'\nDuplicates: {dataset.duplicated().sum()}')
print(f'\nNull values: \n{dataset.isnull().sum()}')
first_look(df_athletes)
first_look(df_coaches)
first_look(df_entries_gender)
first_look(df_medals)
first_look(df_teams)