About This project
Extraction
Athlete Events
SELECT *
FROM 'athlete_events.csv'
Countries
SELECT *
FROM 'noc_regions.csv'
# Let's rename columns and delete the first row to set it up properly
noc_regions.rename({'column0':'noc','column1':'country','column2':'notes'}, axis=1, inplace=True)
noc_regions = noc_regions.drop(0,0)
noc_regions.head()
Questions to be answered
How many olympics games have been held?
SELECT COUNT (DISTINCT games)
FROM athlete_events
List down all Olympics games held so far
SELECT
DISTINCT Year, season, city
FROM athlete_events
ORDER BY year
Mention the total no of nations who participated in each olympics games
SELECT
DISTINCT ae.games AS olympic_games,
COUNT(DISTINCT nr.country) AS number_countries
FROM athlete_events ae
LEFT JOIN noc_regions nr ON nr.noc = ae.noc
GROUP BY games
ORDER BY games ASC
Which year saw the highest and lowest no of countries participating in olympics?
WITH games_countries AS (
SELECT
ae.games AS games,
nr.country AS country
FROM
athlete_events ae
JOIN noc_regions nr ON ae.noc = nr.noc
GROUP BY ae.games, nr.country
),
number_countries AS (
SELECT
games,
COUNT(country) AS number_countries
FROM
games_countries
GROUP BY games
)
SELECT DISTINCT
CONCAT(FIRST_VALUE(games) OVER(ORDER BY number_countries), ' - ' ,FIRST_VALUE(number_countries) OVER(ORDER BY number_countries), ' countries') AS least_countries,
CONCAT(FIRST_VALUE(games) OVER(ORDER BY number_countries DESC), ' - ', FIRST_VALUE(number_countries) OVER(ORDER BY number_countries DESC),' countries') AS most_countries
FROM
number_countries
SELECT
DISTINCT ae.games AS olympic_games,
COUNT(DISTINCT nr.country) AS number_countries,
RANK() OVER(ORDER BY COUNT(DISTINCT nr.country)DESC) AS ranking
FROM athlete_events ae
LEFT JOIN noc_regions nr ON nr.noc = ae.noc
GROUP BY games
ORDER BY games DESC
Which nation has participated in all of the olympic games?
SELECT
nr.country AS countries,
COUNT(DISTINCT ae.games) AS olympic_games
FROM athlete_events ae
LEFT JOIN noc_regions nr ON nr.noc = ae.noc
GROUP BY nr.country
HAVING COUNT(DISTINCT ae.games) = 51
WITH total_games AS (
SELECT
COUNT (DISTINCT games) AS total_games
FROM athlete_events
),
games_countries AS (
SELECT
ae.games AS games,
nr.country AS country
FROM athlete_events ae
JOIN noc_regions nr ON nr.noc=ae.noc
GROUP BY games, nr.country
),
countries_participations AS (
SELECT
country AS country,
COUNT(country) AS number_games
FROM games_countries
GROUP BY country
)
SELECT cp.country, cp.number_games
FROM countries_participations cp
JOIN total_games tg ON cp.number_games = tg.total_games
Identify the sports which were played in all summer olympics
WITH number_summer_games AS (
SELECT
COUNT (DISTINCT games) AS games
FROM
athlete_events
WHERE
season ='Summer'
),
games_sports AS (
SELECT
games,
sport
FROM athlete_events
WHERE
season = 'Summer'
GROUP BY
games, sport
),
number_sports_summer_games AS (
SELECT
sport,
COUNT(sport) number_summer_games
FROM
games_sports
GROUP BY
sport
)
SELECT
ns.sport, ns.number_summer_games
FROM
number_sports_summer_games ns
JOIN number_summer_games nsg ON nsg.games=ns.number_summer_games
Which Sports were just played only once in the olympics?
WITH games_sports AS (
SELECT
games,
sport
FROM athlete_events
GROUP BY
games, sport
),
number_sports AS (
SELECT
sport,
COUNT(sport) number_games
FROM
games_sports
GROUP BY
sport
)
SELECT
ns.sport, ns.number_games, gs.games
FROM
number_sports ns
JOIN games_sports gs ON gs.sport = ns.sport
WHERE
ns.number_games = 1
Fetch the total No. of sports played in each olympic games.
SELECT
DISTINCT games,
city,
COUNT (DISTINCT sport) AS number_sports
FROM
athlete_events
GROUP BY
games, city
ORDER BY
COUNT (DISTINCT sport) DESC
Fetch details of the oldest athletes to win a gold medal.
SELECT
name, sex, age, medal, team, games, city, sport, event, medal
FROM
athlete_events
WHERE age IN (SELECT MAX(age) FROM athlete_events WHERE age NOT LIKE 'NA' AND medal = 'Gold')
AND medal = 'Gold'
SELECT
CONCAT('For every woman in the olympic games there have been: ', CAST(ROUND(sub.number_males, 2) AS DECIMAL(18,2)) / CAST(ROUND(sub.number_females, 2) AS DECIMAL(18,2)), ' men') AS Ratio
FROM
(SELECT
SUM(CASE WHEN sex = 'M' THEN 1 ELSE 0 END) AS number_males,
SUM(CASE WHEN sex = 'F' THEN 1 ELSE 0 END) AS number_females
FROM
athlete_events) AS sub
Fetch the top 5 athletes who have won the most gold medals.
SELECT
name,
gold_medals,
team,
sport
FROM
(SELECT
name,
COUNT(medal) AS gold_medals,
team,
sport,
DENSE_RANK() OVER(ORDER BY COUNT(medal) DESC) AS ranking
FROM
athlete_events
WHERE
medal ='Gold'
GROUP BY
name, team, sport
ORDER BY
gold_medals DESC) AS subquery
WHERE
ranking <=5
Fetch the top 5 athletes who have won the most medals (gold/silver/bronze).
SELECT
name,
gold_medals,
silver_medals,
bronze_medals,
total_medals,
team,
sport
FROM
(SELECT
name,
SUM(CASE WHEN medal = 'Gold' THEN 1 ELSE 0 END) AS gold_medals,
SUM(CASE WHEN medal = 'Silver' THEN 1 ELSE 0 END) AS silver_medals,
SUM(CASE WHEN medal = 'Bronze' THEN 1 ELSE 0 END) AS bronze_medals,
SUM(CASE WHEN medal IN ('Gold','Silver','Bronze') THEN 1 ELSE 0 END) AS total_medals,
team,
sport,
DENSE_RANK() OVER(ORDER BY SUM(CASE WHEN medal IN ('Gold','Silver','Bronze') THEN 1 ELSE 0 END) DESC) AS ranking
FROM
athlete_events
GROUP BY
name, team, sport
ORDER BY
SUM(CASE WHEN medal IN ('Gold','Silver','Bronze') THEN 1 ELSE 0 END) DESC)
WHERE
ranking <= 5
Fetch the top 5 most successful countries in olympics. Success is defined by no of medals won.
SELECT
country,
gold_medals,
silver_medals,
bronze_medals,
total_medals
FROM
(SELECT
nr.country AS country,
SUM(CASE WHEN ae.medal = 'Gold' THEN 1 ELSE 0 END) AS gold_medals,
SUM(CASE WHEN ae.medal = 'Silver' THEN 1 ELSE 0 END) AS silver_medals,
SUM(CASE WHEN ae.medal = 'Bronze' THEN 1 ELSE 0 END) AS bronze_medals,
SUM(CASE WHEN ae.medal IN ('Gold','Silver','Bronze') THEN 1 ELSE 0 END) AS total_medals,
DENSE_RANK() OVER(ORDER BY SUM(CASE WHEN ae.medal IN ('Gold','Silver','Bronze') THEN 1 ELSE 0 END) DESC) AS ranking
FROM
athlete_events ae
JOIN noc_regions nr ON ae.noc=nr.noc
GROUP BY
nr.country
ORDER BY
SUM(CASE WHEN ae.medal IN ('Gold','Silver','Bronze') THEN 1 ELSE 0 END) DESC)
WHERE
ranking <= 5
List down total gold, silver and broze medals won by each country corresponding to each olympic games.
SELECT
nr.country AS country,
ae.games,
SUM(CASE WHEN ae.medal = 'Gold' THEN 1 ELSE 0 END) AS gold_medals,
SUM(CASE WHEN ae.medal = 'Silver' THEN 1 ELSE 0 END) AS silver_medals,
SUM(CASE WHEN ae.medal = 'Bronze' THEN 1 ELSE 0 END) AS bronze_medals,
SUM(CASE WHEN ae.medal IN ('Gold','Silver','Bronze') THEN 1 ELSE 0 END) AS total_medals
FROM
athlete_events ae
JOIN noc_regions nr ON ae.noc=nr.noc
GROUP BY
nr.country, ae.games
ORDER BY
SUM(CASE WHEN ae.medal IN ('Gold','Silver','Bronze') THEN 1 ELSE 0 END) DESC
Identify which country won the most gold, most silver and most bronze medals in each olympic games.
WITH country_games_medals AS (
SELECT
nr.country AS country,
ae.games,
SUM(CASE WHEN ae.medal = 'Gold' THEN 1 ELSE 0 END) AS gold_medals,
SUM(CASE WHEN ae.medal = 'Silver' THEN 1 ELSE 0 END) AS silver_medals,
SUM(CASE WHEN ae.medal = 'Bronze' THEN 1 ELSE 0 END) AS bronze_medals,
FROM
athlete_events ae
JOIN noc_regions nr ON ae.noc=nr.noc
GROUP BY
nr.country, ae.games
ORDER BY
SUM(CASE WHEN ae.medal IN ('Gold','Silver','Bronze') THEN 1 ELSE 0 END) DESC),
games_max_medals AS (
SELECT
games AS games_max,
MAX(gold_medals) AS max_gold,
MAX(silver_medals) AS max_silver,
MAX(bronze_medals) AS max_bronze
FROM country_games_medals
GROUP BY
games
)
SELECT cgm.country, gmm.games_max, MAX(gmm.max_gold), MAX(gmm.max_silver), MAX(max_bronze)
FROM country_games_medals cgm
RIGHT JOIN games_max_medals gmm ON cgm.games = gmm.games_max
AND cgm.gold_medals = gmm.max_gold
GROUP BY cgm.country, gmm.games_max
ORDER BY games_max
-- WORK IN PROGRESS
Which countries have never won gold medal but have won silver/bronze medals?
WITH country_medals AS
(SELECT
nr.country AS country,
SUM(CASE WHEN ae.medal = 'Gold' THEN 1 ELSE 0 END) AS gold_medals,
SUM(CASE WHEN ae.medal = 'Silver' THEN 1 ELSE 0 END) AS silver_medals,
SUM(CASE WHEN ae.medal = 'Bronze' THEN 1 ELSE 0 END) AS bronze_medals
FROM
athlete_events ae
JOIN noc_regions nr ON ae.noc=nr.noc
GROUP BY
nr.country)
SELECT country, gold_medals, silver_medals, bronze_medals
FROM country_medals
WHERE gold_medals = 0
AND (silver_medals > 0
OR bronze_medals > 0)
ORDER BY silver_medals DESC, bronze_medals DESC
In which Sport/event, India has won highest medals.
SELECT
nr.country,
ae.sport,
ae.event,
SUM(CASE WHEN ae.medal IN('Gold','Silver','Bronze') THEN 1 ELSE 0 END) AS total_medals
FROM athlete_events ae
JOIN noc_regions nr ON ae.noc=nr.noc
WHERE nr.country = 'India'
GROUP BY nr.country, ae.sport, ae.event
ORDER BY SUM(CASE WHEN ae.medal IN('Gold','Silver','Bronze') THEN 1 ELSE 0 END) DESC
Break down all olympic games where india won medal for Hockey and how many medals in each olympic games.
SELECT
nr.country,
ae.games,
ae.sport,
ae.event,
SUM(CASE WHEN ae.medal IN('Gold','Silver','Bronze') THEN 1 ELSE 0 END) AS total_medals
FROM athlete_events ae
JOIN noc_regions nr ON ae.noc=nr.noc
WHERE nr.country = 'India'
AND sport = 'Hockey'
GROUP BY nr.country, ae.sport, ae.event, ae.games
HAVING SUM(CASE WHEN ae.medal IN('Gold','Silver','Bronze') THEN 1 ELSE 0 END) > 0
ORDER BY SUM(CASE WHEN ae.medal IN('Gold','Silver','Bronze') THEN 1 ELSE 0 END) DESC
SELECT
nr.country,
ae.games,
ae.medal,
ae.event,
SUM(CASE WHEN ae.medal IN('Gold','Silver','Bronze') THEN 1 ELSE 0 END) AS number_medals,
ae.name
FROM athlete_events ae
JOIN noc_regions nr ON ae.noc=nr.noc
WHERE nr.country = 'Colombia'
AND ae.medal NOT IN('NA')
GROUP BY ae.games, nr.country, ae.medal, ae.event, ae.name
ORDER BY ae.games DESC
athlete_events