SQL
Saved to variable
df_1-- Select the oldest and newest founding years from the businesses table
SELECT MIN(year_founded) AS oldest_founding_year, MAX(year_founded) AS newest_founding_year
FROM "tiffanydatascience/oldest_businesses_in_the_world"."businesses";
SQL
Saved to variable
df_2-- Select the count of businesses that has survived before the year 1,000 AD
SELECT COUNT(*)
FROM "tiffanydatascience/oldest_businesses_in_the_world"."businesses"
WHERE year_founded < 1000;
SQL
Saved to variable
df_10-- Select the count of businesses that has survived before the year 1,000 AD
SELECT bus.business, cnt.country, cat.category
FROM "tiffanydatascience/oldest_businesses_in_the_world"."businesses" AS bus
INNER JOIN "tiffanydatascience/oldest_businesses_in_the_world"."categories" as cat
ON bus.category_code = cat.category_code
INNER JOIN "tiffanydatascience/oldest_businesses_in_the_world"."countries" as cnt
ON bus.country_code = cnt.country_code
WHERE year_founded = 1999
SQL
Saved to variable
df_3SELECT *
FROM "tiffanydatascience/oldest_businesses_in_the_world"."businesses"
WHERE year_founded < 1000
ORDER BY year_founded ASC
SQL
Saved to variable
df_4-- Select business name, founding year, and country code from businesses; and category from categories
-- where the founding year was before 1000, arranged from oldest to newest
SELECT bus.business, bus.year_founded, bus.country_code, cat.category
FROM "tiffanydatascience/oldest_businesses_in_the_world"."businesses" AS bus
INNER JOIN "tiffanydatascience/oldest_businesses_in_the_world"."categories" AS cat
ON bus.category_code = cat.category_code
WHERE year_founded < 1000
ORDER BY year_founded;
SQL
Saved to variable
df_5-- Select the category and count of category (as "n")
-- arranged by descending count, limited to 10 most common categories
SELECT cat.category, COUNT(cat.category) AS n
FROM "tiffanydatascience/oldest_businesses_in_the_world"."businesses" AS bus
INNER JOIN "tiffanydatascience/oldest_businesses_in_the_world"."categories" AS cat
ON bus.category_code = cat.category_code
GROUP BY cat.category
ORDER BY n DESC
LIMIT 10;
SQL
Saved to variable
df_6-- Select the oldest founding year (as "oldest") from businesses,
-- and continent from countries
-- for each continent, ordered from oldest to newest
SELECT MIN(bus.year_founded) as oldest, cnt.continent
FROM "tiffanydatascience/oldest_businesses_in_the_world"."businesses" AS bus
INNER JOIN "tiffanydatascience/oldest_businesses_in_the_world"."countries" as cnt
ON bus.country_code = cnt.country_code
GROUP BY continent
ORDER BY oldest ASC;
SQL
Saved to variable
df_7-- Select the business, founding year, category, country, and continent and join all three tables.
SELECT bus.business, bus.year_founded, cat.category, cnt.country, cnt.continent
FROM "tiffanydatascience/oldest_businesses_in_the_world"."businesses" AS bus
INNER JOIN "tiffanydatascience/oldest_businesses_in_the_world"."categories" as cat
ON bus.category_code = cat.category_code
INNER JOIN "tiffanydatascience/oldest_businesses_in_the_world"."countries" as cnt
ON bus.country_code = cnt.country_code;
SQL
Saved to variable
df_8-- Count the number of businesses in each continent and category
SELECT cnt.continent, cat.category, COUNT(*) AS n
FROM "tiffanydatascience/oldest_businesses_in_the_world"."businesses" AS bus
INNER JOIN "tiffanydatascience/oldest_businesses_in_the_world"."categories" as cat
ON bus.category_code = cat.category_code
INNER JOIN "tiffanydatascience/oldest_businesses_in_the_world"."countries" as cnt
ON bus.country_code = cnt.country_code
GROUP BY cnt.continent, cat.category;
SQL
Saved to variable
df_9-- Repeat that previous query, filtering for results having a count greater than 5
SELECT
cnt.continent,
cat.category,
COUNT(*) AS n
FROM
"tiffanydatascience/oldest_businesses_in_the_world"."businesses" AS bus
INNER JOIN
"tiffanydatascience/oldest_businesses_in_the_world"."categories" AS cat
ON
bus.category_code = cat.category_code
INNER JOIN
"tiffanydatascience/oldest_businesses_in_the_world"."countries" AS cnt
ON
bus.country_code = cnt.country_code
GROUP BY
cnt.continent, cat.category
HAVING COUNT(*) > 5
ORDER BY n DESC;
import pandas as pd
import matplotlib.pyplot as plt
plt.bar(df_9.continent, df_9.category, color='red')
plt.title('Businesses grouped by continent and category with atleast a count of 5')
plt.xlabel('Continent')
plt.ylabel('Business Category')