import pandas as pd
Run to view results
# We will store the orders table csv data in a variable
# Tabs are used as the separating character in this file
link="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/orders.csv"
orders = pd.read_csv(link)
Run to view results
# Use the head method to investigate the first row of data
orders.head()
Run to view results
# Create a dataframe to view the column names and data types
df = pd.DataFrame(orders.dtypes)
Run to view results
print(df)
Run to view results
pd.DataFrame(orders.dtypes, columns=["DataType"])
Run to view results
# Use the shape attribute to determine the amount of rows and columns total
print(orders.shape)
Run to view results
print(f"there are {orders.shape[0]} rows and {orders.shape[1]} columns")
Run to view results
# Use the .columns property to list out the column names
# Use the .index.name property to identify the index
print(orders.columns)
print(orders.index.name)
Run to view results
# Provide a dictionary to the rename method to rename any columns
# The inplace flag determines whether to modify the original dataframe
orders.rename(columns={"order_id": "ID_order", "sales": "Sales_amount"}, inplace=False)
Run to view results
orders.head()
Run to view results
# Explore columns with column attributes and methods
#orders['profit']
#orders['profit'].describe()
orders[['sales', 'quantity', 'discount', 'profit']]
Run to view results
# Import the products.csv dataset and explore the following questions:
link="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/products.csv"
products = pd.read_csv(link)
Run to view results
# A. What are the columns and index: do they suggest any relationship to other tables?
products.columns
Run to view results
# B. How many rows of data are there?
products.shape[0]
products.shape[1]
Run to view results
# C. What are the types of each column?
pd.DataFrame(products.dtypes, columns=["DataType"])
Run to view results
products.head(4)
Run to view results
# Let's return to the orders dataset for the rest of the challenges
# Use boolean filtering and DataFrame/DataSeries methods to solve the following challanges:
Run to view results
# A. What is the mean profit of orders where the ship_mode is "Second Class"
orders[orders["ship_mode"] == "Second Class"]["profit"].describe()
Run to view results
# B. What was the busiest day?
orders["order_date"].value_counts().head(7)
Run to view results
# C. Which products were ordered on the busiest day?
orders[orders["order_date"]=='2019-11-18']["product_id"]
Run to view results
# A. Who is the top customer?
orders["customer_id"].value_counts()
Run to view results
orders["customer_id"].value_counts().head()
Run to view results
top_customer = orders["customer_id"].value_counts().head().index[0]
Run to view results
# B. What are the three top orders purchased by top customer?
orders[orders['customer_id'] == top_customer].sort_values(by="profit", ascending=False).head(7)
Run to view results