import pandas as pd
link="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/orders.csv"
orders = pd.read_csv(link)
Run to view results
# Use the head method to investigate the first row of data
orders.head(1)
Run to view results
# Create a dataframe to view the column names and data types
pd.DataFrame(orders.dtypes, columns = ['DataType'])
Run to view results
# Use the shape attribute to determine the amount of rows and columns total
orders.shape
Run to view results
# Use the .columns property to list out the column names
# Use the .index.name property to identify the index
print(orders.columns)
print(orders.index.name)
Run to view results
# Provide a dictionary to the rename method to rename any columns
# The inplace flag determines whether to modify the original dataframe
orders.rename(columns = {'sales':'sales_USD'}, inplace=False)
Run to view results
# Explore columns with column attributes and methods
orders['order_date'].describe()
Run to view results
orders['sales'].describe()
Run to view results
# Import the products.csv dataset and explore the following questions:
link="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/products.csv"
products = pd.read_csv(link)
Run to view results
# A. What are the columns and index: do they suggest any relationship to other tables?
products.head()
Run to view results
products.columns
Run to view results
# B. How many rows of data are there?
products.shape[0]
Run to view results
# C. What are the types of each column?
pd.DataFrame(products.dtypes, columns=["types"])
Run to view results
# Let's return to the orders dataset for the rest of the challenges
# Use boolean filtering and DataFrame/DataSeries methods to solve the following challanges:
orders.head()
Run to view results
# A. What is the mean profit of orders where the ship_mode is "Second Class"
orders[orders['ship_mode']=="Second Class"]['profit'].mean()
Run to view results
# B. What was the busiest day?
orders['order_date'].mode()
Run to view results
orders['order_date'].value_counts().head()
Run to view results
# C. Which products were ordered on the busiest day?
orders[orders['order_date']=='2019-11-18']['product_id']
Run to view results
# A. Who is the top customer?
orders['customer_id'].mode()[0]
Run to view results
# B. What are the three top orders purchased by top customer?
orders[orders['customer_id']=='MC-17635'].sort_values(by='profit',ascending=False).head(3)
Run to view results