import pandas as pd
link="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/orders.csv"
orders = pd.read_csv(link)
# Use the head method to investigate the first row of data
orders.head(1)
# Create a dataframe to view the column names and data types
pd.DataFrame(orders.dtypes, columns = ['DataType'])
# Use the shape attribute to determine the amount of rows and columns total
orders.shape
# Use the .columns property to list out the column names
# Use the .index.name property to identify the index
print(orders.columns)
print(orders.index.name)
# Provide a dictionary to the rename method to rename any columns
# The inplace flag determines whether to modify the original dataframe
orders.rename(columns = {'sales':'sales_USD'}, inplace=False)
# Explore columns with column attributes and methods
orders['order_date'].describe()
orders['sales'].describe()
# Import the products.csv dataset and explore the following questions:
link="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/products.csv"
products = pd.read_csv(link)
# A. What are the columns and index: do they suggest any relationship to other tables?
products.head()
products.columns
# B. How many rows of data are there?
products.shape[0]
# C. What are the types of each column?
pd.DataFrame(products.dtypes, columns=["types"])
# Let's return to the orders dataset for the rest of the challenges
# Use boolean filtering and DataFrame/DataSeries methods to solve the following challanges:
orders.head()
# A. What is the mean profit of orders where the ship_mode is "Second Class"
orders[orders['ship_mode']=="Second Class"]['profit'].mean()
# B. What was the busiest day?
orders['order_date'].mode()
orders['order_date'].value_counts().head()
# C. Which products were ordered on the busiest day?
orders[orders['order_date']=='2019-11-18']['product_id']
# A. Who is the top customer?
orders['customer_id'].mode()[0]
# B. What are the three top orders purchased by top customer?
orders[orders['customer_id']=='MC-17635'].sort_values(by='profit',ascending=False).head(3)