import pandas as pd
# We will store the orders table csv data in a variable
# Tabs are used as the separating character in this file
file_path = '/work/mtcars.csv'
file = pd.read_csv(file_path)
# Use the head method to investigate the first row of data
file.head()
# Create a dataframe to view the column names and data types
df = pd.DataFrame(file)
df.head()
# Use the shape attribute to determine the amount of rows and columns total
df.shape
# Use the .columns property to list out the column names
# Use the .index.name property to identify the index
print(f"{df.columns}")
print(f"{df.index.name}")
# Provide a dictionary to the rename method to rename any columns
# The inplace flag determines whether to modify the original dataframe
df.rename(columns = {'mpg': "Miles Per Gallon", 'cyl': 'cylinders'}, inplace = True)
# Explore columns with column attributes and methods
df.describe
# Import the products.csv dataset and explore the following questions:
link="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/products.csv"
products = pd.read_csv(link)
# A. What are the columns and index: do they suggest any relationship to other tables?
print(products.columns)
print(products.index.name)
# B. How many rows of data are there?
print(f" This dataframe has {products.shape[1]} rows of data")
# C. What are the types of each column?
products.dtypes
# Let's return to the orders dataset for the rest of the challenges
# Use boolean filtering and DataFrame/DataSeries methods to solve the following challanges:
link="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/orders.csv"
orders = pd.read_csv(link)
print(f' Mean: {orders[orders["ship_mode"] == "Second Class"]["profit"].mean()}')
# C. Which products were ordered on the busiest day?
orders["order_date"].value_counts().head()
# A. Who is the top customer?
# B. What are the three top orders purchased by top customer?