import pandas as pd
source_url="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/Production.Product.csv"
Run to view results
# read the dataset as a DataFrame into a variable named 'prod_df'
prod_df = pd.read_csv(source_url, sep='\t')
Run to view results
# your answer here
prod_df.head(4)
Run to view results
# your answer here
prod_df.shape[0]
Run to view results
# your answer here
prod_df.shape[1]
Run to view results
# your answer here
prod_df['ProductLine'].nunique()
Run to view results
prod_df['ProductLine'].count()
Run to view results
# your answer here
list(prod_df['ProductLine'].unique())
Run to view results
# your answer here
# NAN is used for empty cells, it is not considered a value
Run to view results
# your answer here
Run to view results
# your answer here
# there's a trailing whitespace
Run to view results
# your answer here
print(prod_df['ProductLine'].unique()[1])
len(prod_df['ProductLine'].unique()[1])
Run to view results
# your answer here
is_women_mountain = (prod_df['ProductLine'] == 'M ') & (prod_df['Style'] == 'W ')
prod_df[is_women_mountain].shape[0]
Run to view results
prod_df[is_women_mountain].head()
Run to view results
# your answer here
# create boolean index for women's and mountain
is_women_mountain = (prod_df['ProductLine'] == 'M ') & (prod_df['Style'] == 'W ')
is_women_mountain.head()
Run to view results
# create boolean index for silver
is_silver = prod_df['Color'] == 'Silver'
is_silver.head()
Run to view results
# combine those indices
is_silver_WM = is_women_mountain | is_silver
is_silver_WM.head()
Run to view results
# create placeholders for column name lists and dictionaries
columns = ['Name', 'ListPrice']
new_columns = {'Name': 'Newname'}
Run to view results
# sort values and display
prod_df[is_silver_WM].sort_values(by='ListPrice', ascending=False).head(7)[columns].rename(columns=new_columns)
Run to view results
Run to view results