import pandas as pd
source_url="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/Production.Product.csv"
Run to view results
# read the dataset as a DataFrame into a variable named 'prod_df'
df = pd.read_csv(source_url, sep = '\t')
Run to view results
# your answer here
type(df)
Run to view results
# your answer here
print(f"The dataframe has {df.shape[0]} rows")
Run to view results
# your answer here
print(f"The dataframe has {df.shape[1]} columns")
Run to view results
df.columns
Run to view results
# your answer here
df[df.columns[15]].nunique()
Run to view results
# your answer here
list(df[df.columns[15]].unique())
Run to view results
# your answer here
print(f" No there are less unique values than rows. This is because some values are nan which are not counted as unqiue")
Run to view results
# your answer here
print(df['ProductLine'].unique()[1])
len(df['ProductLine'].unique()[1])
Run to view results
# your answer here
# there's a trailing whitespace
Run to view results
# your answer here
df_roads = df[df['ProductLine'] == 'R ']
df_roads.shape[0]
Run to view results
# your answer here
womens_df = df[(df['ProductLine'] == 'M ') & (df['Style'] == 'W ')]
Run to view results
womens_df.shape[0]
Run to view results
# your answer here
# create boolean index for women's and mountain
most_expensive_df = df[((df['ProductLine'] == 'M ') & (df['Style'] == 'W ')) | (df['Color'] == 'Silver')].sort_values('ListPrice', ascending= False).nlargest(3, 'ListPrice')
most_expensive_df
Run to view results
# create boolean index for silver
silver_list = list(df['Color'] == 'Silver')
silver_list[0:10]
Run to view results
# sort values and display
most_expensive_df.set_index('ProductID', inplace = True)
Run to view results
final_df = pd.DataFrame(most_expensive_df[['ListPrice', 'Name']])
Run to view results
final_df.head()
Run to view results
#renaming columns
final_df.rename(columns={'Name': 'NewName'}, inplace=True)
Run to view results
final_df
Run to view results
Run to view results