Lesson 12 - Data Cleaning

import pandas as pd source_url="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/Production.Product.csv"

Run to view results

# read the dataset as a DataFrame into a variable named 'prod_df' prod_df = pd.read_csv(source_url, sep='\t')

Run to view results

# your answer here prod_df.head(4)

Run to view results

# your answer here prod_df.shape[0]

Run to view results

# your answer here prod_df.shape[1]

Run to view results

# your answer here prod_df['ProductLine'].nunique()

Run to view results

prod_df['ProductLine'].count()

Run to view results

# your answer here list(prod_df['ProductLine'].unique())

Run to view results

# your answer here # NAN is used for empty cells, it is not considered a value

Run to view results

# your answer here

Run to view results

# your answer here # there's a trailing whitespace

Run to view results

# your answer here print(prod_df['ProductLine'].unique()[1]) len(prod_df['ProductLine'].unique()[1])

Run to view results

# your answer here is_women_mountain = (prod_df['ProductLine'] == 'M ') & (prod_df['Style'] == 'W ') prod_df[is_women_mountain].shape[0]

Run to view results

prod_df[is_women_mountain].head()

Run to view results

# your answer here # create boolean index for women's and mountain is_women_mountain = (prod_df['ProductLine'] == 'M ') & (prod_df['Style'] == 'W ') is_women_mountain.head()

Run to view results

# create boolean index for silver is_silver = prod_df['Color'] == 'Silver' is_silver.head()

Run to view results

# combine those indices is_silver_WM = is_women_mountain | is_silver is_silver_WM.head()

Run to view results

# create placeholders for column name lists and dictionaries columns = ['Name', 'ListPrice'] new_columns = {'Name': 'Newname'}

Run to view results

# sort values and display prod_df[is_silver_WM].sort_values(by='ListPrice', ascending=False).head(7)[columns].rename(columns=new_columns)

Run to view results