import pandas as pd
books=pd.read_csv("/work/bestsellers with categories.csv")
books.head()
#A: Altering Row Names
books.set_index("Author")
#a. altering rows or columns
#modifying the code to only select author and year as columns with name as index
books_transpose = books.set_index('Name')[['Year', 'Author']]
books_transpose.head()
#a continued: switching rows and columns using tranpose
books_transpose = books.set_index('Name')[['Year', 'Author']].transpose()
# Print the new code
print(books_transpose)
#b: using pivot table or groupby
# Use pivot table to get all user ratings by year and author
books_pivot_table = books.pivot_table (index='Year',
columns='Author',
aggfunc='mean')
books_pivot_table.head()
#B Ex 2 GroupBy
#display the count of the number of books for each author
books.groupby('Author').Price.count()
#c Applying functions to entire dataset or columns
#print the most expensive book and the author based on the author
books[['Author', 'Price']][books.Price==books['Price'].max()]
#C Ex 2: Applying mean function to data to get average price by author as index and genre as columns
books_pivot_table=books.pivot_table(index='Author',
columns='Genre',
aggfunc='mean')
print(books_pivot_table)
#d: creating new column 'rate_number' based off existing one
books_rating=books.melt(id_vars=['Name', 'Author'],
value_vars=['User Rating', 'Reviews'],
var_name='feature',
value_name='rate_number')
books_rating.head()
#D Ex2
books['yearlyrate']=books['Year']/ books['User Rating']
books[['yearlyrate', 'Year', 'User Rating']].head()
#e: descriptive stats
books.describe()
#e
books.describe(include='object')