pip install pandas
# To download sample data
! wget http://data.insideairbnb.com/the-netherlands/north-holland/amsterdam/2021-04-09/visualisations/listings.csv -O listings.csv
# import pandas package
import pandas as pd
# read the airbnb NYC listings csv file
airbnb = pd.read_csv("listings.csv")
# display the pandas DataFrame
display(airbnb)
# View first few entries
airbnb.head()
# View last few entries
airbnb.tail()
# Results for a single column
airbnb['name']
# results for multiple columns
hosts = airbnb[['host_id', 'host_name']]
hosts.head()
# Show the data types for each column
airbnb.dtypes
# Change the type of a column to datetime
airbnb['last_review'] = pd.to_datetime(airbnb['last_review'])
airbnb.dtypes
# extract the year from a datetime series
airbnb['year'] = airbnb['last_review'].dt.year
airbnb['year'].head()
# Strip leading and trailing spaces from a string series
airbnb['name'] = airbnb['name'].str.strip()
airbnb['name'].head()
# uppercase all strings in a series
airbnb['name_upper'] = airbnb['name'].str.upper()
airbnb['name_upper'].head()
# lowercase all strings in a series
airbnb['name_lower'] = airbnb['name'].str.lower()
airbnb['name_lower'].head()
# calculate using two columns
airbnb['min_revenue'] = airbnb['minimum_nights'] * airbnb['price']
airbnb[['minimum_nights', 'price', 'min_revenue']].head()
# get the mean price
airbnb['price'].mean()
# get the median price
airbnb['price'].median()
# get the mean grouped by type of room
airbnb[['room_type', 'price']].groupby('room_type', as_index=False).mean()
# get the median grouped by type of room
airbnb[['room_type', 'price']].groupby('room_type', as_index=False).median()
# get all rows with price < 1000
airbnb_under_1000 = airbnb[airbnb['price'] < 1000]
airbnb_under_1000.head()
# get all rows with price < 1000 and year equal to 2020
airbnb_2019_under_1000 = airbnb[(airbnb['price'] < 1000) & (airbnb['year'] == 2020)]
airbnb_2019_under_1000.head()
# distribution of prices under $1000
ax = airbnb_under_1000['price'].plot.hist(bins=40)