import pandas as pd
# Download a sample file from http://insideairbnb.com/
! wget http://data.insideairbnb.com/united-states/fl/broward-county/2022-06-17/visualisations/listings.csv -O listings.csv
airbnb = pd.read_csv("listings.csv")
#display(airbnb)
#displays the 'n' first rows of the table, default n = 5
airbnb.head(10)
#displays the 'n' last rows of the table, default n = 5
airbnb.tail(10)
hosts = airbnb['host_name']
#to view multiple columns, pass the name of the column as a list, such that the function has double brackets
hosts = airbnb[['host_id','host_name']]
hosts.head()
airbnb.dtypes
airbnb['last_review'] = pd.to_datetime(airbnb['last_review'])
airbnb.dtypes
airbnb['year'] = airbnb['last_review'].dt.year
airbnb['name'] = airbnb['name'].str.strip()
airbnb['name'].head()
airbnb['name_upper'] = airbnb['name'].str.upper()
airbnb['name_upper'].head()
airbnb['min_revenue'] = airbnb['minimum_nights']*airbnb['price']
airbnb.head()
airbnb['price'].std()
airbnb['price'].var()
airbnb[['room_type', 'price']].groupby('room_type', as_index = False).mean()
airbnb_under_1000 = airbnb[airbnb['price'] < 1000]
airbnb_under_1000
airbnb_2020_under1000 = airbnb[(airbnb['price'] < 1000) & (airbnb['year'] == 2020)]
airbnb_2020_under1000.head()
d = [[1,2], [3,4]]
df = pd.DataFrame(d, index = ['r1', 'r2'], columns = ['a', 'b'])
df
import numpy as np
d = np.arange(24).reshape(6,4)
df = pd.DataFrame(d, index = np.arange(1,7), columns = list("ABCD"))
df
pd.DataFrame(
{
'name': ['Ally','Jane','Belinda'],
'height':[160,155,163],
},
# columns = ['name','height'],
index = ['A1','A2','A3']
)
from pandas import DataFrame as DF
my_df = DF(data = np.random.randn(16).round(2).reshape(4,4),
index = ['r'+str(i) for i in range(1, 5)],
columns = ['c'+str(i) for i in range(1, 5)])
my_df
my_df.T
my_df.loc[['r1', 'r4'], ['c3', 'c4']]
my_df.iloc[[0, 3], [2, 3]]
#both generate the same output
!head -5 listings.csv
import os
[x for x in os.listdir(os.getcwd()) if 'csv' in x]
airbnb_grouped = airbnb.groupby('room_type')
len(airbnb_grouped)
airbnb_grouped.head()
airbnb_grouped.apply(lambda x: x[['host_name', 'price', 'room_type']].sort_values(by = 'price', ascending = False).iloc[:3,:])
airbnb.groupby(['room_type', 'neighbourhood'])['price'].mean()
airbnb.groupby(['room_type', 'neighbourhood'])['price'].mean().unstack()