C02_Pandas_Jordan

import pandas as pd pd.__version__

# Download a sample file from http://insideairbnb.com/ ! wget http://data.insideairbnb.com/united-states/fl/broward-county/2022-06-17/visualisations/listings.csv -O listings.csv

# read the airbnb NYC listings csv file airbnb = pd.read_csv("listings.csv")

# display the pandas DataFrame display(airbnb)

# View first few entries airbnb.head(10)

# View last few entries airbnb.tail()

# Results for a single column airbnb['name']

# results for multiple columns hosts = airbnb[['host_id', 'host_name']] hosts.head()

# Show the data types for each column airbnb.dtypes

# Change the type of a column to datetime airbnb['last_review'] = pd.to_datetime(airbnb['last_review']) airbnb.dtypes

# extract the year from a datetime series airbnb['year'] = airbnb['last_review'].dt.year airbnb['year'].head()

# Strip leading and trailing spaces from a string series airbnb['name'] = airbnb['name'].str.strip() airbnb['name'].tail()

# uppercase all strings in a series airbnb['name_upper'] = airbnb['name'].str.upper() airbnb['name_upper'].tail()

# lowercase all strings in a series airbnb['name_lower'] = airbnb['name'].str.lower() airbnb['name_lower'].tail()

# calculate using two columns airbnb['min_revenue'] = airbnb['minimum_nights'] * airbnb['price'] airbnb[['minimum_nights', 'price', 'min_revenue']].head()

# get the mean pricce airbnb['price'].mean()

# get the median price airbnb['price'].median()

airbnb['price'].std()

airbnb['price'].var()

# get the mean grouped by type of room airbnb[['room_type', 'price']].groupby('room_type', as_index=False).mean().round(2)

# get the median grouped by type of room airbnb[['room_type', 'price']].groupby('room_type', as_index=False).median().round(2)

# get all rows with price < 1000 airbnb_under_1000 = airbnb[airbnb['price'] < 1000] airbnb_under_1000.head()

# get all rows with price < 1000 and year equal to 2020 airbnb_2019_under_1000 = airbnb[(airbnb['price'] < 1000) & (airbnb['year'] == 2020)] airbnb_2019_under_1000.head()

# distribution of prices under $1000 ax = airbnb_under_1000['price'].plot.hist(bins=40)

import numpy as np

pd.Series([1,3,5,6])

d = [[1,2],[3,4]] df = pd.DataFrame(d, index = ['r1','r2'], columns=['a', 'b']) df

d = np.arange(24).reshape(6,4) d

df = pd.DataFrame(d, index=np.arange(1,7), columns=list("ABCD")) df

pd.DataFrame( { 'name': ['Ally','Jane','Belinda'], 'height':[160,155,163], }, columns = ['name','height'], index = ['A1','A2','A3'] )

my_df = pd.DataFrame(data = np.random.randn(16).round(2).reshape(4,4), index = ['r'+str(i) for i in range(1, 5)], columns = ['c'+str(i) for i in range(1, 5)])

my_df

my_df.T

my_df.loc[['r1', 'r4'], ['c3','c4 ']]

my_df.iloc[[0, 3], [2.3]]

!head -5 listings.csv

%ls

import os

[x for x in os.listdir(os.getcwd()) if 'csv' in x]

!mkdir data

%ls

airbnb.to_csv('./data/listings.csv')

%cd data

airbnb_grouped = airbnb.groupby('room_type') len(airbnb_grouped)

for i in ['Entire home/apt', 'Private room', 'Shared room', "Hotel room"]: print(airbnb_grouped.get_group(i).shape)

airbnb_grouped.apply(lambda x: x[['host_name', 'price', 'room_type']].sort_values(by = 'price', ascending = False).iloc[:3,:])

airbnb.groupby('room_type').apply(lambda x: x['price'].describe())

pd.pivot_table(data = airbnb, index = 'room_type', values = 'price', aggfunc = 'mean')

%timeit airbnb.groupby('room_type')['price'].mean()

%timeit pd.pivot_table(data = airbnb, index= 'room_type', values='price', aggfunc='mean')

%timeit airbnb.groupby(['room_type', 'neighbourhood'])['price'].mean().unstack()

%timeit pd.pivot_table(data=airbnb, index='room_type', columns='neighbourhood', values='price', aggfunc='mean')