import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import io
import requests
from PIL import Image
plt.figure(figsize=(15,8)) # Setting plot size
df = pd.read_csv('AB_NYC_2019.csv')
df.columns
df.shape
df.dtypes
df.info()
df.head()
df.tail()
df.describe()
df.isnull().sum()
columnsToDrop = ['id', 'host_name', 'last_review']
columnsToDrop = ['id', 'host_name', 'last_review']
df.drop(columnsToDrop, axis="columns", inplace=True) # inplace = True returns new df
df.fillna({'reviews_per_month': 0}, inplace=True) # {'column_name': value}
df['name'] # column_name, pandas series
df[['name', 'price']] # multiple columns, pandas df
df[0:111] # rows by index
df[['name', 'price']][0:111]
df['price'] < 100 # Boolean Indexing
booleanMask = df['price'] < 100
df[booleanMask]
df.nlargest(10, 'price')
df['neighbourhood_group'].unique()
df['neighbourhood_group'].value_counts()
df['neighbourhood'].value_counts().head(10)
df['neighbourhood'].value_counts().head(10).plot(kind="bar")
sns.countplot(data=df, x="neighbourhood_group" )
order = df['neighbourhood_group'].value_counts().index
sns.countplot(data=df, x='neighbourhood_group', order=order)
sns.countplot(data=df, x='neighbourhood_group', order=order, hue="room_type")
sns.histplot(df['price'])
mask = df['price'] <= 500
df[mask]
sns.histplot(df[mask]['price'])
sns.histplot(df[mask]['price'], kde=True)
df[mask].price.mean()
affordableDf = df[mask]
plt.figure(figsize=(15,8))
sns.violinplot(data=affordableDf, x="neighbourhood_group", y="price")
affordableDf.plot(
kind='scatter',
x='longitude',
y='latitude',
c='price',
cmap='inferno',
colorbar=True,
alpha=0.8,
figsize=(12,8))
# url ='https://upload.wikimedia.org/wikipedia/commons/e/ec/Neighbourhoods_New_York_City_Map.PNG'
# im = Image.open(requests.get(url, stream=True).raw)
# print(requests.get(url, stream=True).content)
im = Image.open('Neighbourhoods_New_York_City_Map.png')
plt.imshow(im, zorder=0, extent=[-74.258, -73.7, 40.49, 40.92])
ax = plt.gca()
affordableDf.plot(
ax=ax,
zorder=1,
kind='scatter',
x='longitude',
y='latitude',
c='price',
cmap='inferno',
colorbar=True,
alpha=0.8,
figsize=(12,8)
)