import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
cities=['Kiev', 'Lvov', 'Odessa', 'Kharkov', 'Dnepr']
#data =pd.read_csv('Data/rent_offers.csv')
data =pd.read_csv('Data/rent_offers_clean.csv')
data.tail()
data.isna().sum()
data.drop(labels='posted_by', axis=1, inplace = True)
data.dropna(axis=0, how='any', inplace=True)
data = data.astype({'floor': 'int32', 'building_height_floors': 'int32', 'rooms': 'int32'})
data.describe(percentiles= [.01, .25, .5, .75, .95, .99])
data.groupby(['rooms', 'city']).price.describe(percentiles= [.01, .25, .5, .75, .95, .99])
data_cut = data[data.price < data.price.quantile(.95)]
data_cut = data_cut[data_cut.price > data.price.quantile(.01)]
%matplotlib inline
fig = plt.figure(figsize=(20,30))
for i in range (1,6):
plt.subplot(5, 1, i)
plt.title(f'{i} rooms apartment')
sns.boxplot(data = data_cut[data_cut.rooms == i], y='price', x= "city",)
plt.show()
%matplotlib inline
fig = plt.figure(figsize=(20,6))
sns.scatterplot(data = data_cut, y='price', x= "apt_area_sqm", alpha=0.4)
plt.show()
%matplotlib inline
fig = plt.figure(figsize=(20,6))
sns.boxplot(data = data_cut[data_cut.apt_area_sqm<300], y='apt_area_sqm', x= "rooms")
plt.show()
data_cut[data_cut.apt_area_sqm>300]
%matplotlib inline
fig = plt.figure(figsize=(20,6))
sns.scatterplot(data = data_cut[data_cut.apt_area_sqm<300], y='price', x= "apt_area_sqm", alpha=0.33, hue='rooms', palette='bright')
plt.show()
%matplotlib inline
fig = plt.figure(figsize=(20,4))
sns.histplot(data = data, x='building_height_floors')
plt.show()
%matplotlib inline
fig = plt.figure(figsize=(20,30))
for i,city in enumerate(cities):
plt.subplot(5, 1, i+1)
plt.title(f'{city} rooms apartment')
#limitng by 50 floors, as it is max. building height in country
sns.histplot(data = data_cut[data_cut.building_height_floors<50], x= "building_height_floors")
plt.show()