!wget https://analytics.urbanpulse.de/files/hackathon2019/Darmstadt_20181229-20190111.csv
--2023-03-06 16:11:25-- https://analytics.urbanpulse.de/files/hackathon2019/Darmstadt_20181229-20190111.csv
Resolving analytics.urbanpulse.de (analytics.urbanpulse.de)... 212.227.192.60
Connecting to analytics.urbanpulse.de (analytics.urbanpulse.de)|212.227.192.60|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 97082064 (93M) [text/plain]
Saving to: ‘Darmstadt_20181229-20190111.csv.1’
Darmstadt_20181229- 100%[===================>] 92.58M 23.8MB/s in 4.1s
2023-03-06 16:11:30 (22.6 MB/s) - ‘Darmstadt_20181229-20190111.csv.1’ saved [97082064/97082064]
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
# Read the data into a Pandas DataFrame
df = pd.read_csv('Darmstadt_20181229-20190111.csv')
#Create Point objects for each lat, lon pair
geometry = [Point(xy) for xy in zip(df.lon, df.lat)]
#Create a GeoDataFrame with Point objects and other columns
crs = 'EPSG:3857'
geo_data = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
#Group the GeoDataFrame by small geo-referenced bins with a side length of 20-50 meters
bin_size = 0.02 # change to 0.05 if you prefer a larger bin size
geo_data['x'] = ((geo_data.geometry.x - geo_data.geometry.x.min()) / bin_size)
geo_data['y'] = ((geo_data.geometry.y - geo_data.geometry.y.min()) / bin_size)
#Calculate the 85th percentile of speed for each bin
grouped = geo_data.groupby(['x', 'y'])['speed'].quantile(0.85)
bin_centers = []
percentile_speeds = []
for (x, y), speed in grouped.items():
bin_centers.append(Point((x+0.5)*bin_size+geo_data.geometry.x.min(), (y+0.5)*bin_size+geo_data.geometry.y.min()))
percentile_speeds.append(speed)
#Create a new GeoDataFrame with only the bin_id and percentile_85 columns
result = gpd.GeoDataFrame({'percentile_speed': percentile_speeds}, geometry=bin_centers, crs=crs)
#Extract lat and lon from the centerpoint
result["long"] = result['geometry'].map(lambda p: p.x)
result["lat"] = result['geometry'].map(lambda p: p.y)
result.to_file('processed_result.json')