import altair as alt
import pandas as pd
import geopandas as gpd
districts = gpd.read_file("https://opendata.potsdam.de/explore/dataset/statistische-bezirke-in-potsdam/download/?format=geojson")
schools = gpd.read_file("https://opendata.potsdam.de/explore/dataset/schulen/download/?format=geojson")
from OSMPythonTools.nominatim import Nominatim
nominatim = Nominatim()
city = nominatim.query('Potsdam, Germany')
city.areaId()
from OSMPythonTools.overpass import overpassQueryBuilder
library_query = overpassQueryBuilder(
area=city.areaId(), # the query can be contrained by an area of an item
elementType='node', # which are points (OSM also has ways and relations)
# the selector in the next line is really the heart of the query:
selector='"amenity"="library"', # we're looking for libraries
out='body', # body indicates that we want the data, not just the count
includeGeometry=True # and we want the geometric information, too
)
library_query
from OSMPythonTools.overpass import Overpass
overpass = Overpass()
lib_data = overpass.query(library_query)
lib_data.elements()[0].tags()
lib_data.elements()[0].geometry()
libraries = [ (lib.tag("name"), lib.geometry() ) for lib in lib_data.elements()]
libraries = gpd.GeoDataFrame(libraries, columns = ['name', 'geometry'])
# 1. prepare query (and directly include the location lookup)
tree_query = overpassQueryBuilder(
area=nominatim.query('Berlin, Germany').areaId(),
elementType='node',
selector='"natural"="tree"',
out='body',
includeGeometry=True
)
# 2. execute query (and give it a bit more time to finish)
tree_data = overpass.query(tree_query, timeout=60)
# 3. get ids and coordinates of trees
tree_locs = [ (tree.id(), tree.geometry()) for tree in tree_data.elements()]
# 4. create GeoDataFrame
trees = gpd.GeoDataFrame(tree_locs, columns=["id", "geometry"])
trees.head()
# import Nominatim as geopy geocoder
from geopy.geocoders import Nominatim
# register a custom user agent (commercial services may also require an API key)
geocoder = Nominatim(user_agent="Data Visualization Tutorial 2023 · FH Potsdam")
loc = geocoder.geocode("Kiepenheuerallee 5, 14469 Potsdam")
print(loc)
print((loc.latitude, loc.longitude))
loc = geocoder.geocode("Fachhochschule Potsdam")
print((loc.latitude, loc.longitude))
kitas = pd.read_csv("https://opendata.potsdam.de/explore/dataset/kitaboerse-20161108/download/", sep=";")
# the parameter of extract is a regular expression that matches the first set of digits
kitas.hausnummer = kitas.hausnummer.str.extract('(\d+)')
# in some cases the value is not a number, which we will replace with zeros
kitas.hausnummer = kitas.hausnummer.fillna(0)
kitas = kitas.sample(20)
kitas
# names of the childcare places
names = kitas["name_der_kindertagesbetreuungseinrichtung"]
# the capacity of the kitas; which we turn into integers
capac = kitas["platze_unbefristet"].fillna(0).astype(int)
# columns containing address information
addr = ['strasse', 'hausnummer', 'postleitzahl']
# we join the values in the three address-related columns for each row
addresses = kitas[addr].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
# the dataframe we will use
kitas = pd.DataFrame({'name': names, 'capacity': capac, 'address': addresses})
kitas
from geopy.extra.rate_limiter import RateLimiter
# add a delay of one second between each geocoding request
geocode = RateLimiter(geocoder.geocode, min_delay_seconds=1)
# apply geocoding to address column; store responses in location column
kitas['location'] = kitas['address'].apply(geocode)
kitas = kitas[kitas['location'].notnull()]
kitas
# create empty columns for coordinates
kitas["lat"] = None
kitas["lon"] = None
# to avoid the SettingWithCopyWarning, we make a clean copy
kitas = kitas.copy()
# extract lat and lon from the location column
kitas.loc[:, ['lat', 'lon']] = [ (loc.latitude, loc.longitude) for loc in kitas['location'] ]
# # create GeoDataFrame, pointing explicitly to lon and lat columns
kitas = gpd.GeoDataFrame(kitas, geometry=gpd.points_from_xy(kitas.lon, kitas.lat))
# # remove superfluous columns that are not needed anymore
kitas = kitas.drop(columns=['location', 'lat', 'lon'])
kitas
schools.head(5)
# 1. mark_geoshape transparently uses the geometry column
basemap = alt.Chart(districts).mark_geoshape(
# add some styling to reduce the salience of the basemap
fill="lightgray", stroke="darkgray"
).properties(width=600, height=600)
# 2. we use mark_circle to have more control over the visual variables
markers = alt.Chart(schools).mark_circle(opacity=1).encode(
# point latitude & longitude to coordinates in the geometry column
longitude='geometry.coordinates[0]:Q',
latitude='geometry.coordinates[1]:Q',
tooltip=['schulname', 'strasse', 'trager'],
)
# combine the two layers
basemap + markers
basemap = alt.Chart(districts).mark_geoshape(
fill="lightgray", stroke="darkgray"
).properties(width=600, height=600)
markers = alt.Chart(kitas).mark_circle(opacity=1).encode(
longitude='geometry.coordinates[0]:Q',
latitude='geometry.coordinates[1]:Q',
tooltip=['name:N', 'address:N', 'capacity:Q'],
size="capacity:Q"
)
basemap + markers
# by default Altair handles a max number of 5000 items only
# the following line disables this limitation; read more here:
# https://altair-viz.github.io/user_guide/large_datasets.html
alt.data_transformers.disable_max_rows()
# to not overburden altair, we're passing a sample of 10k trees
treemap = alt.Chart(trees.sample(n=10000)).mark_circle(
# reduce the visual presence of each element
size=5,
# with a low dot opacity we can use overplotting to indicate densities
opacity=.25,
# a natural choice
color="green"
).encode(
longitude='geometry.coordinates[0]:Q',
latitude='geometry.coordinates[1]:Q'
).properties(width=600, height=600)
treemap
# load country data from geonames
geonames = pd.read_csv("https://www.geonames.org/countryInfoCSV", sep='\t')
# select four columns
geonames = geonames[['name', 'iso alpha3', 'areaInSqKm', 'population']]
# set index to country code
geonames = geonames.set_index("iso alpha3")
geonames.head()
# load country's polygons from datahub https://github.com/datasets/geo-countries
polygons = gpd.read_file("countries.geojson")
# remove country names, as we have them already
polygons = polygons.drop(columns=["ADMIN"])
# set index to country code
polygons = polygons.set_index("ISO_A3")
# reduce the complexity of the shapes
polygons.geometry = polygons.geometry.simplify(.1)
polygons.info()
# inner means that we keep only those countries
# for which we have geometric and attribute data
countries = polygons.join(geonames, how='inner')
countries.info()
countries["density"] = countries["population"] / countries["areaInSqKm"]
countries = countries[countries['density'].notna()]
countries.density = countries.density.round(0).astype(int)
countries = countries.drop("ATA")
from shapely.ops import orient
countries.geometry = countries.geometry.apply(orient, args=(-1,))
alt.Chart(countries).mark_geoshape().encode(
color=alt.Color('density', scale=alt.Scale(type="log", domain=[1,1000] )),
tooltip=['name', 'areaInSqKm', 'population', 'density']
).project(
# enter different projection here
).properties(
width=750,
height=500
)