9 Geovisualization

import altair as alt import pandas as pd import geopandas as gpd

districts = gpd.read_file("https://opendata.potsdam.de/explore/dataset/statistische-bezirke-in-potsdam/download/?format=geojson")

schools = gpd.read_file("https://opendata.potsdam.de/explore/dataset/schulen/download/?format=geojson")

from OSMPythonTools.nominatim import Nominatim nominatim = Nominatim() city = nominatim.query('Potsdam, Germany') city.areaId()

from OSMPythonTools.overpass import overpassQueryBuilder library_query = overpassQueryBuilder( area=city.areaId(), # the query can be contrained by an area of an item elementType='node', # which are points (OSM also has ways and relations) # the selector in the next line is really the heart of the query: selector='"amenity"="library"', # we're looking for libraries out='body', # body indicates that we want the data, not just the count includeGeometry=True # and we want the geometric information, too ) library_query

from OSMPythonTools.overpass import Overpass overpass = Overpass() lib_data = overpass.query(library_query)

lib_data.elements()[0].tags()

lib_data.elements()[0].geometry()

libraries = [ (lib.tag("name"), lib.geometry() ) for lib in lib_data.elements()]

libraries = gpd.GeoDataFrame(libraries, columns = ['name', 'geometry'])

# 1. prepare query (and directly include the location lookup) tree_query = overpassQueryBuilder( area=nominatim.query('Berlin, Germany').areaId(), elementType='node', selector='"natural"="tree"', out='body', includeGeometry=True ) # 2. execute query (and give it a bit more time to finish) tree_data = overpass.query(tree_query, timeout=60) # 3. get ids and coordinates of trees tree_locs = [ (tree.id(), tree.geometry()) for tree in tree_data.elements()] # 4. create GeoDataFrame trees = gpd.GeoDataFrame(tree_locs, columns=["id", "geometry"]) trees.head()

# import Nominatim as geopy geocoder from geopy.geocoders import Nominatim # register a custom user agent (commercial services may also require an API key) geocoder = Nominatim(user_agent="Data Visualization Tutorial 2023 · FH Potsdam")

loc = geocoder.geocode("Kiepenheuerallee 5, 14469 Potsdam") print(loc)

print((loc.latitude, loc.longitude))

loc = geocoder.geocode("Fachhochschule Potsdam") print((loc.latitude, loc.longitude))

kitas = pd.read_csv("https://opendata.potsdam.de/explore/dataset/kitaboerse-20161108/download/", sep=";")

# the parameter of extract is a regular expression that matches the first set of digits kitas.hausnummer = kitas.hausnummer.str.extract('(\d+)') # in some cases the value is not a number, which we will replace with zeros kitas.hausnummer = kitas.hausnummer.fillna(0)

kitas = kitas.sample(20)

kitas

# names of the childcare places names = kitas["name_der_kindertagesbetreuungseinrichtung"] # the capacity of the kitas; which we turn into integers capac = kitas["platze_unbefristet"].fillna(0).astype(int) # columns containing address information addr = ['strasse', 'hausnummer', 'postleitzahl'] # we join the values in the three address-related columns for each row addresses = kitas[addr].apply(lambda row: ' '.join(row.values.astype(str)), axis=1) # the dataframe we will use kitas = pd.DataFrame({'name': names, 'capacity': capac, 'address': addresses}) kitas

from geopy.extra.rate_limiter import RateLimiter # add a delay of one second between each geocoding request geocode = RateLimiter(geocoder.geocode, min_delay_seconds=1)

# apply geocoding to address column; store responses in location column kitas['location'] = kitas['address'].apply(geocode)

kitas = kitas[kitas['location'].notnull()] kitas

# create empty columns for coordinates kitas["lat"] = None kitas["lon"] = None # to avoid the SettingWithCopyWarning, we make a clean copy kitas = kitas.copy() # extract lat and lon from the location column kitas.loc[:, ['lat', 'lon']] = [ (loc.latitude, loc.longitude) for loc in kitas['location'] ] # # create GeoDataFrame, pointing explicitly to lon and lat columns kitas = gpd.GeoDataFrame(kitas, geometry=gpd.points_from_xy(kitas.lon, kitas.lat)) # # remove superfluous columns that are not needed anymore kitas = kitas.drop(columns=['location', 'lat', 'lon']) kitas

schools.head(5)

# 1. mark_geoshape transparently uses the geometry column basemap = alt.Chart(districts).mark_geoshape( # add some styling to reduce the salience of the basemap fill="lightgray", stroke="darkgray" ).properties(width=600, height=600) # 2. we use mark_circle to have more control over the visual variables markers = alt.Chart(schools).mark_circle(opacity=1).encode( # point latitude & longitude to coordinates in the geometry column longitude='geometry.coordinates[0]:Q', latitude='geometry.coordinates[1]:Q', tooltip=['schulname', 'strasse', 'trager'], ) # combine the two layers basemap + markers

basemap = alt.Chart(districts).mark_geoshape( fill="lightgray", stroke="darkgray" ).properties(width=600, height=600) markers = alt.Chart(kitas).mark_circle(opacity=1).encode( longitude='geometry.coordinates[0]:Q', latitude='geometry.coordinates[1]:Q', tooltip=['name:N', 'address:N', 'capacity:Q'], size="capacity:Q" ) basemap + markers

# by default Altair handles a max number of 5000 items only # the following line disables this limitation; read more here: # https://altair-viz.github.io/user_guide/large_datasets.html alt.data_transformers.disable_max_rows() # to not overburden altair, we're passing a sample of 10k trees treemap = alt.Chart(trees.sample(n=10000)).mark_circle( # reduce the visual presence of each element size=5, # with a low dot opacity we can use overplotting to indicate densities opacity=.25, # a natural choice color="green" ).encode( longitude='geometry.coordinates[0]:Q', latitude='geometry.coordinates[1]:Q' ).properties(width=600, height=600) treemap

# load country data from geonames geonames = pd.read_csv("https://www.geonames.org/countryInfoCSV", sep='\t') # select four columns geonames = geonames[['name', 'iso alpha3', 'areaInSqKm', 'population']] # set index to country code geonames = geonames.set_index("iso alpha3") geonames.head()

# load country's polygons from datahub https://github.com/datasets/geo-countries polygons = gpd.read_file("countries.geojson") # remove country names, as we have them already polygons = polygons.drop(columns=["ADMIN"]) # set index to country code polygons = polygons.set_index("ISO_A3") # reduce the complexity of the shapes polygons.geometry = polygons.geometry.simplify(.1) polygons.info()

# inner means that we keep only those countries # for which we have geometric and attribute data countries = polygons.join(geonames, how='inner') countries.info()

countries["density"] = countries["population"] / countries["areaInSqKm"]

countries = countries[countries['density'].notna()] countries.density = countries.density.round(0).astype(int)

countries = countries.drop("ATA")

from shapely.ops import orient countries.geometry = countries.geometry.apply(orient, args=(-1,))

alt.Chart(countries).mark_geoshape().encode( color=alt.Color('density', scale=alt.Scale(type="log", domain=[1,1000] )), tooltip=['name', 'areaInSqKm', 'population', 'density'] ).project( # enter different projection here ).properties( width=750, height=500 )