0 Initialization: Modules + Options
# LOADING PACKAGES:
import os
import pandas as pd
import csv
import copy
import requests
import json
import ast
import time
import numpy as np
import statistics
import matplotlib.pyplot as plt
# from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from pandas.plotting import autocorrelation_plot
!pip install statsmodels==0.12.2
from statsmodels.tsa.arima.model import ARIMA
# SETTING PRINT-OPTIONS:
np.set_printoptions(linewidth = 1000, suppress = True)
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', 15000)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.width', 1200)
# SETTING UP PLOT DESIGN:
!pip install palettable
# from palettable.cmocean.diverging import Curl_19
from palettable.colorbrewer.sequential import YlOrBr_9
import seaborn as sns
plt.style.use(['seaborn-colorblind'])
1 Obtaining empirical Data
1.1 Google Maps Scrapper: Retrieving raw Station Data
# BASELINK: extracted from an analysis of the request that is sent by the browser when searching for a location in Google Maps
baselink = "https://www.google.com/search?tbm=map&authuser=0&hl=de&gl=bg&pb=!4m12!1m3!1d1918817.4443173876!2d25.47055767187499!3d42.72523200000002!2m3!1f0!2f0!3f0!3m2!1i1699!2i491!4f13.1!7i20!10b1!12m8!1m1!18b1!2m3!5m1!6e2!20e3!10b1!16b1!19m4!2m3!1i360!2i120!4i8!20m65!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m50!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!22m6!1suSWpYOGWE5OB9u8P_4ufwA4:2!2s1i:0,t:11886,p:uSWpYOGWE5OB9u8P_4ufwA4:2!7e81!12e5!17suSWpYOGWE5OB9u8P_4ufwA4:43!18e15!24m54!1m16!13m7!2b1!3b1!4b1!6i1!8b1!9b1!20b1!18m7!3b1!4b1!5b1!6b1!9b1!13b1!14b0!2b1!5m5!2b1!3b1!5b1!6b1!7b1!10m1!8e3!14m1!3b1!17b1!20m2!1e3!1e6!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!89b1!26m4!2m3!1i80!2i92!4i8!30m28!1m6!1m2!1i0!2i0!2m2!1i458!2i491!1m6!1m2!1i1649!2i0!2m2!1i1699!2i491!1m6!1m2!1i0!2i0!2m2!1i1699!2i20!1m6!1m2!1i0!2i471!2m2!1i1699!2i491!34m17!2b1!3b1!4b1!6b1!8m5!1b1!3b1!4b1!5b1!6b1!9b1!12b1!14b1!20b1!23b1!25b1!26b1!37m1!1e81!42b1!47m0!49m1!3b1!50m4!2e2!3m2!1b1!3b1!65m1!1b1!67m2!7b1!10b1!69i557&q="
# GETTING THE LIST OF STATION NAMES FROM LINE U2: proper encoding is important here because the station names contain special german characters (ä, ö, ü, ß, ...)
sourceFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", encoding = "latin1", sep = ";", names = ["ID", "StationNames", "StationDistancesMin"])
stationNameList = sourceFile["StationNames"].tolist()
# RETRIEVING STATION INFORMATION FROM GOOGLE MAPS:
for i in range(len(stationNameList)):
print("Getting data for station: " + stationNameList[i])
name = stationNameList[i].replace(" ", "&&") # replacing spaces by "&& in names because only then the correct station information is returned by Google Maps
name = requests.utils.quote(name) # making station names compatible with request package
response = requests.get(baselink + "U-Bahn-Haltestelle%20" + name)
if response.status_code != 200:
print("Failed to get data for station: " + name + " status = " + str(response.status_code))
else:
# Export:
output = open("GoogleMaps_Data/U2-Station" + str(i + 1) + ".txt", mode = "w")
output.write(response.text)
output.close()
time.sleep(2) # Google Maps blocks scrapper if request frequency is too high
1.2 Extracting Google Maps Crowdedness Information from raw Station Data
namesFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationDistancesMin"], delimiter=";", encoding="utf-8")
stationNameList = namesFile["StationNames"].tolist()
stationIDList = namesFile["ID"].tolist()
# DEFINING MULTIDIMENSIONAL RECURSIVE LIST SEARCH:
def nDListValueSearch(data, indexList, output):
# checking if "04 Uhr" is present in current list, if yes: set output to indexlist of that string
if '04 Uhr' in data and len(output) == 0: # crowdedness data information always contains "04 Uhr" --> reliable index to first occurrence of crowdedness data
for index in indexList:
output.append(index)
index = -1
for element in data: # checking every element in the list and calling this function with every element that is a list
index += 1
if isinstance(element, list):
newIndexList = indexList.copy() # saving one indexList per element representing its position in the json
newIndexList.append(index)
nDListValueSearch(element, newIndexList, output)
def getcrowdednessData(data):
output = []
nDListValueSearch(data, [], output)
crowdednessData = data.copy()
for i in range(len(output) - 3): # len() - 3 to get to the higher level list with all crowdedness data (returned indexList directly leads to "04 Uhr")
crowdednessData = crowdednessData[output[i]]
return crowdednessData
# ASSIGNMENT OF RETRIEVED DATA TO RESPECTIVE STATIONS:
crowdednessDataList = [""] * len(stationNameList)
with os.scandir("GoogleMaps_Data") as file_directory:
for entry in file_directory:
with open(entry.path, mode="r", encoding="utf-8") as rawData:
rawData.readline() # skipping first line since Google sent something that does not belong to the json-array
data = json.load(rawData)
# replacing characters to make sure that the station title from the Google Maps array is a substring of the station name from our station list
rawDataStationName = data[0][0].replace("&&", " ").replace("U-Bahn-Haltestelle ", "")
if rawDataStationName in stationNameList: # matching search results from raw Google Maps data with entries from station data list
crowdednessDataList[stationNameList.index(rawDataStationName)] = getcrowdednessData(data)
else:
print("ERROR: " + str(rawDataStationName))
# EXPORT:
crowdednessDataFrame = pd.DataFrame({
"ID" : stationIDList,
"Crowdedness Information" : crowdednessDataList
})
crowdednessDataFrame.style.set_properties(subset=["ID", "Crowdedness Information"], **{'text-align': 'left'}
crowdednessDataFrame.to_csv("Station_Info/U2-Station_CrowdednessInfo.csv", sep=";", encoding="utf-8", index = False, header = False)
1.3 Adjusting empirical Boarding and Alighting Data to match OD Estimation Method Constraints
# DEFINING ADJUSTMENT FUNCTIONS:
def differenceSum(i,j):
return sum(i) - sum(j)
# The column (boarding or alighting) with fewer passengers is adjusted to match the sum of the numbers from the column with more passengers:
def adjustmentProcess(boardingList, alightingList):
print("Old difference: " + str(differenceSum(alightingList, boardingList)))
if differenceSum(alightingList, boardingList) > 0:
return adjustArray(array1 = boardingList, array2 = alightingList)
elif differenceSum(alightingList, boardingList) < 0:
return adjustArray(array1 = alightingList, array2 = boardingList)
else:
print("Raw numbers match.")
# Adjustment is carried out so that proportions between boardings (or alights) at each station remain the same:
def adjustArray(array1, array2):
summe = sum(array1)
difference = differenceSum(array2, array1)
for i in range(len(array1)):
array1[i] += (array1[i]/summe) * difference
print("New difference: " + str(differenceSum(array2, array1)))
# CARRYING OUT ADJUSTMENT OPERATION FOR EACH YEAR IN WHICH DATA IS AVAILABLE:
yearList = [2009, 2013, 2016, 2017, 2018, 2019]
for year in yearList:
sourceFile = pd.read_csv("Boarding_Alighting_Data/U2-" + str(year) + "_original.csv", names = ["StationNrsA", "EinstiegA", "AusstiegA","StationNrsB", "EinstiegB", "AusstiegB"], delimiter = "\t")
boardingsA = sourceFile["EinstiegA"].tolist()
alightsA = sourceFile["AusstiegA"].tolist()
boardingsB = sourceFile["EinstiegB"].tolist()
alightsB = sourceFile["AusstiegB"].tolist()
stationsNrsA = sourceFile["StationNrsA"].tolist()
stationsNrsB = sourceFile["StationNrsB"].tolist()
# constraints: On first station of boarding-list, no passenger may alight. On last station of alighting-list, no passenger may board.
boardingsA[len(boardingsA)-1] = 0
alightsA[0] = 0
boardingsB[len(boardingsB)-1] = 0
alightsB[0] = 0
# Calculation for Direction A:
print("Year: " + str(year))
print("Direction A")
adjustmentProcess(boardingsA, alightsA)
print()
# Calculation for Direction B:
print("Direction B")
adjustmentProcess(boardingsB, alightsB)
print()
# EXPORT:
data = {"StationsNrsA": stationsNrsA,
"EinstiegA": boardingsA,
"AusstiegA": alightsA,
"StationsNrsB": stationsNrsB,
"EinstiegB": boardingsB,
"AusstiegB": alightsB
}
df = pd.DataFrame(data)
print(df)
print("=" * 80 + "\n")
df.to_csv("Boarding_Alighting_Data/U2-" + str(year) + "_adjusted.csv", index = False, header = False, sep = "\t")
2 OD Estimation
# OD MATRIX HEATMAP PLOTTER:
def odHeatmap(diffMatrix, title, mask):
tickList = np.arange(1, len(diffMatrix) + 1, 1)
figure, ax = plt.subplots(figsize=(15, 15))
cmap = YlOrBr_9.mpl_colormap
cbar_kws = {'shrink': 0.8, 'ticks' : np.arange(0,5500,500)}
annot_kws = {"fontsize": 8}
plot = sns.heatmap(diffMatrix, square = True, vmin = 0, vmax = 4000, xticklabels = tickList, yticklabels = tickList, annot = True, mask = mask, annot_kws = annot_kws , cmap = cmap, cbar_kws = cbar_kws, fmt = ".0f", linewidths = .5, ax = ax)
plt.title(label = title, pad = 10)
ax.xaxis.tick_top()
ax.tick_params(left=False, top=False)
plot.set_yticklabels(plot.get_yticklabels(), rotation = 0, horizontalalignment='right')
plt.show()
return figure
2.1 Fluid Analogy Method
# FLUID ANALOGY METHOD DEFINITION:
# FA here for special case m = 0 --> people are allowed to alight at every following station after the boarding station
# created from the examplary calculation which can be found in the appendix of the Simon and Furth (1985) paper --> matrix names and meanings can be looked up there
# code is not a general FA implementation but an FA implementation with some shortcuts due to the special case m = 0
def faMethod(boardingList, alightingList):
eMatrix = np.zeros((len(boardingList), len(alightingList))) # eligible-for-alight matrix
vMatrix = np.zeros((len(boardingList), len(alightingList))) # real-OD-matrix
fraction = 0
for i in range(len(boardingList) - 1):
eMatrix[i, i + 1] = boardingList[i] # step 6
for j in range(len(boardingList)): # step 7
if eMatrix[:, j].sum() > 0:
fraction = alightingList[j] / eMatrix[:, j].sum() # step 7a+b
vMatrix[:, j] = eMatrix[:, j] * fraction # step 7c
if j + 1 < len(boardingList):
eMatrix[:, j + 1] = eMatrix[:, j] + eMatrix[:, j + 1] - vMatrix[:, j] # step 7d
return [vMatrix, fraction]
# RUNNING FLUID ANALOGY METHOD WITH YEARLY BOARDING-ALIGHTING DATA:
stationNames = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationDistancesMin"], delimiter = ";")["ID"].tolist()
yearList = [2009, 2013, 2016, 2017, 2018, 2019]
for year in yearList:
sourceFile = pd.read_csv("Boarding_Alighting_Data/U2-" + str(year) + "_adjusted.csv", names = ["StationNrsA", "EinstiegA", "AusstiegA","StationNrsB", "EinstiegB", "AusstiegB"], delimiter = "\t")
boardingsA = sourceFile["EinstiegA"].tolist()
alightsA = sourceFile["AusstiegA"].tolist()
boardingsB = sourceFile["EinstiegB"].tolist()
alightsB = sourceFile["AusstiegB"].tolist()
resultA = faMethod(boardingsA, alightsA)
resultB = faMethod(boardingsB, alightsB)
vMatrixA = resultA[0]
vMatrixB = resultB[0]
gMatrixFramed = pd.DataFrame(np.add(vMatrixA,np.flip(vMatrixB)), columns = stationNames, index = stationNames)
# Output:
print("Year " + str(year) + ":")
print(str(gMatrixFramed.round()))
print("f_{A}: " + str(resultA[1].round(3)))
print("f_{B}: " + str(resultB[1].round(3)))
print(200 * "=" + "\n")
gMatrixFramed.to_csv("Estimation/FluidAnalogy_Matrizen/U2-" + str(year) + "-OD-Matrix-FluidAnalogyMethod.csv", index = False, header = False, sep = "\t")
# FA 2019 MATRIX PLOT:
stationNames = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationDistancesMin"], delimiter = ";")["ID"].tolist()
sourceFile = pd.read_csv("Estimation/FluidAnalogy_Matrizen/U2-2019-OD-Matrix-FluidAnalogyMethod.csv", delimiter = "\t", names = stationNames)
mask = np.zeros((len(stationNames), len(stationNames)))
np.fill_diagonal(mask, 1)
plot = odHeatmap(sourceFile, "Estimated Fluid Analogy OD Matrix from 2019 Boarding-Alighting Data", mask)
plot.savefig("Estimation/FluidAnalogy_Matrizen/FA-2019-Heatmap.pdf", bbox_inches = 'tight')
2.2 IPF Method
# IPF METHOD DEFINITION:
# function to return the time difference (distance) between stations
def stationTimeDistance(i, j, timestamps):
relativeDistance = abs(int(timestamps[j]) - int(timestamps[i])) / max(timestamps)
return relativeDistance
# creation of a seed-matrix with the time-distance based propensity function introduced by Navik and Furth (1994)
def createSeedMatrix(matrix, stationTimestamps, alpha = 1, beta = 0):
for i in range(len(matrix[0])):
for j in range(len(matrix[0])):
if j >= i:
matrix[i,j] = stationTimeDistance(i,j,stationTimestamps)**alpha * np.exp(-stationTimeDistance(i,j,stationTimestamps)*beta)
else:
matrix[i,j] = 0
# recursive function to distribute counts according to the pattern laid out in the seed matrix
def ipf(seedMatrix, boardingList, alightingList, convError = 0.1):
# adjusting rows of the seed matrix
for i in range(len(seedMatrix[0])):
#if condition to make sure there is no division by zero
if boardingList[i] != seedMatrix[i,:].sum():
a = boardingList[i]/seedMatrix[i,:].sum() # a = row balancing factor
else:
a = 1
seedMatrix[i,:] = seedMatrix[i,:] * a
# adjusting columns of the seed matrix
for j in range(len(seedMatrix[:,0])):
if alightingList[j] != seedMatrix[:,j].sum():
b = alightingList[j]/seedMatrix[:,j].sum() # b = column balancing factor
else:
b = 1
seedMatrix[:,j] = seedMatrix[:,j] * b
# due to the column adjustment, row sums may be off again --> the difference is calculated
maxError = 0
for i in range(len(seedMatrix)):
if maxError < abs(seedMatrix[i,:].sum() - boardingList[i]):
maxError = abs(seedMatrix[i,:].sum() - boardingList[i])
# if the difference it is more then the specified conversion error, the IPF function is called again with with the current matrix as the new seed matrix
if maxError <= convError:
return
else:
ipf(seedMatrix, boardingList, alightingList, convError)
stationInfoFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationTimestampsMin"], delimiter=";", header = None)
stationNames = stationInfoFile["ID"].tolist()
stationTimestamps = stationInfoFile["StationTimestampsMin"].tolist()
yearList = [2009, 2013, 2016, 2017, 2018, 2019]
for year in yearList:
sourceFile = pd.read_csv("Boarding_Alighting_Data/U2-" + str(year) + "_adjusted.csv", names = ["StationNrsA", "EinstiegA", "AusstiegA", "StationNrsB", "EinstiegB", "AusstiegB"], delimiter = "\t", header = None)
boardingsA = sourceFile["EinstiegA"].tolist()
alightsA = sourceFile["AusstiegA"].tolist()
boardingsB = sourceFile["EinstiegB"].tolist()
alightsB = sourceFile["AusstiegB"].tolist()
matrixA = np.zeros((len(stationNames), len(stationNames)))
matrixB = np.zeros((len(stationNames), len(stationNames)))
createSeedMatrix(matrixA, stationTimestamps)
createSeedMatrix(matrixB, stationTimestamps)
ipf(matrixA, boardingsA, alightsA)
ipf(matrixB, boardingsB, alightsB)
gMatrixFramed = pd.DataFrame(np.add(matrixA,np.flip(matrixB)), columns = stationNames, index = stationNames)
# Output:
print("Year " + str(year) + ":")
print(gMatrixFramed.round())
print(200 * "=" + "\n")
gMatrixFramed.to_csv("Estimation/IPF_Matrizen/U2-" + str(year) + "-OD-Matrix-IPFMethod.csv", index = False, header = False, sep = "\t")
# FLUID ANALOGY TEST:
# The FA method is a special case of the IPF ansatz with the seed matrix having all possible OD pairs (outside the diagonal) with value 1
matrixTest = np.zeros((len(stationNames), len(stationNames)))
for i in range(len(matrixTest[0])):
for j in range(len(matrixTest[0])):
if i < j:
matrixTest[i,j] = 1
ipf(matrixTest, boardingsA, alightsA)
print(matrixTest.round())
matrixfa = np.loadtxt("Estimation/FluidAnalogy_Matrizen/U2-2013-OD-Matrix-FluidAnalogyMethod.txt")
print(matrixfa.round())
print((matrixfa.round() - matrixTest.round()))
# IPF 2019 MATRIX PLOT:
stationNames = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationDistancesMin"], delimiter = ";")["ID"].tolist()
sourceFile = pd.read_csv("Estimation/IPF_Matrizen/U2-2019-OD-Matrix-IPFMethod.csv", delimiter = "\t", names = stationNames)
mask = np.zeros((len(stationNames), len(stationNames)))
np.fill_diagonal(mask, 1)
plot = odHeatmap(sourceFile, "Estimated IPF OD Matrix from 2019 Boarding-Alighting Data", mask)
plot.savefig("Estimation/IPF_Matrizen/IPF-2019-Heatmap.pdf", bbox_inches = 'tight')
3 OD Prediction
3.1 Defining Error Measurements
# DEFINING ERROR MEASURES:
def RMSE(matrixRef, matrixPre):
rmse = 0
for i in range(len(matrixRef[0])):
for j in range(len(matrixRef[0])):
rmse += (matrixRef[i][j] - matrixPre[i][j]) ** 2
return np.sqrt(rmse / ((len(matrixRef[0]) ** 2)))
def MAPE(matrixRef, matrixPre):
mape = 0
for i in range(len(matrixRef[0])):
for j in range(len(matrixRef[0])):
if i != j:
mape += abs(matrixRef[i][j] - matrixPre[i][j]) / matrixRef[i][j]
return mape / ((len(matrixRef[0]) ** 2))
3.2 Historical Average
# DEFINING HISTORICAL AVERAGE:
def historicalAverage(tensor):
dimension = len(list(tensor.values())[0][0])
sumMatrix = np.zeros((dimension, dimension))
timestamps = 0
for matrix in tensor.values():
timestamps += 1
sumMatrix = np.add(sumMatrix, matrix)
return sumMatrix / timestamps
3.2.1 ... using Fluid Analogy OD Matrices
stationInfoFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationTimestampsMin"], delimiter=";", header = None)
stationNames = stationInfoFile["ID"].tolist()
stationTimestamps = stationInfoFile["StationTimestampsMin"].tolist()
fluidAnalogyTensor = {}
with os.scandir("Estimation/FluidAnalogy_Matrizen") as file_directory:
for entry in file_directory:
if entry.name.endswith(".csv"): # skipping plot pdf
fluidAnalogyTensor[int(entry.name[3:7])] = np.loadtxt(entry.path)
refFluidAnalogyMatrix = fluidAnalogyTensor[max(list(fluidAnalogyTensor.keys()))]
historicalFluidAnalogyTensor = fluidAnalogyTensor.copy()
del historicalFluidAnalogyTensor[max(list(fluidAnalogyTensor.keys()))]
# del historicalFluidAnalogyTensor[2009]
# del historicalFluidAnalogyTensor[2013]
# CALCULATION AND OUTPUT:
output = historicalAverage(historicalFluidAnalogyTensor)
outputdf = pd.DataFrame(output, columns = stationNames, index = stationNames)
outputdf.to_csv("Prediction/HA-FA-" + str(max(list(fluidAnalogyTensor.keys()))) + "Matrix.csv", index = False, header = False, sep = "\t")
print(outputdf.round())
print("RMSE: " + str(RMSE(refFluidAnalogyMatrix,historicalAverage(historicalFluidAnalogyTensor)))) # == 88.56000620529849
print("MAE: " + str(MAE(refFluidAnalogyMatrix,historicalAverage(historicalFluidAnalogyTensor)))) # == 0.1296692283052319
3.2.2 ... using IPF OD Matrices
stationInfoFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationTimestampsMin"], delimiter=";", header = None)
stationNames = stationInfoFile["ID"].tolist()
stationTimestamps = stationInfoFile["StationTimestampsMin"].tolist()
ipfTensor = {}
with os.scandir("Estimation/IPF_Matrizen") as file_directory:
for entry in file_directory:
if entry.name.endswith(".csv"):
ipfTensor[int(entry.name[3:7])] = np.loadtxt(entry.path)
refIPFMatrix = ipfTensor[max(list(ipfTensor.keys()))]
historicalIPFTensor = ipfTensor.copy()
del ipfTensor[max(list(ipfTensor.keys()))]
# del ipfTensor[2009]
# del ipfTensor[2013]
# CALCULATION AND OUTPUT:
output = historicalAverage(ipfTensor)
outputdf = pd.DataFrame(output, columns = stationNames, index = stationNames)
outputdf.to_csv("Prediction/HA-IPF-" + str(max(list(fluidAnalogyTensor.keys()))) + "Matrix.csv", index = False, header = False, sep = "\t")
print(outputdf.round())
print("RMSE: " + str(RMSE(refIPFMatrix,historicalAverage(historicalIPFTensor)))) # == 73.01379357820703
print("MAE: " + str(MAE(refIPFMatrix,historicalAverage(historicalIPFTensor)))) # == 0.11087428013805561
3.3 Linear Regression
# DEFINING LINEAR REGRESSION METHOD:
def linearRegression(tensor, to_predict):
model = LinearRegression() # using linear fit model from scikit-learn
#r = []
yearList = np.array(list(tensor.keys()))
prediction = np.zeros((len(tensor[yearList[0]][0]), len(tensor[yearList[0]][0])))
for i in range(len(tensor[yearList[0]][0])):
for j in range(len(tensor[yearList[0]][0])):
if i != j:
y = []
for year in yearList:
y.append(tensor[year][i][j])
model.fit(yearList.reshape((-1, 1)), np.array(y))
y_pred = model.predict(np.array([to_predict]).reshape((-1, 1)))
prediction[i,j] = y_pred
#r_sq = model.score(yearList.reshape((-1, 1)), np.array(y))
#r.append(r_sq)
#print(np.median(r))
return prediction
3.3.1 ... using Fluid Analogy OD Matrices
stationInfoFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationTimestampsMin"], delimiter=";", header = None)
stationNames = stationInfoFile["ID"].tolist()
stationTimestamps = stationInfoFile["StationTimestampsMin"].tolist()
fluidAnalogyTensor = {}
with os.scandir("Estimation/FluidAnalogy_Matrizen") as file_directory:
for entry in file_directory:
if entry.name.endswith(".csv"):
fluidAnalogyTensor[int(entry.name[3:7])] = np.loadtxt(entry.path)
refFluidAnalogyMatrix = fluidAnalogyTensor[max(list(fluidAnalogyTensor.keys()))]
historicalFluidAnalogyTensor = fluidAnalogyTensor.copy()
del historicalFluidAnalogyTensor[max(list(fluidAnalogyTensor.keys()))]
prediction = linearRegression(historicalFluidAnalogyTensor, max(list(fluidAnalogyTensor.keys())))
predictiondf = pd.DataFrame(prediction, columns = stationNames, index = stationNames)
predictiondf.to_csv("Prediction/LinRegr-FA-" + str(max(list(fluidAnalogyTensor.keys()))) + "Matrix.csv", index = False, header = False, sep = "\t")
print(predictiondf.round())
print("RMSE: " + str(RMSE(refFluidAnalogyMatrix,prediction))) # == 57.264198790246326
print("MAE: " + str(MAE(refFluidAnalogyMatrix,prediction))) # == 0.08585216082499736
3.3.2 ... using IPF OD Matrices
stationInfoFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationTimestampsMin"], delimiter=";", header = None)
stationNames = stationInfoFile["ID"].tolist()
stationTimestamps = stationInfoFile["StationTimestampsMin"].tolist()
ipfTensor = {}
with os.scandir("Estimation/IPF_Matrizen") as file_directory:
for entry in file_directory:
if entry.name.endswith(".csv"):
ipfTensor[int(entry.name[3:7])] = np.loadtxt(entry.path)
refIPFMatrix = ipfTensor[max(list(ipfTensor.keys()))]
historicalIPFTensor = ipfTensor.copy()
del historicalIPFTensor[max(list(ipfTensor.keys()))]
prediction = linearRegression(historicalIPFTensor, max(list(ipfTensor.keys())))
predictiondf = pd.DataFrame(prediction, columns = stationNames, index = stationNames)
predictiondf.to_csv("Prediction/LinRegr-IPF-" + str(max(list(ipfTensor.keys()))) + "Matrix.csv", index = False, header = False, sep = "\t")
print(predictiondf.round())
print("RMSE: " + str(RMSE(refIPFMatrix,prediction))) # == 57.00729639069595
print("MAE: " + str(MAE(refIPFMatrix,prediction))) # == 0.08939740732051964
3.4 ARIMA
# DEFINING ARIMA MODEL:
def arimaModel(tensor):
yearList = np.array(list(tensor.keys()))
prediction = np.zeros((len(tensor[yearList[0]][0]),len(tensor[yearList[0]][0])))
for i in range(len(tensor[yearList[0]][0])):
for j in range(len(tensor[yearList[0]][0])):
if j != i:
y = []
for year in yearList:
y.append(tensor[year][i][j])
series = np.array([yearList,y])
# using ARIMA model from statsmodels
# model = ARIMA(series[1], order = (0,1,1))
model = ARIMA(series[1], order = (0,1,2)) # using these options since they perform the best
# model = ARIMA(series[1], order = (1,1,3))
model_fit = model.fit()
prediction[i][j] = model_fit.forecast()
return prediction
3.4.1 ... using Fluid Analogy OD Matrices
start_time = time.time() # measuring runtime
stationInfoFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationTimestampsMin"], delimiter=";", header = None)
stationNames = stationInfoFile["ID"].tolist()
stationTimestamps = stationInfoFile["StationTimestampsMin"].tolist()
fluidAnalogyTensor = {}
with os.scandir("Estimation/FluidAnalogy_Matrizen") as file_directory:
for entry in file_directory:
if entry.name.endswith(".csv"):
fluidAnalogyTensor[int(entry.name[3:7])] = np.loadtxt(entry.path)
refFluidAnalogyMatrix = fluidAnalogyTensor[max(list(fluidAnalogyTensor.keys()))]
historicalFluidAnalogyTensor = fluidAnalogyTensor.copy()
del historicalFluidAnalogyTensor[max(list(fluidAnalogyTensor.keys()))]
prediction = arimaModel(historicalFluidAnalogyTensor)
predictiondf = pd.DataFrame(prediction, columns = stationNames, index = stationNames)
predictiondf.to_csv("Prediction/ARIMA-FA-" + str(max(list(fluidAnalogyTensor.keys()))) + "Matrix.csv", index = False, header = False, sep = "\t")
print(predictiondf.round())
print("RMSE: " + str(RMSE(refFluidAnalogyMatrix,prediction))) # == 71.99057510109759
print("MAE: " + str(MAE(refFluidAnalogyMatrix,prediction))) # == 0.11415591376817115
print("Runtime: %s seconds (%s minutes)" % (time.time() - start_time, (time.time() - start_time) / 60))
3.4.2 ... using IPF OD Matrices
start_time = time.time() # measuring runtime
stationInfoFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationTimestampsMin"], delimiter=";", header = None)
stationNames = stationInfoFile["ID"].tolist()
stationTimestamps = stationInfoFile["StationTimestampsMin"].tolist()
ipfTensor = {}
with os.scandir("Estimation/IPF_Matrizen") as file_directory:
for entry in file_directory:
if entry.name.endswith(".csv"):
ipfTensor[int(entry.name[3:7])] = np.loadtxt(entry.path)
refIPFMatrix = ipfTensor[max(list(ipfTensor.keys()))]
historicalipfTensor = ipfTensor.copy()
del historicalipfTensor[max(list(ipfTensor.keys()))]
prediction = arimaModel(historicalipfTensor)
predictiondf = pd.DataFrame(prediction, columns = stationNames, index = stationNames)
predictiondf.to_csv("Prediction/ARIMA-IPF-" + str(max(list(ipfTensor.keys()))) + "Matrix.csv", index = False, header = False, sep = "\t")
print(predictiondf.round())
print("RMSE: " + str(RMSE(refIPFMatrix,prediction))) # == 81.34581626724004
print("MAE: " + str(MAE(refIPFMatrix,prediction))) # == 0.1207366483006969
print("Runtime: %s seconds (%s minutes)" % (time.time() - start_time, (time.time() - start_time) / 60))
3.5 Comparison with OD Matrices from empirical 2019 Data
# DIFFERENCE MATRIX:
def absPredictionDifferenceMatrix(empirical2019matrix, predicted2019matrix):
differenceMatrix = np.zeros((len(empirical2019matrix[0]),len(empirical2019matrix[0])))
for i in range(len(empirical2019matrix[0])):
for j in range(len(empirical2019matrix[0])):
differenceMatrix[i,j] = predicted2019matrix[i,j] - empirical2019matrix[i,j]
return differenceMatrix
def relPredictionDifferenceMatrix(empirical2019matrix, predicted2019matrix):
differenceMatrix = np.zeros((len(empirical2019matrix[0]),len(empirical2019matrix[0])))
for i in range(len(empirical2019matrix[0])):
for j in range(len(empirical2019matrix[0])):
if empirical2019matrix[i,j] != 0:
differenceMatrix[i,j] = ((predicted2019matrix[i,j] - empirical2019matrix[i,j]) / empirical2019matrix[i,j]) * 100
else:
differenceMatrix[i,j] = 0
return differenceMatrix
# HEATMAP PLOT:
def odDiffHeatmap(diffMatrix, title, mask):
tickList = np.arange(1,26,1)
figure, ax = plt.subplots(figsize=(15, 15))
cmap = sns.diverging_palette(220, 20, as_cmap = True)
cbar_kws = {'shrink': 0.8, 'ticks' : np.arange(-60,70,10)}
annot_kws = {"fontsize": 8}
plot = sns.heatmap(diffMatrix, square=True, vmin = -60, vmax = 60, xticklabels = tickList, yticklabels = tickList, annot=True, annot_kws = annot_kws , cmap = cmap, cbar_kws = cbar_kws, mask = mask, fmt=".1f", linewidths = .5, ax = ax)
plt.title(label = title, pad = 10)
ax.xaxis.tick_top()
ax.tick_params(left=False, top=False)
plot.set_yticklabels(plot.get_yticklabels(), rotation = 0, horizontalalignment='right')
plt.show()
return figure
3.5.1 Comparison with Matrix predicted using Historical Average
# HA:
# FA:
empirical2019MatrixFA = np.loadtxt("Estimation/FluidAnalogy_Matrizen/U2-2019-OD-Matrix-FluidAnalogyMethod.csv", delimiter = "\t")
predicted2019MatrixHA_FA = np.loadtxt("Prediction/HA-FA-2019Matrix.csv", delimiter = "\t")
absdiffFA = absPredictionDifferenceMatrix(empirical2019MatrixFA, predicted2019MatrixHA_FA)
reldiffFA = relPredictionDifferenceMatrix(empirical2019MatrixFA, predicted2019MatrixHA_FA)
# print(absdiffFA)
# print()
# print(reldiffFA)
print("FA min reldiff " + str(np.amin(reldiffFA)))
print("FA max reldiff " + str(np.amax(reldiffFA)))
mask = np.zeros((len(empirical2019MatrixFA), len(empirical2019MatrixFA)))
np.fill_diagonal(mask, 100)
plot = odDiffHeatmap(reldiffFA, "Relative Difference between empirical and predicted 2019 Fluid Analogy Matrices using Historical Average Prediction", mask = mask)
plot.savefig('Prediction/Heatmap-HA-FA.pdf', bbox_inches='tight')
# IPF:
empirical2019MatrixIPF = np.loadtxt("Estimation/IPF_Matrizen/U2-2019-OD-Matrix-IPFMethod.csv", delimiter = "\t")
predicted2019MatrixHA_IPF = np.loadtxt("Prediction/HA-IPF-2019Matrix.csv", delimiter = "\t")
absdiffIPF = absPredictionDifferenceMatrix(empirical2019MatrixIPF, predicted2019MatrixHA_IPF)
reldiffIPF = relPredictionDifferenceMatrix(empirical2019MatrixIPF, predicted2019MatrixHA_IPF)
# print(absdiffIPF)
# print()
# print(reldiffIPF)
print("IPF min reldiff " + str(np.amin(reldiffIPF)))
print("IPF max reldiff " + str(np.amax(reldiffIPF)))
plot = odDiffHeatmap(reldiffIPF, "Relative Difference between empirical and predicted 2019 IPF Matrices using Historical Average Prediction", mask = mask)
plot.savefig('Prediction/Heatmap-HA-IPF.pdf', bbox_inches='tight')
print()
3.5.2 Comparison with Matrix predicted using Linear Regression
# LINEAR REGRESSION:
# FA:
empirical2019MatrixFA = np.loadtxt("Estimation/FluidAnalogy_Matrizen/U2-2019-OD-Matrix-FluidAnalogyMethod.csv", delimiter = "\t")
predicted2019MatrixLinRegr_FA = np.loadtxt("Prediction/LinRegr-FA-2019Matrix.csv", delimiter = "\t")
absdiffFA = absPredictionDifferenceMatrix(empirical2019MatrixFA, predicted2019MatrixLinRegr_FA)
reldiffFA = relPredictionDifferenceMatrix(empirical2019MatrixFA, predicted2019MatrixLinRegr_FA)
# print(absdiffFA)
# print()
# print(reldiffFA)
mask = np.zeros((len(empirical2019MatrixFA), len(empirical2019MatrixFA)))
np.fill_diagonal(mask, 100)
print("FA min reldiffFA " + str(np.amin(reldiffFA)))
print("FA max reldiffFA " + str(np.amax(reldiffFA)))
plot = odDiffHeatmap(reldiffFA, "Relative Difference between empirical and predicted 2019 Fluid Analogy Matrices using Linear Regression Prediction", mask = mask)
plot.savefig('Prediction/Heatmap-LinRegr-FA.pdf', bbox_inches='tight')
# IPF:
empirical2019MatrixIPF = np.loadtxt("Estimation/IPF_Matrizen/U2-2019-OD-Matrix-IPFMethod.csv", delimiter = "\t")
predicted2019MatrixLinRegr_IPF = np.loadtxt("Prediction/LinRegr-IPF-2019Matrix.csv", delimiter = "\t")
absdiffIPF = absPredictionDifferenceMatrix(empirical2019MatrixIPF, predicted2019MatrixLinRegr_IPF)
reldiffIPF = relPredictionDifferenceMatrix(empirical2019MatrixIPF, predicted2019MatrixLinRegr_IPF)
# print(absdiffIPF)
# print()
# print(reldiffIPF)
print("IPF min reldiffIPF " + str(np.amin(reldiffIPF)))
print("IPF max reldiffIPF " + str(np.amax(reldiffIPF)))
plot = odDiffHeatmap(reldiffIPF, "Relative Difference between empirical and predicted 2019 IPF Matrices using Linear Regression Prediction", mask = mask)
plot.savefig('Prediction/Heatmap-LinRegr-IPF.pdf', bbox_inches='tight')
print()
3.5.3 Comparison with Matrix predicted using ARIMA
# ARIMA:
empirical2019MatrixFA = np.loadtxt("Estimation/FluidAnalogy_Matrizen/U2-2019-OD-Matrix-FluidAnalogyMethod.csv", delimiter = "\t")
predicted2019MatrixARIMA_FA = np.loadtxt("Prediction/ARIMA-FA-2019Matrix.csv", delimiter = "\t")
absdiffFA = absPredictionDifferenceMatrix(empirical2019MatrixFA, predicted2019MatrixARIMA_FA)
reldiffFA = relPredictionDifferenceMatrix(empirical2019MatrixFA, predicted2019MatrixARIMA_FA)
# print(absdiffFA)
# print()
# print(reldiffFA)
mask = np.zeros((len(empirical2019MatrixFA), len(empirical2019MatrixFA)))
np.fill_diagonal(mask, 100)
print("FA min reldiffFA " + str(np.amin(reldiffFA)))
print("FA max reldiffFA " + str(np.amax(reldiffFA)))
plot = odDiffHeatmap(reldiffFA, "Relative Difference between empirical and predicted 2019 Fluid Analogy Matrices using ARIMA Prediction", mask = mask)
plot.savefig('Prediction/Heatmap-ARIMA-FA.pdf', bbox_inches='tight')
# IPF:
empirical2019MatrixIPF = np.loadtxt("Estimation/IPF_Matrizen/U2-2019-OD-Matrix-IPFMethod.csv", delimiter = "\t")
predicted2019MatrixARIMA_IPF = np.loadtxt("Prediction/ARIMA-IPF-2019Matrix.csv", delimiter = "\t")
absdiffIPF = absPredictionDifferenceMatrix(empirical2019MatrixIPF, predicted2019MatrixARIMA_IPF)
reldiffIPF = relPredictionDifferenceMatrix(empirical2019MatrixIPF, predicted2019MatrixARIMA_IPF)
# print(absdiffIPF)
# print()
# print(reldiffIPF)
print("IPF min reldiffIPF " + str(np.amin(reldiffIPF)))
print("IPF max reldiffIPF " + str(np.amax(reldiffIPF)))
plot = odDiffHeatmap(reldiffIPF, "Relative Difference between empirical and predicted 2019 IPF Matrices using ARIMA Prediction", mask = mask)
plot.savefig('Prediction/Heatmap-ARIMA-IPF.pdf', bbox_inches='tight')
print()
4 Congestion Calculation
4.1 Defining Congestion Calculation + Congestion Plots
All definitions are collected here to make the work with the notebook easier. (otherwise, many different definition-code cells would have to be updated independently to update the results of each method)
# DEFINITION OF TRAIN:
def getPassengerCountFromiToj(matrix, i, j):
count = 0
for z in range(len(matrix[0])):
for k in range(len(matrix[0])):
if z <= i and k >= j and i < j:
count += matrix[z, k]
elif z >= i and k <= j and i > j:
count += matrix[z, k]
return count
class Wagon:
def __init__(self, seats, stand):
self.seats = seats
self.stand = stand
class Train:
def __init__(self, wagons=None):
if wagons is None:
wagons = []
self.wagons = wagons
def addWagon(self, wagon):
self.wagons.append(wagon)
# SETTING UP SPECIFIC TRAIN-PARAMETERS FOR HAMBURG'S SUBWAY:
DT4 = Wagon(seats=182, stand=223)
train = Train(wagons=[DT4] * 8)
averageDailyfrequency = 24 * 9 # daily average connection frequency within subway line: 24 = train service 24h/day ; 6 = six trains per hour (one train every ten minutes)
# hour specific frequencies given in chapter 5 ("scenario generation")
# TODO
# CONGESTION CALCULATOR FROM OD MATRICES:
def seatCongestionFromiToj(matrix, i, j, train, frequency):
passengers = getPassengerCountFromiToj(matrix, i, j)
seatCount = 0
for wagon in train.wagons:
seatCount += wagon.seats
return passengers / (seatCount * frequency)
def totalCongestionFromiToj(matrix, i, j, train, frequency):
passengers = getPassengerCountFromiToj(matrix, i, j)
seatCount = 0
for wagon in train.wagons:
seatCount += wagon.seats
standCount = 0
for wagon in train.wagons:
standCount += wagon.stand
return passengers / ((seatCount + standCount) * frequency)
# calculating and returning all congestions for all neighboring station pairs in both directions
def getAllCongestions(matrix, train, frequency):
stationsDirectionA = []
stationsDirectionB = []
seatCongestionDirectionA = []
seatCongestionDirectionB = []
totalCongestionDirectionA = []
totalCongestionDirectionB = []
for i in range(len(matrix[0]) - 1):
# for a better printing layout, station indices to do not start with station index 0 but with station index 1
stationsDirectionA.append(i + 1)
iDirectionB = len(matrix[0]) - (i + 1)
stationsDirectionB.append(iDirectionB + 1)
seatCongestionDirectionA.append(seatCongestionFromiToj(matrix, i, i + 1, train, frequency) * 100)
totalCongestionDirectionA.append(totalCongestionFromiToj(matrix, i, i + 1, train, frequency) * 100)
seatCongestionDirectionB.append(seatCongestionFromiToj(matrix, iDirectionB, iDirectionB - 1, train, frequency) * 100)
totalCongestionDirectionB.append(totalCongestionFromiToj(matrix, iDirectionB, iDirectionB - 1, train, frequency) * 100)
return [stationsDirectionA, seatCongestionDirectionA, totalCongestionDirectionA, stationsDirectionB, seatCongestionDirectionB, totalCongestionDirectionB]
# CONGESTION CALCULATOR FROM AGGREGATED BOARDING/ALIGHTING LISTS:
def congestionCalculatorFromAggregatedData(boardings, alights, frequency, spacePerWagon, numberOfWagons):
congestionList = []
for linesection in range(1,len(boardings)):
congestion = ((sum(boardings[0:linesection]) - sum(alights[0:linesection])) / (frequency * spacePerWagon * numberOfWagons)) * 100
congestionList.append(congestion)
return congestionList
# METHOD-INDEPENDENT CONGESTION-DIFFERENCE CALCULATOR:
# absolute difference:
def absCongestionDifference(empiricalCongestion, calculatedCongestion):
differenceList = []
zipper = zip(empiricalCongestion, calculatedCongestion)
for empiricalCongestion_i, calculatedCongestion_i in zipper:
differenceList.append(empiricalCongestion_i - calculatedCongestion_i)
return differenceList
# relative difference:
def relCongestionDifference(empiricalCongestion, calculatedCongestion):
differenceList = []
zipper = zip(empiricalCongestion, calculatedCongestion)
for empiricalCongestion_i, calculatedCongestion_i in zipper:
differenceList.append(((calculatedCongestion_i - empiricalCongestion_i) / empiricalCongestion_i) * 100)
return differenceList
def differenceCalculator(direction, filedirectory, name):
if direction == "A":
calculatedCongestion = pd.read_csv(filedirectory, names = columns, delimiter="\t", header = None)["seatCongestionDirectionA"].tolist()
absDifference = absCongestionDifference(empiricalCongestion_A, calculatedCongestion)
relDifference = relCongestionDifference(empiricalCongestion_A, calculatedCongestion)
absDiffOutputA.insert(len(absDiffOutputA.columns), "absDiff-" + str(name), absDifference)
relDiffOutputA.insert(len(relDiffOutputA.columns), "relDiff-" + str(name), relDifference)
elif direction == "B":
calculatedCongestion = pd.read_csv(filedirectory, names = columns, delimiter="\t", header = None)["seatCongestionDirectionB"].tolist()
absDifference = absCongestionDifference(empiricalCongestion_B, calculatedCongestion)
relDifference = relCongestionDifference(empiricalCongestion_B, calculatedCongestion)
absDiffOutputB.insert(len(absDiffOutputB.columns), "absDiff-" + str(name), absDifference)
relDiffOutputB.insert(len(relDiffOutputB.columns), "relDiff-" + str(name), relDifference)
# CONGESTION BAR PLOTS:
def barplotCongestionA(congestions, predictionMethod):
figure = plt.figure(figsize=(10.0, 6.0))
plt.bar(congestions[0], congestions[1], width = 0.85, label="Seat")
for index, value in enumerate(congestions[1]):
plt.text(index + 1, value + 0.25, float(value.round(1)), size = "8", fontfamily = "monospace", horizontalalignment = "center", color = "grey")
plt.bar(congestions[0], congestions[2], width = 0.85, label="Total")
for index, value in enumerate(congestions[2]):
plt.text(index + 1, value + 0.25, float(value.round(1)), size = "8",fontfamily = "monospace", horizontalalignment = "center", color = "white")
plt.title("Predicted Congestion for 2019 (Direction A: " + str(predictionMethod) + ")")
plt.xlim(min(congestions[1]) - 0.5, max(congestions[1]) - 0.5)
# xticks are assigned a numeric label to shift the station numbers in a way that the bars in the diagram can better be recognized as the linesection congestions between a station i and a station i+1
xlabelListA = [int(x) for x in np.arange(min(congestions[0]), max(congestions[0]) + 2, 1)]
plt.xticks(ticks = np.arange(min(congestions[0])-0.5, max(congestions[0])+1.5, 1), labels = xlabelListA)
plt.xlabel("station number")
plt.ylim(0, 20)
plt.ylabel("Congestion on line section / %")
plt.legend()
plt.show()
return figure
def barplotCongestionB(congestions, predictionMethod):
figure = plt.figure(figsize=(10.0, 6.0))
plt.bar(congestions[3], congestions[4], width = 0.85, label="Seat")
for index, value in enumerate(congestions[4]):
plt.text(len(congestions[3]) - index + 1, value + 0.25, float(value.round(1)), size = "8",fontfamily = "monospace", horizontalalignment = "center", color = "grey")
plt.bar(congestions[3], congestions[5], width = 0.85, label="Total")
for index, value in enumerate(congestions[5]):
plt.text(len(congestions[3]) - index + 1, value + 0.25, float(value.round(1)), size = "8",fontfamily = "monospace", horizontalalignment = "center", color = "white")
plt.title("Predicted Congestion for 2019 (Direction B: " + str(predictionMethod) + ")")
plt.xlim(max(congestions[3]) + 0.5, min(congestions[3]) + 0.5)
# xticks are assigned a numeric label to shift the station numbers in a way that the bars in the diagram can better be recognized as the linesection congestions between a station i and a station i-1
xlabelListB = [int(x) for x in np.arange(min(congestions[3]) - 1, max(congestions[3]) + 1, 1)]
plt.xticks(ticks = np.arange(1.5, 26.5, 1), labels = xlabelListB)
plt.xlabel("station number")
plt.ylim(0, 20)
plt.ylabel("Congestion on line section / %")
plt.legend()
plt.show()
return figure
# CONGESTION HEATMAPS:
def combinedCongestionHeatmap(direction, congestionMatrix, yticklabels, title, colours, minvalue, maxvalue, mask, shrinkvalue, figsize):
figure, ax = plt.subplots(figsize = figsize)
if direction == "A":
stationlabel1 = [str(element) for element in list(np.arange(1,len(congestionMatrix[0]) + 1, 1))]
stationlabel2 = [str(element) for element in list(np.arange(2,len(congestionMatrix[0]) + 2, 1))]
xticklabels = list(map(" → ".join, zip(stationlabel1, stationlabel2)))
else:
stationlabel1 = [str(element) for element in list(np.arange(len(congestionMatrix[0]) + 1, 1, -1))]
stationlabel2 = [str(element) for element in list(np.arange(len(congestionMatrix[0]) + 0, 0, -1))]
xticklabels = list(map(" → ".join, zip(stationlabel1, stationlabel2)))
ax.invert_xaxis()
cbar_kws = {'shrink': shrinkvalue, 'ticks' : np.arange(minvalue, maxvalue+10, 2.5), 'orientation': 'horizontal', 'pad' : 0.02, 'aspect': 30}
annot_kws = {"fontsize": 8}
plot = sns.heatmap(congestionMatrix, square=True, vmin = minvalue, vmax = maxvalue, xticklabels = xticklabels, yticklabels = yticklabels, annot=True, annot_kws = annot_kws, mask = mask, cmap = colours, cbar_kws = cbar_kws, fmt = ".1f", linewidths = .5, ax = ax)
plt.title(label = title, pad = 15)
ax.xaxis.tick_top()
plot.set_xticklabels(plot.get_xticklabels(), rotation = 90, horizontalalignment = 'center', verticalalignment = 'baseline')
plot.set_yticklabels(plot.get_yticklabels(), rotation = 0, horizontalalignment = 'right', verticalalignment = 'center')
ax.tick_params(left = False, top = False)
plt.show()
return figure
4.2 Congestion comparison with OD Matrix predicted using Historical Average
4.2.1 Prediction from Fluid Analogy Matrices
# METHOD SPECIFICATION:
predictionMethod = "Historical Average from Fluid Analogy Matrices"
# example for fluid analogy OD matrix of 2019
matrix2019 = np.loadtxt("Prediction/HA-FA-2019Matrix.csv")
congestions = getAllCongestions(matrix2019, train, averageDailyfrequency)
congestionsDataFrame = pd.DataFrame(np.array(congestions).transpose(), columns = ["stationsDirectionA", "seatCongestionDirectionA", "totalCongestionDirectionA", "stationsDirectionB", "seatCongestionDirectionB", "totalCongestionDirectionB"])
congestionsDataFrame.to_csv("Congestion/HA-FA-CongestionInfo.csv", index = False, header = False, sep = "\t")
# print(congestionsDataFrame)
plotA = barplotCongestionA(congestions, predictionMethod)
plotB = barplotCongestionB(congestions, predictionMethod)
plotA.savefig('Congestion/HA-FA-A.pdf')
plotB.savefig('Congestion/HA-FA-B.pdf')
4.2.2 Prediction from IPF Matrices
# METHOD SPECIFICATION:
predictionMethod = "Historical Average from IPF Matrices"
# example for fluid analogy OD matrix of 2019
matrix2019 = np.loadtxt("Prediction/HA-IPF-2019Matrix.csv")
congestions = getAllCongestions(matrix2019, train, averageDailyfrequency)
congestionsDataFrame = pd.DataFrame(np.array(congestions).transpose(), columns = ["stationsDirectionA", "seatCongestionDirectionA", "totalCongestionDirectionA", "stationsDirectionB", "seatCongestionDirectionB", "totalCongestionDirectionB"])
congestionsDataFrame.to_csv("Congestion/HA-IPF-CongestionInfo.csv", index = False, header = False, sep = "\t")
# print(congestionsDataFrame)
plotA = barplotCongestionA(congestions, predictionMethod)
plotB = barplotCongestionB(congestions, predictionMethod)
plotA.savefig('Congestion/HA-IPF-A.pdf')
plotB.savefig('Congestion/HA-IPF-B.pdf')
4.3 Congestion comparison with OD Matrix predicted using Linear Regression
4.3.1 Prediction from Fluid Analogy Matrices
# METHOD SPECIFICATION:
predictionMethod = "Linear Regression from Fluid Analogy Matrices"
# example for fluid analogy OD matrix of 2019
matrix2019 = np.loadtxt("Prediction/LinRegr-FA-2019Matrix.csv")
congestions = getAllCongestions(matrix2019, train, averageDailyfrequency)
congestionsDataFrame = pd.DataFrame(np.array(congestions).transpose(), columns = ["stationsDirectionA", "seatCongestionDirectionA", "totalCongestionDirectionA", "stationsDirectionB", "seatCongestionDirectionB", "totalCongestionDirectionB"])
congestionsDataFrame.to_csv("Congestion/LinRegr-FA-CongestionInfo.csv", index = False, header = False, sep = "\t")
# print(congestionsDataFrame)
plotA = barplotCongestionA(congestions, predictionMethod)
plotB = barplotCongestionB(congestions, predictionMethod)
plotA.savefig('Congestion/LinRegr-FA-A.pdf')
plotB.savefig('Congestion/LinRegr-FA-B.pdf')
4.3.2 Prediction from IPF Matrices
# METHOD SPECIFICATION:
predictionMethod = "Linear Regression from IPF Matrices"
# example for fluid analogy OD matrix of 2019
matrix2019 = np.loadtxt("Prediction/LinRegr-IPF-2019Matrix.csv")
congestions = getAllCongestions(matrix2019, train, averageDailyfrequency)
congestionsDataFrame = pd.DataFrame(np.array(congestions).transpose(), columns = ["stationsDirectionA", "seatCongestionDirectionA", "totalCongestionDirectionA", "stationsDirectionB", "seatCongestionDirectionB", "totalCongestionDirectionB"])
congestionsDataFrame.to_csv("Congestion/LinRegr-IPF-CongestionInfo.csv", index = False, header = False, sep = "\t")
# print(congestionsDataFrame)
plotA = barplotCongestionA(congestions, predictionMethod)
plotB = barplotCongestionB(congestions, predictionMethod)
plotA.savefig('Congestion/LinRegr-IPF-A.pdf')
plotB.savefig('Congestion/LinRegr-IPF-B.pdf')
4.4 Congestion comparison with OD Matrix predicted using ARIMA
4.4.1 Prediction from Fluid Analogy Matrices
# METHOD SPECIFICATION:
predictionMethod = "ARIMA from Fluid Analogy Matrices"
# example for fluid analogy OD matrix of 2019
matrix2019 = np.loadtxt("Prediction/ARIMA-FA-2019Matrix.csv")
congestions = getAllCongestions(matrix2019, train, averageDailyfrequency)
congestionsDataFrame = pd.DataFrame(np.array(congestions).transpose(), columns = ["stationsDirectionA", "seatCongestionDirectionA", "totalCongestionDirectionA", "stationsDirectionB", "seatCongestionDirectionB", "totalCongestionDirectionB"])
congestionsDataFrame.to_csv("Congestion/ARIMA-FA-CongestionInfo.csv", index = False, header = False, sep = "\t")
# print(congestionsDataFrame)
plotA = barplotCongestionA(congestions, predictionMethod)
plotB = barplotCongestionB(congestions, predictionMethod)
plotA.savefig('Congestion/ARIMA-FA-A.pdf')
plotB.savefig('Congestion/ARIMA-FA-B.pdf')
4.4.2 Prediction from IPF Matrices
# METHOD SPECIFICATION:
predictionMethod = "ARIMA from IPF Matrices"
# example for fluid analogy OD matrix of 2019
matrix2019 = np.loadtxt("Prediction/ARIMA-IPF-2019Matrix.csv")
congestions = getAllCongestions(matrix2019, train, averageDailyfrequency)
congestionsDataFrame = pd.DataFrame(np.array(congestions).transpose(), columns = ["stationsDirectionA", "seatCongestionDirectionA", "totalCongestionDirectionA", "stationsDirectionB", "seatCongestionDirectionB", "totalCongestionDirectionB"])
congestionsDataFrame.to_csv("Congestion/ARIMA-IPF-CongestionInfo.csv", index = False, header = False, sep = "\t")
# print(congestionsDataFrame)
plotA = barplotCongestionA(congestions, predictionMethod)
plotB = barplotCongestionB(congestions, predictionMethod)
plotA.savefig('Congestion/ARIMA-IPF-A.pdf')
plotB.savefig('Congestion/ARIMA-IPF-B.pdf')
4.5 Difference calculation with empirical 2019 Boarding-Alighting Data
4.5.1 2019 Congestion Plots from empirical 2019 Boarding-Alighting Data
# REFERENCE CONGESTION FROM AGGREGATED BOARDING/ALIGHTING DATA:
counts = pd.read_csv("Boarding_Alighting_Data/U2-2019_adjusted.csv", delimiter="\t", names = ["StationNrsA", "EinstiegA", "AusstiegA","StationNrsB", "EinstiegB", "AusstiegB"])
boardingsA = counts["EinstiegA"].to_list()
alightsA = counts["AusstiegA"].to_list()
boardingsB = counts["EinstiegB"].to_list()
alightsB = counts["AusstiegB"].to_list()
stationsDirectionA = np.arange(1,25,1)
stationsDirectionB = np.arange(25,1,-1)
# CALCULATION:
seatCongestionAList = congestionCalculatorFromAggregatedData(boardingsA, alightsA, averageDailyfrequency, 182, 8)
seatCongestionBList = congestionCalculatorFromAggregatedData(boardingsB, alightsB, averageDailyfrequency, 182, 8)
totalCongestionAList = congestionCalculatorFromAggregatedData(boardingsA, alightsA, averageDailyfrequency, (223 + 182), 8)
totalCongestionBList = congestionCalculatorFromAggregatedData(boardingsB, alightsB, averageDailyfrequency, (223 + 182), 8)
# PLOT:
predictionMethod = "with empirical Boarding-Alighting Data"
congestions = np.vstack((stationsDirectionA, seatCongestionAList, totalCongestionAList, stationsDirectionB, seatCongestionBList, totalCongestionBList))
congestionsDataFrame = pd.DataFrame(np.array(congestions).transpose(), columns = ["stationsDirectionA", "seatCongestionDirectionA", "totalCongestionDirectionA", "stationsDirectionB", "seatCongestionDirectionB", "totalCongestionDirectionB"])
congestionsDataFrame.to_csv("Congestion/Empirical-CongestionInfo.csv", index = False, header = False, sep = "\t")
# print(congestionsDataFrame)
plotA = barplotCongestionA(congestions, predictionMethod)
plotB = barplotCongestionB(congestions, predictionMethod)
plotA.savefig('Congestion/Empirical-A.pdf')
plotB.savefig('Congestion/Empirical-B.pdf')
# CONSISTENCY TEST:
# matrix = np.loadtxt("Estimation/IPF_Matrizen/U2-2019-OD-Matrix-IPFMethod.csv", delimiter = "\t")
# for i in range(0,25):
# col = matrix[i, :]
# print(sum(col[0:i+1]).round(10))
4.5.2 Comparison Calculations
overview over total seat congestions (condensed view of the data obtained in sections 4.2 - 4.4)
# INPUT:
columns = ["stationsDirectionA", "seatCongestionDirectionA", "totalCongestionDirectionA", "stationsDirectionB", "seatCongestionDirectionB", "totalCongestionDirectionB"]
empiricalCongestion = pd.read_csv("Congestion/Empirical-CongestionInfo.csv", names = columns, delimiter = "\t")
HA_FA_Congestion = pd.read_csv("Congestion/HA-FA-CongestionInfo.csv", names = columns, delimiter = "\t")
HA_IPF_Congestion = pd.read_csv("Congestion/HA-IPF-CongestionInfo.csv", names = columns, delimiter = "\t")
linRegr_FA_Congestion = pd.read_csv("Congestion/LinRegr-FA-CongestionInfo.csv", names = columns, delimiter = "\t")
linRegr_IPF_Congestion = pd.read_csv("Congestion/LinRegr-IPF-CongestionInfo.csv", names = columns, delimiter = "\t")
ARIMA_FA_Congestion = pd.read_csv("Congestion/ARIMA-FA-CongestionInfo.csv", names = columns, delimiter = "\t")
ARIMA_IPF_Congestion = pd.read_csv("Congestion/ARIMA-IPF-CongestionInfo.csv", names = columns, delimiter = "\t")
empiricalSeatCongestion_A = empiricalCongestion["seatCongestionDirectionA"].tolist()
HA_FA_SeatCongestionA = HA_FA_Congestion["seatCongestionDirectionA"].tolist()
HA_IPF_SeatCongestionA = HA_IPF_Congestion["seatCongestionDirectionA"].tolist()
linRegr_FA_SeatCongestionA = linRegr_FA_Congestion["seatCongestionDirectionA"].tolist()
linRegr_IPF_SeatCongestionA = linRegr_IPF_Congestion["seatCongestionDirectionA"].tolist()
ARIMA_FA_SeatCongestionA = ARIMA_FA_Congestion["seatCongestionDirectionA"].tolist()
ARIMA_IPF_SeatCongestionA = ARIMA_IPF_Congestion["seatCongestionDirectionA"].tolist()
empiricalSeatCongestion_B = empiricalCongestion["seatCongestionDirectionB"].tolist()
HA_FA_SeatCongestionB = HA_FA_Congestion["seatCongestionDirectionB"].tolist()
HA_IPF_SeatCongestionB = HA_IPF_Congestion["seatCongestionDirectionB"].tolist()
linRegr_FA_SeatCongestionB = linRegr_FA_Congestion["seatCongestionDirectionB"].tolist()
linRegr_IPF_SeatCongestionB = linRegr_IPF_Congestion["seatCongestionDirectionB"].tolist()
ARIMA_FA_SeatCongestionB = ARIMA_FA_Congestion["seatCongestionDirectionB"].tolist()
ARIMA_IPF_SeatCongestionB = ARIMA_IPF_Congestion["seatCongestionDirectionB"].tolist()
empiricalTotalCongestion_A = empiricalCongestion["totalCongestionDirectionA"].tolist()
HA_FA_TotalCongestionA = HA_FA_Congestion["totalCongestionDirectionA"].tolist()
HA_IPF_TotalCongestionA = HA_IPF_Congestion["totalCongestionDirectionA"].tolist()
linRegr_FA_TotalCongestionA = linRegr_FA_Congestion["totalCongestionDirectionA"].tolist()
linRegr_IPF_TotalCongestionA = linRegr_IPF_Congestion["totalCongestionDirectionA"].tolist()
ARIMA_FA_TotalCongestionA = ARIMA_FA_Congestion["totalCongestionDirectionA"].tolist()
ARIMA_IPF_TotalCongestionA = ARIMA_IPF_Congestion["totalCongestionDirectionA"].tolist()
empiricalTotalCongestion_B = empiricalCongestion["totalCongestionDirectionB"].tolist()
HA_FA_TotalCongestionB = HA_FA_Congestion["totalCongestionDirectionB"].tolist()
HA_IPF_TotalCongestionB = HA_IPF_Congestion["totalCongestionDirectionB"].tolist()
linRegr_FA_TotalCongestionB = linRegr_FA_Congestion["totalCongestionDirectionB"].tolist()
linRegr_IPF_TotalCongestionB = linRegr_IPF_Congestion["totalCongestionDirectionB"].tolist()
ARIMA_FA_TotalCongestionB = ARIMA_FA_Congestion["totalCongestionDirectionB"].tolist()
ARIMA_IPF_TotalCongestionB = ARIMA_IPF_Congestion["totalCongestionDirectionB"].tolist()
empty = [0]*24
# OUTPUT:
seatCongestionMatrixA = np.row_stack((HA_FA_SeatCongestionA, HA_IPF_SeatCongestionA, linRegr_FA_SeatCongestionA, linRegr_IPF_SeatCongestionA, ARIMA_FA_SeatCongestionA, ARIMA_IPF_SeatCongestionA, empty, empiricalSeatCongestion_A))
seatCongestionMatrixB = np.row_stack((HA_FA_SeatCongestionB, HA_IPF_SeatCongestionB, linRegr_FA_SeatCongestionB, linRegr_IPF_SeatCongestionB, ARIMA_FA_SeatCongestionB, ARIMA_IPF_SeatCongestionB, empty, empiricalSeatCongestion_B))
totalCongestionMatrixA = np.row_stack((HA_FA_TotalCongestionA, HA_IPF_TotalCongestionA, linRegr_FA_TotalCongestionA, linRegr_IPF_TotalCongestionA, ARIMA_FA_TotalCongestionA, ARIMA_IPF_TotalCongestionA, empty, empiricalTotalCongestion_A))
totalCongestionMatrixB = np.row_stack((HA_FA_TotalCongestionB, HA_IPF_TotalCongestionB, linRegr_FA_TotalCongestionB, linRegr_IPF_TotalCongestionB, ARIMA_FA_TotalCongestionB, ARIMA_IPF_TotalCongestionB, empty, empiricalTotalCongestion_B))
methods = ["Historical Average from Fluid Analogy Matrices", "Historical Average from IPF Matrices", "Linear Regression from Fluid Analogy Matrices", "Linear Regression from IPF Matrices", "ARIMA from Fluid Analogy Matrices", "ARIMA from IPF Matrices", "", "$\it{Empirical}$ $\it{Values}$"]
seatMatrixA = pd.DataFrame(seatCongestionMatrixA, index = methods)
seatMatrixB = pd.DataFrame(seatCongestionMatrixB, index = methods)
# print(seatMatrixA)
# print()
# print(seatMatrixB)
seatMatrixA.to_csv("Congestion/SeatCongestion-Matrix-A.csv", sep = "\t", index = False, header = False)
seatMatrixB.to_csv("Congestion/SeatCongestion-Matrix-B.csv", sep = "\t", index = False, header = False)
totalMatrixA = pd.DataFrame(totalCongestionMatrixA, index = methods)
totalMatrixB = pd.DataFrame(totalCongestionMatrixB, index = methods)
# print(totalMatrixA)
# print()
# print(totalMatrixB)
totalMatrixA.to_csv("Congestion/TotalCongestion-Matrix-A.csv", sep = "\t", index = False, header = False)
totalMatrixB.to_csv("Congestion/TotalCongestion-Matrix-B.csv", sep = "\t", index = False, header = False)
yminSeat = 0
ymaxSeat = 15
yminTotal = 0
ymaxTotal = 15
maskA = (seatCongestionMatrixA == 0)
maskB = (seatCongestionMatrixB == 0)
plot1 = combinedCongestionHeatmap("A", seatCongestionMatrixA, methods, "Seat Congestion Overview in Direction A", YlOrBr_9.mpl_colormap, yminSeat, ymaxSeat, maskA, 1, (12.5, 15))
plot2 = combinedCongestionHeatmap("B", seatCongestionMatrixB, methods, "Seat Congestion Overview in Direction B", YlOrBr_9.mpl_colormap, yminSeat, ymaxSeat, maskB, 1, (12.5, 15))
# In our final evaluation, only the seat congestions are compared since the total congestions and seat congestions always have the same proportions to another (seat congestion = 0.45 * total congestion).
plot3 = combinedCongestionHeatmap("A", totalCongestionMatrixA, methods, "Total Congestion Overview in Direction A", YlOrBr_9.mpl_colormap, yminSeat, ymaxTotal, maskA, 1, (12.5, 15))
plot4 = combinedCongestionHeatmap("B", totalCongestionMatrixB, methods, "Total Congestion Overview in Direction B", YlOrBr_9.mpl_colormap, yminSeat, ymaxTotal, maskB, 1, (12.5, 15))
plot1.savefig("Congestion/SeatCongestion-absVal-Overview_A.pdf", bbox_inches='tight')
plot2.savefig("Congestion/SeatCongestion-absVal-Overview_B.pdf", bbox_inches='tight')
plot3.savefig("Congestion/TotalCongestion-absVal-Overview_A.pdf", bbox_inches='tight')
plot4.savefig("Congestion/TotalCongestion-absVal-Overview_B.pdf", bbox_inches='tight')
print()
differences between empirical and predicted congestion values (absolute and relative differences):
# INPUT:
columns = ["stationsDirectionA", "seatCongestionDirectionA", "totalCongestionDirectionA", "stationsDirectionB", "seatCongestionDirectionB", "totalCongestionDirectionB"]
linesectionA = [int(x) for x in pd.read_csv("Congestion/Empirical-CongestionInfo.csv", names = columns, delimiter="\t", header = None)["stationsDirectionA"].tolist()]
linesectionB = [int(x) for x in pd.read_csv("Congestion/Empirical-CongestionInfo.csv", names = columns, delimiter="\t", header = None)["stationsDirectionB"].tolist()]
empiricalCongestion_A = pd.read_csv("Congestion/Empirical-CongestionInfo.csv", names = columns, delimiter="\t", header = None)["seatCongestionDirectionA"].tolist()
empiricalCongestion_B = pd.read_csv("Congestion/Empirical-CongestionInfo.csv", names = columns, delimiter="\t", header = None)["seatCongestionDirectionB"].tolist()
# print(linesectionA)
# print(linesectionB)
# print(empiricalCongestion_A)
# print(empiricalCongestion_B)
# CALCULATION:
absDiffOutputA = pd.DataFrame({"LinesectionA" : linesectionA})
relDiffOutputA = pd.DataFrame({"LinesectionA" : linesectionA})
absDiffOutputB = pd.DataFrame({"LinesectionB" : linesectionB})
relDiffOutputB = pd.DataFrame({"LinesectionB" : linesectionB})
# HA:
differenceCalculator("A", "Congestion/HA-FA-CongestionInfo.csv", "HA_FA_A")
differenceCalculator("B", "Congestion/HA-FA-CongestionInfo.csv", "HA_FA_B")
differenceCalculator("A", "Congestion/HA-IPF-CongestionInfo.csv", "HA_IPF_A")
differenceCalculator("B", "Congestion/HA-IPF-CongestionInfo.csv", "HA_IPF_B")
# LinRegr:
differenceCalculator("A", "Congestion/LinRegr-FA-CongestionInfo.csv", "LinRegr_FA_A")
differenceCalculator("B", "Congestion/LinRegr-FA-CongestionInfo.csv", "LinRegr_FA_B")
differenceCalculator("A", "Congestion/LinRegr-IPF-CongestionInfo.csv", "LinRegr_IPF_A")
differenceCalculator("B", "Congestion/LinRegr-IPF-CongestionInfo.csv", "LinRegr_IPF_B")
# ARIMA:
differenceCalculator("A", "Congestion/ARIMA-FA-CongestionInfo.csv", "arima_FA_A")
differenceCalculator("B", "Congestion/ARIMA-FA-CongestionInfo.csv", "arima_FA_B")
differenceCalculator("A", "Congestion/ARIMA-IPF-CongestionInfo.csv", "arima_IPF_A")
differenceCalculator("B", "Congestion/ARIMA-IPF-CongestionInfo.csv", "arima_IPF_B")
#SAVING:
# print(relDiffOutputA)
# print()
# print(relDiffOutputB)
# print()
# print(absDiffOutputA)
# print()
# print(absDiffOutputB)
relDiffOutputA.to_csv("Congestion/Diff-Rel-A.csv", sep = "\t", header = False, index = False)
relDiffOutputB.to_csv("Congestion/Diff-Rel-B.csv", sep = "\t", header = False, index = False)
absDiffOutputA.to_csv("Congestion/Diff-Abs-A.csv", sep = "\t", header = False, index = False)
absDiffOutputB.to_csv("Congestion/Diff-Abs-B.csv", sep = "\t", header = False, index = False)
# PLOT:
methodListsA = relDiffOutputA.values[:, 1:].T
methodListsB = relDiffOutputB.values[:, 1:].T
ymin = -17.5
ymax = 17.5
mask = (methodListsA == -100) # -100 = dummy value
methodNames = ["Historical Average from Fluid Analogy Matrices", "Historical Average from IPF Matrices", "Linear Regression from Fluid Analogy Matrices", "Linear Regression from IPF Matrices", "ARIMA from Fluid Analogy Matrices", "ARIMA from IPF Matrices"]
plotA = combinedCongestionHeatmap("A", methodListsA, methodNames, "Differences between predicted and estimated Linesection Congestions in Direction A", sns.diverging_palette(220, 20, as_cmap = True), ymin, ymax, mask, 1, (12.5,15))
plotB = combinedCongestionHeatmap("B", methodListsB, methodNames, "Differences between predicted and estimated Linesection Congestions in Direction B", sns.diverging_palette(220, 20, as_cmap = True), ymin, ymax, mask, 1, (12.5,15))
plotA.savefig("Congestion/Congestion-relDiff-Overview_A.pdf", bbox_inches='tight')
plotB.savefig("Congestion/Congestion-relDiff-Overview_B.pdf", bbox_inches='tight')
print()
5. Scenario Generation
5.1 Scenario Creation
# ADJUSTMENT DEFINITION:
def boardingAlightAdjuster(boardings, alights, output, convError = 0.00001):
changed = False
#print(boardings)
#print(alights)
if abs(np.sum(boardings) - np.sum(alights)) > convError:
changed = True
totalSum = np.sum(boardings) + np.sum(alights)
boardings = boardings*((totalSum/2)/np.sum(boardings))
alights = alights*((totalSum/2)/np.sum(alights))
for i in range(1,len(alights)):
diff = np.sum(alights[0:i]) - np.sum(boardings[0:(i-1)])
if diff > 0 and alights[i] >= diff and i <= 23:
changed = True
alights[i] = alights[i] - diff
boardings[i] = boardings[i] + diff
if changed:
boardingAlightAdjuster(boardings, alights, output, convError)
else:
output.append(boardings)
output.append(alights)
# Dividing Boarding and Alighting Counts from 2019 based on Google Maps Crowdedness Data into hourly Intervals according OD Estimation Method Constraints
# MATRIX WRAPPER TO LINK CUSTOM COLUMN AND ROW NAMES/INDICES:
class timeMatrix():
# / hour1 hour2 hour3 ... hourN
# station1 X X X X
# station2 X X X X
# station3 X X X X
# ...
# stationN X X X X
def __init__(self, stationIDs, hours):
self.matrix = np.zeros((len(stationIDs), len(hours)))
self.hours = hours
self.stationIDs = stationIDs
def __str__(self):
return np.array2string(self.matrix)
def setValue(self, stationID, hour, value):
self.matrix[self.stationIDs.index(stationID), self.hours.index(hour)] = value
def getValue(self, stationID, hour):
return self.matrix[self.stationIDs.index(stationID), self.hours.index(hour)]
def getValueListByHour(self, hour):
return self.matrix[:, self.hours.index(hour)]
# PUTTING CROWDEDNESS DATA INTO A DICTIONARY-FORMAT:
stationData = pd.read_csv("Station_Info/U2-Station_CrowdednessInfo.csv", delimiter=";", names = ["ID", "Crowdedness"])
stationIDs = stationData["ID"].to_list()
crowdednessData = stationData["Crowdedness"].to_list()
dayDic = {7: "Sunday", 1: "Monday", 2:"Tuesday", 3:"Wednesday", 4:"Thursday", 5:"Friday", 6:"Saturday"}
subwayCrowdednessDic = {}
for k in range(len(crowdednessData)):
stationData = ast.literal_eval(crowdednessData[k])
stationCrowdednessDic = {}
for j in range(len(stationData)):
dayIndex = stationData[j][0] # Google Maps Weekday index
dayData = stationData[j][1] # Google Maps cowdness data content [[4 Uhr, ...], [5 Uhr, ...], ...]
crowdednessDic = {}
for i in range(len(dayData)):
hourData = dayData[i]
hour = hourData[0]
crowdedness = hourData[1]
crowdednessDic[int(hour)] = crowdedness
if dayDic[dayIndex] != "Saturday" and dayDic[dayIndex] != "Sunday": # only workday information is relevant
stationCrowdednessDic[dayDic[dayIndex]] = crowdednessDic
subwayCrowdednessDic[stationIDs[k]] = stationCrowdednessDic
# print(subwayCrowdednessDic)
# GETTING TOTAL CROWDEDNESS FOR THE WHOLE WEEK AND FOR HOURS FOR ALL STATIONS:
subwayTotalCrowdedness = {}
stationList = list(subwayCrowdednessDic.values())
for station in stationList:
totalCrowdedness = 0
dayList = list(station.values())
for day in dayList:
hourList = list(day.values())
for crowdedness in hourList:
totalCrowdedness += crowdedness
# print(totalCrowdedness)
subwayTotalCrowdedness[list(subwayCrowdednessDic.keys())[stationList.index(station)]] = totalCrowdedness
# print(subwayTotalCrowdedness)
# SPLITTING COUNTS TO DAYS AND HOURS ACCORDING TO GOOGLE MAPS CROWDEDNESS INFORMATION
# result is a tensor/dic of weekdays containing a dic with alights/boardingsA/B as keys
# values are the coresponding timeMatrix-Objects
counts = pd.read_csv("Boarding_Alighting_Data/U2-2019_adjusted.csv", delimiter="\t", names = ["StationNrsA", "EinstiegA", "AusstiegA","StationNrsB", "EinstiegB", "AusstiegB"])
boardingsADic = dict(zip(counts["StationNrsA"].to_list(), counts["EinstiegA"].to_list()))
alightsADic = dict(zip(counts["StationNrsA"].to_list(), counts["AusstiegA"].to_list()))
boardingsBDic = dict(zip(counts["StationNrsB"].to_list(), counts["EinstiegB"].to_list()))
alightsBDic = dict(zip(counts["StationNrsB"].to_list(), counts["AusstiegB"].to_list()))
# keys for hours and weekdays are always the same - even if some hour lists are messed up, data will still be correct, because of the matrix-Object
# all other matrices will be sorted by this inital hourlist
weekdays = list(subwayCrowdednessDic[1].keys())
hourList = list(subwayCrowdednessDic[1]['Monday'].keys())
weekDayDic = {}
for weekday in weekdays:
tMatrixBoardingsA = timeMatrix(list(boardingsADic.keys()), hourList)
tMatrixAlightsA = timeMatrix(list(boardingsADic.keys()), hourList)
tMatrixBoardingsB = timeMatrix(list(boardingsBDic.keys()), hourList)
tMatrixAlightsB = timeMatrix(list(boardingsBDic.keys()), hourList)
for id in stationIDs:
weekBoardingCountA = boardingsADic[id] * 5 # 5 = workdays per week
weekAlightCountA = alightsADic[id] * 5
weekBoardingCountB = boardingsBDic[id] * 5
weekAlightCountB = alightsBDic[id] * 5
for hour in hourList:
valueBoardingA = (subwayCrowdednessDic[id][weekday][hour]/subwayTotalCrowdedness[id])*weekBoardingCountA
tMatrixBoardingsA.setValue(id, hour, valueBoardingA)
valueAlightsA = (subwayCrowdednessDic[id][weekday][hour]/subwayTotalCrowdedness[id])*weekAlightCountA
tMatrixAlightsA.setValue(id, hour, valueAlightsA)
valueBoardingB = (subwayCrowdednessDic[id][weekday][hour]/subwayTotalCrowdedness[id])*weekBoardingCountB
tMatrixBoardingsB.setValue(id, hour, valueBoardingB)
valueAlightsB = (subwayCrowdednessDic[id][weekday][hour]/subwayTotalCrowdedness[id])*weekAlightCountB
tMatrixAlightsB.setValue(id, hour, valueAlightsB)
weekDayDic[weekday] = {}
weekDayDic[weekday]["BoardingsA"] = tMatrixBoardingsA
weekDayDic[weekday]["AlightsA"] = tMatrixAlightsA
weekDayDic[weekday]["BoardingsB"] = tMatrixBoardingsB
weekDayDic[weekday]["AlightsB"] = tMatrixAlightsB
# print(weekDayDic["Monday"]["AlightsB"].matrix.round())
# print(np.sum(weekDayDic["Monday"]["AlightsB"].matrix[:,10]-weekDayDic["Monday"]["BoardingsB"].matrix[:,10])/np.sum(weekDayDic["Monday"]["AlightsB"].matrix[:,10]))
def objectiveFunction(x):
xs = np.split(x, 2)
length1 = np.linalg.norm(boardings-xs[0])
length2 = np.linalg.norm(alights-xs[1])
return length1 + length2
#equal boardings and alights
def constraint1(x):
xs = np.split(x, 2)
return np.sum(np.abs(xs[0] - xs[1]))
#adjust value if at the station more alights then boardings (counting from the beginning of the journey) have occured
def constraint2(x):
xs = np.split(x, 2)
sum = 0
for i in range(len(xs[1])):
if np.sum(xs[1][0:i + 2]) > np.sum(xs[0][0:i + 1]):
sum += (np.sum(xs[1][0:i + 2]) - np.sum(xs[0][0:i + 1]))
return sum
# print(weekDayDic["Monday"]["AlightsB"].matrix[:,6])
con1 = {"type" : "eq", "fun" : constraint1}
con2 = {"type" : "eq", "fun" : constraint2}
cons = [con1, con2]
# constraint: non negative numbers for all values
bnds = [(0,None)]*len(stationIDs)*2
# constraint: On first station of boarding-list, no passenger may alight. On last station of alighting-list, no passenger may board.
bnds[len(stationIDs)-1] = (0,0)
bnds[len(stationIDs)] = (0,0)
bnds = tuple(bnds)
for day in weekdays:
for hour in hourList:
boardings = weekDayDic[day]["BoardingsA"].getValueListByHour(hour)
alights = weekDayDic[day]["AlightsA"].getValueListByHour(hour)
# x0 = np.concatenate((boardings*1.2, alights*1.2))
# resA = minimize(objectiveFunction, x0, method='trust-constr', options={'disp': True}, constraints=cons, bounds=bnds)
# print(objectiveFunction(resA.x))
# print(resA.x)
# print(boardings)
# print(alights)
outputA = []
boardingAlightAdjuster(boardings, alights, outputA)
boardings = weekDayDic[day]["BoardingsB"].getValueListByHour(hour)
alights = weekDayDic[day]["AlightsB"].getValueListByHour(hour)
# x0 = x0 = np.concatenate((boardings*1.2, alights*1.2))
# resB = minimize(objectiveFunction, x0, method='trust-constr', options={'disp': True}, constraints=cons, bounds=bnds)
# print(resB.x)
outputB = []
boardingAlightAdjuster(boardings, alights, outputB)
# xsA = np.split(resA.x,2)
# boardingsA = xsA[0]
# alightsA = xsA[1]
boardingsA = outputA[0]
alightsA = outputA[1]
idsA = weekDayDic[day]["BoardingsA"].stationIDs
# xsB = np.split(resB.x,2)
# boardingsB = xsB[0]
# alightsB = xsB[1]
boardingsB = outputB[0]
alightsB = outputB[1]
idsB = weekDayDic[day]["BoardingsB"].stationIDs
# absolute value is calculated because some zero values are slightly below zero (e.g. -5E-24) and after rounding a "-0" still remains
output = np.absolute(np.around(np.column_stack((idsA, boardingsA, alightsA, idsB, boardingsB, alightsB)), 9))
output = pd.DataFrame(output)
print(str(day) + " " + str(hour))
print(output)
output.to_csv("Scenario/Hourly_Boarding_Alighting_Data/" + day + "/" + str(hour) + "Uhr.csv", index = False, header = False, sep = "\t")
5.2 Calculation of hourly Congestions
# CONGESTION CALCULATION:
stationInfoFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationTimestampsMin"], delimiter=";", header = None)
stationNames = stationInfoFile["StationNames"].tolist()
stationIDs = stationInfoFile["ID"].tolist()
stationTimestamps = stationInfoFile["StationTimestampsMin"].tolist()
dayList = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
# hourly frequency == how many trains are available within each hour? e.g. 0:3 --> from midnight to 1 AM, three trains can be used
hourlyFrequencyDic ={0:3, 1:3, 2:3, 3:3, 4:4, 5:6, 6:12, 7:12, 8:12, 9:12, 10:12, 11:12, 12:12, 13:12, 14:12, 15:12, 16:12, 17:12, 18:12, 19:12, 20:10, 21:6, 22:6, 23:5}
# con = np.array(list(hourlyFrequencyDic.values()))
# print(np.mean(con)) # = 9: true daily average frequency
dailyCongestionlistDirectionA = []
dailyCongestionlistDirectionB = []
for day in dayList:
hourlyCongestionlistDirectionA = []
hourlyCongestionlistDirectionB = []
for hour in range(0,24):
sourceFile = pd.read_csv("Scenario/Hourly_Boarding_Alighting_Data/" + str(day) + "/" + str(hour) + "Uhr.csv", names = ["StationNrsA", "EinstiegA", "AusstiegA", "StationNrsB", "EinstiegB", "AusstiegB"], delimiter = "\t", header = None)
boardingsA = np.array(sourceFile["EinstiegA"].tolist())
alightsA = np.array(sourceFile["AusstiegA"].tolist())
boardingsB = np.array(sourceFile["EinstiegB"].tolist())
alightsB = np.array(sourceFile["AusstiegB"].tolist())
hourlyCongestionlistDirectionA.append(congestionCalculatorFromAggregatedData(boardingsA, alightsA, hourlyFrequencyDic[hour], 182, 8))
hourlyCongestionlistDirectionB.append(congestionCalculatorFromAggregatedData(boardingsB, alightsB, hourlyFrequencyDic[hour], 182, 8))
dailyCongestionlistDirectionA.append(hourlyCongestionlistDirectionA)
dailyCongestionlistDirectionB.append(hourlyCongestionlistDirectionB)
completeArray_DirectionA = np.asarray(hourlyCongestionlistDirectionA)
completeArray_DirectionB = np.asarray(hourlyCongestionlistDirectionA)
# print(completeArray_DirectionA)
# print(completeArray_DirectionB)
# print(completeArray_DirectionA.size)
# print(completeArray_DirectionB.size)
# print(24*24*5) # 24 Einträge pro Stunde, 24 Stunden am Tag, 5 Tage --> Arrays haben richtige Größe
yticklabels = []
for hour in range(0,9):
yticklabels.append("0" + str(hour) + ":00 AM - 0" + str(hour + 1) + ":00 AM")
yticklabels.append("09:00 AM - 10:00 AM")
for hour in range(10,12):
yticklabels.append(str(hour) + ":00 AM - " + str(hour + 1) + ":00 AM")
yticklabels.append("12:00 AM - 01:00 PM")
for hour in range(13,21):
yticklabels.append("0" + str(hour - 12) + ":00 PM - 0" + str(hour - 12 + 1) + ":00 PM")
yticklabels.append("09:00 PM - 10:00 PM")
for hour in range(22,24):
yticklabels.append(str(hour - 12) + ":00 PM - " + str(hour - 12 + 1) + ":00 PM")
ymin = 0
ymax = 20
dayNameDic = {0 : "Monday", 1 : "Tuesday", 2 : "Wednesday", 3 : "Thursday", 4 : "Friday"}
mask = (np.abs(dailyCongestionlistDirectionA[0]) == -100) # -100 = dummy value
for index in list(dayNameDic.keys()):
plotA = combinedCongestionHeatmap("A", np.abs(dailyCongestionlistDirectionA[index]), yticklabels, "Hourly Line Section Congestion in Direction A on " + str(dayNameDic[index]), YlOrBr_9.mpl_colormap, ymin, ymax, mask, 0.79, (15,15))
plotB = combinedCongestionHeatmap("B", np.abs(dailyCongestionlistDirectionB[index]), yticklabels, "Hourly Line Section Congestion in Direction B on " + str(dayNameDic[index]), YlOrBr_9.mpl_colormap, ymin, ymax, mask, 0.79, (15,15))
plotA.savefig("Scenario/Hourly_Congestion/Heatmap-" + str(dayNameDic[index]) + "-A.pdf", bbox_inches='tight')
plotB.savefig("Scenario/Hourly_Congestion/Heatmap-" + str(dayNameDic[index]) + "-B.pdf", bbox_inches='tight')
print()
5.3 Calculation of hourly OD Matrices
5.3.1 Estimations using the Fluid Analogy Method
stationInfoFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationTimestampsMin"], delimiter=";", header = None)
stationNames = stationInfoFile["StationNames"].tolist()
stationIDs = stationInfoFile["ID"].tolist()
stationTimestamps = stationInfoFile["StationTimestampsMin"].tolist()
difference = []
dayList = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
for day in dayList:
for hour in range(0,24):
try:
sourceFile = pd.read_csv("Scenario/Hourly_Boarding_Alighting_Data/" + str(day) + "/" + str(hour) + "Uhr.csv", names = ["StationNrsA", "EinstiegA", "AusstiegA", "StationNrsB", "EinstiegB", "AusstiegB"], delimiter = "\t", header = None)
boardingsA = np.array(sourceFile["EinstiegA"].tolist())
alightsA = np.array(sourceFile["AusstiegA"].tolist())
boardingsB = np.array(sourceFile["EinstiegB"].tolist())
alightsB = np.array(sourceFile["AusstiegB"].tolist())
# print(boardingsA)
# print(alightsA)
# print(boardingsB)
# print(alightsB)
resultA = faMethod(boardingsA, alightsA) # Fluid Analogy method has been defined earlier and is used here again
resultB = faMethod(boardingsB, alightsB)
vMatrixA = resultA[0]
vMatrixB = resultB[0]
gMatrix = np.add(vMatrixA,np.flip(vMatrixB))
gMatrixFramed = pd.DataFrame(gMatrix, columns = stationIDs, index = stationIDs)
# Output:
if not np.isnan(gMatrix).any():
print("CALCULATED: " +str(day) + " " + str(hour) + " o'clock.")
print(gMatrixFramed)
print("f_{A}: " + str(resultA[1].round(3)))
print("f_{B}: " + str(resultB[1].round(3)))
print(150 * "=")
print()
else:
print("ERROR: " + str(day) + " " + str(hour) + " o'clock. Exception: NaN")
print(150 * "=")
print()
gMatrixFramed.to_csv("Scenario/Hourly_Estimation/FluidAnalogy_Matrizen/" + str(day) + "/U2-2019" + "-" + str(hour) + "Uhr_OD-Matrix-FluidAnalogyMethod.csv", index = False, header = False, sep = "\t")
except Exception as e:
print("ERROR: " + str(day) + " " + str(hour) + " o'clock. Exception: " + str(e))
print(150 * "=")
print()
5.3.2 Estimations using the IPF Method
stationInfoFile = pd.read_csv("Station_Info/U2-Station_Names-Times_encoded.csv", names = ["ID", "StationNames", "StationTimestampsMin"], delimiter=";", header = None)
stationNames = stationInfoFile["StationNames"].tolist()
stationIDs = stationInfoFile["ID"].tolist()
stationTimestamps = stationInfoFile["StationTimestampsMin"].tolist()
difference = []
dayList = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
for day in dayList:
for hour in range(0,24):
try:
sourceFile = pd.read_csv("Scenario/Hourly_Boarding_Alighting_Data/" + str(day) + "/" + str(hour) + "Uhr.csv", names = ["StationNrsA", "EinstiegA", "AusstiegA", "StationNrsB", "EinstiegB", "AusstiegB"], delimiter = "\t", header = None)
boardingsA = np.array(sourceFile["EinstiegA"].tolist())
alightsA = np.array(sourceFile["AusstiegA"].tolist())
boardingsB = np.array(sourceFile["EinstiegB"].tolist())
alightsB = np.array(sourceFile["AusstiegB"].tolist())
difference.append(np.sum(boardingsA)-np.sum(alightsA))
matrixA = np.zeros((len(stationNames), len(stationNames)))
matrixB = np.zeros((len(stationNames), len(stationNames)))
createSeedMatrix(matrixA, stationTimestamps)
createSeedMatrix(matrixB, stationTimestamps)
ipf(matrixA, boardingsA, alightsA) # IPF method has been defined earlier and is used here again
ipf(matrixB, boardingsB, alightsB)
gMatrix = np.add(matrixA,np.flip(matrixB))
gMatrixFramed = pd.DataFrame(gMatrix, columns = stationNames, index = stationNames)
# Output:
if not np.isnan(gMatrix).any():
print("CALCULATED: " + str(day) + " " + str(hour) + " o'clock.")
print(gMatrixFramed)
print(150 * "=")
print()
else:
print("ERROR: " + str(day) + " " + str(hour) + " o'clock. Exception: NaN")
print(150 * "=")
print()
gMatrixFramed.to_csv("Scenario/Hourly_Estimation/IPF_Matrizen/" + str(day) + "/U2-2019" + "-" + str(hour) + "Uhr_OD-Matrix-IPFMethod.csv", index = False, header = False, sep = "\t")
except Exception as e:
print("ERROR: " + str(day) + " " + str(hour) + " o'clock. Exception: " + str(e))
print(150 * "=")
print()
# print(max(difference))