Installing & Importing
# First step was to install openpyxl to make it possible to read excel files
!pip install openpyxl==3.1.2
# Import important libraries
import numpy as np
import pandas as pd
import openpyxl
df = pd.read_excel("UPDATED_BMMS_overview.xlsx")
df
# Unieke waarden uit de kolom halen
unieke_waarden = df['road'].unique()
# Initialiseren van tellers
count_N = 0
count_R = 0
count_Z = 0
# Loop over de unieke waarden en tel het aantal waarden dat begint met 'N', 'R' of 'Z'
for waarde in unieke_waarden:
if waarde.startswith('N'):
count_N += 1
elif waarde.startswith('R'):
count_R += 1
elif waarde.startswith('Z'):
count_Z += 1
# Resultaten afdrukken
print("Aantal waarden die met 'N' beginnen:", count_N)
print("Aantal waarden die met 'R' beginnen:", count_R)
print("Aantal waarden die met 'Z' beginnen:", count_Z)
Find roads that intersect with N1 & N2
#Manualy give the reference list of the intersections with road N1 and N2. These will be added in intersect_list.
intersect_list = []
reference_list_N2 = [ 'N1', 'N2', 'R201', 'Z1090', 'R202', 'N105', 'R203', 'Z2038', 'R301', 'Z2047', 'R210', 'R310', 'R211', 'Z2033', 'Z2042', 'Z2041', 'Z2032', 'R360', 'Z2031', 'N102', 'R220', 'N204', 'R221', 'R240', 'N204', 'N207', 'R240', 'R241', 'N207', 'Z2022', 'Z2016', 'R283', 'Z2013', 'N209', 'N208', 'N206', 'N210', 'Z2011', 'Z2012' ]
reference_list_N1 = [ 'Z1101', 'R110', 'Z1029', 'Z1052', 'R141', 'Z1031', 'N104', 'Z1070', 'Z1126', 'R111', 'N2', 'R113', 'N105', 'Z1061', 'Z1089', 'Z1066', 'Z1063', 'Z1062', 'Z1402', 'Z1053', 'Z1044', 'Z1042', 'Z1017', 'Z1008', 'Z1022', 'Z1219', 'Z1028', 'N102', 'Z1046', 'Z1045', 'Z1048', 'Z1031', 'Z1030', 'Z1034', 'Z1034', 'Z1081', 'Z1031', 'R151', 'Z1021', 'Z1086', 'Z1087', 'Z1016', 'N111', 'Z1804', 'Z1804', 'Z1065', 'Z1018', 'Z1071', 'N107', 'Z1059', 'Z1057', 'R170', 'Z1039', 'Z1026', 'Z1036', 'N108', 'Z1019', 'Z1013', 'R171', 'Z1007', 'Z1124', 'Z1125', 'Z1002', 'Z1127', 'R172', 'Z1005', 'Z1130', 'Z1131', 'Z1001', 'N109', 'N110', 'Z1009', 'Z1503', 'Z1503', 'Z1504', 'Z1133']
reference_list = reference_list_N1 + reference_list_N2
# Filtering DataFrame based on our reference list and finding the maximum value in column chainage.
max_values = df[df['road'].isin(reference_list)].groupby('road')['chainage'].max()
# Loop through the max_values and append the references to intersect_list if the maximum value is greater than 25
for reference, max_value in max_values.items():
if max_value > 25:
intersect_list.append(reference)
print("References added to intersect_list if the maximum value is greater than 25:")
print(intersect_list)
#Remove the duplicates from the list.
def remove_duplicates(input_list):
"""
Removes duplicates from a list.
Parameters:
input_list (list): The list from which duplicates will be removed.
Returns:
list: A list with duplicates removed.
"""
# Convert the list to a set to automatically remove duplicates
unique_list = list(set(input_list))
return unique_list
unique_list = remove_duplicates(intersect_list)
print(unique_list)
Create df for bridges
# Filtering DataFrame based on our unique_list
df_filtered_bridges = df[df['road'].isin(unique_list)]
df_filtered_bridges
# Sorting DataFrame based on road (primary) and chainage (secondary)
df_sorted_bridges = df_filtered_bridges.sort_values(by=['road', 'chainage'])
df_sorted_bridges
# Add a new column 'model_type' to the DataFrame
df_sorted_bridges['model_type'] = 'bridge'
df_sorted_bridges
Create df for roads
roads = pd.read_csv('_roads3.csv')
roads
# Filtering DataFrame based on unique_list
filtered_roads = roads[roads['road'].isin(unique_list)]
filtered_roads
# Add a new column 'model_type' to the DataFrame
filtered_roads['model_type'] = 'link'
df_sorted_roads = filtered_roads.copy()
df_sorted_roads
Merge dataframes
#!!!!!
#Check if all the roads are in df_sorted_roads
# List of values to check
check_list = unique_list
# Check if all values in the list are present in the column 'road' of the DataFrame
all_present = df_sorted_roads['road'].isin(check_list).all()
if all_present:
print("All values are present in the column.")
else:
print("Not all values are present in the column.")
#!!!!!
#Check if all the roads are in df_sorted_bridges
# List of values to check
check_list = unique_list
# Check if all values in the list are present in the column 'road' of the DataFrame
all_present = df_sorted_bridges['road'].isin(check_list).all()
if all_present:
print("All values are present in the column.")
else:
print("Not all values are present in the column.")
# Concatenate the two DataFrames vertically
concatenated_df = pd.concat([df_sorted_roads, df_sorted_bridges])
# Sort the concatenated DataFrame based on road (primary) and chainage (secondary)
sorted_df = concatenated_df.sort_values(by=['road', 'chainage'])
# Reset the index of the DataFrame
sorted_df.reset_index(drop=True, inplace=True)
sorted_df
Place sourcesinks and intersections
#!!!!
#First the sourcesinks have to be placed so that the intersections in a next step will overwrite certain sourcesinks.
df1 = sorted_df.copy()
# Initialize a variable to keep track of the previous road value
prev_road = None
# Iterate over each row in the DataFrame
for index, row in df1.iterrows():
# Check if the current road value is different from the previous one
if row['road'] != prev_road:
# Update the 'model_type' column for the previous row (if it exists)
if prev_road is not None:
df1.at[prev_index, 'model_type'] = 'sourcesink'
# Update the 'model_type' column for the current row
df1.at[index, 'model_type'] = 'sourcesink'
# Update the previous road value and index for the next iteration
prev_road = row['road']
prev_index = index
# Update the 'model_type' column for the last row (if the DataFrame is not empty)
if prev_road is not None:
df1.at[prev_index, 'model_type'] = 'sourcesink'
df1
from math import radians, sin, cos, sqrt, atan2
# Function to calculate the distance between two points (given their lat and lon) using the haversine formula
def haversine(lat1, lon1, lat2, lon2):
# Convert coordinates from degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# Haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
c = 2 * atan2(sqrt(a), sqrt(1 - a))
distance = 6371 * c # Radius of the Earth in kilometers
return distance
# Function to check if two points are within a certain distance of each other
def within_distance(lat1, lon1, lat2, lon2, distance_threshold):
distance = haversine(lat1, lon1, lat2, lon2)
return distance <= distance_threshold
#Test
haversine(24.0515, 90.8731104, 24.0514722, 90.874166)
#!!!!!
#This cell will not be used in this notebook.
#This cell is to only find intersections with road N1.
#import itertools
#intersect_df = sorted_df
# Set the distance threshold
#distance_threshold = 0.01
# Filter the DataFrame to include only points on road "N1"
#road_N1_df = intersect_df[intersect_df['road'] == 'N1']
# Loop through each pair of points on road "N1" and other roads
#for (index1, row1), (index2, row2) in itertools.product(road_N1_df.iterrows(), intersect_df.iterrows()):
# Check if the points are from different roads (excluding comparing with itself)
# if row1['road'] != row2['road']:
# Check if the distance between these two points is within the threshold
# if within_distance(row1['lat'], row1['lon'], row2['lat'], row2['lon'], distance_threshold):
# Update the 'model_type' column for both points to 'intersection'
# intersect_df.at[index1, 'model_type'] = 'intersection'
# intersect_df.at[index2, 'model_type'] = 'intersection'
# Display the DataFrame with the updated values in the 'model_type' column
#intersect_df
#This cell is to find intersections with road N1 and road N2.
df2 = df1.copy()
import itertools
# Set the distance threshold
distance_threshold = 1
# Filter the DataFrame to include only points on road "N1"
road_N1_df = df2[df2['road'] == 'N1']
# Filter the DataFrame to include only points on road "N2"
road_N2_df = df2[df2['road'] == 'N2']
# Loop through each pair of points on road "N1" and other roads
for (index1, row1), (index2, row2) in itertools.product(road_N1_df.iterrows(), df2.iterrows()):
# Check if the points are from different roads (excluding comparing with itself)
if row1['road'] != row2['road']:
# Check if the distance between these two points is within the threshold
if within_distance(row1['lat'], row1['lon'], row2['lat'], row2['lon'], distance_threshold):
# Update the 'model_type' column for both points to 'intersection'
df2.at[index1, 'model_type'] = 'intersection'
df2.at[index2, 'model_type'] = 'intersection'
# Loop through each pair of points on road "N2" and other roads
for (index1, row1), (index2, row2) in itertools.product(road_N2_df.iterrows(), df2.iterrows()):
# Check if the points are from different roads (excluding comparing with itself)
if row1['road'] != row2['road']:
# Check if the distance between these two points is within the threshold
if within_distance(row1['lat'], row1['lon'], row2['lat'], row2['lon'], distance_threshold):
# Update the 'model_type' column for both points to 'intersection'
df2.at[index1, 'model_type'] = 'intersection'
df2.at[index2, 'model_type'] = 'intersection'
df2
intersect_df = df2.copy()
intersect_df
# Check for consecutive 'intersection' values in the 'model_type' column to ensure intersections are not doubled due to the threshold.
consecutive_intersections = (intersect_df['model_type'] == 'intersection') & (intersect_df['model_type'].shift(-1) == 'intersection')
# Check if there are any True values in the resulting boolean series
if consecutive_intersections.any():
print("There are consecutive rows with 'intersection' in the 'model_type' column.")
else:
print("There are no consecutive rows with 'intersection' in the 'model_type' column.")
# Check if the current row is the same as the previous row in the 'model_type' column
is_duplicate = intersect_df['model_type'] == intersect_df['model_type'].shift()
# Change the 'model_type' to 'link' if the current row is the same as the previous row
intersect_df.loc[is_duplicate, 'model_type'] = 'link'
# Check for consecutive 'intersection' values in the 'model_type' column to ensure intersections are not doubled due to the threshold.
consecutive_intersections = (intersect_df['model_type'] == 'intersection') & (intersect_df['model_type'].shift(-1) == 'intersection')
# Check if there are any True values in the resulting boolean series
if consecutive_intersections.any():
print("There are consecutive rows with 'intersection' in the 'model_type' column.")
else:
print("There are no consecutive rows with 'intersection' in the 'model_type' column.")
#Check1 intersections
# Filter rows where 'model_type' is 'intersection'
intersection_rows = intersect_df[intersect_df['model_type'] == 'intersection']
# Sort the filtered rows based on the values in the 'lat' column
sorted_intersection_rows = intersection_rows.sort_values(by='lat')
sorted_intersection_rows
#Set coordinates of intersections as exactly the same.
mod2_intersect_df = intersect_df.copy()
# Loop through each row in the DataFrame where model_type is 'intersection'
for index1, row1 in mod2_intersect_df[mod2_intersect_df['model_type'] == 'intersection'].iterrows():
for index2, row2 in mod2_intersect_df[mod2_intersect_df['model_type'] == 'intersection'].iterrows():
# Check if the distance between these two points is within the threshold
if index1 != index2 and within_distance(row1['lat'], row1['lon'], row2['lat'], row2['lon'], 3*distance_threshold):
# If an intersection is detected, copy lon and lat from one road to the other
mod2_intersect_df.at[index1, 'lat'] = row2['lat']
mod2_intersect_df.at[index1, 'lon'] = row2['lon']
mod2_intersect_df.at[index2, 'lat'] = row1['lat']
mod2_intersect_df.at[index2, 'lon'] = row1['lon']
# Display the DataFrame with the updated values in the 'lat' and 'lon' columns
mod2_intersect_df
#Check2 intersections
# Filter rows where 'model_type' is 'intersection'
intersection_rows = mod2_intersect_df[mod2_intersect_df['model_type'] == 'intersection']
# Sort the filtered rows based on the values in the 'lat' column
sorted_intersection_rows2 = intersection_rows.sort_values(by='lat')
sorted_intersection_rows2
#Check3 intersections
# Filter rows where 'model_type' is 'intersection'
intersection_rows = mod2_intersect_df[mod2_intersect_df['model_type'] == 'intersection']
# Sort the filtered rows based on the values in the 'lat' column
sorted_intersection_rows3 = intersection_rows.sort_values(by='lat')
sorted_intersection_rows3
mod3_intersect_df = mod2_intersect_df.copy()
# Determine the starting number for the ids
start_id = 1000000
# Add the 'id' column to the DataFrame
mod3_intersect_df['id'] = range(start_id, start_id + len(mod3_intersect_df))
mod3_intersect_df
# Reset the indexes of the DataFrame
mod3_intersect_df.reset_index(drop=True, inplace=True)
mod3_intersect_df
#!!!!
#Make the id of the intersections the same.
mod4_intersect_df = mod3_intersect_df.copy() # Make a copy of the DataFrame to avoid modifying the original one
# Determine the starting number for the ids
start_id = 1000000
# Initialize a dictionary to store mappings of (lat, lon) to id
id_map = {}
# Loop through each row in the DataFrame where model_type is 'intersection'
for index, row in mod4_intersect_df[mod4_intersect_df['model_type'] == 'intersection'].iterrows():
# Get the current latitude and longitude
lat = row['lat']
lon = row['lon']
# Check if this (lat, lon) pair already exists in the id_map
if (lat, lon) in id_map:
# If it exists, assign the same id to this intersection
mod4_intersect_df.at[index, 'id'] = id_map[(lat, lon)]
else:
# If it doesn't exist, assign the current index + 1000000 as the ID and store the mapping
mod4_intersect_df.at[index, 'id'] = index + start_id
id_map[(lat, lon)] = index + start_id
mod4_intersect_df
# Sort the DataFrame based on the 'road' column and then the 'chainage' column
mod4_intersect_df = mod4_intersect_df.sort_values(by=['road', 'chainage'])
Calculate length
#Calculate length
# Make a copy of the DataFrame to avoid modifying the original DataFrame
mod5_intersect_df = mod4_intersect_df.copy()
# Filter rows where 'model_type' is 'link'
link_rows = mod5_intersect_df[mod5_intersect_df['model_type'] == 'link']
# Calculate the length only for the filtered rows
link_rows['length'] = link_rows.groupby('road')['chainage'].diff()
# Update the 'length' column in the original DataFrame with the calculated lengths for link rows
mod5_intersect_df.update(link_rows)
# Display the DataFrame with the updated 'length' column
mod5_intersect_df
#Calculate chainage in km to length in meters.
mod6_intersect_df = mod5_intersect_df
# Multiply values in the 'length' column by 1000 for rows where 'model_type' is 'link'
mod6_intersect_df.loc[mod6_intersect_df['model_type'] == 'link', 'length'] *= 1000
# Replace NaN values in a specific column with 0
mod6_intersect_df['length'] = mod6_intersect_df['length'].fillna(0)
Pour into the correct format
# Reset the index to consecutive integers
mod6_intersect_df = mod6_intersect_df.reset_index(drop=True)
mod6_intersect_df
mod7_intersect_df = mod6_intersect_df.copy()
# Select the desired columns in the desired order
selected_columns = ['road', 'id', 'model_type', 'condition', 'name', 'lat', 'lon', 'length']
# Create a new DataFrame with only the selected columns
mod7_intersect_df = mod7_intersect_df[selected_columns]
# Sort the DataFrame based on the index to preserve the original order
mod7_intersect_df = mod7_intersect_df.sort_index()
# Display the resulting DataFrame
mod7_intersect_df
#Check4 intersections
# Filter rows where 'model_type' is 'intersection'
intersection_rows = mod7_intersect_df[mod7_intersect_df['model_type'] == 'intersection']
# Sort the filtered rows based on the values in the 'lat' column
sorted_intersection_rows4 = intersection_rows.sort_values(by='lat')
# Display the sorted intersection rows
sorted_intersection_rows4
#Remove intersections that occur once so are actually links.
# Count the occurrences of each value in the 'lat' column
value_counts = sorted_intersection_rows4['lat'].value_counts()
# Filter the index of values that occur only once
indices_to_change = value_counts[value_counts == 1].index
# Change the value of the 'model_type' to 'link' for rows where the 'lat' value occurs only once
sorted_intersection_rows4.loc[sorted_intersection_rows4['lat'].isin(indices_to_change), 'model_type'] = 'link'
sorted_intersection_rows4
# Step 1: Filter rows with 'model_type' == 'intersection'
intersection_rows = mod7_intersect_df[mod7_intersect_df['model_type'] == 'intersection']
# Step 2: Count occurrences of each value in 'lat' column
lat_counts = intersection_rows['lat'].value_counts()
# Step 3: Identify rows where 'lat' occurs only once
unique_lat_rows = intersection_rows[intersection_rows['lat'].map(lat_counts) == 1]
# Step 4: Update 'model_type' for identified rows to 'link'
mod7_intersect_df.loc[unique_lat_rows.index, 'model_type'] = 'link'
mod7_intersect_df
#Check5 intersections
# Filter rows where 'model_type' is 'intersection'
intersection_rows = mod7_intersect_df[mod7_intersect_df['model_type'] == 'intersection']
# Sort the filtered rows based on the values in the 'lat' column
sorted_intersection_rows5 = intersection_rows.sort_values(by='lat')
# Display the sorted intersection rows
sorted_intersection_rows5
Remove isolated sourcesinks
These sourcesinks are removed because the model indicates that they are not connected with other sourcesinks. Both are listed below, and manual inspection determined which ones should be removed. The first two are extensively discussed, while the last three are removed in abbreviated form.
# Get all rows where the value in the column 'id' is x
rows_with_value_x = mod7_intersect_df[mod7_intersect_df['id'] == 1004230
]
rows_with_value_x
#N2
# Get all rows where the value in the column 'id' is x
rows_with_value_x = mod7_intersect_df[mod7_intersect_df['id'] == 1006578
]
rows_with_value_x
#R241
# Define the specific value in the 'road' column
specific_value = 'R241'
# Filter the DataFrame based on the specific value in the 'road' column and 'sourcesink' in 'model_type'
mod7_intersect_df.loc[(mod7_intersect_df['road'] == specific_value) & (mod7_intersect_df['model_type'] == 'sourcesink'), 'model_type'] = 'link'
mod7_intersect_df
# Get all rows where the value in the column 'id' is x
rows_with_value_x = mod7_intersect_df[mod7_intersect_df['id'] == 1007266
]
rows_with_value_x
#Z1005
# Get all rows where the value in the column 'id' is x
rows_with_value_x = mod7_intersect_df[mod7_intersect_df['id'] == 1004605
]
rows_with_value_x
#N208
# Define the specific value in the 'road' column
specific_value = 'Z1005'
# Filter the DataFrame based on the specific value in the 'road' column and 'sourcesink' in 'model_type'
mod7_intersect_df.loc[(mod7_intersect_df['road'] == specific_value) & (mod7_intersect_df['model_type'] == 'sourcesink'), 'model_type'] = 'link'
mod7_intersect_df
# Filter rows where 'road' column has value 'x' and 'id' column has value 'y'
filtered_rows = mod7_intersect_df[(mod7_intersect_df['road'] == 'Z1402') & (mod7_intersect_df['id'] == '1008009')]
# Update the 'model_type' column for the filtered rows
filtered_rows['model_type'] = 'link'
# Update the original DataFrame with the modified rows
df.update(filtered_rows)
# Filter rows where 'road' column has value 'x' and 'id' column has value 'y'
filtered_rows = mod7_intersect_df[(mod7_intersect_df['road'] == 'R203') & (mod7_intersect_df['id'] == '1005910')]
# Update the 'model_type' column for the filtered rows
filtered_rows['model_type'] = 'link'
# Update the original DataFrame with the modified rows
df.update(filtered_rows)
# Filter rows where 'road' column has value 'x' and 'id' column has value 'y'
filtered_rows = mod7_intersect_df[(mod7_intersect_df['road'] == 'R220') & (mod7_intersect_df['id'] == '1006224')]
# Update the 'model_type' column for the filtered rows
filtered_rows['model_type'] = 'link'
# Update the original DataFrame with the modified rows
df.update(filtered_rows)
Save the file
# Save the DataFrame to a CSV file
mod7_intersect_df.to_csv('N1_N2_V4.csv', index=False)