Installing & Importing

# First step was to install openpyxl to make it possible to read excel files !pip install openpyxl==3.1.2

# Import important libraries import numpy as np import pandas as pd import openpyxl

df = pd.read_excel("UPDATED_BMMS_overview.xlsx") df

# Unieke waarden uit de kolom halen unieke_waarden = df['road'].unique() # Initialiseren van tellers count_N = 0 count_R = 0 count_Z = 0 # Loop over de unieke waarden en tel het aantal waarden dat begint met 'N', 'R' of 'Z' for waarde in unieke_waarden: if waarde.startswith('N'): count_N += 1 elif waarde.startswith('R'): count_R += 1 elif waarde.startswith('Z'): count_Z += 1 # Resultaten afdrukken print("Aantal waarden die met 'N' beginnen:", count_N) print("Aantal waarden die met 'R' beginnen:", count_R) print("Aantal waarden die met 'Z' beginnen:", count_Z)

Find roads that intersect with N1 & N2

#Manualy give the reference list of the intersections with road N1 and N2. These will be added in intersect_list. intersect_list = [] reference_list_N2 = [ 'N1', 'N2', 'R201', 'Z1090', 'R202', 'N105', 'R203', 'Z2038', 'R301', 'Z2047', 'R210', 'R310', 'R211', 'Z2033', 'Z2042', 'Z2041', 'Z2032', 'R360', 'Z2031', 'N102', 'R220', 'N204', 'R221', 'R240', 'N204', 'N207', 'R240', 'R241', 'N207', 'Z2022', 'Z2016', 'R283', 'Z2013', 'N209', 'N208', 'N206', 'N210', 'Z2011', 'Z2012' ] reference_list_N1 = [ 'Z1101', 'R110', 'Z1029', 'Z1052', 'R141', 'Z1031', 'N104', 'Z1070', 'Z1126', 'R111', 'N2', 'R113', 'N105', 'Z1061', 'Z1089', 'Z1066', 'Z1063', 'Z1062', 'Z1402', 'Z1053', 'Z1044', 'Z1042', 'Z1017', 'Z1008', 'Z1022', 'Z1219', 'Z1028', 'N102', 'Z1046', 'Z1045', 'Z1048', 'Z1031', 'Z1030', 'Z1034', 'Z1034', 'Z1081', 'Z1031', 'R151', 'Z1021', 'Z1086', 'Z1087', 'Z1016', 'N111', 'Z1804', 'Z1804', 'Z1065', 'Z1018', 'Z1071', 'N107', 'Z1059', 'Z1057', 'R170', 'Z1039', 'Z1026', 'Z1036', 'N108', 'Z1019', 'Z1013', 'R171', 'Z1007', 'Z1124', 'Z1125', 'Z1002', 'Z1127', 'R172', 'Z1005', 'Z1130', 'Z1131', 'Z1001', 'N109', 'N110', 'Z1009', 'Z1503', 'Z1503', 'Z1504', 'Z1133'] reference_list = reference_list_N1 + reference_list_N2 # Filtering DataFrame based on our reference list and finding the maximum value in column chainage. max_values = df[df['road'].isin(reference_list)].groupby('road')['chainage'].max() # Loop through the max_values and append the references to intersect_list if the maximum value is greater than 25 for reference, max_value in max_values.items(): if max_value > 25: intersect_list.append(reference) print("References added to intersect_list if the maximum value is greater than 25:") print(intersect_list)

#Remove the duplicates from the list. def remove_duplicates(input_list): """ Removes duplicates from a list. Parameters: input_list (list): The list from which duplicates will be removed. Returns: list: A list with duplicates removed. """ # Convert the list to a set to automatically remove duplicates unique_list = list(set(input_list)) return unique_list unique_list = remove_duplicates(intersect_list) print(unique_list)

Create df for bridges

# Filtering DataFrame based on our unique_list df_filtered_bridges = df[df['road'].isin(unique_list)] df_filtered_bridges

# Sorting DataFrame based on road (primary) and chainage (secondary) df_sorted_bridges = df_filtered_bridges.sort_values(by=['road', 'chainage']) df_sorted_bridges

# Add a new column 'model_type' to the DataFrame df_sorted_bridges['model_type'] = 'bridge' df_sorted_bridges

Create df for roads

roads = pd.read_csv('_roads3.csv') roads

# Filtering DataFrame based on unique_list filtered_roads = roads[roads['road'].isin(unique_list)] filtered_roads

# Add a new column 'model_type' to the DataFrame filtered_roads['model_type'] = 'link' df_sorted_roads = filtered_roads.copy() df_sorted_roads

Merge dataframes

#!!!!! #Check if all the roads are in df_sorted_roads # List of values to check check_list = unique_list # Check if all values in the list are present in the column 'road' of the DataFrame all_present = df_sorted_roads['road'].isin(check_list).all() if all_present: print("All values are present in the column.") else: print("Not all values are present in the column.")

#!!!!! #Check if all the roads are in df_sorted_bridges # List of values to check check_list = unique_list # Check if all values in the list are present in the column 'road' of the DataFrame all_present = df_sorted_bridges['road'].isin(check_list).all() if all_present: print("All values are present in the column.") else: print("Not all values are present in the column.")

# Concatenate the two DataFrames vertically concatenated_df = pd.concat([df_sorted_roads, df_sorted_bridges]) # Sort the concatenated DataFrame based on road (primary) and chainage (secondary) sorted_df = concatenated_df.sort_values(by=['road', 'chainage']) # Reset the index of the DataFrame sorted_df.reset_index(drop=True, inplace=True) sorted_df

Place sourcesinks and intersections

#!!!! #First the sourcesinks have to be placed so that the intersections in a next step will overwrite certain sourcesinks. df1 = sorted_df.copy() # Initialize a variable to keep track of the previous road value prev_road = None # Iterate over each row in the DataFrame for index, row in df1.iterrows(): # Check if the current road value is different from the previous one if row['road'] != prev_road: # Update the 'model_type' column for the previous row (if it exists) if prev_road is not None: df1.at[prev_index, 'model_type'] = 'sourcesink' # Update the 'model_type' column for the current row df1.at[index, 'model_type'] = 'sourcesink' # Update the previous road value and index for the next iteration prev_road = row['road'] prev_index = index # Update the 'model_type' column for the last row (if the DataFrame is not empty) if prev_road is not None: df1.at[prev_index, 'model_type'] = 'sourcesink' df1

from math import radians, sin, cos, sqrt, atan2 # Function to calculate the distance between two points (given their lat and lon) using the haversine formula def haversine(lat1, lon1, lat2, lon2): # Convert coordinates from degrees to radians lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) # Haversine formula dlon = lon2 - lon1 dlat = lat2 - lat1 a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2 c = 2 * atan2(sqrt(a), sqrt(1 - a)) distance = 6371 * c # Radius of the Earth in kilometers return distance # Function to check if two points are within a certain distance of each other def within_distance(lat1, lon1, lat2, lon2, distance_threshold): distance = haversine(lat1, lon1, lat2, lon2) return distance <= distance_threshold

#Test haversine(24.0515, 90.8731104, 24.0514722, 90.874166)

#!!!!! #This cell will not be used in this notebook. #This cell is to only find intersections with road N1. #import itertools #intersect_df = sorted_df # Set the distance threshold #distance_threshold = 0.01 # Filter the DataFrame to include only points on road "N1" #road_N1_df = intersect_df[intersect_df['road'] == 'N1'] # Loop through each pair of points on road "N1" and other roads #for (index1, row1), (index2, row2) in itertools.product(road_N1_df.iterrows(), intersect_df.iterrows()): # Check if the points are from different roads (excluding comparing with itself) # if row1['road'] != row2['road']: # Check if the distance between these two points is within the threshold # if within_distance(row1['lat'], row1['lon'], row2['lat'], row2['lon'], distance_threshold): # Update the 'model_type' column for both points to 'intersection' # intersect_df.at[index1, 'model_type'] = 'intersection' # intersect_df.at[index2, 'model_type'] = 'intersection' # Display the DataFrame with the updated values in the 'model_type' column #intersect_df

#This cell is to find intersections with road N1 and road N2. df2 = df1.copy() import itertools # Set the distance threshold distance_threshold = 1 # Filter the DataFrame to include only points on road "N1" road_N1_df = df2[df2['road'] == 'N1'] # Filter the DataFrame to include only points on road "N2" road_N2_df = df2[df2['road'] == 'N2'] # Loop through each pair of points on road "N1" and other roads for (index1, row1), (index2, row2) in itertools.product(road_N1_df.iterrows(), df2.iterrows()): # Check if the points are from different roads (excluding comparing with itself) if row1['road'] != row2['road']: # Check if the distance between these two points is within the threshold if within_distance(row1['lat'], row1['lon'], row2['lat'], row2['lon'], distance_threshold): # Update the 'model_type' column for both points to 'intersection' df2.at[index1, 'model_type'] = 'intersection' df2.at[index2, 'model_type'] = 'intersection' # Loop through each pair of points on road "N2" and other roads for (index1, row1), (index2, row2) in itertools.product(road_N2_df.iterrows(), df2.iterrows()): # Check if the points are from different roads (excluding comparing with itself) if row1['road'] != row2['road']: # Check if the distance between these two points is within the threshold if within_distance(row1['lat'], row1['lon'], row2['lat'], row2['lon'], distance_threshold): # Update the 'model_type' column for both points to 'intersection' df2.at[index1, 'model_type'] = 'intersection' df2.at[index2, 'model_type'] = 'intersection' df2 intersect_df = df2.copy()

intersect_df

# Check for consecutive 'intersection' values in the 'model_type' column to ensure intersections are not doubled due to the threshold. consecutive_intersections = (intersect_df['model_type'] == 'intersection') & (intersect_df['model_type'].shift(-1) == 'intersection') # Check if there are any True values in the resulting boolean series if consecutive_intersections.any(): print("There are consecutive rows with 'intersection' in the 'model_type' column.") else: print("There are no consecutive rows with 'intersection' in the 'model_type' column.")

# Check if the current row is the same as the previous row in the 'model_type' column is_duplicate = intersect_df['model_type'] == intersect_df['model_type'].shift() # Change the 'model_type' to 'link' if the current row is the same as the previous row intersect_df.loc[is_duplicate, 'model_type'] = 'link'

#Check1 intersections # Filter rows where 'model_type' is 'intersection' intersection_rows = intersect_df[intersect_df['model_type'] == 'intersection'] # Sort the filtered rows based on the values in the 'lat' column sorted_intersection_rows = intersection_rows.sort_values(by='lat') sorted_intersection_rows

#Set coordinates of intersections as exactly the same. mod2_intersect_df = intersect_df.copy() # Loop through each row in the DataFrame where model_type is 'intersection' for index1, row1 in mod2_intersect_df[mod2_intersect_df['model_type'] == 'intersection'].iterrows(): for index2, row2 in mod2_intersect_df[mod2_intersect_df['model_type'] == 'intersection'].iterrows(): # Check if the distance between these two points is within the threshold if index1 != index2 and within_distance(row1['lat'], row1['lon'], row2['lat'], row2['lon'], 3*distance_threshold): # If an intersection is detected, copy lon and lat from one road to the other mod2_intersect_df.at[index1, 'lat'] = row2['lat'] mod2_intersect_df.at[index1, 'lon'] = row2['lon'] mod2_intersect_df.at[index2, 'lat'] = row1['lat'] mod2_intersect_df.at[index2, 'lon'] = row1['lon'] # Display the DataFrame with the updated values in the 'lat' and 'lon' columns mod2_intersect_df

#Check2 intersections # Filter rows where 'model_type' is 'intersection' intersection_rows = mod2_intersect_df[mod2_intersect_df['model_type'] == 'intersection'] # Sort the filtered rows based on the values in the 'lat' column sorted_intersection_rows2 = intersection_rows.sort_values(by='lat') sorted_intersection_rows2

#Check3 intersections # Filter rows where 'model_type' is 'intersection' intersection_rows = mod2_intersect_df[mod2_intersect_df['model_type'] == 'intersection'] # Sort the filtered rows based on the values in the 'lat' column sorted_intersection_rows3 = intersection_rows.sort_values(by='lat') sorted_intersection_rows3

mod3_intersect_df = mod2_intersect_df.copy() # Determine the starting number for the ids start_id = 1000000 # Add the 'id' column to the DataFrame mod3_intersect_df['id'] = range(start_id, start_id + len(mod3_intersect_df)) mod3_intersect_df

# Reset the indexes of the DataFrame mod3_intersect_df.reset_index(drop=True, inplace=True) mod3_intersect_df

#!!!! #Make the id of the intersections the same. mod4_intersect_df = mod3_intersect_df.copy() # Make a copy of the DataFrame to avoid modifying the original one # Determine the starting number for the ids start_id = 1000000 # Initialize a dictionary to store mappings of (lat, lon) to id id_map = {} # Loop through each row in the DataFrame where model_type is 'intersection' for index, row in mod4_intersect_df[mod4_intersect_df['model_type'] == 'intersection'].iterrows(): # Get the current latitude and longitude lat = row['lat'] lon = row['lon'] # Check if this (lat, lon) pair already exists in the id_map if (lat, lon) in id_map: # If it exists, assign the same id to this intersection mod4_intersect_df.at[index, 'id'] = id_map[(lat, lon)] else: # If it doesn't exist, assign the current index + 1000000 as the ID and store the mapping mod4_intersect_df.at[index, 'id'] = index + start_id id_map[(lat, lon)] = index + start_id mod4_intersect_df

# Sort the DataFrame based on the 'road' column and then the 'chainage' column mod4_intersect_df = mod4_intersect_df.sort_values(by=['road', 'chainage'])

Calculate length

#Calculate length # Make a copy of the DataFrame to avoid modifying the original DataFrame mod5_intersect_df = mod4_intersect_df.copy() # Filter rows where 'model_type' is 'link' link_rows = mod5_intersect_df[mod5_intersect_df['model_type'] == 'link'] # Calculate the length only for the filtered rows link_rows['length'] = link_rows.groupby('road')['chainage'].diff() # Update the 'length' column in the original DataFrame with the calculated lengths for link rows mod5_intersect_df.update(link_rows) # Display the DataFrame with the updated 'length' column mod5_intersect_df

#Calculate chainage in km to length in meters. mod6_intersect_df = mod5_intersect_df # Multiply values in the 'length' column by 1000 for rows where 'model_type' is 'link' mod6_intersect_df.loc[mod6_intersect_df['model_type'] == 'link', 'length'] *= 1000

# Replace NaN values in a specific column with 0 mod6_intersect_df['length'] = mod6_intersect_df['length'].fillna(0)

Pour into the correct format

# Reset the index to consecutive integers mod6_intersect_df = mod6_intersect_df.reset_index(drop=True) mod6_intersect_df

mod7_intersect_df = mod6_intersect_df.copy() # Select the desired columns in the desired order selected_columns = ['road', 'id', 'model_type', 'condition', 'name', 'lat', 'lon', 'length'] # Create a new DataFrame with only the selected columns mod7_intersect_df = mod7_intersect_df[selected_columns] # Sort the DataFrame based on the index to preserve the original order mod7_intersect_df = mod7_intersect_df.sort_index() # Display the resulting DataFrame mod7_intersect_df

#Check4 intersections # Filter rows where 'model_type' is 'intersection' intersection_rows = mod7_intersect_df[mod7_intersect_df['model_type'] == 'intersection'] # Sort the filtered rows based on the values in the 'lat' column sorted_intersection_rows4 = intersection_rows.sort_values(by='lat') # Display the sorted intersection rows sorted_intersection_rows4

#Remove intersections that occur once so are actually links. # Count the occurrences of each value in the 'lat' column value_counts = sorted_intersection_rows4['lat'].value_counts() # Filter the index of values that occur only once indices_to_change = value_counts[value_counts == 1].index # Change the value of the 'model_type' to 'link' for rows where the 'lat' value occurs only once sorted_intersection_rows4.loc[sorted_intersection_rows4['lat'].isin(indices_to_change), 'model_type'] = 'link' sorted_intersection_rows4

# Step 1: Filter rows with 'model_type' == 'intersection' intersection_rows = mod7_intersect_df[mod7_intersect_df['model_type'] == 'intersection'] # Step 2: Count occurrences of each value in 'lat' column lat_counts = intersection_rows['lat'].value_counts() # Step 3: Identify rows where 'lat' occurs only once unique_lat_rows = intersection_rows[intersection_rows['lat'].map(lat_counts) == 1] # Step 4: Update 'model_type' for identified rows to 'link' mod7_intersect_df.loc[unique_lat_rows.index, 'model_type'] = 'link' mod7_intersect_df

#Check5 intersections # Filter rows where 'model_type' is 'intersection' intersection_rows = mod7_intersect_df[mod7_intersect_df['model_type'] == 'intersection'] # Sort the filtered rows based on the values in the 'lat' column sorted_intersection_rows5 = intersection_rows.sort_values(by='lat') # Display the sorted intersection rows sorted_intersection_rows5

Remove isolated sourcesinks

These sourcesinks are removed because the model indicates that they are not connected with other sourcesinks. Both are listed below, and manual inspection determined which ones should be removed. The first two are extensively discussed, while the last three are removed in abbreviated form.

# Get all rows where the value in the column 'id' is x rows_with_value_x = mod7_intersect_df[mod7_intersect_df['id'] == 1004230 ] rows_with_value_x #N2 # Get all rows where the value in the column 'id' is x rows_with_value_x = mod7_intersect_df[mod7_intersect_df['id'] == 1006578 ] rows_with_value_x #R241

# Define the specific value in the 'road' column specific_value = 'R241' # Filter the DataFrame based on the specific value in the 'road' column and 'sourcesink' in 'model_type' mod7_intersect_df.loc[(mod7_intersect_df['road'] == specific_value) & (mod7_intersect_df['model_type'] == 'sourcesink'), 'model_type'] = 'link' mod7_intersect_df

# Get all rows where the value in the column 'id' is x rows_with_value_x = mod7_intersect_df[mod7_intersect_df['id'] == 1007266 ] rows_with_value_x #Z1005 # Get all rows where the value in the column 'id' is x rows_with_value_x = mod7_intersect_df[mod7_intersect_df['id'] == 1004605 ] rows_with_value_x #N208

# Define the specific value in the 'road' column specific_value = 'Z1005' # Filter the DataFrame based on the specific value in the 'road' column and 'sourcesink' in 'model_type' mod7_intersect_df.loc[(mod7_intersect_df['road'] == specific_value) & (mod7_intersect_df['model_type'] == 'sourcesink'), 'model_type'] = 'link' mod7_intersect_df

# Filter rows where 'road' column has value 'x' and 'id' column has value 'y' filtered_rows = mod7_intersect_df[(mod7_intersect_df['road'] == 'Z1402') & (mod7_intersect_df['id'] == '1008009')] # Update the 'model_type' column for the filtered rows filtered_rows['model_type'] = 'link' # Update the original DataFrame with the modified rows df.update(filtered_rows)

# Filter rows where 'road' column has value 'x' and 'id' column has value 'y' filtered_rows = mod7_intersect_df[(mod7_intersect_df['road'] == 'R203') & (mod7_intersect_df['id'] == '1005910')] # Update the 'model_type' column for the filtered rows filtered_rows['model_type'] = 'link' # Update the original DataFrame with the modified rows df.update(filtered_rows)

# Filter rows where 'road' column has value 'x' and 'id' column has value 'y' filtered_rows = mod7_intersect_df[(mod7_intersect_df['road'] == 'R220') & (mod7_intersect_df['id'] == '1006224')] # Update the 'model_type' column for the filtered rows filtered_rows['model_type'] = 'link' # Update the original DataFrame with the modified rows df.update(filtered_rows)

Save the file

# Save the DataFrame to a CSV file mod7_intersect_df.to_csv('N1_N2_V4.csv', index=False)

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Installing & Importing