Gas Reserve Explorer

# Import necessasry libraries import warnings import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import numpy as np import plotly.graph_objects as go

# Suppress warnings warnings.filterwarnings("ignore", category=FutureWarning) # Set the default Seaborn style sns.set_theme(style="whitegrid", font="serif")

# Load the dataset data = pd.read_csv("/kaggle/input/natural-gas-storage-inventory-in-canada/natural_gas_storage_inventory_2016_2024.csv") # Preview the dataset, first 5 rows data.head()

data.dtypes

data.shape

data.describe().T

data.isnull().sum()

data = data.dropna(axis=1) # Preview data after dropping NULL values data.isnull().sum()

# Rename all columns to be in lowercase data.columns = [col.lower() for col in data.columns] # Preview the renamed columns data.columns.tolist()

# Drop the specified columns data = data.drop(columns=["geo", "dguid"]) # Preview the remaining columns data.columns.tolist()

# Transform the 'ref_date' column into datetime format data["ref_date"] = pd.to_datetime(data["ref_date"]) # Preview the data types to confirm the transformation data.head()

# Filter data opening = data[data["storage"] == "Opening inventory"] closing = data[data["storage"] == "Closing inventory"] # Extract the value for cubic meters and gigajoules opening_m3 = opening[opening["statistics"] == "Cubic metres"]["value"].values[0] / 1000 opening_ggj = opening[opening["statistics"] == "Gigajoules"]["value"].values[0] / 1000 closing_m3 = closing[closing["statistics"] == "Cubic metres"]["value"].values[0] / 1000 closing_ggj = closing[closing["statistics"] == "Gigajoules"]["value"].values[0] / 1000 # Categories and values cat = ["Opening Inventory", "Closing Inventory"] m3_values = [opening_m3, closing_m3] ggj_values = [opening_ggj, closing_ggj] # Bars width bar_width = 0.35 # Position of the bars on the x-axis r1 = np.arange(len(cat)) r2 = [x + bar_width for x in r1] # Create grouped bar chart plt.figure(figsize=(10, 6)) bars1 = plt.bar(r1, m3_values, color="skyblue", width=bar_width, edgecolor="grey", label="Cubic Metres") bars2 = plt.bar(r2, ggj_values, color="salmon", width=bar_width, edgecolor="grey", label="Gigajoules") # Add labels plt.xlabel("Inventory Type") plt.ylabel("Values in Thousands") # Add xticks plt.xticks([r + bar_width/2 for r in range(len(cat))], cat) # Add title plt.title("Opening and Closing Inventory Levels") # Add legend plt.legend() # Add grid for better readability plt.grid(linestyle='--', alpha=0.7) # Add text annotation at the top and center of the bars for bar in bars1: yval = bar.get_height() plt.text(bar.get_x() + bar.get_width()/2.0, yval, round(yval, 2), ha='center', va='bottom') for bar in bars2: yval = bar.get_height() plt.text(bar.get_x() + bar.get_width()/2.0, yval, round(yval, 2), ha='center', va='bottom') # Show the plot plt.show()

# Filter the data inventory_change = data[data["storage"] == "Inventory change"] # Extract the values m3_change = inventory_change[inventory_change["statistics"] == "Cubic metres"] ggj_change = inventory_change[inventory_change["statistics"] == "Gigajoules"] # Create line chart plt.figure(figsize=(20, 8)) plt.plot(m3_change["ref_date"], m3_change["value"] / 1000, marker="o", linestyle="-", color="skyblue", label="Cubic Metres") plt.plot(ggj_change["ref_date"], ggj_change["value"] / 1000, marker="o", linestyle="-", color="salmon", label="Gigajoules") # Add horizontal lines for the lowest and highest values min_value = min(m3_change["value"].min(), ggj_change["value"].min()) / 1000 max_value = max(m3_change["value"].max(), ggj_change["value"].max()) / 1000 plt.axhline(y=min_value, color='skyblue', linestyle='--', label=f'Lowest Value: {min_value:.2f}') plt.axhline(y=max_value, color='skyblue', linestyle='-', label=f'Highest Value: {max_value:.2f}') # Add labels plt.xlabel("Year") plt.ylabel("Inventory Change Values in Thousands") # Add title plt.title("Inventory Change Over Time") # Add legend plt.legend() # Add grid for better readability plt.grid(linestyle='--', alpha=0.7) # Rotate x-axis labels for better readability if there are many time points plt.xticks(rotation=45) # Show the plot plt.tight_layout() plt.show()

# Filter the data for inventory change inventory_change = data[data["storage"] == "Inventory change"] # Extract the values for m3 and ggj m3_change = inventory_change[inventory_change["statistics"] == "Cubic metres"]["value"].values[0] / 1000 ggj_change = inventory_change[inventory_change["statistics"] == "Gigajoules"]["value"].values[0] / 1000 # Time (e.g., January 2016) time = data["ref_date"].dt.strftime('%Y-%m').unique() # Create the area chart plt.figure(figsize=(30, 6)) # Plot the areas plt.fill_between(time, [0], [m3_change], color="skyblue", alpha=0.5, label="Cubic metres") plt.fill_between(time, [m3_change], [m3_change + ggj_change], color="salmon", alpha=0.5, label="Gigajoules") # Add labels plt.xlabel("Date") plt.ylabel("Cumulative Inventory Change Values in Thousands") # Add xticks plt.xticks(rotation=45) # Add a title plt.title("Cumulative Inventory Change Over Time") # Add a legend plt.legend() # Add grid for better readability plt.grid(linestyle="--", alpha=0.7) # Show the plot plt.tight_layout() plt.show()

# Filter the data injections = data[data["storage"] == "Injections to storage"] withdrawals = data[data["storage"] == "Withdrawals from storage"] # Extract the values injections_m3 = injections[injections['statistics'] == 'Cubic metres']['value'].values[0] / 1000 injections_ggj = injections[injections['statistics'] == 'Gigajoules']['value'].values[0] / 1000 withdrawals_m3 = withdrawals[withdrawals['statistics'] == 'Cubic metres']['value'].values[0] / 1000 withdrawals_ggj = withdrawals[withdrawals['statistics'] == 'Gigajoules']['value'].values[0] / 1000 # Categories and values cat = ["Injections", "Withdrawals"] m3_values = [injections_m3, withdrawals_m3] ggj_values = [injections_ggj, withdrawals_ggj] # Create stacked bar chart fig, ax = plt.subplots(figsize=(10, 6)) # Plot the bars ax.bar(cat, m3_values, color='skyblue', label='Cubic Metres') ax.bar(cat, ggj_values, bottom=m3_values, color='salmon', label='Gigajoules') # Add labels ax.set_xlabel('Categories') ax.set_ylabel('Values in Thousands') # Add title ax.set_title('Injections vs. Withdrawals') # Add legend ax.legend() # Add grid for better readability plt.grid(linestyle='--', alpha=0.7) # Show the plot plt.tight_layout() plt.show()

# Calculate the absolute values of inventory change total_injections_m3 = injections[injections["statistics"] == "Cubic metres"]["value"].sum() total_withdrawals_m3 = withdrawals[withdrawals["statistics"] == "Cubic metres"]["value"].sum() total_injections_ggj = injections[injections["statistics"] == "Gigajoules"]["value"].sum() total_withdrawals_ggj = withdrawals[withdrawals["statistics"] == "Gigajoules"]["value"].sum() # Calculate the net change net_change_m3 = abs(total_injections_m3 - total_withdrawals_m3) net_change_ggj = abs(total_injections_ggj - total_withdrawals_ggj) # Data for the pie chart labels = ["Cubic metres", "Gigajoules"] sizes = [net_change_m3, net_change_ggj] colors = ["skyblue", "salmon"] # Create the pie chart fig, ax = plt.subplots(figsize=(8, 8)) ax.pie(sizes, labels=labels, colors=colors, autopct="%1.1f%%", startangle=140) # Add title ax.set_title("Proportion of Inventory Change") # Equal aspect ratio ensures that pie is drawn as a circle. ax.axis("equal") # Show the plot plt.tight_layout() plt.show()

# Filter the data for opening inventory, injections, withdrawals, and closing inventory opening_inventory = data[data["storage"] == "Opening inventory"] closing_inventory = data[data["storage"] == "Closing inventory"] injections = data[data["storage"] == "Injections to storage"] withdrawals = data[data["storage"] == "Withdrawals from storage"] # Extract the values for m3 and ggj opening_m3 = opening_inventory[opening_inventory["statistics"] == "Cubic metres"]["value"].values[0] / 1000 opening_ggj = opening_inventory[opening_inventory["statistics"] == "Gigajoules"]["value"].values[0] / 1000 closing_m3 = closing_inventory[closing_inventory["statistics"] == "Cubic metres"]["value"].values[0] / 1000 closing_ggj = closing_inventory[closing_inventory["statistics"] == "Gigajoules"]["value"].values[0] / 1000 injections_m3 = injections[injections["statistics"] == "Cubic metres"]["value"].values[0] / 1000 injections_ggj = injections[injections["statistics"] == "Gigajoules"]["value"].values[0] / 1000 withdrawals_m3 = withdrawals[withdrawals["statistics"] == "Cubic metres"]["value"].values[0] / 1000 withdrawals_ggj = withdrawals[withdrawals["statistics"] == "Gigajoules"]["value"].values[0] / 1000 # Define the nodes nodes = ["Opening Inventory (Cubic metres)", "Opening Inventory (Gigajoules)", "Injections (Cubic metres)", "Injections (Gigajoules)", "Withdrawals (Cubic metres)", "Withdrawals (Gigajoules)", "Closing Inventory (Cubic metres)", "Closing Inventory (Gigajoules)"] # Define the links links = [ {"source": nodes.index("Opening Inventory (Cubic metres)"), "target": nodes.index("Closing Inventory (Cubic metres)"), "value": opening_m3 - withdrawals_m3}, {"source": nodes.index("Opening Inventory (Gigajoules)"), "target": nodes.index("Closing Inventory (Gigajoules)"), "value": opening_ggj - withdrawals_ggj}, {"source": nodes.index("Opening Inventory (Cubic metres)"), "target": nodes.index("Withdrawals (Cubic metres)"), "value": withdrawals_m3}, {"source": nodes.index("Opening Inventory (Gigajoules)"), "target": nodes.index("Withdrawals (Gigajoules)"), "value": withdrawals_ggj}, {"source": nodes.index("Injections (Cubic metres)"), "target": nodes.index("Closing Inventory (Cubic metres)"), "value": injections_m3}, {"source": nodes.index("Injections (Gigajoules)"), "target": nodes.index("Closing Inventory (Gigajoules)"), "value": injections_ggj}, ] # Create the Sankey diagram fig = go.Figure(data=[go.Sankey( node=dict( pad=15, thickness=20, line=dict(color="black", width=0.5), label=nodes, color="blue" ), link=dict( source=[link["source"] for link in links], target=[link["target"] for link in links], value=[link["value"] for link in links], ))]) # Add a title fig.update_layout(title_text="Flow of Natural Gas from Opening Inventory to Closing Inventory", font_size=10) # Show the plot fig.show()

# Filter the data for opening inventory, closing inventory, and inventory change opening_inventory = data[data["storage"] == "Opening inventory"] closing_inventory = data[data["storage"] == "Closing inventory"] inventory_change = data[data["storage"] == "Inventory change"] # Extract the values for m3 and ggj opening_m3 = opening_inventory[opening_inventory["statistics"] == "Cubic metres"]["value"].values[0] / 1000 opening_ggj = opening_inventory[opening_inventory["statistics"] == "Gigajoules"]["value"].values[0] / 1000 closing_m3 = closing_inventory[closing_inventory["statistics"] == "Cubic metres"]["value"].values[0] / 1000 closing_ggj = closing_inventory[closing_inventory["statistics"] == "Gigajoules"]["value"].values[0] / 1000 change_m3 = inventory_change[inventory_change["statistics"] == "Cubic metres"]["value"].values[0] / 1000 change_ggj = inventory_change[inventory_change["statistics"] == "Gigajoules"]["value"].values[0] / 1000 # Create a DataFrame for the heatmap heatmap_data = pd.DataFrame({ "Opening Inventory": [opening_m3, opening_ggj], "Closing Inventory": [closing_m3, closing_ggj], "Inventory Change": [change_m3, change_ggj] }, index=["Cubic metres", "Gigajoules"]) # Create the heatmap plt.figure(figsize=(10, 6)) sns.heatmap(heatmap_data, annot=True, fmt=".2f", cmap="YlGnBu", cbar_kws={'label': 'Values in Thousands'}) # Add x-axis label plt.xlabel("Storage Activities") # Add y-axis label plt.ylabel("Units of Measurement") # Add a title plt.title("Storage Dynamics by Unit of Measurement") # Show the plot plt.tight_layout() plt.show()