# Import necessasry libraries
import warnings
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objects as go
# Suppress warnings
warnings.filterwarnings("ignore", category=FutureWarning)
# Set the default Seaborn style
sns.set_theme(style="whitegrid", font="serif")
# Load the dataset
data = pd.read_csv("/kaggle/input/natural-gas-storage-inventory-in-canada/natural_gas_storage_inventory_2016_2024.csv")
# Preview the dataset, first 5 rows
data.head()
data.dtypes
data.shape
data.describe().T
data.isnull().sum()
data = data.dropna(axis=1)
# Preview data after dropping NULL values
data.isnull().sum()
# Rename all columns to be in lowercase
data.columns = [col.lower() for col in data.columns]
# Preview the renamed columns
data.columns.tolist()
# Drop the specified columns
data = data.drop(columns=["geo", "dguid"])
# Preview the remaining columns
data.columns.tolist()
# Transform the 'ref_date' column into datetime format
data["ref_date"] = pd.to_datetime(data["ref_date"])
# Preview the data types to confirm the transformation
data.head()
# Filter data
opening = data[data["storage"] == "Opening inventory"]
closing = data[data["storage"] == "Closing inventory"]
# Extract the value for cubic meters and gigajoules
opening_m3 = opening[opening["statistics"] == "Cubic metres"]["value"].values[0] / 1000
opening_ggj = opening[opening["statistics"] == "Gigajoules"]["value"].values[0] / 1000
closing_m3 = closing[closing["statistics"] == "Cubic metres"]["value"].values[0] / 1000
closing_ggj = closing[closing["statistics"] == "Gigajoules"]["value"].values[0] / 1000
# Categories and values
cat = ["Opening Inventory", "Closing Inventory"]
m3_values = [opening_m3, closing_m3]
ggj_values = [opening_ggj, closing_ggj]
# Bars width
bar_width = 0.35
# Position of the bars on the x-axis
r1 = np.arange(len(cat))
r2 = [x + bar_width for x in r1]
# Create grouped bar chart
plt.figure(figsize=(10, 6))
bars1 = plt.bar(r1, m3_values, color="skyblue", width=bar_width, edgecolor="grey", label="Cubic Metres")
bars2 = plt.bar(r2, ggj_values, color="salmon", width=bar_width, edgecolor="grey", label="Gigajoules")
# Add labels
plt.xlabel("Inventory Type")
plt.ylabel("Values in Thousands")
# Add xticks
plt.xticks([r + bar_width/2 for r in range(len(cat))], cat)
# Add title
plt.title("Opening and Closing Inventory Levels")
# Add legend
plt.legend()
# Add grid for better readability
plt.grid(linestyle='--', alpha=0.7)
# Add text annotation at the top and center of the bars
for bar in bars1:
yval = bar.get_height()
plt.text(bar.get_x() + bar.get_width()/2.0, yval, round(yval, 2), ha='center', va='bottom')
for bar in bars2:
yval = bar.get_height()
plt.text(bar.get_x() + bar.get_width()/2.0, yval, round(yval, 2), ha='center', va='bottom')
# Show the plot
plt.show()
# Filter the data
inventory_change = data[data["storage"] == "Inventory change"]
# Extract the values
m3_change = inventory_change[inventory_change["statistics"] == "Cubic metres"]
ggj_change = inventory_change[inventory_change["statistics"] == "Gigajoules"]
# Create line chart
plt.figure(figsize=(20, 8))
plt.plot(m3_change["ref_date"], m3_change["value"] / 1000, marker="o", linestyle="-", color="skyblue", label="Cubic Metres")
plt.plot(ggj_change["ref_date"], ggj_change["value"] / 1000, marker="o", linestyle="-", color="salmon", label="Gigajoules")
# Add horizontal lines for the lowest and highest values
min_value = min(m3_change["value"].min(), ggj_change["value"].min()) / 1000
max_value = max(m3_change["value"].max(), ggj_change["value"].max()) / 1000
plt.axhline(y=min_value, color='skyblue', linestyle='--', label=f'Lowest Value: {min_value:.2f}')
plt.axhline(y=max_value, color='skyblue', linestyle='-', label=f'Highest Value: {max_value:.2f}')
# Add labels
plt.xlabel("Year")
plt.ylabel("Inventory Change Values in Thousands")
# Add title
plt.title("Inventory Change Over Time")
# Add legend
plt.legend()
# Add grid for better readability
plt.grid(linestyle='--', alpha=0.7)
# Rotate x-axis labels for better readability if there are many time points
plt.xticks(rotation=45)
# Show the plot
plt.tight_layout()
plt.show()
# Filter the data for inventory change
inventory_change = data[data["storage"] == "Inventory change"]
# Extract the values for m3 and ggj
m3_change = inventory_change[inventory_change["statistics"] == "Cubic metres"]["value"].values[0] / 1000
ggj_change = inventory_change[inventory_change["statistics"] == "Gigajoules"]["value"].values[0] / 1000
# Time (e.g., January 2016)
time = data["ref_date"].dt.strftime('%Y-%m').unique()
# Create the area chart
plt.figure(figsize=(30, 6))
# Plot the areas
plt.fill_between(time, [0], [m3_change], color="skyblue", alpha=0.5, label="Cubic metres")
plt.fill_between(time, [m3_change], [m3_change + ggj_change], color="salmon", alpha=0.5, label="Gigajoules")
# Add labels
plt.xlabel("Date")
plt.ylabel("Cumulative Inventory Change Values in Thousands")
# Add xticks
plt.xticks(rotation=45)
# Add a title
plt.title("Cumulative Inventory Change Over Time")
# Add a legend
plt.legend()
# Add grid for better readability
plt.grid(linestyle="--", alpha=0.7)
# Show the plot
plt.tight_layout()
plt.show()
# Filter the data
injections = data[data["storage"] == "Injections to storage"]
withdrawals = data[data["storage"] == "Withdrawals from storage"]
# Extract the values
injections_m3 = injections[injections['statistics'] == 'Cubic metres']['value'].values[0] / 1000
injections_ggj = injections[injections['statistics'] == 'Gigajoules']['value'].values[0] / 1000
withdrawals_m3 = withdrawals[withdrawals['statistics'] == 'Cubic metres']['value'].values[0] / 1000
withdrawals_ggj = withdrawals[withdrawals['statistics'] == 'Gigajoules']['value'].values[0] / 1000
# Categories and values
cat = ["Injections", "Withdrawals"]
m3_values = [injections_m3, withdrawals_m3]
ggj_values = [injections_ggj, withdrawals_ggj]
# Create stacked bar chart
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the bars
ax.bar(cat, m3_values, color='skyblue', label='Cubic Metres')
ax.bar(cat, ggj_values, bottom=m3_values, color='salmon', label='Gigajoules')
# Add labels
ax.set_xlabel('Categories')
ax.set_ylabel('Values in Thousands')
# Add title
ax.set_title('Injections vs. Withdrawals')
# Add legend
ax.legend()
# Add grid for better readability
plt.grid(linestyle='--', alpha=0.7)
# Show the plot
plt.tight_layout()
plt.show()
# Calculate the absolute values of inventory change
total_injections_m3 = injections[injections["statistics"] == "Cubic metres"]["value"].sum()
total_withdrawals_m3 = withdrawals[withdrawals["statistics"] == "Cubic metres"]["value"].sum()
total_injections_ggj = injections[injections["statistics"] == "Gigajoules"]["value"].sum()
total_withdrawals_ggj = withdrawals[withdrawals["statistics"] == "Gigajoules"]["value"].sum()
# Calculate the net change
net_change_m3 = abs(total_injections_m3 - total_withdrawals_m3)
net_change_ggj = abs(total_injections_ggj - total_withdrawals_ggj)
# Data for the pie chart
labels = ["Cubic metres", "Gigajoules"]
sizes = [net_change_m3, net_change_ggj]
colors = ["skyblue", "salmon"]
# Create the pie chart
fig, ax = plt.subplots(figsize=(8, 8))
ax.pie(sizes, labels=labels, colors=colors, autopct="%1.1f%%", startangle=140)
# Add title
ax.set_title("Proportion of Inventory Change")
# Equal aspect ratio ensures that pie is drawn as a circle.
ax.axis("equal")
# Show the plot
plt.tight_layout()
plt.show()
# Filter the data for opening inventory, injections, withdrawals, and closing inventory
opening_inventory = data[data["storage"] == "Opening inventory"]
closing_inventory = data[data["storage"] == "Closing inventory"]
injections = data[data["storage"] == "Injections to storage"]
withdrawals = data[data["storage"] == "Withdrawals from storage"]
# Extract the values for m3 and ggj
opening_m3 = opening_inventory[opening_inventory["statistics"] == "Cubic metres"]["value"].values[0] / 1000
opening_ggj = opening_inventory[opening_inventory["statistics"] == "Gigajoules"]["value"].values[0] / 1000
closing_m3 = closing_inventory[closing_inventory["statistics"] == "Cubic metres"]["value"].values[0] / 1000
closing_ggj = closing_inventory[closing_inventory["statistics"] == "Gigajoules"]["value"].values[0] / 1000
injections_m3 = injections[injections["statistics"] == "Cubic metres"]["value"].values[0] / 1000
injections_ggj = injections[injections["statistics"] == "Gigajoules"]["value"].values[0] / 1000
withdrawals_m3 = withdrawals[withdrawals["statistics"] == "Cubic metres"]["value"].values[0] / 1000
withdrawals_ggj = withdrawals[withdrawals["statistics"] == "Gigajoules"]["value"].values[0] / 1000
# Define the nodes
nodes = ["Opening Inventory (Cubic metres)", "Opening Inventory (Gigajoules)", "Injections (Cubic metres)", "Injections (Gigajoules)", "Withdrawals (Cubic metres)", "Withdrawals (Gigajoules)", "Closing Inventory (Cubic metres)", "Closing Inventory (Gigajoules)"]
# Define the links
links = [
{"source": nodes.index("Opening Inventory (Cubic metres)"), "target": nodes.index("Closing Inventory (Cubic metres)"), "value": opening_m3 - withdrawals_m3},
{"source": nodes.index("Opening Inventory (Gigajoules)"), "target": nodes.index("Closing Inventory (Gigajoules)"), "value": opening_ggj - withdrawals_ggj},
{"source": nodes.index("Opening Inventory (Cubic metres)"), "target": nodes.index("Withdrawals (Cubic metres)"), "value": withdrawals_m3},
{"source": nodes.index("Opening Inventory (Gigajoules)"), "target": nodes.index("Withdrawals (Gigajoules)"), "value": withdrawals_ggj},
{"source": nodes.index("Injections (Cubic metres)"), "target": nodes.index("Closing Inventory (Cubic metres)"), "value": injections_m3},
{"source": nodes.index("Injections (Gigajoules)"), "target": nodes.index("Closing Inventory (Gigajoules)"), "value": injections_ggj},
]
# Create the Sankey diagram
fig = go.Figure(data=[go.Sankey(
node=dict(
pad=15,
thickness=20,
line=dict(color="black", width=0.5),
label=nodes,
color="blue"
),
link=dict(
source=[link["source"] for link in links],
target=[link["target"] for link in links],
value=[link["value"] for link in links],
))])
# Add a title
fig.update_layout(title_text="Flow of Natural Gas from Opening Inventory to Closing Inventory", font_size=10)
# Show the plot
fig.show()
# Filter the data for opening inventory, closing inventory, and inventory change
opening_inventory = data[data["storage"] == "Opening inventory"]
closing_inventory = data[data["storage"] == "Closing inventory"]
inventory_change = data[data["storage"] == "Inventory change"]
# Extract the values for m3 and ggj
opening_m3 = opening_inventory[opening_inventory["statistics"] == "Cubic metres"]["value"].values[0] / 1000
opening_ggj = opening_inventory[opening_inventory["statistics"] == "Gigajoules"]["value"].values[0] / 1000
closing_m3 = closing_inventory[closing_inventory["statistics"] == "Cubic metres"]["value"].values[0] / 1000
closing_ggj = closing_inventory[closing_inventory["statistics"] == "Gigajoules"]["value"].values[0] / 1000
change_m3 = inventory_change[inventory_change["statistics"] == "Cubic metres"]["value"].values[0] / 1000
change_ggj = inventory_change[inventory_change["statistics"] == "Gigajoules"]["value"].values[0] / 1000
# Create a DataFrame for the heatmap
heatmap_data = pd.DataFrame({
"Opening Inventory": [opening_m3, opening_ggj],
"Closing Inventory": [closing_m3, closing_ggj],
"Inventory Change": [change_m3, change_ggj]
}, index=["Cubic metres", "Gigajoules"])
# Create the heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(heatmap_data, annot=True, fmt=".2f", cmap="YlGnBu", cbar_kws={'label': 'Values in Thousands'})
# Add x-axis label
plt.xlabel("Storage Activities")
# Add y-axis label
plt.ylabel("Units of Measurement")
# Add a title
plt.title("Storage Dynamics by Unit of Measurement")
# Show the plot
plt.tight_layout()
plt.show()