import pandas as pd
import altair as alt
pd.to_datetime("today")
pd.to_datetime('2020-05-07 3pm')
pd.to_datetime('12.6.1929')
pd.to_datetime('12.6.1929', dayfirst=True)
sessions=["2.4.2020", "9.4.2020", "16.4.2020", "23.4.2020", "7.5.2020", "14.5.2020", "28.5.2020", "4.6.2020", "11.6.2020", "25.6.2020", "2.7.2020", "9.7.2020"]
pd.to_datetime(sessions, dayfirst=True)
pd.to_datetime('2020-05-07', format="%Y-%m-%d")
# pd.to_datetime('8.5.1945 23:01', format="")
# data downloaded from OPSD - see how to filter elements on this page:
# https://data.open-power-system-data.org/time_series/2020-10-06
url = "https://data.open-power-system-data.org/index.php?package=time_series&version=2020-10-06&action=customDownload&resource=3&filter%5BRegion%5D%5B%5D=DE&filter%5BVariable%5D%5B%5D=load_actual_entsoe_transparency&filter%5BVariable%5D%5B%5D=solar_generation_actual&filter%5BVariable%5D%5B%5D=wind_generation_actual&downloadCSV=Download+CSV"
opsd = pd.read_csv(url, parse_dates=['utc_timestamp']) # parse timestamp column
opsd = opsd.drop(columns=["cet_cest_timestamp"])
opsd = opsd.rename({
'utc_timestamp': 'datetime',
'DE_load_actual_entsoe_transparency': 'load',
'DE_solar_generation_actual': 'solar',
'DE_wind_generation_actual': 'wind'
}, axis=1)
opsd["datetime"] = opsd["datetime"].dt.tz_convert("Europe/Berlin")
opsd
start_date = pd.to_datetime('2015-01-01').tz_localize('Europe/Berlin')
end_date = pd.to_datetime('2019-12-31').tz_localize('Europe/Berlin')
opsd = opsd[(opsd['datetime'] >= start_date) & (opsd['datetime'] <= end_date)]
opsd
opsd.sample(10)
opsd.info()
opsd = opsd.set_index("datetime")
opsd.info()
opsd.index.year.unique()
opsd.loc["2017-06-21"]
opsd.loc["2017-06"]
opsd.loc["2017-10-04":"2017-10-06"]
sums = opsd.resample("Y").sum()
sums
# first we create the sums per week
weekly_sums = opsd.resample("W").sum()
# then we generate the weekly means for each quarter
quarterly_means = weekly_sums.resample("Q").mean()
# for readability we'll revert the values back to integers
quarterly_means.astype(int)
presidents = pd.read_csv("us_presidents.csv")
presidents
presidents['start'] = pd.to_datetime(presidents['start'], format="%Y")
presidents['end'] = pd.to_datetime(presidents['end'], format="%Y")
# we add height as a parameter to make the time spans appear less clunky
bars = alt.Chart(presidents).mark_bar(height=5).encode(
# we use two columns for the x-positions where the bar starts and ends
x='start',
x2='end',
# we sort by start dates and hide the axis, as names will be added below
y=alt.Y('name').sort('x').axis(None),
color="party"
)
# the labels are added to the chart using the mark_text command
# no need to specify x and y as this is re-used from the bar chart
labels = bars.mark_text(align='right', dx=-5).encode(
text='name'
)
# both charts integrated into a layered chart through the magic of a plus sign
bars + labels
days = opsd.resample("D").sum()
opsd.reset_index().melt("datetime")
alt.data_transformers.disable_max_rows()
alt.Chart(days.reset_index().melt("datetime")).mark_circle().encode(
x='datetime',
y='value',
color='variable',
).properties(width=700, height=400)
alt.Chart(days.reset_index().melt("datetime")).mark_line(strokeWidth=1).encode(
x='datetime',
y='value',
color='variable',
).properties(width=700, height=400)
# we reduce the granularity to months and sum up the values
months = opsd.resample("M").sum()
# to make the line a bit smoother, we include an interpolate parameter
alt.Chart(months.reset_index().melt("datetime")).mark_line(opacity=0.75, interpolate="basis").encode(
x='datetime',
y='value',
color='variable',
).properties(width=700, height=400)
days = opsd.resample("D").sum()
months = days.resample("M").mean()
chart1 = alt.Chart(days.reset_index().melt("datetime")).mark_line(strokeWidth=1, opacity=0.25).encode(
x='datetime',
y='value',
color='variable',
).properties(width=700, height=400) # not necessary to repeat below (the 2nd chart uses this)
chart2 = alt.Chart(months.reset_index().melt("datetime")).mark_line(interpolate="basis", opacity=1).encode(
x='datetime',
y='value',
color='variable',
)
chart1 + chart2
rolling = days.rolling(60, center=True, win_type="triang").mean()
chart1 = alt.Chart(rolling.reset_index().melt("datetime")).mark_line(strokeWidth=1.5, opacity=1).encode(
x='datetime', y='value', color='variable',
).properties(width=700, height=400)
# same as the two charts in previous code cell, except more transparent
chart2 = alt.Chart(days.reset_index().melt("datetime")).mark_line(strokeWidth=1, opacity=0.1).encode(
x='datetime', y='value', color='variable',
)
chart3 = alt.Chart(months.reset_index().melt("datetime")).mark_line(interpolate="basis", opacity=.25).encode(
x='datetime', y='value', color='variable',
)
chart1 + chart2 + chart3
burglind = opsd.loc["2018-01-01":"2018-01-07"]
alt.Chart(burglind.reset_index().melt("datetime")).mark_line().encode(
x='datetime',
y='value',
color='variable'
).properties(width=700, height=400)
years = opsd.resample("Y").sum()
years["renewable"] = years["solar"]+years["wind"]
renewable = years.drop(columns=["load", "solar", "wind"])
renewable["year"] = years.index.year
renewable
renewable = renewable.reset_index(drop=True)
renewable
alt.Chart(renewable).mark_bar(width=30, fill="lightgreen").encode(
x='year:O', # O ~ treat the years as ordinal data
y='renewable:Q' # Q ~ treat renewable as quantitative data
).properties(width=300, height=300)
# remove solar and wind columns
weekdays = opsd.drop(columns=["solar", "wind"])
# add a column for weekdays
weekdays["weekday"] = weekdays.index.weekday
weekdays = weekdays.groupby("weekday").mean()
alt.Chart(weekdays.reset_index()).mark_bar(width=20).encode(
x='weekday:O', # O ~ treat weekday as ordinal data
y='load:Q' # Q ~ treat load as quantitative data
).properties(width=300, height=300)
weeks = opsd.resample("W").sum()
days = opsd.resample("D").sum()
brush = alt.selection_interval(encodings=['x'])
upper = alt.Chart(weeks.reset_index().melt("datetime")).mark_area(interpolate="basis").encode(
x = alt.X('datetime:T', axis=None),
y = alt.Y('value:Q', axis=None),
color='variable'
).properties(width=700, height=50).add_params(brush)
lower = alt.Chart(days.reset_index().melt("datetime")).mark_line(strokeWidth=1).encode(
x = alt.X('datetime:T', scale=alt.Scale(domain=brush)),
y='value',
color='variable',
).properties(width=700, height=300)
upper & lower