import altair as alt
import pandas as pd
df = pd.read_csv('descriptive-representation-2021.csv', sep=';')
percentage_cols = [col for col in df.columns if col.startswith('percentage')]
for col in percentage_cols + ['lrgen']:
df[col] = df[col].str.rstrip('%').str.replace(',', '.').astype('float')
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 political_party 17 non-null object
1 total_candidates 17 non-null int64
2 number_minorities_list 17 non-null int64
3 percentage_minorities_list 17 non-null float64
4 number_safe_list_positions 17 non-null int64
5 number_minorities_safe_list 17 non-null int64
6 percentage_minorities_safe_list 17 non-null float64
7 percentage_safe_list_is_minority 17 non-null float64
8 percentage_minorities_mp 17 non-null float64
9 number_females 17 non-null int64
10 percentage_females 17 non-null float64
11 included 17 non-null int64
12 lrgen 13 non-null float64
dtypes: float64(6), int64(6), object(1)
memory usage: 1.9+ KB
scatter = alt.Chart(df[df.included == 1]).mark_circle().encode(
x=alt.X('percentage_minorities_list:Q', scale=alt.Scale(domain=[0,100])),
y=alt.X('percentage_females:Q', scale=alt.Scale(domain=[0,100])),
color=alt.Color('lrgen:Q', scale=alt.Scale(domain=[0,10], range=['red', 'blue'])),
size='total_candidates:Q',
tooltip=['political_party', 'total_candidates', 'number_minorities_list', 'number_females', 'lrgen']
)
label = scatter.mark_text(
align='left',
baseline='middle',
dx=10
).encode(
text='political_party',
size='included'
)
average_females_NL = alt.Chart(pd.DataFrame({'y': [50]})).mark_rule(strokeDash=[2,2]).encode(y='y')
average_minorities_NL = alt.Chart(pd.DataFrame({'x': [24]})).mark_rule(strokeDash=[2,2]).encode(x='x')
average_females_parties = alt.Chart(pd.DataFrame({'y': [37]})).mark_rule(strokeDash=[2,2], color='orange').encode(y='y')
average_minorities_parties = alt.Chart(pd.DataFrame({'x': [17]})).mark_rule(strokeDash=[2,2], color='orange').encode(x='x')
scatter + label + average_females_NL + average_minorities_NL + average_females_parties + average_minorities_parties