import pandas as pd
import plotly.express as px
df = pd.read_csv('crowdfunding.csv')
df.head()
categoryobject
deviceobject
0
Fashion
iOS
1
Sports
android
2
Technology
android
3
Technology
iOS
4
Sports
android
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20658 entries, 0 to 20657
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 category 20658 non-null object
1 device 20658 non-null object
2 gender 20658 non-null object
3 age 20658 non-null object
4 amount 20658 non-null float64
dtypes: float64(1), object(4)
memory usage: 807.1+ KB
# Selecting the number of results to plot
n = 3
# Group by category and sum of the amounts donated
cat_data = df.groupby('category', as_index=False)[['amount']].sum().nlargest(n, 'amount')
# Create a list of top three categories for ease of use
cat_list = cat_data['category'].values.tolist()
# Create new dataframe with only the top three categories; sorted by age for the visualization
data = df[df['category'].isin(cat_list)].sort_values('age')
# Visualization
fig = px.histogram(data,
x='age',
y='amount',
color='device',
barmode='group',
facet_col='category',
title='Top {} categories by donation amount and device'.format(n))
fig.show()
for i in cat_list:
j = df[df['category'] == i]
k = j.amount.sum()
print(i, ': ', '€', k)
Games : € 165483.0
Sports : € 163528.0
Technology : € 162731.0
device_totals = df.groupby('device')[['amount']].sum()
device_totals
amountfloat64
android
283545.0
iOS
530525.0
total_sum = df.amount.sum()
young_sum = df[df['age'] == '18-24'].amount.sum()
x = young_sum / total_sum
print('total sum = €{} (thousand)'.format(total_sum / 1000))
print('percent of total donation: {}%'.format(x.round(3) * 100))
total sum = €814.07 (thousand)
percent of total donation: 50.5%