SQL
Saved to variable
regionsSELECT *
FROM 'regions.csv'
region_idint64
region_nameobject
0
1
Africa
1
2
America
2
3
Asia
3
4
Europe
4
5
Oceania
SQL
Saved to variable
customer_nodesSELECT *
FROM 'customer_nodes.csv'
customer_idfloat64
1.0 - 500.0
region_idfloat64
1.0 - 5.0
0
1.0
3.0
1
2.0
3.0
2
3.0
5.0
3
4.0
5.0
4
5.0
3.0
5
6.0
1.0
6
7.0
2.0
7
8.0
1.0
8
9.0
4.0
9
10.0
3.0
SQL
Saved to variable
customer_transactionsSELECT *
FROM 'Customer-Transactions.csv'
customer_idobject
690.4%
504 others99.6%
Missing0%
txn_datedatetime64[ns]
2020-01-01 00:00:00 - 2020-04-28 00:00:00
0
429
2020-01-21 00:00:00
1
155
2020-01-10 00:00:00
2
398
2020-01-01 00:00:00
3
255
2020-01-14 00:00:00
4
185
2020-01-29 00:00:00
5
309
2020-01-13 00:00:00
6
312
2020-01-20 00:00:00
7
376
2020-01-03 00:00:00
8
188
2020-01-13 00:00:00
9
138
2020-01-11 00:00:00
SQL
Saved to variable
df_3SELECT COUNT( DISTINCT node_id) as unique_nodes
FROM customer_nodes
unique_nodesint64
0
5
SQL
Saved to variable
df_5SELECT c.region_id,
region_name,
count(node_id) as total_nodes
FROM customer_nodes c
JOIN regions r ON c.region_id = r.region_id
GROUP BY c.region_id,region_name
ORDER BY c.region_id
region_idfloat64
region_nameobject
0
1.0
Africa
1
2.0
America
2
3.0
Asia
3
4.0
Europe
4
5.0
Oceania
SQL
Saved to variable
df_6SELECT c.region_id,
region_name,
COUNT(distinct customer_id) as total_customers
FROM customer_nodes c
JOIN regions r ON c.region_id = r.region_id
GROUP BY c.region_id,
region_name
ORDER BY c.region_id
region_idfloat64
region_nameobject
0
1.0
Africa
1
2.0
America
2
3.0
Asia
3
4.0
Europe
4
5.0
Oceania
SQL
Saved to variable
df_7SELECT AVG(DATEDIFF(day,start_date,end_date))
FROM customer_nodes
WHERE end_date != '9999-12-31';
SQL
Saved to variable
df_8WITH CTE AS (SELECT region_id,
DATEDIFF(day,start_date,end_date) as allocation_days
FROM customer_nodes
WHERE end_date != '9999-12-31'
)
SELECT distinct region_id ,
PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY allocation_days) OVER (PARTITION BY region_id) AS median,
PERCENTILE_DISC(0.8) WITHIN GROUP (ORDER BY allocation_days) OVER (PARTITION BY region_id) AS 80th_percentile,
PERCENTILE_DISC(0.95) WITHIN GROUP (ORDER BY allocation_days) OVER (PARTITION BY region_id) AS 95TH_percentile
FROM CTE
SQL
Saved to variable
df_9SELECT txn_type,
COUNT(*) as total_transaction,
SUM(txn_amount) as total_amount
FROM customer_transactions
GROUP BY txn_type;
txn_typeobject
total_transactionint64
0
deposit
2671
1
withdrawal
1580
2
purchase
1617
3
None
7
SQL
Saved to variable
df_10WITH DEPOSIT_CTE AS (
SELECT customer_id,
COUNT(customer_id) as time_deposit,
AVG(txn_amount) as amount_deposit
FROM customer_transactions
WHERE txn_type = 'deposit'
GROUP BY customer_id
)
SELECT AVG(time_deposit) AS avg_count,
AVG(amount_deposit) AS avg_amount
FROM DEPOSIT_CTE;
avg_countfloat64
avg_amountfloat64
0
5.342
508.61278209568235
SQL
Saved to variable
df_11WITH CTE AS (
SELECT customer_id,
datepart(month,txn_date) as month,
SUM(CASE WHEN txn_type ='deposit' then 1 else 0 end) as deposit_time,
SUM(CASE WHEN txn_type ='purchase' then 1 else 0 end) as purchase_time,
SUM(CASE WHEN txn_type ='withdrawal' then 1 else 0 end) as withdrawal_time
FROM customer_transactions
GROUP BY customer_id,datepart(month,txn_date))
SELECT month, count(*)
FROM CTE
WHERE deposit_time > 1 and (purchase_time =1 or withdrawal_time =1)
GROUP BY month;
SQL
Saved to variable
df_12WITH CTE as (
SELECT customer_id,
DATEPART(MONTH,txn_date) as month,
SUM(CASE WHEN txn_type ='deposit' then txn_amount else 0 end) as deposit,
SUM(CASE WHEN txn_type ='purchase' then - txn_amount else 0 end) as purchase ,
SUM(CASE WHEN txn_type ='withdrawal' then - txn_amount else 0 end) as withdrawal
from customer_transactions
GROUP BY customer_id,DATEPART(MONTH,txn_date)
),
CTE_2 AS (
SELECT customer_id,
month,(deposit +purchase +withdrawal) as total
from CTE)
SELECT customer_id,
month,
SUM(total) OVER (PARTITION BY customer_id ORDER BY customer_id,month ROWS BETWEEN UNBOUNDED PRECEDING AND current ROW) AS balance,
total AS change_in_balance
FROM CTE_2;
SQL
Saved to variable
df_13WITH CTE as (
SELECT customer_id,
DATEPART(MONTH,txn_date) as month,
SUM(CASE WHEN txn_type ='deposit' then txn_amount else 0 end) as deposit,
SUM(CASE WHEN txn_type ='purchase' then - txn_amount else 0 end) as purchase ,
SUM(CASE WHEN txn_type ='withdrawal' then - txn_amount else 0 end) as withdrawal
from customer_transactions
GROUP BY customer_id,DATEPART(MONTH,txn_date)
),
CTE_2 AS (
SELECT customer_id,
month,(deposit +purchase +withdrawal) as total
from CTE),
CTE_3 AS (
SELECT customer_id,
month,
SUM(total) OVER (PARTITION BY customer_id ORDER BY customer_id,month ROWS BETWEEN UNBOUNDED PRECEDING AND current ROW) AS balance,total AS change_in_balance
FROM CTE_2),
CTE_4 AS ( SELECT distinct customer_id ,
first_value(balance) over (partition by customer_id order by customer_id) as start_balance,
last_value(balance) over (partition by customer_id order by customer_id) as end_balance
FROM cte_3 ),
CTE_5 AS (
SELECT *,
((end_balance - start_balance) * 100 / start_balance) as growing_rate
FROM CTE_4
WHERE ((end_balance - start_balance) * 100 / start_balance) >= 5 AND end_balance >start_balance)
SELECT CAST(COUNT (customer_id) AS FLOAT) * 100 / (SELECT COUNT (DISTINCT customer_id) from customer_transactions) as Percent_Customer
FROM CTE_5