import pandas as pd
plans = pd.read_csv('plans.csv',index_col=False)
plan_idint64
plan_nameobject
0
0
trial
1
1
basic monthly
2
2
pro monthly
3
3
pro annual
4
4
churn
SELECT *
FROM 'subscriptions.csv'
customer_idint64
1 - 1000
plan_idint64
0 - 4
0
1
0
1
1
1
2
2
0
3
2
3
4
3
0
5
3
1
6
4
0
7
4
1
8
4
4
9
5
0
DROP TABLE IF EXISTS subscriptions_demo
CREATE TABLE subscriptions_demo (
customer_id INTEGER,
plan_id INTEGER,
start_date DATE
);
INSERT INTO subscriptions_demo
(customer_id, plan_id, start_date)
VALUES
('1', '0', '2020-08-01'),
('1', '1', '2020-08-08'),
('2', '0', '2020-09-20'),
('2', '3', '2020-09-27'),
('11','0', '2020-11-19'),
('11','4' ,'2020-11-26'),
('13','0', '2020-12-15'),
('13','1' ,'2020-12-22'),
('13', '2' ,'2021-03-29'),
('15', '0' ,'2020-03-17'),
('15', '2' ,'2020-03-24'),
('15', '4' ,'2020-04-29'),
('16', '0' ,'2020-05-31'),
('16', '1' ,'2020-06-07'),
('16', '3' ,'2020-10-21'),
('18', '0' ,'2020-07-06'),
('18', '2' ,'2020-07-13'),
('19', '0' ,'2020-06-22'),
('19', '2' ,'2020-06-29'),
('19', '3', '2020-08-29');
SELECT s.customer_id,
p.plan_name,
s.start_date,
DATEDIFF(day, LAG(start_date) OVER (PARTITION BY customer_id ORDER BY start_date),start_date ) AS days_diff,
DATEDIFF(month,LAG(start_date) OVER (PARTITION BY customer_id ORDER BY start_date),start_date) as months_diff
FROM subscriptions_demo AS s
JOIN plans AS p
ON s.plan_id = p.plan_id
;
SELECT COUNT( DISTINCT customer_id) AS Number_Customer
FROM subscriptions;
Number_Customerint64
0
1000
SELECT DATEPART(month, start_date) as month ,
DATEPART(year, start_date) as year,
COUNT(customer_id) as number_of_trial
FROM subscriptions s
LEFT JOIN plans p ON s.plan_id =p.plan_id
WHERE plan_name = 'trial'
GROUP BY DATEPART(month, start_date),
DATEPART(year, start_date)
SELECT s.plan_id,
plan_name ,
COUNT(s.plan_id) AS number_of_trial
FROM subscriptions s
LEFT JOIN plans p ON s.plan_id = p.plan_id
WHERE DATEPART(year,start_date) > 2020
GROUP BY s.plan_id,plan_name;
SELECT
COUNT(DISTINCT customer_id) AS total_customers,
SUM(CASE WHEN plan_id = 4 THEN 1 ELSE NULL END) AS churned_customers,
ROUND(CAST(SUM(CASE WHEN plan_id = 4 THEN 1 ELSE NULL END) AS decimal(5,1)) /
CAST(COUNT(DISTINCT customer_id) AS Decimal(5,1)) * 100,1) AS pct_churn
FROM subscriptions;
total_customersint64
churned_customersint64
0
1000
307
WITH CTE AS(SELECT *,
LEAD(plan_id,1) OVER( PARTITION BY customer_id ORDER BY plan_id) As next_plan
FROM subscriptions
)
SELECT plan_name ,
COUNT(next_plan) as number_churn,
CAST(count(next_plan) AS FLOAT) * 100 / (select count(distinct customer_id) from subscriptions) as perc_straight_churn
FROM CTE c
LEFT JOIN plans p ON c.next_plan = p.plan_id
WHERE next_plan = 4 and c.plan_id = 0
GROUP BY plan_name;
plan_nameobject
number_churnint64
0
churn
92
WITH CTE AS(SELECT *,
LEAD(plan_id,1) OVER( PARTITION BY customer_id ORDER BY plan_id) As next_plan
FROM subscriptions)
SELECT plan_name, count(*) as num_plan, Cast(count(next_plan) as float) * 100 / (select count(distinct customer_id) from subscriptions) as perc_next_plan
FROM CTE c
LEFT JOIN plans p ON c.next_plan = p.plan_id
WHERE c.plan_id = 0 and next_plan is not NULL
GROUP BY plan_name,next_plan;
plan_nameobject
num_planint64
0
basic monthly
546
1
pro monthly
325
2
pro annual
37
3
churn
92
WITH CTE AS(SELECT *,
LEAD(start_date,1) OVER( PARTITION BY customer_id ORDER BY plan_id) As next_date
FROM subscriptions
WHERE start_date <= '2020-12-31')
SELECT C.plan_id,plan_name,
count(C.plan_id) AS customer_count,
(CAST(count(C.plan_id) AS Float) *100 / (select count(distinct customer_id) FROM subscriptions) ) as Percentage_customer
FROM CTE c
LEFT JOIN plans P ON C.plan_id= P.plan_id
WHERE next_date is NULL or next_date >'2020-12-31'
GROUP BY C.plan_id,plan_name
ORDER BY plan_id
SELECT plan_name,
COUNT(s.plan_id) as number_annual_plan
FROM subscriptions s
INNER JOIN plans p ON s.plan_id = p.plan_id
WHERE plan_name = 'pro annual' and start_date <='2020-12-31'
GROUP BY plan_name;
plan_nameobject
number_annual_planint64
0
pro annual
195
WITH START_CTE AS (SELECT customer_id,
start_date
FROM subscriptions s
INNER JOIN plans p ON s.plan_id = p.plan_id
WHERE plan_name = 'trial' ),
ANNUAL_CTE AS (SELECT customer_id,
start_date as start_annual
FROM subscriptions s
INNER JOIN plans p ON s.plan_id = p.plan_id
WHERE plan_name = 'pro annual' )
SELECT Avg(DATEDIFF(day,start_date,start_annual)) as average_day
FROM ANNUAL_CTE C2
LEFT JOIN START_CTE C1 ON C2.customer_id =C1.customer_id;
WITH START_CTE AS (
SELECT customer_id,
start_date
FROM subscriptions s
INNER JOIN plans p ON s.plan_id = p.plan_id
WHERE plan_name = 'trial' ),
ANNUAL_CTE AS ( SELECT customer_id,
start_date as start_annual
FROM subscriptions s
INNER JOIN plans p ON s.plan_id = p.plan_id
WHERE plan_name = 'pro annual' ),
DIFF_DAY_CTE AS ( SELECT DATEDIFF(day,start_date,start_annual) as diff_day
FROM ANNUAL_CTE C2
LEFT JOIN START_CTE C1 ON C2.customer_id =C1.customer_id),
GROUP_DAY_CTE AS ( SELECT*, FLOOR(diff_day/30) as group_day
FROM DIFF_DAY_CTE)
SELECT CONCAT((group_day *30) +1 , '-',(group_day +1)*30, ' days') as days,
COUNT(group_day) as number_days
FROM GROUP_DAY_CTE
GROUP BY group_day;
WITH CTE AS(SELECT *,
LEAD(plan_id,1) OVER( PARTITION BY customer_id ORDER BY plan_id) As next_plan
FROM subscriptions
WHERE start_date <= '2020-12-31')
SELECT COUNT(*) as num_downgrade
FROM CTE
WHERE next_plan = 1 and plan_id = 2;
num_downgradeint64
0
0