SSL Pulse report

import pandas_redshift as pr import pandas as pd import os # analysis timeframe # data prior 01/16 contained a bug, so only counting 2 weeks of January # and multiplying figures x2 when publishing to the website start_date = '2021-02-01' end_date = '2021-11-30' pr.connect_to_redshift(dbname = os.environ['PG_DATABASE'], host = os.environ['PG_HOST'], port = 5432, user = os.environ['PG_USER'], password=os.environ['PG_PASSWORD']) total_expiring_domains = pr.redshift_to_pandas(f""" SELECT count(distinct(domain_id)) as count FROM scan_results WHERE type='ssl' and response->>'not_after' between '{start_date}' and '{end_date}' """)['count'][0] print(f'Domains with SSL to be renewed: {total_expiring_domains}')

# "expirations" are detected and added to the DB 1 week ahead of the actual SSL expiration time: expirations = pr.redshift_to_pandas(f""" SELECT state, count(*) AS "count" FROM expirations WHERE expired_at BETWEEN '{start_date}' and '{end_date}' GROUP BY state """) # but there were domains where SSL was renewed more than a week in advance - adding them as well renewed_in_advance = total_expiring_domains - expirations['count'].sum() df = expirations.append(pd.DataFrame([["renewed_in_advance", renewed_in_advance]], columns=["state", "count"])).set_index("state") plot = df.plot.pie(y='count', figsize=(8, 8), labeldistance=None)

# we only care about the domains where SSL expiration was noticed and fixed fixed = df.loc['fixed']['count'] print("SSL expired then got fixed on {:} domains ({:.1%} of {:} domains)".format(fixed, fixed / total_expiring_domains, total_expiring_domains))

SELECT sum(duration_sec::double precision * domains.daily_uniques / 3600 / 24) :: bigint as "affected_users" FROM expirations LEFT JOIN domains ON domain_id = domains.id WHERE expired_at BETWEEN '2021-02-01' and '2021-11-30'

data = pr.redshift_to_pandas(f""" SELECT alexa_rank, (duration_sec / 3600) as hours FROM expirations JOIN domains ON domain_id = domains.id WHERE state = 'fixed' and expired_at between '{start_date}' and '{end_date}' """) data.plot.scatter(x='hours', y='alexa_rank')

more_than_hour_percentage = data[data['hours'] > 1].count() / data.count() print("Around {:.0%} websites were down for > 1 hour".format(more_than_hour_percentage[0])) more_than_day_percentage = data[data['hours'] > 24].count() / data.count() print("Around {:.0%} websites were down for > 24 hours".format(more_than_day_percentage[0]))

days = pr.redshift_to_pandas(f""" SELECT day, count(1) as cnt FROM( SELECT CASE WHEN extract(isodow from expired_at) < 5 THEN 'Mon-Thu' ELSE 'Fri-Sun' END as day FROM expirations JOIN domains ON domain_id = domains.id WHERE state = 'fixed' and expired_at between '{start_date}' and '{end_date}' AND duration_sec > 10 * 3600 ) days GROUP BY day; """) days.set_index('day').plot.pie(y='cnt', figsize=(8, 8), labeldistance=None)

data['alexa_rank'].plot.hist(bins=25)

data['alexa_rank'].where(lambda r : r < 50000).dropna().plot.hist(bins=10)