import matplotlib.pyplot as plt
import numpy as np
from collections import Counter
tags_filename = './2022_dailyflash_tags.txt'
with open(tags_filename) as f:
lines = f.readlines()
primary_tags = []
for line in lines:
tags = line.strip().split(' ')
primary_tag = tags[0]
primary_tags += [primary_tag]
freq_primary_tags = Counter(primary_tags)
top_ten_primary_tags = freq_primary_tags.most_common(10)
for i in range(len(top_ten_primary_tags)):
out = str(i+1)+'. '+top_ten_primary_tags[i][0]+' - '+str(top_ten_primary_tags[i][1])+' tags found'
print(out)
top_ten_totals = sum(i[1] for i in top_ten_primary_tags)
top_ten_totals_pct = round((top_ten_totals/365)*100,2)
out = "Top Ten % - "+str(top_ten_totals_pct)+"%"
print(out)
tag_pct = [ 0.0 ] * 10
for i in range(len(top_ten_primary_tags)):
tag_pct[i] = (top_ten_primary_tags[i][1]/365)*100
out = str(i+1)+'. '+top_ten_primary_tags[i][0]+' - '+str(round(tag_pct[i],2))+'%'
print(out)
second_top5_tag_pct = tag_pct[5]+tag_pct[6]+tag_pct[7]+tag_pct[8]+tag_pct[9]
other_tags = 100-top_ten_totals_pct
x = [
tag_pct[0], tag_pct[1], tag_pct[2], tag_pct[3], tag_pct[4],
second_top5_tag_pct, other_tags
]
labels = [
'seattle (#1)', 'cat (#2)', 'f1 (#3)', 'snow (#4)', 'computer (#5)',
'3dprinter,\nnetflix,\nknit,\nmovie,\nfood\n(#6-10)', 'other tags'
]
colors = plt.get_cmap('Paired')(np.linspace(0.1, 0.8, len(x)))
# plot
fig, ax = plt.subplots(figsize=(7, 7))
ax.set_title('2022 Tag Breakdown')
patches, texts, pcts = ax.pie(
x, colors=colors, labels=labels,
autopct='%.1f%%', pctdistance=.83,
labeldistance=1.1,
wedgeprops={"linewidth": 3, "edgecolor": "white"}, frame=False)
for i, patch in enumerate(patches):
texts[i].set_color(patch.get_facecolor())
plt.setp(texts, fontweight=600)
plt.setp(pcts, color='white', fontweight=600)
plt.show()