from csv import reader
import datetime as dt
#opened_file=open("HN_posts_year_to_Sep_26_2016.csv")
opened_file=open("hacker_news.csv")
read_file=reader(opened_file)
hn=list(read_file)
print(hn[0:4])
[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'], ['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20']]
headers=hn[0]
hn=hn[1:]
print(headers)
['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']
print(hn[0:4])
[['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01']]
print(hn[17])
['10610020', 'Ask HN: Am I the only one outraged by Twitter shutting down share counts?', '', '28', '29', 'tkfx', '11/22/2015 13:43']
ask_posts=[]
show_posts=[]
other_posts=[]
for each in hn:
subject=each[1]
subject=subject.lower() #--> Why are we using the string method `lower`?
#In order to avoid to lose some data because differences on capitalization.
if subject.startswith("ask hn"):
ask_posts.append(each)
elif subject.startswith("show hn"):
show_posts.append(each)
else:
other_posts.append(each)
print(f"Among the total number of posts ({len(hn):,} posts), we have {len(ask_posts):,} posts of type Ask HN, {len(show_posts):,} posts of type Show HN, and {len(other_posts):,} posts of other type.")
Among the total number of posts (20,100 posts), we have 1,744 posts of type Ask HN, 1,162 posts of type Show HN, and 17,194 posts of other type.
print(ask_posts[:5])
[['12296411', 'Ask HN: How to improve my personal website?', '', '2', '6', 'ahmedbaracat', '8/16/2016 9:55'], ['10610020', 'Ask HN: Am I the only one outraged by Twitter shutting down share counts?', '', '28', '29', 'tkfx', '11/22/2015 13:43'], ['11610310', 'Ask HN: Aby recent changes to CSS that broke mobile?', '', '1', '1', 'polskibus', '5/2/2016 10:14'], ['12210105', 'Ask HN: Looking for Employee #3 How do I do it?', '', '1', '3', 'sph130', '8/2/2016 14:20'], ['10394168', 'Ask HN: Someone offered to buy my browser extension from me. What now?', '', '28', '17', 'roykolak', '10/15/2015 16:38']]
print(show_posts[:5])
[['10627194', 'Show HN: Wio Link ESP8266 Based Web of Things Hardware Development Platform', 'https://iot.seeed.cc', '26', '22', 'kfihihc', '11/25/2015 14:03'], ['10646440', 'Show HN: Something pointless I made', 'http://dn.ht/picklecat/', '747', '102', 'dhotson', '11/29/2015 22:46'], ['11590768', 'Show HN: Shanhu.io, a programming playground powered by e8vm', 'https://shanhu.io', '1', '1', 'h8liu', '4/28/2016 18:05'], ['12178806', 'Show HN: Webscope Easy way for web developers to communicate with Clients', 'http://webscopeapp.com', '3', '3', 'fastbrick', '7/28/2016 7:11'], ['10872799', 'Show HN: GeoScreenshot Easily test Geo-IP based web pages', 'https://www.geoscreenshot.com/', '1', '9', 'kpsychwave', '1/9/2016 20:45']]
print(other_posts[:5])
[['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01'], ['10301696', 'Note by Note: The Making of Steinway L1037 (2007)', 'http://www.nytimes.com/2007/11/07/movies/07stein.html?_r=0', '8', '2', 'walterbell', '9/30/2015 4:12']]
def avg_comments(list_input):
i=0
for each in list_input:
comments=int(each[4])
i+=comments
avg=(i/len(list_input))
return avg
print(f"In average the posts 'Ask HN' received {avg_comments(ask_posts):.2f} comments")
print(f"In average the posts 'Show HN' received {avg_comments(show_posts):.2f} comments")
print(f"In average the posts 'Other' received {avg_comments(other_posts):.2f} comments")
In average the posts 'Ask HN' received 14.04 comments
In average the posts 'Show HN' received 10.32 comments
In average the posts 'Other' received 26.87 comments
#exemple of one date/time in this data set: 9/26/2016 3:19
for each in hn:
time=each[6]
time=dt.datetime.strptime(time, "%m/%d/%Y %H:%M") #-> with csv in kaggle this step is not need becayse already date format strptime() argument 1 must be str, not datetime.datetime - this means that the time in the data set is already formated as time:
each[6]=time
print(type(hn[1][6]))
<class 'datetime.datetime'>
month_dic={}
for each in hn:
time=each[6]
comments= int(each[4])
month=time.month
if month in month_dic:
#month_dic[month]= (month_dic[month][0]+ comments, month_dic[month][1]+1)
month_dic[month]["comments"]+=comments
month_dic[month]["posts"]+=1
else:
#month_dic[month]= (comments, 1)
month_dic[month]= {"comments":comments, "posts":1}
for each in month_dic:
#month_dic[each]=(month_dic[each][0],month_dic[each][1],round(month_dic[each][0]/month_dic[each][1],2))
month_dic[each]["avg"]=(month_dic[each]["comments"]/ month_dic[each]["posts"])
print("Dictionary with the total number of comments and posts per months and average comments per month:", "\n", "\n", month_dic)
Dictionary with the total number of comments and posts per months and average comments per month:
{8: {'comments': 43888, 'posts': 1562, 'avg': 28.09731113956466}, 1: {'comments': 38702, 'posts': 1694, 'avg': 22.846517119244393}, 6: {'comments': 43036, 'posts': 1585, 'avg': 27.15205047318612}, 9: {'comments': 64922, 'posts': 2511, 'avg': 25.855037833532457}, 10: {'comments': 34869, 'posts': 1693, 'avg': 20.59598346131128}, 11: {'comments': 37287, 'posts': 1665, 'avg': 22.394594594594594}, 3: {'comments': 42171, 'posts': 1676, 'avg': 25.161694510739856}, 5: {'comments': 41574, 'posts': 1532, 'avg': 27.137075718015666}, 4: {'comments': 43807, 'posts': 1615, 'avg': 27.125077399380803}, 7: {'comments': 39021, 'posts': 1505, 'avg': 25.927574750830566}, 2: {'comments': 33388, 'posts': 1511, 'avg': 22.096624751819988}, 12: {'comments': 35861, 'posts': 1551, 'avg': 23.12121212121212}}
list_=[]
for each in month_dic:
tuples=(round(month_dic[each]["avg"],2),each)
list_.append(tuples)
table_sorted=sorted(list_, reverse=True)
for each in table_sorted:
print(each[1],":",each[0])
8 : 28.1
6 : 27.15
5 : 27.14
4 : 27.13
7 : 25.93
9 : 25.86
3 : 25.16
12 : 23.12
1 : 22.85
11 : 22.39
2 : 22.1
10 : 20.6
month_dic_ahn={}
for each in ask_posts:
time=each[6]
comments= int(each[4])
month=time.month
if month in month_dic_ahn:
month_dic_ahn[month]["comments"]+=comments
month_dic_ahn[month]["posts"]+=1
else:
month_dic_ahn[month]= {"comments":comments, "posts":1}
for each in month_dic:
month_dic_ahn[each]["avg"]=(month_dic_ahn[each]["comments"]/ month_dic_ahn[each]["posts"])
list_=[]
for each in month_dic_ahn:
tuples=(round(month_dic_ahn[each]["avg"],2),each)
list_.append(tuples)
table_sorted=sorted(list_, reverse=True)
for each in table_sorted:
print(each[1],":",each[0])
8 : 21.75
3 : 18.57
5 : 18.16
6 : 16.24
9 : 15.97
1 : 13.61
11 : 12.55
7 : 11.87
4 : 10.78
12 : 9.12
10 : 8.64
2 : 7.52
hours_dic={}
for each in hn:
time=each[6]
comments= int(each[4])
hours=time.hour
if hours in hours_dic:
hours_dic[hours]["comments"]+=comments
hours_dic[hours]["posts"]+=1
else:
hours_dic[hours]= {"comments":comments, "posts":1}
for each in hours_dic:
hours_dic[each]["avg"]=(hours_dic[each]["comments"]/ hours_dic[each]["posts"])
print("Dictionary with the total number of comments and posts per months and average comments per hour:", "\n", "\n", hours_dic)
Dictionary with the total number of comments and posts per months and average comments per hour:
{11: {'comments': 20664, 'posts': 762, 'avg': 27.118110236220474}, 19: {'comments': 27894, 'posts': 1145, 'avg': 24.361572052401748}, 22: {'comments': 18684, 'posts': 875, 'avg': 21.353142857142856}, 0: {'comments': 17478, 'posts': 697, 'avg': 25.076040172166426}, 4: {'comments': 11537, 'posts': 527, 'avg': 21.891840607210625}, 9: {'comments': 15274, 'posts': 609, 'avg': 25.080459770114942}, 16: {'comments': 30857, 'posts': 1302, 'avg': 23.69969278033794}, 18: {'comments': 31587, 'posts': 1254, 'avg': 25.188995215311003}, 14: {'comments': 33545, 'posts': 1151, 'avg': 29.14422241529105}, 10: {'comments': 16818, 'posts': 686, 'avg': 24.516034985422742}, 12: {'comments': 25351, 'posts': 923, 'avg': 27.465872156013003}, 13: {'comments': 30562, 'posts': 1102, 'avg': 27.733212341197824}, 20: {'comments': 23414, 'posts': 1051, 'avg': 22.27783063748811}, 3: {'comments': 11626, 'posts': 488, 'avg': 23.82377049180328}, 17: {'comments': 34784, 'posts': 1362, 'avg': 25.53891336270191}, 1: {'comments': 12465, 'posts': 588, 'avg': 21.198979591836736}, 23: {'comments': 17582, 'posts': 778, 'avg': 22.59897172236504}, 8: {'comments': 14062, 'posts': 578, 'avg': 24.32871972318339}, 2: {'comments': 13762, 'posts': 529, 'avg': 26.015122873345934}, 21: {'comments': 22652, 'posts': 1030, 'avg': 21.992233009708738}, 15: {'comments': 35809, 'posts': 1234, 'avg': 29.01863857374392}, 6: {'comments': 9253, 'posts': 468, 'avg': 19.771367521367523}, 7: {'comments': 12576, 'posts': 508, 'avg': 24.755905511811022}, 5: {'comments': 10290, 'posts': 453, 'avg': 22.71523178807947}}
list_=[]
for each in hours_dic:
tuples=(round(hours_dic[each]["avg"],2),each)
list_.append(tuples)
table_sorted=sorted(list_, reverse=True)
for each in table_sorted:
print(each[1],":",each[0])
14 : 29.14
15 : 29.02
13 : 27.73
12 : 27.47
11 : 27.12
2 : 26.02
17 : 25.54
18 : 25.19
9 : 25.08
0 : 25.08
7 : 24.76
10 : 24.52
19 : 24.36
8 : 24.33
3 : 23.82
16 : 23.7
5 : 22.72
23 : 22.6
20 : 22.28
21 : 21.99
4 : 21.89
22 : 21.35
1 : 21.2
6 : 19.77
hours_dic_ahn={}
for each in ask_posts:
time=each[6]
comments= int(each[4])
hours=time.hour
if hours in hours_dic_ahn:
hours_dic_ahn[hours]["comments"]+=comments
hours_dic_ahn[hours]["posts"]+=1
else:
hours_dic_ahn[hours]= {"comments":comments, "posts":1}
for each in hours_dic_ahn:
hours_dic_ahn[each]["avg"]=(hours_dic_ahn[each]["comments"]/ hours_dic_ahn[each]["posts"])
list_=[]
for each in hours_dic_ahn:
tuples=(round(hours_dic_ahn[each]["avg"],2),each)
list_.append(tuples)
table_sorted=sorted(list_, reverse=True)
for each in table_sorted:
print(each[1],":",each[0])
#print(hours_dic_ahn)
15 : 38.59
2 : 23.81
20 : 21.52
16 : 16.8
21 : 16.01
13 : 14.74
10 : 13.44
14 : 13.23
18 : 13.2
17 : 11.46
1 : 11.38
11 : 11.05
19 : 10.8
8 : 10.25
5 : 10.09
12 : 9.41
6 : 9.02
0 : 8.13
23 : 7.99
7 : 7.85
3 : 7.8
4 : 7.17
22 : 6.75
9 : 5.58