import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.cluster import KMeans
data = pd.read_csv("lottery.csv")
data.head()
number_occurences = np.zeros(45)
nb_loto = range(len(data))
columns = range(2,9)
for col in columns:
for row in nb_loto:
current_nb = data.iloc[row,col]
number_occurences[current_nb-1] = number_occurences[current_nb-1] + 1
numbers = range(1,46)
df_numbers = pd.DataFrame({ "Number": numbers, "Occurence": number_occurences })
df_numbers = df_numbers.sort_values('Occurence', ascending=False)
for n in numbers:
print(str(df_numbers.iloc[n-1,0]) + " -> " + str(df_numbers.iloc[n-1,1]) + " times ")
43 -> 174.0 times
27 -> 166.0 times
34 -> 164.0 times
1 -> 163.0 times
17 -> 162.0 times
13 -> 161.0 times
4 -> 161.0 times
12 -> 160.0 times
33 -> 159.0 times
39 -> 158.0 times
10 -> 157.0 times
20 -> 157.0 times
40 -> 156.0 times
38 -> 154.0 times
18 -> 154.0 times
14 -> 153.0 times
26 -> 153.0 times
2 -> 152.0 times
37 -> 151.0 times
31 -> 151.0 times
3 -> 151.0 times
11 -> 150.0 times
24 -> 150.0 times
45 -> 148.0 times
21 -> 148.0 times
19 -> 148.0 times
16 -> 148.0 times
8 -> 147.0 times
15 -> 147.0 times
7 -> 147.0 times
5 -> 146.0 times
6 -> 146.0 times
36 -> 146.0 times
35 -> 145.0 times
42 -> 145.0 times
44 -> 144.0 times
30 -> 144.0 times
25 -> 140.0 times
32 -> 134.0 times
28 -> 134.0 times
41 -> 133.0 times
23 -> 131.0 times
29 -> 128.0 times
22 -> 124.0 times
9 -> 123.0 times
from random import seed
from random import choice
seed(1)
def generate_number_combination():
numbers = [i for i in range(1,46)]
combination = [0,0,0,0,0,0,0]
for i in range(7):
n = choice(numbers)
combination[i] = n
numbers.remove(n)
return combination
def check_wrong_combination(winner, loser):
for i in range(7):
if winner[i] != loser[i]:
return True
return False
data['win'] = [1 for i in nb_loto]
for i in nb_loto:
win_combi = data.iloc[i,2:9]
lose_combi = generate_number_combination()
while check_wrong_combination(win_combi, lose_combi) == False:
lose_combi = generate_number_combination()
new_row = {'round':data.iloc[i,0],
'date':data.iloc[i,1],
'first':lose_combi[0],
'second':lose_combi[1],
'third':lose_combi[2],
'fourth':lose_combi[3],
'fifth':lose_combi[4],
'sixth':lose_combi[5],
'bonus':lose_combi[6],
'win':0}
data = data.append(new_row, ignore_index=True)
data = data.sort_values('round', ascending=False)
data.head(20)