import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.cluster import KMeans
data = pd.read_csv("lottery.csv")
data.head()
number_occurences = np.zeros(45)
nb_loto = range(len(data))
columns = range(2,9)
for col in columns:
for row in nb_loto:
current_nb = data.iloc[row,col]
number_occurences[current_nb-1] = number_occurences[current_nb-1] + 1
numbers = range(1,46)
df_numbers = pd.DataFrame({ "Number": numbers, "Occurence": number_occurences })
df_numbers = df_numbers.sort_values('Occurence', ascending=False)
for n in numbers:
print(str(df_numbers.iloc[n-1,0]) + " -> " + str(df_numbers.iloc[n-1,1]) + " times ")
from random import seed
from random import choice
seed(1)
def generate_number_combination():
numbers = [i for i in range(1,46)]
combination = [0,0,0,0,0,0,0]
for i in range(7):
n = choice(numbers)
combination[i] = n
numbers.remove(n)
return combination
def check_wrong_combination(winner, loser):
for i in range(7):
if winner[i] != loser[i]:
return True
return False
data['win'] = [1 for i in nb_loto]
for i in nb_loto:
win_combi = data.iloc[i,2:9]
lose_combi = generate_number_combination()
while check_wrong_combination(win_combi, lose_combi) == False:
lose_combi = generate_number_combination()
new_row = {'round':data.iloc[i,0],
'date':data.iloc[i,1],
'first':lose_combi[0],
'second':lose_combi[1],
'third':lose_combi[2],
'fourth':lose_combi[3],
'fifth':lose_combi[4],
'sixth':lose_combi[5],
'bonus':lose_combi[6],
'win':0}
data = data.append(new_row, ignore_index=True)
data = data.sort_values('round', ascending=False)
data.head(20)