import pandas as pd
import sidetable
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth
groceries_df = pd.read_csv('https://gist.githubusercontent.com/ahbanavi/0b1f62713bdf8bd32da0e17ae07e0fad/raw/3974126661c5d25505ead0b23dee70eee2a4f83a/Groceries_dataset.csv')
groceries_df.head()
groceries_df.rename(columns={'Member_number': 'customer_id', 'itemDescription': 'item'},inplace=True)
groceries_df.drop(columns=['Date'], inplace=True)
print("Items count before deleting duplicates: ", groceries_df.shape[0])
groceries_df.drop_duplicates(inplace=True)
print("Items count after deleting duplicates: ", groceries_df.shape[0])
display(groceries_df)
items_set = groceries_df.groupby(by = ['customer_id'])['item'].apply(list).reset_index()
display(items_set)
items_list = items_set['item'].tolist()
te = TransactionEncoder()
te_ary = te.fit_transform(items_list)
items_df = pd.DataFrame(te_ary, columns=te.columns_)
display(items_df)
frequent_itemsets = apriori(items_df, min_support=0.01, use_colnames=True)
# Add length of itemsets to dataframe
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets.sort_values(by='support', inplace=True, ascending=False)
display(frequent_itemsets)
def display_itemset(n):
display(str(n) + " Members Frequent Itemsets", frequent_itemsets[frequent_itemsets['length'] == n])
display_itemset(2)
display_itemset(3)
display_itemset(4)
display_itemset(5)
frequent_itemsets = fpgrowth(items_df, min_support=0.01, use_colnames=True)
# Add length of itemsets to dataframe
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets.sort_values(by='support', inplace=True, ascending=False)
display(frequent_itemsets)
display_itemset(2)
display_itemset(3)
display_itemset(4)
display_itemset(5)