{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "464f673b-5811-4553-abad-f5bcf52a71f8",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from mlxtend.frequent_patterns import apriori, association_rules"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1722e071-d107-4d98-b24e-3f973cc5db44",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" products | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" MILK,BREAD,BISCUIT | \n",
"
\n",
" \n",
" 1 | \n",
" BREAD,MILK,BISCUIT,CORNFLAKES | \n",
"
\n",
" \n",
" 2 | \n",
" BREAD,TEA,BOURNVITA | \n",
"
\n",
" \n",
" 3 | \n",
" JAM,MAGGI,BREAD,MILK | \n",
"
\n",
" \n",
" 4 | \n",
" MAGGI,TEA,BISCUIT | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" products\n",
"0 MILK,BREAD,BISCUIT\n",
"1 BREAD,MILK,BISCUIT,CORNFLAKES\n",
"2 BREAD,TEA,BOURNVITA\n",
"3 JAM,MAGGI,BREAD,MILK\n",
"4 MAGGI,TEA,BISCUIT"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(\"GroceryStoreDataSet.csv\", names = ['products'], sep = ',')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2bef844f-21ea-4b83-87fb-c86aa04a1ea8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(20, 1)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "25e35043-beff-4212-a61e-072e2f9fbc70",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['MILK', 'BREAD', 'BISCUIT'],\n",
" ['BREAD', 'MILK', 'BISCUIT', 'CORNFLAKES'],\n",
" ['BREAD', 'TEA', 'BOURNVITA'],\n",
" ['JAM', 'MAGGI', 'BREAD', 'MILK'],\n",
" ['MAGGI', 'TEA', 'BISCUIT'],\n",
" ['BREAD', 'TEA', 'BOURNVITA'],\n",
" ['MAGGI', 'TEA', 'CORNFLAKES'],\n",
" ['MAGGI', 'BREAD', 'TEA', 'BISCUIT'],\n",
" ['JAM', 'MAGGI', 'BREAD', 'TEA'],\n",
" ['BREAD', 'MILK'],\n",
" ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],\n",
" ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],\n",
" ['COFFEE', 'SUGER', 'BOURNVITA'],\n",
" ['BREAD', 'COFFEE', 'COCK'],\n",
" ['BREAD', 'SUGER', 'BISCUIT'],\n",
" ['COFFEE', 'SUGER', 'CORNFLAKES'],\n",
" ['BREAD', 'SUGER', 'BOURNVITA'],\n",
" ['BREAD', 'COFFEE', 'SUGER'],\n",
" ['BREAD', 'COFFEE', 'SUGER'],\n",
" ['TEA', 'MILK', 'COFFEE', 'CORNFLAKES']]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = list(df[\"products\"].apply(lambda x:x.split(\",\") ))\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "166d480f-ca69-4dd3-bba0-ceaf176d6c6e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" BISCUIT | \n",
" BOURNVITA | \n",
" BREAD | \n",
" COCK | \n",
" COFFEE | \n",
" CORNFLAKES | \n",
" JAM | \n",
" MAGGI | \n",
" MILK | \n",
" SUGER | \n",
" TEA | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 5 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 6 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 7 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 8 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 9 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 10 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 11 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 12 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 13 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 14 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 15 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 16 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 17 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 18 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 19 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" BISCUIT BOURNVITA BREAD COCK COFFEE CORNFLAKES JAM MAGGI MILK \\\n",
"0 1 0 1 0 0 0 0 0 1 \n",
"1 1 0 1 0 0 1 0 0 1 \n",
"2 0 1 1 0 0 0 0 0 0 \n",
"3 0 0 1 0 0 0 1 1 1 \n",
"4 1 0 0 0 0 0 0 1 0 \n",
"5 0 1 1 0 0 0 0 0 0 \n",
"6 0 0 0 0 0 1 0 1 0 \n",
"7 1 0 1 0 0 0 0 1 0 \n",
"8 0 0 1 0 0 0 1 1 0 \n",
"9 0 0 1 0 0 0 0 0 1 \n",
"10 1 0 0 1 1 1 0 0 0 \n",
"11 1 0 0 1 1 1 0 0 0 \n",
"12 0 1 0 0 1 0 0 0 0 \n",
"13 0 0 1 1 1 0 0 0 0 \n",
"14 1 0 1 0 0 0 0 0 0 \n",
"15 0 0 0 0 1 1 0 0 0 \n",
"16 0 1 1 0 0 0 0 0 0 \n",
"17 0 0 1 0 1 0 0 0 0 \n",
"18 0 0 1 0 1 0 0 0 0 \n",
"19 0 0 0 0 1 1 0 0 1 \n",
"\n",
" SUGER TEA \n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 1 \n",
"3 0 0 \n",
"4 0 1 \n",
"5 0 1 \n",
"6 0 1 \n",
"7 0 1 \n",
"8 0 1 \n",
"9 0 0 \n",
"10 0 0 \n",
"11 0 0 \n",
"12 1 0 \n",
"13 0 0 \n",
"14 1 0 \n",
"15 1 0 \n",
"16 1 0 \n",
"17 1 0 \n",
"18 1 0 \n",
"19 0 1 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Let's transform the list, with one-hot encoding\n",
"from mlxtend.preprocessing import TransactionEncoder\n",
"a = TransactionEncoder()\n",
"a_data = a.fit(data).transform(data)\n",
"df = pd.DataFrame(a_data,columns=a.columns_)\n",
"df = df.replace(False,0)\n",
"df = df.replace(True,1)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "6ce5c732-f9bb-4f89-80a1-55bdeffb5248",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processing 42 combinations | Sampling itemset size 3\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/periklis/Library/Python/3.9/lib/python/site-packages/mlxtend/frequent_patterns/fpcommon.py:109: DeprecationWarning: DataFrames with non-bool types result in worse computationalperformance and their support might be discontinued in the future.Please use a DataFrame with bool type\n",
" warnings.warn(\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" support | \n",
" itemsets | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.35 | \n",
" (BISCUIT) | \n",
"
\n",
" \n",
" 1 | \n",
" 0.20 | \n",
" (BOURNVITA) | \n",
"
\n",
" \n",
" 2 | \n",
" 0.65 | \n",
" (BREAD) | \n",
"
\n",
" \n",
" 3 | \n",
" 0.40 | \n",
" (COFFEE) | \n",
"
\n",
" \n",
" 4 | \n",
" 0.30 | \n",
" (CORNFLAKES) | \n",
"
\n",
" \n",
" 5 | \n",
" 0.25 | \n",
" (MAGGI) | \n",
"
\n",
" \n",
" 6 | \n",
" 0.25 | \n",
" (MILK) | \n",
"
\n",
" \n",
" 7 | \n",
" 0.30 | \n",
" (SUGER) | \n",
"
\n",
" \n",
" 8 | \n",
" 0.35 | \n",
" (TEA) | \n",
"
\n",
" \n",
" 9 | \n",
" 0.20 | \n",
" (BISCUIT, BREAD) | \n",
"
\n",
" \n",
" 10 | \n",
" 0.20 | \n",
" (BREAD, MILK) | \n",
"
\n",
" \n",
" 11 | \n",
" 0.20 | \n",
" (BREAD, SUGER) | \n",
"
\n",
" \n",
" 12 | \n",
" 0.20 | \n",
" (BREAD, TEA) | \n",
"
\n",
" \n",
" 13 | \n",
" 0.20 | \n",
" (CORNFLAKES, COFFEE) | \n",
"
\n",
" \n",
" 14 | \n",
" 0.20 | \n",
" (SUGER, COFFEE) | \n",
"
\n",
" \n",
" 15 | \n",
" 0.20 | \n",
" (TEA, MAGGI) | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" support itemsets\n",
"0 0.35 (BISCUIT)\n",
"1 0.20 (BOURNVITA)\n",
"2 0.65 (BREAD)\n",
"3 0.40 (COFFEE)\n",
"4 0.30 (CORNFLAKES)\n",
"5 0.25 (MAGGI)\n",
"6 0.25 (MILK)\n",
"7 0.30 (SUGER)\n",
"8 0.35 (TEA)\n",
"9 0.20 (BISCUIT, BREAD)\n",
"10 0.20 (BREAD, MILK)\n",
"11 0.20 (BREAD, SUGER)\n",
"12 0.20 (BREAD, TEA)\n",
"13 0.20 (CORNFLAKES, COFFEE)\n",
"14 0.20 (SUGER, COFFEE)\n",
"15 0.20 (TEA, MAGGI)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = apriori(df, min_support = 0.2, use_colnames = True, verbose = 1)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "196907eb-d967-440b-9f58-754013fc77d4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" antecedents | \n",
" consequents | \n",
" antecedent support | \n",
" consequent support | \n",
" support | \n",
" confidence | \n",
" lift | \n",
" leverage | \n",
" conviction | \n",
" zhangs_metric | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (MILK) | \n",
" (BREAD) | \n",
" 0.25 | \n",
" 0.65 | \n",
" 0.2 | \n",
" 0.800000 | \n",
" 1.230769 | \n",
" 0.0375 | \n",
" 1.75 | \n",
" 0.250000 | \n",
"
\n",
" \n",
" 1 | \n",
" (SUGER) | \n",
" (BREAD) | \n",
" 0.30 | \n",
" 0.65 | \n",
" 0.2 | \n",
" 0.666667 | \n",
" 1.025641 | \n",
" 0.0050 | \n",
" 1.05 | \n",
" 0.035714 | \n",
"
\n",
" \n",
" 2 | \n",
" (CORNFLAKES) | \n",
" (COFFEE) | \n",
" 0.30 | \n",
" 0.40 | \n",
" 0.2 | \n",
" 0.666667 | \n",
" 1.666667 | \n",
" 0.0800 | \n",
" 1.80 | \n",
" 0.571429 | \n",
"
\n",
" \n",
" 3 | \n",
" (SUGER) | \n",
" (COFFEE) | \n",
" 0.30 | \n",
" 0.40 | \n",
" 0.2 | \n",
" 0.666667 | \n",
" 1.666667 | \n",
" 0.0800 | \n",
" 1.80 | \n",
" 0.571429 | \n",
"
\n",
" \n",
" 4 | \n",
" (MAGGI) | \n",
" (TEA) | \n",
" 0.25 | \n",
" 0.35 | \n",
" 0.2 | \n",
" 0.800000 | \n",
" 2.285714 | \n",
" 0.1125 | \n",
" 3.25 | \n",
" 0.750000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" antecedents consequents antecedent support consequent support support \\\n",
"0 (MILK) (BREAD) 0.25 0.65 0.2 \n",
"1 (SUGER) (BREAD) 0.30 0.65 0.2 \n",
"2 (CORNFLAKES) (COFFEE) 0.30 0.40 0.2 \n",
"3 (SUGER) (COFFEE) 0.30 0.40 0.2 \n",
"4 (MAGGI) (TEA) 0.25 0.35 0.2 \n",
"\n",
" confidence lift leverage conviction zhangs_metric \n",
"0 0.800000 1.230769 0.0375 1.75 0.250000 \n",
"1 0.666667 1.025641 0.0050 1.05 0.035714 \n",
"2 0.666667 1.666667 0.0800 1.80 0.571429 \n",
"3 0.666667 1.666667 0.0800 1.80 0.571429 \n",
"4 0.800000 2.285714 0.1125 3.25 0.750000 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Let's view our interpretation values using the Associan rule function.\n",
"df_ar = association_rules(df, metric = \"confidence\", min_threshold = 0.6)\n",
"df_ar"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f5cc755a-814c-43c4-b189-5184a21607be",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}