{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "464f673b-5811-4553-abad-f5bcf52a71f8", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from mlxtend.frequent_patterns import apriori, association_rules" ] }, { "cell_type": "code", "execution_count": 3, "id": "1722e071-d107-4d98-b24e-3f973cc5db44", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
products
0MILK,BREAD,BISCUIT
1BREAD,MILK,BISCUIT,CORNFLAKES
2BREAD,TEA,BOURNVITA
3JAM,MAGGI,BREAD,MILK
4MAGGI,TEA,BISCUIT
\n", "
" ], "text/plain": [ " products\n", "0 MILK,BREAD,BISCUIT\n", "1 BREAD,MILK,BISCUIT,CORNFLAKES\n", "2 BREAD,TEA,BOURNVITA\n", "3 JAM,MAGGI,BREAD,MILK\n", "4 MAGGI,TEA,BISCUIT" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(\"GroceryStoreDataSet.csv\", names = ['products'], sep = ',')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "2bef844f-21ea-4b83-87fb-c86aa04a1ea8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(20, 1)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "25e35043-beff-4212-a61e-072e2f9fbc70", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[['MILK', 'BREAD', 'BISCUIT'],\n", " ['BREAD', 'MILK', 'BISCUIT', 'CORNFLAKES'],\n", " ['BREAD', 'TEA', 'BOURNVITA'],\n", " ['JAM', 'MAGGI', 'BREAD', 'MILK'],\n", " ['MAGGI', 'TEA', 'BISCUIT'],\n", " ['BREAD', 'TEA', 'BOURNVITA'],\n", " ['MAGGI', 'TEA', 'CORNFLAKES'],\n", " ['MAGGI', 'BREAD', 'TEA', 'BISCUIT'],\n", " ['JAM', 'MAGGI', 'BREAD', 'TEA'],\n", " ['BREAD', 'MILK'],\n", " ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],\n", " ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],\n", " ['COFFEE', 'SUGER', 'BOURNVITA'],\n", " ['BREAD', 'COFFEE', 'COCK'],\n", " ['BREAD', 'SUGER', 'BISCUIT'],\n", " ['COFFEE', 'SUGER', 'CORNFLAKES'],\n", " ['BREAD', 'SUGER', 'BOURNVITA'],\n", " ['BREAD', 'COFFEE', 'SUGER'],\n", " ['BREAD', 'COFFEE', 'SUGER'],\n", " ['TEA', 'MILK', 'COFFEE', 'CORNFLAKES']]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = list(df[\"products\"].apply(lambda x:x.split(\",\") ))\n", "data" ] }, { "cell_type": "code", "execution_count": 8, "id": "166d480f-ca69-4dd3-bba0-ceaf176d6c6e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
BISCUITBOURNVITABREADCOCKCOFFEECORNFLAKESJAMMAGGIMILKSUGERTEA
010100000100
110100100100
201100000001
300100011100
410000001001
501100000001
600000101001
710100001001
800100011001
900100000100
1010011100000
1110011100000
1201001000010
1300111000000
1410100000010
1500001100010
1601100000010
1700101000010
1800101000010
1900001100101
\n", "
" ], "text/plain": [ " BISCUIT BOURNVITA BREAD COCK COFFEE CORNFLAKES JAM MAGGI MILK \\\n", "0 1 0 1 0 0 0 0 0 1 \n", "1 1 0 1 0 0 1 0 0 1 \n", "2 0 1 1 0 0 0 0 0 0 \n", "3 0 0 1 0 0 0 1 1 1 \n", "4 1 0 0 0 0 0 0 1 0 \n", "5 0 1 1 0 0 0 0 0 0 \n", "6 0 0 0 0 0 1 0 1 0 \n", "7 1 0 1 0 0 0 0 1 0 \n", "8 0 0 1 0 0 0 1 1 0 \n", "9 0 0 1 0 0 0 0 0 1 \n", "10 1 0 0 1 1 1 0 0 0 \n", "11 1 0 0 1 1 1 0 0 0 \n", "12 0 1 0 0 1 0 0 0 0 \n", "13 0 0 1 1 1 0 0 0 0 \n", "14 1 0 1 0 0 0 0 0 0 \n", "15 0 0 0 0 1 1 0 0 0 \n", "16 0 1 1 0 0 0 0 0 0 \n", "17 0 0 1 0 1 0 0 0 0 \n", "18 0 0 1 0 1 0 0 0 0 \n", "19 0 0 0 0 1 1 0 0 1 \n", "\n", " SUGER TEA \n", "0 0 0 \n", "1 0 0 \n", "2 0 1 \n", "3 0 0 \n", "4 0 1 \n", "5 0 1 \n", "6 0 1 \n", "7 0 1 \n", "8 0 1 \n", "9 0 0 \n", "10 0 0 \n", "11 0 0 \n", "12 1 0 \n", "13 0 0 \n", "14 1 0 \n", "15 1 0 \n", "16 1 0 \n", "17 1 0 \n", "18 1 0 \n", "19 0 1 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Let's transform the list, with one-hot encoding\n", "from mlxtend.preprocessing import TransactionEncoder\n", "a = TransactionEncoder()\n", "a_data = a.fit(data).transform(data)\n", "df = pd.DataFrame(a_data,columns=a.columns_)\n", "df = df.replace(False,0)\n", "df = df.replace(True,1)\n", "df" ] }, { "cell_type": "code", "execution_count": 9, "id": "6ce5c732-f9bb-4f89-80a1-55bdeffb5248", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Processing 42 combinations | Sampling itemset size 3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/periklis/Library/Python/3.9/lib/python/site-packages/mlxtend/frequent_patterns/fpcommon.py:109: DeprecationWarning: DataFrames with non-bool types result in worse computationalperformance and their support might be discontinued in the future.Please use a DataFrame with bool type\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
supportitemsets
00.35(BISCUIT)
10.20(BOURNVITA)
20.65(BREAD)
30.40(COFFEE)
40.30(CORNFLAKES)
50.25(MAGGI)
60.25(MILK)
70.30(SUGER)
80.35(TEA)
90.20(BISCUIT, BREAD)
100.20(BREAD, MILK)
110.20(BREAD, SUGER)
120.20(BREAD, TEA)
130.20(CORNFLAKES, COFFEE)
140.20(SUGER, COFFEE)
150.20(TEA, MAGGI)
\n", "
" ], "text/plain": [ " support itemsets\n", "0 0.35 (BISCUIT)\n", "1 0.20 (BOURNVITA)\n", "2 0.65 (BREAD)\n", "3 0.40 (COFFEE)\n", "4 0.30 (CORNFLAKES)\n", "5 0.25 (MAGGI)\n", "6 0.25 (MILK)\n", "7 0.30 (SUGER)\n", "8 0.35 (TEA)\n", "9 0.20 (BISCUIT, BREAD)\n", "10 0.20 (BREAD, MILK)\n", "11 0.20 (BREAD, SUGER)\n", "12 0.20 (BREAD, TEA)\n", "13 0.20 (CORNFLAKES, COFFEE)\n", "14 0.20 (SUGER, COFFEE)\n", "15 0.20 (TEA, MAGGI)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = apriori(df, min_support = 0.2, use_colnames = True, verbose = 1)\n", "df" ] }, { "cell_type": "code", "execution_count": 10, "id": "196907eb-d967-440b-9f58-754013fc77d4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftleverageconvictionzhangs_metric
0(MILK)(BREAD)0.250.650.20.8000001.2307690.03751.750.250000
1(SUGER)(BREAD)0.300.650.20.6666671.0256410.00501.050.035714
2(CORNFLAKES)(COFFEE)0.300.400.20.6666671.6666670.08001.800.571429
3(SUGER)(COFFEE)0.300.400.20.6666671.6666670.08001.800.571429
4(MAGGI)(TEA)0.250.350.20.8000002.2857140.11253.250.750000
\n", "
" ], "text/plain": [ " antecedents consequents antecedent support consequent support support \\\n", "0 (MILK) (BREAD) 0.25 0.65 0.2 \n", "1 (SUGER) (BREAD) 0.30 0.65 0.2 \n", "2 (CORNFLAKES) (COFFEE) 0.30 0.40 0.2 \n", "3 (SUGER) (COFFEE) 0.30 0.40 0.2 \n", "4 (MAGGI) (TEA) 0.25 0.35 0.2 \n", "\n", " confidence lift leverage conviction zhangs_metric \n", "0 0.800000 1.230769 0.0375 1.75 0.250000 \n", "1 0.666667 1.025641 0.0050 1.05 0.035714 \n", "2 0.666667 1.666667 0.0800 1.80 0.571429 \n", "3 0.666667 1.666667 0.0800 1.80 0.571429 \n", "4 0.800000 2.285714 0.1125 3.25 0.750000 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Let's view our interpretation values using the Associan rule function.\n", "df_ar = association_rules(df, metric = \"confidence\", min_threshold = 0.6)\n", "df_ar" ] }, { "cell_type": "code", "execution_count": null, "id": "f5cc755a-814c-43c4-b189-5184a21607be", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }