In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

In [2]:
# s. https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/apriori/
dataset = [['basketball', 'Gutenberg-Gym', 'Relationship'],
           ['Fußball', 'HSG', 'Single'],
           ['Fußball', 'Gutenberg-Gym', 'Relationship'],
           ['Fußball', 'HSG', 'Single'],
           ['Fußball', 'HSG', 'Single']]
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,Fußball,Gutenberg-Gym,HSG,Relationship,Single,basketball
0,False,True,False,True,False,True
1,True,False,True,False,True,False
2,True,True,False,True,False,False
3,True,False,True,False,True,False
4,True,False,True,False,True,False


In [3]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.8,(Fußball),1
1,0.6,(HSG),1
2,0.6,(Single),1
3,0.6,"(Fußball, HSG)",2
4,0.6,"(Fußball, Single)",2
5,0.6,"(HSG, Single)",2
6,0.6,"(Fußball, HSG, Single)",3


In [5]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) &
                   (frequent_itemsets['support'] >= 0.6) ]

Unnamed: 0,support,itemsets,length
3,0.6,"(Fußball, HSG)",2
4,0.6,"(Fußball, Single)",2
5,0.6,"(HSG, Single)",2


In [24]:
frequent_itemsets[ frequent_itemsets['itemsets'] == {'Fußball', 'HSG', 'Single'} ]
#frequent_itemsets['support']


Unnamed: 0,support,itemsets,length
6,0.6,"(Fußball, HSG, Single)",3


In [8]:
# https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/
from mlxtend.frequent_patterns import association_rules

association_rules(frequent_itemsets, metric="confidence", min_threshold=0.8)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(HSG),(Fußball),0.6,0.8,0.6,1.0,1.25,0.12,inf
1,(Single),(Fußball),0.6,0.8,0.6,1.0,1.25,0.12,inf
2,(HSG),(Single),0.6,0.6,0.6,1.0,1.666667,0.24,inf
3,(Single),(HSG),0.6,0.6,0.6,1.0,1.666667,0.24,inf
4,"(Fußball, HSG)",(Single),0.6,0.6,0.6,1.0,1.666667,0.24,inf
5,"(Fußball, Single)",(HSG),0.6,0.6,0.6,1.0,1.666667,0.24,inf
6,"(HSG, Single)",(Fußball),0.6,0.8,0.6,1.0,1.25,0.12,inf
7,(HSG),"(Fußball, Single)",0.6,0.6,0.6,1.0,1.666667,0.24,inf
8,(Single),"(Fußball, HSG)",0.6,0.6,0.6,1.0,1.666667,0.24,inf


In [9]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.5)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(HSG),(Single),0.6,0.6,0.6,1.0,1.666667,0.24,inf
1,(Single),(HSG),0.6,0.6,0.6,1.0,1.666667,0.24,inf
2,"(Fußball, HSG)",(Single),0.6,0.6,0.6,1.0,1.666667,0.24,inf
3,"(Fußball, Single)",(HSG),0.6,0.6,0.6,1.0,1.666667,0.24,inf
4,(HSG),"(Fußball, Single)",0.6,0.6,0.6,1.0,1.666667,0.24,inf
5,(Single),"(Fußball, HSG)",0.6,0.6,0.6,1.0,1.666667,0.24,inf


In [11]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules[ (rules['antecedent_len'] >= 2) &
       (rules['confidence'] > 0.75) &
       (rules['lift'] > 1.2) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
2,"(Fußball, HSG)",(Single),0.6,0.6,0.6,1.0,1.666667,0.24,inf,2
3,"(Fußball, Single)",(HSG),0.6,0.6,0.6,1.0,1.666667,0.24,inf,2
