-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfrequentitemset.py
62 lines (38 loc) · 1.14 KB
/
frequentitemset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import math
from itertools import combinations
def read_data_in_dict(filename):
f = open(filename)
lines = f.readlines()
transactions = []
items = lines[0].split(',')
for line in lines[1:]:
transactions.append(list(map(int,line.split(','))))
data ={
'items':items,
'transactions':transactions
}
return data
data=read_data_in_dict('itemsets.csv')
print(data);
def get_freq(s,items,transactions):
freq=0
for t in transactions:
temp=1
for item in s:
temp*=t[items.index(item)]
if temp==1:
freq+=1
return freq
def frequent_itemsets(data,level,min_support):
items = data['items']
transactions = data['transactions']
min_freq = math.ceil(min_support*len(transactions))
sets = list(combinations(items,level))
frequent_sets = []
for s in sets:
freq=get_freq(s,items,transactions)
if freq>=min_freq:
frequent_sets.append(s)
return frequent_sets
print("frequent item sets")
print(frequent_itemsets(data,2,0.5))