-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmovie_count.py
94 lines (76 loc) · 2.01 KB
/
movie_count.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from imdb import IMDb
import pandas as pd
import numpy as np
import csv
import pickle
import string
import nltk
from nltk.tokenize import word_tokenize,sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
movies={}
with open("movies_words.pickle",'rb') as dic:
movies=pickle.load(dic)
movies1={}
with open("movies_genre.pickle",'rb') as dic:
movies1=pickle.load(dic)
movies2={}
with open("movies_cast.pickle",'rb') as dic:
movies2=pickle.load(dic)
c={}
c1={}
c2={}
c3={}
d={}
d1={}
d2={}
d3={}
new={}
l=0
for i in movies:
new[i] = list(set(movies[i]))
for i in movies:
for j in movies:
if str(i) == str(j):
continue
count=0
count1=0
count2=0
for word in new[i]:
if word in new[j]:
count=count+1
for word in movies1[i]:
if word in movies1[j]:
count1=count1+1
for word in movies2[i]:
if word in movies2[j]:
count2=count2+1
c[str(j)]=(count/len(new[i]))+(count1/len(movies1[i]))
+(count2/len(movies2[i]))
c1[str(j)]=(count1/len(movies1[i]))
+(count2/len(movies2[i]))
c2[str(j)]=(count2/len(movies2[i]))
c3[str(j)]=(count/len(new[i]))
d[str(i)]=c
d1[str(i)]=c1
d2[str(i)]=c2
d3[str(i)]=c3
c={}
c1={}
c2={}
c3={}
l=l+1
print(l)
with open("sim.pickle",'wb') as dic:
pickle.dump(d,dic,protocol=pickle.HIGHEST_PROTOCOL)
print("DONE")
with open("sim_gc.pickle",'wb') as dic:
pickle.dump(d1,dic,protocol=pickle.HIGHEST_PROTOCOL)
print("DONE")
with open("sim_c.pickle",'wb') as dic:
pickle.dump(d2,dic,protocol=pickle.HIGHEST_PROTOCOL)
print("DONE")
with open("sim_w.pickle",'wb') as dic:
pickle.dump(d3,dic,protocol=pickle.HIGHEST_PROTOCOL)
print("DONE")