-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathevaluate.py
172 lines (151 loc) · 5.15 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# -*- coding: utf-8 -*-
"""
@Time : 2021/5/27 9:51
@Author : Wanglulu
@File :your_your_script.py
@IDE :PyCharm Community Edition
"""
"""
学者画像比赛评测脚本
"""
import json
import argparse
def remove_null(list):
list = [i for i in list if i != '']
return list
def compute_other_score(prediction,reference):
if prediction==reference:
score=1.0
else:
score=0.0
return score
def compute_Jaccrad(prediction, reference):#reference为参考答案,prediction为预测答案
prediction=remove_null(prediction)
reference=remove_null(reference)
grams_reference = set(reference)#去重;如果不需要就改为list
grams_prediction=set(prediction)
temp=0
for i in grams_reference:
if i in grams_prediction:
temp=temp+1
fenmu=len(grams_prediction)+len(grams_reference)-temp #并集
if fenmu==0:
jaccard_coefficient=1.0
else:
jaccard_coefficient=float(temp/fenmu)#交集
return jaccard_coefficient
def home_compute_Jaccrad(prediction, reference):
prediction=remove_null(prediction)
reference=remove_null(reference)
grams_reference = set(reference)#去重;如果不需要就改为list
grams_prediction=set(prediction)
temp=0
for i in grams_reference:
i=i.strip("https://").strip("http://")
for j in grams_prediction:
j=j.strip("https://").strip("http://")
if j==i:
temp=temp+1
break
elif j.startswith(i):
temp=temp+1
break
elif i.startswith(j) and len(i)-(len(j))<10:
temp=temp+1
break
else:
pass
fenmu=len(grams_prediction)+len(grams_reference)-temp #并集
if fenmu==0:
jaccard_coefficient=1.0
else:
jaccard_coefficient=float(temp/fenmu)#交集
return jaccard_coefficient
def evaluate(hypothesis_data, reference_data):
"""
提交文件与训练文件同构;
"""
scores=[]
home_scores=[]
title_scores=[]
gender_scores=[]
linenum=0
for refer in reference_data:
pred=hypothesis_data[linenum]
homepage_score=home_compute_Jaccrad(pred["homepage"],refer["homepage"])
#email_score=compute_Jaccrad(pred["email"],refer["email"])
title_score=compute_other_score(pred["title"],refer["title"])
gender_score=compute_other_score(pred["gender"],refer["gender"])
#language_score=compute_other_score(pred["lang"],refer["lang"])
each_score=(homepage_score+title_score+gender_score)/3
scores.append(each_score)
home_scores.append(homepage_score)
title_scores.append(title_score)
gender_scores.append(gender_score)
linenum+=1
score_ = sum(scores) / len(scores)
home_score_=sum(home_scores)/len(home_scores)
title_score_ = sum(title_scores) / len(title_scores)
gender_score_=sum(gender_scores)/len(gender_scores)
return score_, home_score_, title_score_, gender_score_
def main(hypothesis_path, reference_path):
is_success = False
score = 0.
try:
with open(hypothesis_path,"r",encoding="utf-8") as f:
hypothesis_data=[]
for x in f.readlines():
line=json.loads(x)
hypothesis_data.append(line)
except Exception as e:
err_info = "hypothesis file load failed. please upload a json file such as 'train.json'. err: {}".format(e)
return score, is_success, err_info
try:
with open(reference_path, "r",encoding="utf-8") as f:
reference_data=[]
for x in f.readlines():
line=json.loads(x)
reference_data.append(line)
except Exception as e:
err_info = "reference file load failed. please upload a json file. err: {}".format(e)
return score, is_success, err_info
try:
score_, home_score_, title_score_, gender_score_ = evaluate(hypothesis_data, reference_data)
except Exception as e:
return score, is_success, str(e)
return score_, home_score_, title_score_, gender_score_
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
"--hp",
type=str,
# required=True,
help="hypothesis file",
)
parser.add_argument(
"--rf",
type=str,
# required=True,
help="reference file",
)
# parser.add_argument(
# "-l",
# type=str,
# # required=True,
# help="result file",
# )
args = parser.parse_args()
# args.hp='qingyou_result.json'
# args.rf = 'new_ground.json'
# hypothesis_path = "data/hypothesis.text" # 学生提交文件
# reference_path = "data/test1.text" # 答案文件
# result_path = "data/result.log" # 结果文件
# score,homepage_score,title_score,gender_score = main(args.hp, args.rf)
# print(score,homepage_score,title_score,gender_score)
r = main(args.hp, args.rf)
print(r)
# with open(args.l, "w", encoding="utf-8") as f:
# if success:
# f.write("{}###{}".format(score, ret_info))
# else:
# f.write(ret_info)