16
16
from drain3 .simple_profiler import SimpleProfiler , NullProfiler , Profiler
17
17
from drain3 .template_miner_config import TemplateMinerConfig
18
18
from src .common_config import CLUSTER_COUNT_KEY , DEFAULT_STR_VALUE , USE_OLD_FUNCTION_EXTRACT_PARAMETER ,\
19
- STAR_CHAR , CLUSTER_ID_KEY ,CLUSTER_SIZE_KEY ,TEMPLATE_MINED_KEY ,LOG_TEMPLATE_TOKENS_KEY
19
+ TOKEN_LIST_KEY , CLUSTER_ID_KEY ,CLUSTER_SIZE_KEY ,TEMPLATE_MINED_KEY ,LOG_TEMPLATE_TOKENS_KEY , ENABLE_MASK_CONTENT
20
20
21
21
logger = logging .getLogger (__name__ )
22
22
@@ -139,6 +139,15 @@ def get_snapshot_reason(self, change_type, cluster_id):
139
139
140
140
return None
141
141
142
+ def make_result_dict (self ,cluster , tokenize_result ):
143
+ result_dict = { CLUSTER_ID_KEY : cluster .cluster_id ,
144
+ CLUSTER_SIZE_KEY : cluster .size , #yd。用于统计当前cluster匹配的日志条数
145
+ LOG_TEMPLATE_TOKENS_KEY : cluster .log_template_tokens ,
146
+ TEMPLATE_MINED_KEY : cluster .get_template () # yd。返回挖掘处理的日志模板
147
+ }
148
+ result_dict .update (tokenize_result )
149
+ return result_dict
150
+
142
151
def add_log_message (self , log_message : str ) -> dict :
143
152
"""
144
153
yd。功能:根据当前传入的日志内容,获取对应的日志模板的logCluster
@@ -147,7 +156,7 @@ def add_log_message(self, log_message: str) -> dict:
147
156
"""
148
157
self .profiler .start_section ("total" )
149
158
150
- if 0 :
159
+ if ENABLE_MASK_CONTENT :
151
160
self .profiler .start_section ("mask" )
152
161
# yd。将log_message字符串中正则匹配的子串,用特定符号替换。
153
162
# 比如将"connected to 10.0.0.1"中的ip数字用"<:IP:>"替换,返回"connected to <:IP:>"
@@ -163,19 +172,11 @@ def add_log_message(self, log_message: str) -> dict:
163
172
164
173
result = {
165
174
"change_type" : change_type ,
166
- #"cluster_id": cluster.cluster_id,
167
- CLUSTER_ID_KEY : cluster .cluster_id ,
168
- #"cluster_size": cluster.size, #yd。用于统计当前cluster匹配的日志条数
169
- CLUSTER_SIZE_KEY : cluster .size , #yd。用于统计当前cluster匹配的日志条数
170
- #"log_template_tokens": cluster.log_template_tokens,
171
- LOG_TEMPLATE_TOKENS_KEY : cluster .log_template_tokens ,
172
- #"template_mined": cluster.get_template(), #yd。返回挖掘处理的日志模板
173
- TEMPLATE_MINED_KEY : cluster .get_template (), # yd。返回挖掘处理的日志模板
174
- #"cluster_count": len(self.drain.clusters) #yd。统计当前已经挖掘的模板的 总数
175
175
CLUSTER_COUNT_KEY : len (self .drain .clusters ) # yd。统计当前已经挖掘的模板的 总数
176
-
177
176
}
178
- result .update (tokenize_result )
177
+ result_dict = self .make_result_dict (cluster , tokenize_result )
178
+ result .update (result_dict )
179
+
179
180
#yd。这里是将当前的日志模板信息的快照保存下来
180
181
if self .persistence_handler is not None :
181
182
self .profiler .start_section ("save_state" )
@@ -208,10 +209,15 @@ def match(self, log_message: str, full_search_strategy="never") -> LogCluster:
208
209
count of wildcard matches.
209
210
:return: Matched cluster or None if no match found.
210
211
"""
212
+ if ENABLE_MASK_CONTENT :
213
+ # yd。将log_message字符串中正则匹配的子串,用特定符号替换。
214
+ # 比如将"connected to 10.0.0.1"中的ip数字用"<:IP:>"替换,返回"connected to <:IP:>"
215
+ masked_content = self .masker .mask (log_message )
216
+ else :
217
+ masked_content = log_message
211
218
212
- masked_content = self .masker .mask (log_message )
213
- matched_cluster = self .drain .match (masked_content , full_search_strategy )
214
- return matched_cluster
219
+ matched_cluster , tokenize_result = self .drain .match (masked_content , full_search_strategy )
220
+ return matched_cluster , tokenize_result
215
221
216
222
def get_parameter_list (self , log_template : str , log_message : str ) -> List [str ]:
217
223
"""
@@ -230,6 +236,18 @@ def get_parameter_list(self, log_template: str, log_message: str) -> List[str]:
230
236
return []
231
237
return [parameter .value for parameter in extracted_parameters ]
232
238
239
+ def get_parameter (self ,result_dict , log_line ):
240
+ if USE_OLD_FUNCTION_EXTRACT_PARAMETER :
241
+ # template = result["template_mined"]
242
+ template = result_dict .get (TEMPLATE_MINED_KEY , DEFAULT_STR_VALUE )
243
+ params = self .extract_parameters (template , log_line )
244
+ return params
245
+ content_tokens = result_dict .get (TOKEN_LIST_KEY , [])
246
+ # log_template_tokens = result["log_template_tokens"]
247
+ log_template_tokens = result_dict .get (LOG_TEMPLATE_TOKENS_KEY , [])
248
+ params = self .extract_parameters_by_compare (content_tokens , log_template_tokens )
249
+ return params
250
+
233
251
def extract_parameters_by_compare (self , content_tokens , log_template_tokens ):
234
252
parameter_list = []
235
253
for token1 , token2 in zip (content_tokens , log_template_tokens ):
0 commit comments