-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathfofa_spider_ext.py
149 lines (141 loc) · 4.68 KB
/
fofa_spider_ext.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/02/28
# @Author : r4v3zn
# @Site : FOFA 爬虫大法
import sys
import requests
import sys
import time
import json
import base64
import traceback
import logging
import pymysql
pymysql.install_as_MySQLdb()
# 禁用安全请求警告
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
# 配置日志打印信息
logging.basicConfig(level = logging.INFO,format='[%(asctime)s] :%(levelname)s: %(message)s')
reload(sys)
sys.setdefaultencoding("utf-8")
from DBUtils.PooledDB import PooledDB
'''
Celery
'''
from celery import platforms,Celery
platforms.C_FORCE_ROOT = True
# Redis连接地址,如果为本机不需要做修改
broker = 'redis://127.0.0.1:6379/0'
app = Celery('fofa_spider_ext',broker=broker)
host = ''
# 数据库连接用户名
user = ''
# 数据库连接密码
pwd = ''
# 数据库名称
db_name = ''
# 端口号
port = 3306
# 编码
charset = 'utf8'
# FOFA 用户名
fofa_name = ''
# FOFA 用户key
fofa_key = ''
# FOFA 每页数量,默认为1万可自行修改
page_size = 10000 #10000
# 起始页码
page_start = 1
# 终止页码,会自动计算计算结果为最大页数
page_end = 1
# 爬虫字段 host,ip,端口,协议,国家,省份,城市
fields = ['host','ip','port','protocol','country','region','city']
# port,protocol,country,region,city,host
pool = PooledDB(pymysql,20,host=host,user=user,passwd=pwd,db=db_name,port=port,charset=charset)
connection = pool.connection()
cursor = connection.cursor()
session = requests.session()
# 请求头
headers = {
'Upgrade-Insecure-Requests': '1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
}
'''
请求中心,控制程序所有HTTP请求,如果请求发生错误进行尝试再次连接
@param url 请求连接
@return 请求响应结果
'''
def fofa_requests(url):
rs_content = ''
error_content = ''
while True:
try:
logging.info(url)
rs = session.get(url, verify=False,headers=headers)
rs_text = rs.text
error_content = rs_text
results = json.loads(rs_text)
if results['error'] and 'None' not in results['error']:
info = u'fofa 错误:'+results['error']+u' 休眠30s'
logging.error(info)
time.sleep(30)
else:
rs_content = results
break
except Exception as e:
logging.error(error_content)
logging.error(u'fofa 错误:'+str(e.message)+u' 休眠30s')
traceback.print_exc()
time.sleep(30)
return rs_content
'''
批量数据存入数据库
@param results
@param page_no 当前页数
@param page_total 总页数
'''
def batch_insert_db(results,fofa_sql):
try:
Z = []
for result in results:
a = (str(result[0]),str(result[1]),str(result[2]),str(result[3]),str(result[4]),str(result[5]),str(result[6]),pymysql.escape_string(fofa_sql))
Z.append(a)
sql = "INSERT IGNORE INTO fofa_spider(id,host,ip,port,protocol,country_name,region_name,city_name,fofa_sql,create_date,update_date) VALUES(DEFAULT,%s,%s,%s,%s,%s,%s,%s,%s,NOW(),NOW())"
cursor.executemany(sql, Z)
connection.commit()
logging.info(u'存入数据库ok,总数量为:'+str(len(Z)))
except Exception as e:
logging.error(u"存入数据库错误,错误信息:"+e.message)
traceback.print_exc()
'''
celery 爬虫
@param api_url 爬虫URL
@param fofa_sql FOFA语句
'''
@app.task
def celery_spider(api_url,fofa_sql):
rs = fofa_requests(api_url)
batch_insert_db(rs['results'],fofa_sql)
'''
fofa 爬虫主函数
@param fofa_sql fofa查询语句
'''
def main(fofa_sql):
base64_str = base64.b64encode(fofa_sql)
fields_str = ','.join(fields)
api_url = 'http://fofa.so/api/v1/search/all?email='+fofa_name+'&key='+fofa_key+'&fields='+fields_str+'&size='+str(page_size)+'&page='+str(page_start)+'&qbase64='+base64_str
rs = fofa_requests(api_url)
total_size = rs['size']
# 计算页数
page_end = total_size / page_size + 1 if total_size % page_size != 0 else total_size / page_size
# 存入数据库
batch_insert_db(rs['results'],fofa_sql)
for page_no in range(1,page_end+1):
api_url = 'http://fofa.so/api/v1/search/all?email='+fofa_name+'&key='+fofa_key+'&fields='+fields_str+'&size='+str(page_size)+'&page='+str(page_no)+'&qbase64='+base64_str
logging.info('send task -->'+api_url)
celery_spider.delay(api_url,fofa_sql)
if __name__ == '__main__':
fofa_sql = 'app="大华-视频监控"'
main(fofa_sql)