-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathmureq.py
393 lines (315 loc) · 14.1 KB
/
mureq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
"""
mureq is a replacement for python-requests, intended to be vendored
in-tree by Linux systems software and other lightweight applications.
mureq is copyright 2021 by its contributors and is released under the
0BSD ("zero-clause BSD") license.
"""
import contextlib
import io
import os.path
import socket
import ssl
import sys
import urllib.parse
from http.client import HTTPConnection, HTTPSConnection, HTTPMessage, HTTPException
__version__ = '0.2.0'
__all__ = ['HTTPException', 'TooManyRedirects', 'Response',
'yield_response', 'request', 'get', 'post', 'head', 'put', 'patch', 'delete']
DEFAULT_TIMEOUT = 15.0
# e.g. "Python 3.8.10"
DEFAULT_UA = "Python " + sys.version.split()[0]
def request(method, url, *, read_limit=None, **kwargs):
"""request performs an HTTP request and reads the entire response body.
:param str method: HTTP method to request (e.g. 'GET', 'POST')
:param str url: URL to request
:param read_limit: maximum number of bytes to read from the body, or None for no limit
:type read_limit: int or None
:param kwargs: optional arguments defined by yield_response
:return: Response object
:rtype: Response
:raises: HTTPException
"""
with yield_response(method, url, **kwargs) as response:
try:
body = response.read(read_limit)
except HTTPException:
raise
except IOError as e:
raise HTTPException(str(e)) from e
return Response(response.url, response.status, _prepare_incoming_headers(response.headers), body)
def get(url, **kwargs):
"""get performs an HTTP GET request."""
return request('GET', url=url, **kwargs)
def post(url, body=None, **kwargs):
"""post performs an HTTP POST request."""
return request('POST', url=url, body=body, **kwargs)
def head(url, **kwargs):
"""head performs an HTTP HEAD request."""
return request('HEAD', url=url, **kwargs)
def put(url, body=None, **kwargs):
"""put performs an HTTP PUT request."""
return request('PUT', url=url, body=body, **kwargs)
def patch(url, body=None, **kwargs):
"""patch performs an HTTP PATCH request."""
return request('PATCH', url=url, body=body, **kwargs)
def delete(url, **kwargs):
"""delete performs an HTTP DELETE request."""
return request('DELETE', url=url, **kwargs)
@contextlib.contextmanager
def yield_response(method, url, *, unix_socket=None, timeout=DEFAULT_TIMEOUT, headers=None,
params=None, body=None, form=None, json=None, verify=True, source_address=None,
max_redirects=None, ssl_context=None):
"""yield_response is a low-level API that exposes the actual
http.client.HTTPResponse via a contextmanager.
Note that unlike mureq.Response, http.client.HTTPResponse does not
automatically canonicalize multiple appearances of the same header by
joining them together with a comma delimiter. To retrieve canonicalized
headers from the response, use response.getheader():
https://docs.python.org/3/library/http.client.html#http.client.HTTPResponse.getheader
:param str method: HTTP method to request (e.g. 'GET', 'POST')
:param str url: URL to request
:param unix_socket: path to Unix domain socket to query, or None for a normal TCP request
:type unix_socket: str or None
:param timeout: timeout in seconds, or None for no timeout (default: 15 seconds)
:type timeout: float or None
:param headers: HTTP headers as a mapping or list of key-value pairs
:param params: parameters to be URL-encoded and added to the query string, as a mapping or list of key-value pairs
:param body: payload body of the request
:type body: bytes or None
:param form: parameters to be form-encoded and sent as the payload body, as a mapping or list of key-value pairs
:param json: object to be serialized as JSON and sent as the payload body
:param bool verify: whether to verify TLS certificates (default: True)
:param source_address: source address to bind to for TCP
:type source_address: str or tuple(str, int) or None
:param max_redirects: maximum number of redirects to follow, or None (the default) for no redirection
:type max_redirects: int or None
:param ssl_context: TLS config to control certificate validation, or None for default behavior
:type ssl_context: ssl.SSLContext or None
:return: http.client.HTTPResponse, yielded as context manager
:rtype: http.client.HTTPResponse
:raises: HTTPException
"""
method = method.upper()
headers = _prepare_outgoing_headers(headers)
enc_params = _prepare_params(params)
body = _prepare_body(body, form, json, headers)
visited_urls = []
while max_redirects is None or len(visited_urls) <= max_redirects:
url, conn, path = _prepare_request(method, url, enc_params=enc_params, timeout=timeout, unix_socket=unix_socket, verify=verify, source_address=source_address, ssl_context=ssl_context)
enc_params = '' # don't reappend enc_params if we get redirected
visited_urls.append(url)
try:
try:
conn.request(method, path, headers=headers, body=body)
response = conn.getresponse()
except HTTPException:
raise
except IOError as e:
# wrap any IOError that is not already an HTTPException
# in HTTPException, exposing a uniform API for remote errors
raise HTTPException(str(e)) from e
redirect_url = _check_redirect(url, response.status, response.headers)
if max_redirects is None or redirect_url is None:
response.url = url # https://bugs.python.org/issue42062
yield response
return
else:
url = redirect_url
if response.status == 303:
# 303 See Other: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/303
method = 'GET'
finally:
conn.close()
raise TooManyRedirects(visited_urls)
class Response:
"""Response contains a completely consumed HTTP response.
:ivar str url: the retrieved URL, indicating whether a redirection occurred
:ivar int status_code: the HTTP status code
:ivar http.client.HTTPMessage headers: the HTTP headers
:ivar bytes body: the payload body of the response
"""
__slots__ = ('url', 'status_code', 'headers', 'body')
def __init__(self, url, status_code, headers, body):
self.url, self.status_code, self.headers, self.body = url, status_code, headers, body
def __repr__(self):
return f"Response(status_code={self.status_code:d})"
@property
def ok(self):
"""ok returns whether the response had a successful status code
(anything other than a 40x or 50x)."""
return not (400 <= self.status_code < 600)
@property
def content(self):
"""content returns the response body (the `body` member). This is an
alias for compatibility with requests.Response."""
return self.body
def raise_for_status(self):
"""raise_for_status checks the response's success code, raising an
exception for error codes."""
if not self.ok:
raise HTTPErrorStatus(self.status_code)
def json(self):
"""Attempts to deserialize the response body as UTF-8 encoded JSON."""
import json as jsonlib
return jsonlib.loads(self.body)
def _debugstr(self):
buf = io.StringIO()
print("HTTP", self.status_code, file=buf)
for k, v in self.headers.items():
print(f"{k}: {v}", file=buf)
print(file=buf)
try:
print(self.body.decode('utf-8'), file=buf)
except UnicodeDecodeError:
print(f"<{len(self.body)} bytes binary data>", file=buf)
return buf.getvalue()
class TooManyRedirects(HTTPException):
"""TooManyRedirects is raised when automatic following of redirects was
enabled, but the server redirected too many times without completing."""
pass
class HTTPErrorStatus(HTTPException):
"""HTTPErrorStatus is raised by Response.raise_for_status() to indicate an
HTTP error code (a 40x or a 50x). Note that a well-formed response with an
error code does not result in an exception unless raise_for_status() is
called explicitly.
"""
def __init__(self, status_code):
self.status_code = status_code
def __str__(self):
return f"HTTP response returned error code {self.status_code:d}"
# end public API, begin internal implementation details
_JSON_CONTENTTYPE = 'application/json'
_FORM_CONTENTTYPE = 'application/x-www-form-urlencoded'
class UnixHTTPConnection(HTTPConnection):
"""UnixHTTPConnection is a subclass of HTTPConnection that connects to a
Unix domain stream socket instead of a TCP address.
"""
def __init__(self, path, timeout=DEFAULT_TIMEOUT):
super(UnixHTTPConnection, self).__init__('localhost', timeout=timeout)
self._unix_path = path
def connect(self):
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
try:
sock.settimeout(self.timeout)
sock.connect(self._unix_path)
except Exception:
sock.close()
raise
self.sock = sock
def _check_redirect(url, status, response_headers):
"""Return the URL to redirect to, or None for no redirection."""
if status not in (301, 302, 303, 307, 308):
return None
location = response_headers.get('Location')
if not location:
return None
parsed_location = urllib.parse.urlparse(location)
if parsed_location.scheme:
# absolute URL
return location
old_url = urllib.parse.urlparse(url)
if location.startswith('/'):
# absolute path on old hostname
return urllib.parse.urlunparse((old_url.scheme, old_url.netloc,
parsed_location.path, parsed_location.params,
parsed_location.query, parsed_location.fragment))
# relative path on old hostname
old_dir, _old_file = os.path.split(old_url.path)
new_path = os.path.join(old_dir, location)
return urllib.parse.urlunparse((old_url.scheme, old_url.netloc,
new_path, parsed_location.params,
parsed_location.query, parsed_location.fragment))
def _prepare_outgoing_headers(headers):
if headers is None:
headers = HTTPMessage()
elif not isinstance(headers, HTTPMessage):
new_headers = HTTPMessage()
if hasattr(headers, 'items'):
iterator = headers.items()
else:
iterator = iter(headers)
for k, v in iterator:
new_headers[k] = v
headers = new_headers
_setdefault_header(headers, 'User-Agent', DEFAULT_UA)
return headers
# XXX join multi-headers together so that get(), __getitem__(),
# etc. behave intuitively, then stuff them back in an HTTPMessage.
def _prepare_incoming_headers(headers):
headers_dict = {}
for k, v in headers.items():
headers_dict.setdefault(k, []).append(v)
result = HTTPMessage()
# note that iterating over headers_dict preserves the original
# insertion order in all versions since Python 3.6:
for k, vlist in headers_dict.items():
result[k] = ','.join(vlist)
return result
def _setdefault_header(headers, name, value):
if name not in headers:
headers[name] = value
def _prepare_body(body, form, json, headers):
if body is not None:
if not isinstance(body, bytes):
raise TypeError('body must be bytes or None', type(body))
return body
if json is not None:
_setdefault_header(headers, 'Content-Type', _JSON_CONTENTTYPE)
import json as jsonlib
return jsonlib.dumps(json).encode('utf-8')
if form is not None:
_setdefault_header(headers, 'Content-Type', _FORM_CONTENTTYPE)
return urllib.parse.urlencode(form, doseq=True)
return None
def _prepare_params(params):
if params is None:
return ''
return urllib.parse.urlencode(params, doseq=True)
def _prepare_request(method, url, *, enc_params='', timeout=DEFAULT_TIMEOUT, source_address=None, unix_socket=None, verify=True, ssl_context=None):
"""Parses the URL, returns the path and the right HTTPConnection subclass."""
parsed_url = urllib.parse.urlparse(url)
is_unix = (unix_socket is not None)
scheme = parsed_url.scheme.lower()
if scheme.endswith('+unix'):
scheme = scheme[:-5]
is_unix = True
if scheme == 'https':
raise ValueError("https+unix is not implemented")
if scheme not in ('http', 'https'):
raise ValueError("unrecognized scheme", scheme)
is_https = (scheme == 'https')
host = parsed_url.hostname
port = 443 if is_https else 80
if parsed_url.port:
port = parsed_url.port
if is_unix and unix_socket is None:
unix_socket = urllib.parse.unquote(parsed_url.netloc)
path = parsed_url.path
if parsed_url.query:
if enc_params:
path = f'{path}?{parsed_url.query}&{enc_params}'
else:
path = f'{path}?{parsed_url.query}'
else:
if enc_params:
path = f'{path}?{enc_params}'
else:
pass # just parsed_url.path in this case
if isinstance(source_address, str):
source_address = (source_address, 0)
if is_unix:
conn = UnixHTTPConnection(unix_socket, timeout=timeout)
elif is_https:
if ssl_context is None:
ssl_context = ssl.create_default_context()
if not verify:
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
conn = HTTPSConnection(host, port, source_address=source_address, timeout=timeout,
context=ssl_context)
else:
conn = HTTPConnection(host, port, source_address=source_address, timeout=timeout)
munged_url = urllib.parse.urlunparse((parsed_url.scheme, parsed_url.netloc,
path, parsed_url.params,
'', parsed_url.fragment))
return munged_url, conn, path