1
- /**
2
- * @license
3
- *
4
- * MIT License
5
- *
6
- * Copyright (c) 2019 Richie Bendall and 2016 - 2019 The Node Fetch Team
7
- *
8
- * Permission is hereby granted, free of charge, to any person obtaining a copy
9
- * of this software and associated documentation files (the 'Software'), to deal
10
- * in the Software without restriction, including without limitation the rights
11
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
- * copies of the Software, and to permit persons to whom the Software is
13
- * furnished to do so, subject to the following conditions:
14
- *
15
- * The above copyright notice and this permission notice shall be included in all
16
- * copies or substantial portions of the Software.
17
- *
18
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
- * SOFTWARE.
25
- */
26
-
27
1
import getCharset from "./utils/get-charset"
28
2
import { decode } from "iconv-lite"
29
3
import { load as $ } from "cheerio"
@@ -36,40 +10,40 @@ import is from "@sindresorhus/is"
36
10
* @param content The content to convert.
37
11
* @param headers HTTP Headers provided with a request.
38
12
*/
39
- export default function convertBody ( content : Buffer | string , headers ?: Headers ) : string {
40
- // Try to extract content-type header
41
- const contentType = ! is . nullOrUndefined ( headers ) ? headers . get ( "content-type" ) : null
42
-
43
- // Resulting charset
44
- let charset : string
45
-
46
- // Convert to buffer
47
- if ( is . string ( content ) ) content = Buffer . from ( content )
48
-
49
- // Header
50
- if ( contentType ) charset = getCharset ( contentType )
51
-
52
- // No charset in content type, peek at response body for at most 1024 bytes
53
- const res = content . slice ( 0 , 1024 ) . toString ( )
54
-
55
- // HTML5, HTML4 and XML
56
- if ( ! charset && res ) {
57
- charset = getCharset (
58
- $ ( res ) ( "meta[charset]" ) . attr ( "charset" ) || // HTML5
59
- $ ( res ) ( "meta[http-equiv][content]" ) . attr ( "content" ) || // HTML4
60
- $ ( res . replace ( / < \? ( .* ) \? > / im, "<$1>" ) , { xmlMode : true } ) . root ( ) . find ( "xml" ) . attr ( "encoding" ) , // XML
61
- )
62
- }
63
-
64
- // Prevent decode issues when sites use incorrect encoding
65
- // ref: https://hsivonen.fi/encoding-menu/
66
- if ( charset && [ "gb2312" , "gbk" ] . includes ( charset . toLowerCase ( ) ) ) charset = "gb18030"
67
-
68
- // Turn raw buffers into a single utf-8 buffer
69
- return decode (
70
- content ,
71
- charset || "utf-8" ,
72
- )
13
+ function convertBody ( content : Buffer | string , headers ?: Headers ) : string {
14
+ // Try to extract content-type header
15
+ const contentType = ! is . nullOrUndefined ( headers ) ? headers . get ( "content-type" ) : null
16
+
17
+ // Resulting charset
18
+ let charset : string
19
+
20
+ // Convert to buffer
21
+ if ( is . string ( content ) ) content = Buffer . from ( content )
22
+
23
+ // Header
24
+ if ( contentType ) charset = getCharset ( contentType )
25
+
26
+ // No charset in content type, peek at response body for at most 1024 bytes
27
+ const res = content . slice ( 0 , 1024 ) . toString ( )
28
+
29
+ // HTML5, HTML4 and XML
30
+ if ( ! charset && res ) {
31
+ charset = getCharset (
32
+ $ ( res ) ( "meta[charset]" ) . attr ( "charset" ) || // HTML5
33
+ $ ( res ) ( "meta[http-equiv][content]" ) . attr ( "content" ) || // HTML4
34
+ $ ( res . replace ( / < \? ( .* ) \? > / im, "<$1>" ) , { xmlMode : true } ) . root ( ) . find ( "xml" ) . attr ( "encoding" ) , // XML
35
+ )
36
+ }
37
+
38
+ // Prevent decode issues when sites use incorrect encoding
39
+ // ref: https://hsivonen.fi/encoding-menu/
40
+ if ( charset && [ "gb2312" , "gbk" ] . includes ( charset . toLowerCase ( ) ) ) charset = "gb18030"
41
+
42
+ // Turn raw buffers into a single utf-8 buffer
43
+ return decode (
44
+ content ,
45
+ charset || "utf-8" ,
46
+ )
73
47
}
74
48
75
- module . exports = convertBody
49
+ export = convertBody
0 commit comments