-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathindex.js
115 lines (99 loc) · 2.34 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/**
* @typedef {[string, number]} TrigramTuple
* @typedef {TrigramTuple[]} TrigramTuples
* @typedef {Record<string, number>} TrigramDictionary
*/
import {trigram} from 'n-gram'
import {collapseWhiteSpace} from 'collapse-white-space'
const own = {}.hasOwnProperty
/**
* Clean `value`.
* Removed general non-important (as in, for language detection) punctuation
* marks, symbols, and digits.
*
* @param {string|null} [value]
* @returns {string}
*/
export function clean(value) {
if (value === null || value === undefined) {
return ''
}
return collapseWhiteSpace(String(value).replace(/[\u0021-\u0040]+/g, ' '))
.trim()
.toLowerCase()
}
/**
* Get clean, padded, trigrams.
*
* @param {string} [value]
* @returns {string[]}
*/
export function trigrams(value) {
return trigram(' ' + clean(value) + ' ')
}
/**
* Get an `Object` with trigrams as its attributes, and their occurence count as
* their values.
*
* @param {string} value
* @returns {TrigramDictionary}
*/
export function asDictionary(value) {
const values = trigrams(value)
/** @type {TrigramDictionary} */
const dictionary = {}
let index = -1
while (++index < values.length) {
if (own.call(dictionary, values[index])) {
dictionary[values[index]]++
} else {
dictionary[values[index]] = 1
}
}
return dictionary
}
/**
* Get an `Array` containing trigram--count tuples from a given value.
*
* @param {string} value
* @returns {TrigramTuples}
*/
export function asTuples(value) {
const dictionary = asDictionary(value)
/** @type {TrigramTuples} */
const tuples = []
/** @type {string} */
let trigram
for (trigram in dictionary) {
if (own.call(dictionary, trigram)) {
tuples.push([trigram, dictionary[trigram]])
}
}
tuples.sort(sort)
return tuples
}
/**
* Get an `Array` containing trigram--count tuples from a given value.
*
* @param {TrigramTuples} tuples
* @returns {TrigramDictionary}
*/
export function tuplesAsDictionary(tuples) {
/** @type {TrigramDictionary} */
const dictionary = {}
let index = -1
while (++index < tuples.length) {
dictionary[tuples[index][0]] = tuples[index][1]
}
return dictionary
}
/**
* Deep regular sort on item at `1` in both `Object`s.
*
* @param {TrigramTuple} a
* @param {TrigramTuple} b
* @returns {number}
*/
function sort(a, b) {
return a[1] - b[1]
}