-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathparseRows.js
29 lines (25 loc) · 843 Bytes
/
parseRows.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
var pdfreader = require('pdfreader');
//pdfreader.LOG.toggle(true); // uncomment this to get DEBUG logs
var rows = {}; // indexed by y-position
function printRow(y) {
console.log((rows[y] || []).join(''));
}
function printRows() {
Object.keys(rows) // => array of y-positions (type: float)
.sort((y1, y2) => parseFloat(y1) - parseFloat(y2)) // sort float positions
.forEach(printRow);
}
new pdfreader.PdfReader().parseFileItems('CV_ErhanYasar.pdf', function(err, item){
if (err)
console.error(err);
else if (!item || item.page) {
// end of file, or page
printRows();
console.log('\n -- PAGE', item.page, '-- \n');
rows = {}; // clear rows for next page
}
else if (item.text) {
// accumulate text items into rows object, per line
(rows[item.y] = rows[item.y] || []).push(item.text);
}
});