-
Notifications
You must be signed in to change notification settings - Fork 192
/
Copy pathimport.js
79 lines (63 loc) · 1.78 KB
/
import.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
var jsdom = require("jsdom")
, async = require("async")
, fs = require('fs')
, request = require('request')
, stringify = require("csv-stringify")
, countries = require('country-data').countries;
const { JSDOM } = jsdom;
let count;
let lastPage;
function readPage(body, write, cb) {
const { document } = (new JSDOM(body)).window;
count = 0;
const firstItem = document.querySelector('ol li a');
if (firstItem) {
const currentPage = firstItem.innerHTML;
if (currentPage === lastPage) {
return cb();
}
lastPage = currentPage;
}
const allItems = document.querySelectorAll('ol li a');
for (count = 0; count < allItems.length; count++) {
write(allItems[count].innerHTML, allItems[count].href);
count++;
}
cb();
}
const fileStream = fs.createWriteStream('world-universities.csv');
const output = stringify();
output.on('readable', function() {
while(row = output.read()){
fileStream.write(row);
}
});
function loadList(dom, country, cb) {
let total = 0;
let start = 1;
process.stdout.write("["+country+"] ");
async.doUntil(function(cb) {
request("http://univ.cc/search.php?dom=" + dom + "&key=&start=" + start, function (error, response, body) {
readPage(body, function (name, url) {
output.write([country, name, url]);
}, cb);
});
}, function() {
start += 50;
total += count;
process.stdout.write('.');
return count < 50;
}, function () {
process.stdout.write(total + '\n');
cb();
});
}
const countriesCodes = Object.keys(countries);
async.eachSeries(countriesCodes, function(country, cb) {
if (country.length !== 2)
return cb();
var dom = country == "US" ? "edu" : country;
loadList(dom.toLowerCase(), country, cb);
}, function() {
output.end();
});