-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetchPaths.js
103 lines (73 loc) · 3.23 KB
/
fetchPaths.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import fs from 'fs'
import fetch from 'node-fetch'
import { XMLParser } from 'fast-xml-parser';
import * as cheerio from 'cheerio';
import { JSDOM } from 'jsdom'
var template = fs.readFileSync('./astro.template', 'utf8')
var existingFiles = fs.readdirSync('./src/pages')
var nSite = 'https://graphics-for-good.wixsite.com/graphics-for-good'
var fName = 'sitemap.xml'
var sitemap = `${nSite}/${fName}`
async function fetchPaths() {
var response = await fetch(sitemap)
var text = await response.text()
fs.writeFileSync(`./public/${fName}`, text)
const parser = new XMLParser();
const json = parser.parse(text);
var nestedSitemaps = json.sitemapindex.sitemap
existingFiles.forEach(file => {
if (file !== '404.astro') {
if (!fs.lstatSync(`./src/pages/${file}`).isDirectory()) {
fs.unlinkSync(`./src/pages/${file}`)
}
}
})
nestedSitemaps.forEach(async nSitemap => {
var nSitemapLoc = nSitemap.loc
var sitemapName = nSitemapLoc
if (sitemapName.startsWith(nSite)) sitemapName = sitemapName.slice(nSite.length)
var nResponse = await fetch(nSitemapLoc)
var nText = await nResponse.text()
fs.writeFileSync(`./public/${sitemapName}`, text)
const nParser = new XMLParser();
var nJson = nParser.parse(nText);
var urls = nJson.urlset.url
if (!Array.isArray(urls)) urls = [urls]
urls.forEach(async url => {
var path = url.loc
var pageResponse = await fetch(path)
var pText = await pageResponse.text()
var pDom = new JSDOM(pText)
var document = pDom.window.document
var head = document.head
var description = document.body.querySelector('.PAGES_CONTAINER')
description.querySelectorAll('script, style, meta, link, :empty').forEach(e => e.remove())
const $ = cheerio.load(description.innerHTML);
description = $.text()
description = description.replace(/[\n,\r,\r\n]+/g, ' ')
var title = head.querySelector('title').textContent
var icon = head.querySelector('link[rel="icon"]').href
if (path.startsWith(nSite)) path = path.slice(nSite.length)
if (path === '') path = '/index'
if (path.startsWith('/')) path = path.slice(1)
var nTemplate = template
nTemplate = nTemplate.replace("var path = ''", `var path = "/${path === 'index' ? '' : path}"`)
nTemplate = nTemplate.replace('</head>', ` <title>${title}</title>\n <meta type="description" content="${description}" />\n <link rel="icon" href="${icon}" />\n</head>`)
var paths = []
if (path.includes('/')) {
paths = path.split('/')
paths.pop()
}
var fPath = ''
paths.forEach((p, i) => {
if (i !== 0) fPath += '/'
fPath += p
if (!fs.existsSync(`./src/pages/${fPath}`)) {
fs.mkdirSync(`./src/pages/${fPath}`)
}
})
fs.writeFileSync(`./src/pages/${path}.astro`, nTemplate)
})
})
}
fetchPaths()