From 06be7f7567fa23d85619369b48eb62b056a60656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcos=20C=C3=A1ceres?= Date: Wed, 9 Jun 2021 18:32:43 +1000 Subject: [PATCH] fix(validate/links): use 'link-checker' temporarily (#78) Co-authored-by: Sid Vishnoi <8426945+sidvishnoi@users.noreply.github.com> --- docs/options.md | 2 ++ src/validate-links.ts | 30 ++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/docs/options.md b/docs/options.md index 492e0a0..98952de 100644 --- a/docs/options.md +++ b/docs/options.md @@ -102,6 +102,8 @@ The Action will try to make use of metadata/config from previously published ver Whether or not to check for broken hyperlinks. +**Warning:** This feature is experimental. + **Possible values:** true, false **Default:** false diff --git a/src/validate-links.ts b/src/validate-links.ts index c0f8144..a011de4 100644 --- a/src/validate-links.ts +++ b/src/validate-links.ts @@ -1,24 +1,34 @@ import { env, exit, install, sh, yesOrNo } from "./utils.js"; -import { PUPPETEER_ENV } from "./constants.js"; - import { BuildResult } from "./build.js"; type Input = Pick; +const URL_IGNORE = [ + // Doesn't like robots + "https://ev.buaa.edu.cn/", + // The to-be published /TR URL. + // Ideally should include shortname, but may be good enough. + `/TR/.+${new Date().toISOString().slice(0, 10).replace(/-/g, "")}/$`, +]; + if (module === require.main) { if (yesOrNo(env("INPUTS_VALIDATE_LINKS")) === false) { exit("Skipped", 0); } - exit("Link validator is currently disabled due to some bugs.", 0); const input: Input = JSON.parse(env("OUTPUTS_BUILD")); main(input).catch(err => exit(err.message || "Failed", err.code)); } -export default async function main({ dest, file }: Input) { - await install(`href-checker`, PUPPETEER_ENV); - await sh(`href-checker ${file} --no-same-site`, { - output: "stream", - cwd: dest, - env: PUPPETEER_ENV, - }); +export default async function main({ dest: dir }: Input) { + await install("link-checker"); + const opts = getLinkCheckerOptions(URL_IGNORE); + // Note: link-checker checks a directory, not a file. + await sh(`link-checker ${opts} ${dir}`, "stream"); +} + +function getLinkCheckerOptions(ignoreList: string[]) { + return ignoreList + .map(url => `--url-ignore="${url}"`) + .concat(["--http-timeout=50000", "--http-redirects=3", "--http-always-get"]) + .join(" "); }