parseHtml: add link and img extraction
This commit is contained in:
parent
e738cfae89
commit
18948cc6df
|
|
@ -0,0 +1 @@
|
|||
node_modules
|
||||
|
|
@ -1,11 +1,17 @@
|
|||
{
|
||||
"name": "ad_pubfiles_pub.d",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "ad_pubfiles_pub.d",
|
||||
"version": "1.0.0",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@weborigami/origami": "^0.6.12"
|
||||
"@weborigami/origami": "^0.6.12",
|
||||
"hast-util-from-html": "^2.0.3",
|
||||
"unist-util-visit": "^5.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@acemir/cssom": {
|
||||
|
|
@ -685,6 +691,21 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/hast": {
|
||||
"version": "3.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
|
||||
"integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/unist": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/unist": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
|
||||
"integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@weborigami/async-tree": {
|
||||
"version": "0.6.12",
|
||||
"resolved": "https://registry.npmjs.org/@weborigami/async-tree/-/async-tree-0.6.12.tgz",
|
||||
|
|
@ -747,6 +768,16 @@
|
|||
"require-from-string": "^2.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/comma-separated-tokens": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz",
|
||||
"integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/css-tree": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/css-tree/-/css-tree-3.1.0.tgz",
|
||||
|
|
@ -811,6 +842,15 @@
|
|||
"integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/dequal": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
|
||||
"integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/detect-libc": {
|
||||
"version": "2.1.2",
|
||||
"resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
|
||||
|
|
@ -826,6 +866,19 @@
|
|||
"integrity": "sha512-ZVyjhAJ7sCe1PNXEGveObOH9AC8QvMga3HJIghHawtG7mE4K5pW9nz/vDGAr/U7a3LWgdOzEE7ac9MURnyfaTA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/devlop": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz",
|
||||
"integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"dequal": "^2.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/entities": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz",
|
||||
|
|
@ -849,6 +902,86 @@
|
|||
"integrity": "sha512-IaOQ9puYtjrkq7Y0Ygl9KDZnrf/aiUJYUpVf89y8kyaxbRG7Y1SrX/jaumrv81vc61+kiMempujsM3Yw7w5qcw==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/hast-util-from-html": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/hast-util-from-html/-/hast-util-from-html-2.0.3.tgz",
|
||||
"integrity": "sha512-CUSRHXyKjzHov8yKsQjGOElXy/3EKpyX56ELnkHH34vDVw1N1XSQ1ZcAvTyAPtGqLTuKP/uxM+aLkSPqF/EtMw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/hast": "^3.0.0",
|
||||
"devlop": "^1.1.0",
|
||||
"hast-util-from-parse5": "^8.0.0",
|
||||
"parse5": "^7.0.0",
|
||||
"vfile": "^6.0.0",
|
||||
"vfile-message": "^4.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/hast-util-from-html/node_modules/parse5": {
|
||||
"version": "7.3.0",
|
||||
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz",
|
||||
"integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"entities": "^6.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/inikulin/parse5?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/hast-util-from-parse5": {
|
||||
"version": "8.0.3",
|
||||
"resolved": "https://registry.npmjs.org/hast-util-from-parse5/-/hast-util-from-parse5-8.0.3.tgz",
|
||||
"integrity": "sha512-3kxEVkEKt0zvcZ3hCRYI8rqrgwtlIOFMWkbclACvjlDw8Li9S2hk/d51OI0nr/gIpdMHNepwgOKqZ/sy0Clpyg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/hast": "^3.0.0",
|
||||
"@types/unist": "^3.0.0",
|
||||
"devlop": "^1.0.0",
|
||||
"hastscript": "^9.0.0",
|
||||
"property-information": "^7.0.0",
|
||||
"vfile": "^6.0.0",
|
||||
"vfile-location": "^5.0.0",
|
||||
"web-namespaces": "^2.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/hast-util-parse-selector": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz",
|
||||
"integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/hast": "^3.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/hastscript": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/hastscript/-/hastscript-9.0.1.tgz",
|
||||
"integrity": "sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/hast": "^3.0.0",
|
||||
"comma-separated-tokens": "^2.0.0",
|
||||
"hast-util-parse-selector": "^4.0.0",
|
||||
"property-information": "^7.0.0",
|
||||
"space-separated-tokens": "^2.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/highlight.js": {
|
||||
"version": "11.11.1",
|
||||
"resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.11.1.tgz",
|
||||
|
|
@ -1035,6 +1168,16 @@
|
|||
"integrity": "sha512-B+b+kQ1YrYS7zO7P7bQcoqqMUizP06BOyNSBEnB5VJKDSWo8fsVuDkfSmwdjF0JsRtaNh83so5MMFJ95soH5jg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/property-information": {
|
||||
"version": "7.1.0",
|
||||
"resolved": "https://registry.npmjs.org/property-information/-/property-information-7.1.0.tgz",
|
||||
"integrity": "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/punycode": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
|
||||
|
|
@ -1140,6 +1283,16 @@
|
|||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/space-separated-tokens": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz",
|
||||
"integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/stubborn-fs": {
|
||||
"version": "1.2.5",
|
||||
"resolved": "https://registry.npmjs.org/stubborn-fs/-/stubborn-fs-1.2.5.tgz",
|
||||
|
|
@ -1218,6 +1371,103 @@
|
|||
"node": ">=20.18.1"
|
||||
}
|
||||
},
|
||||
"node_modules/unist-util-is": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.1.tgz",
|
||||
"integrity": "sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/unist": "^3.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/unist-util-stringify-position": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz",
|
||||
"integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/unist": "^3.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/unist-util-visit": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.1.0.tgz",
|
||||
"integrity": "sha512-m+vIdyeCOpdr/QeQCu2EzxX/ohgS8KbnPDgFni4dQsfSCtpz8UqDyY5GjRru8PDKuYn7Fq19j1CQ+nJSsGKOzg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/unist": "^3.0.0",
|
||||
"unist-util-is": "^6.0.0",
|
||||
"unist-util-visit-parents": "^6.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/unist-util-visit-parents": {
|
||||
"version": "6.0.2",
|
||||
"resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.2.tgz",
|
||||
"integrity": "sha512-goh1s1TBrqSqukSc8wrjwWhL0hiJxgA8m4kFxGlQ+8FYQ3C/m11FcTs4YYem7V664AhHVvgoQLk890Ssdsr2IQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/unist": "^3.0.0",
|
||||
"unist-util-is": "^6.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/vfile": {
|
||||
"version": "6.0.3",
|
||||
"resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz",
|
||||
"integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/unist": "^3.0.0",
|
||||
"vfile-message": "^4.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/vfile-location": {
|
||||
"version": "5.0.3",
|
||||
"resolved": "https://registry.npmjs.org/vfile-location/-/vfile-location-5.0.3.tgz",
|
||||
"integrity": "sha512-5yXvWDEgqeiYiBe1lbxYF7UMAIm/IcopxMHrMQDq3nvKcjPKIhZklUKL+AE7J7uApI4kwe2snsK+eI6UTj9EHg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/unist": "^3.0.0",
|
||||
"vfile": "^6.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/vfile-message": {
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.3.tgz",
|
||||
"integrity": "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/unist": "^3.0.0",
|
||||
"unist-util-stringify-position": "^4.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/w3c-xmlserializer": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz",
|
||||
|
|
@ -1240,6 +1490,16 @@
|
|||
"tiny-readdir": "^2.7.2"
|
||||
}
|
||||
},
|
||||
"node_modules/web-namespaces": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz",
|
||||
"integrity": "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/webidl-conversions": {
|
||||
"version": "8.0.1",
|
||||
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz",
|
||||
|
|
|
|||
16
package.json
16
package.json
|
|
@ -1,6 +1,18 @@
|
|||
{
|
||||
"dependencies": {
|
||||
"@weborigami/origami": "^0.6.12"
|
||||
"@weborigami/origami": "^0.6.12",
|
||||
"hast-util-from-html": "^2.0.3",
|
||||
"unist-util-visit": "^5.1.0"
|
||||
},
|
||||
"type": "module"
|
||||
"type": "module",
|
||||
"name": "ad_pubfiles_pub.d",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "addFilenameData.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
import {fromHtml} from 'hast-util-from-html';
|
||||
import {visit} from 'unist-util-visit';
|
||||
|
||||
export default async function(value) {
|
||||
const tree = fromHtml(value);
|
||||
const links = [];
|
||||
visit(tree, [{tagName: 'link'}, {tagName: 'img'}], function(node) {
|
||||
links.push(node);
|
||||
})
|
||||
const hrefs = links.map(link => link.properties.href || link.properties.src)
|
||||
|
||||
return hrefs;
|
||||
// return tree;
|
||||
}
|
||||
8
site.ori
8
site.ori
|
|
@ -51,6 +51,14 @@
|
|||
}.final
|
||||
|
||||
renderedPages: Tree.map(pages, (page) => <page.ori>(page, all))
|
||||
/*
|
||||
the rendered pages contain links and images.
|
||||
We want to find those, and insert a tree of their maps into the output.
|
||||
First test is whether that works.
|
||||
Second test is whether the paths will be correct.
|
||||
*/
|
||||
hast = Tree.map(renderedPages, parseHtml.js)
|
||||
links: Tree.flat(hast, 2) → (a) => [...new Set(a)]
|
||||
|
||||
/*
|
||||
assets are relative to the pubfiles directory
|
||||
|
|
|
|||
Loading…
Reference in New Issue