adding recursive fetching and hashing
This commit is contained in:
parent
346bded903
commit
a18b50f4ae
43
index.js
43
index.js
@ -4,6 +4,8 @@ const axios = require('axios');
|
||||
const { createHash } = require('crypto');
|
||||
const archiver = require('archiver');
|
||||
const rateLimit = require('express-rate-limit');
|
||||
const { parse: parseUrl } = require('url');
|
||||
const { parse: parseHtml } = require('node-html-parser');
|
||||
|
||||
const app = express();
|
||||
const port = process.env.PORT || 3000;
|
||||
@ -42,7 +44,6 @@ app.use(
|
||||
app.get('/', (req, res) => {
|
||||
res.send('Hello There!');
|
||||
})
|
||||
|
||||
// hashContent function to hash the content of a file
|
||||
async function hashContent(content) {
|
||||
const hash = createHash('sha256');
|
||||
@ -50,6 +51,38 @@ async function hashContent(content) {
|
||||
return hash.digest('hex');
|
||||
}
|
||||
|
||||
// Recursive function to fetch and hash content, including linked resources
|
||||
async function fetchAndHashContent(url, visitedUrls = new Set()) {
|
||||
if (visitedUrls.has(url)) {
|
||||
return ''; // Avoid fetching the same URL multiple times to prevent infinite loops
|
||||
}
|
||||
|
||||
visitedUrls.add(url);
|
||||
|
||||
const response = await axios.get(url, { responseType: 'arraybuffer', timeout: 10000 });
|
||||
const content = response.data.toString('utf-8');
|
||||
|
||||
// Parse HTML content to identify linked resources
|
||||
const root = parseHtml(content);
|
||||
const linkedResources = root.querySelectorAll('link[rel="stylesheet"], script[src]');
|
||||
// Fetch and hash linked resources
|
||||
const linkedResourceHashes = await Promise.all(linkedResources.map(async (resource) => {
|
||||
const resourceUrl = parseUrl(resource.getAttribute('href') || resource.getAttribute('src'), true);
|
||||
let absoluteResourceUrl = resourceUrl.href;
|
||||
if (!resourceUrl.hostname) {
|
||||
if (!resourceUrl.path.startsWith('/') && !url.endsWith('/'))
|
||||
url += '/';
|
||||
absoluteResourceUrl = `${url}${resourceUrl.path}`;
|
||||
}
|
||||
const resourceContent = await fetchAndHashContent(absoluteResourceUrl, visitedUrls);
|
||||
return `${resourceUrl.path}_${resourceContent}`;
|
||||
}));
|
||||
|
||||
// Combine the content and hashes of linked resources
|
||||
return `${content}_${linkedResourceHashes.join('_')}`;
|
||||
}
|
||||
|
||||
|
||||
// API endpoint to start the recursive download and hashing
|
||||
app.post('/hash', async (req, res) => {
|
||||
try {
|
||||
@ -62,15 +95,17 @@ app.post('/hash', async (req, res) => {
|
||||
url = [url];
|
||||
|
||||
const promises = url.map(async (url) => {
|
||||
const response = await axios.get(url, { responseType: 'arraybuffer', timeout: 10000 });
|
||||
const fileHash = await hashContent(response.data);
|
||||
const hashedContent = await fetchAndHashContent(url);
|
||||
const fileHash = await hashContent(Buffer.from(hashedContent, 'utf-8'));
|
||||
return { url, fileHash };
|
||||
})
|
||||
});
|
||||
|
||||
let results = await Promise.all(promises);
|
||||
results = results.reduce((acc, { url, fileHash }) => {
|
||||
acc[url] = fileHash;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
res.json(results);
|
||||
} catch (error) {
|
||||
console.error('Error:', error.message);
|
||||
|
||||
2
index.min.js
vendored
2
index.min.js
vendored
@ -1 +1 @@
|
||||
require("dotenv").config();const express=require("express"),axios=require("axios"),{createHash:createHash}=require("crypto"),archiver=require("archiver"),rateLimit=require("express-rate-limit"),app=express(),port=process.env.PORT||3e3;app.use(express.json());const allowedDomains=process.env.ALLOWED_DOMAINS.split(",");async function hashContent(content){const hash=createHash("sha256");return hash.update(content),hash.digest("hex")}async function downloadGitHubRepo(owner,repo){if(!owner||!repo)throw new Error("Missing owner or repo");const zipUrl=`https://github.com/${owner}/${repo}/archive/refs/heads/master.zip`;return(await axios.get(zipUrl,{responseType:"arraybuffer"})).data}app.use(rateLimit({windowMs:6e4,max:1})),app.get("/",((req,res)=>{res.send("Hello There!")})),app.post("/hash",(async(req,res)=>{try{console.log("Request:",req.body);let{url:url}=req.body;if(!url)return res.status(400).json({error:"Missing URL in the request parameters"});Array.isArray(url)||(url=[url]);const promises=url.map((async url=>{const response=await axios.get(url,{responseType:"arraybuffer",timeout:1e4});return{url:url,fileHash:await hashContent(response.data)}}));let results=await Promise.all(promises);results=results.reduce(((acc,{url:url,fileHash:fileHash})=>(acc[url]=fileHash,acc)),{}),res.json(results)}catch(error){console.error("Error:",error.message),res.status(500).json({error:"Internal Server Error"})}})),app.post("/download-repos",(async(req,res)=>{try{let{urls:urls}=req.body;if(!urls)return res.status(400).json({error:"Missing urls in the request parameters"});Array.isArray(urls)||(urls=[urls]);const archive=archiver("zip");res.attachment("repos.zip");const downloadPromises=urls.map((async url=>{const[owner,name]=url.split("/").slice(-2);if(!owner||!name)return void console.error(`Invalid url format: ${url}`);const zipBuffer=await downloadGitHubRepo(owner,name);archive.append(zipBuffer,{name:`${owner}-${name}.zip`})}));await Promise.all(downloadPromises),archive.finalize(),archive.pipe(res)}catch(error){console.error("Error:",error.message),res.status(500).json({error:"Internal Server Error"})}})),app.listen(port,(()=>{console.log(`Server is running at http://localhost:${port}`)})),module.exports=app;
|
||||
require("dotenv").config();const express=require("express"),axios=require("axios"),{createHash:createHash}=require("crypto"),archiver=require("archiver"),rateLimit=require("express-rate-limit"),{parse:parseUrl}=require("url"),{parse:parseHtml}=require("node-html-parser"),app=express(),port=process.env.PORT||3e3;app.use(express.json());const allowedDomains=process.env.ALLOWED_DOMAINS.split(",");async function hashContent(content){const hash=createHash("sha256");return hash.update(content),hash.digest("hex")}async function fetchAndHashContent(url,visitedUrls=new Set){if(visitedUrls.has(url))return"";visitedUrls.add(url);const content=(await axios.get(url,{responseType:"arraybuffer",timeout:1e4})).data.toString("utf-8"),linkedResources=parseHtml(content).querySelectorAll('link[rel="stylesheet"], script[src]');return`${content}_${(await Promise.all(linkedResources.map((async resource=>{const resourceUrl=parseUrl(resource.getAttribute("href")||resource.getAttribute("src"),!0);let absoluteResourceUrl=resourceUrl.href;resourceUrl.hostname||(resourceUrl.path.startsWith("/")||url.endsWith("/")||(url+="/"),absoluteResourceUrl=`${url}${resourceUrl.path}`);const resourceContent=await fetchAndHashContent(absoluteResourceUrl,visitedUrls);return`${resourceUrl.path}_${resourceContent}`})))).join("_")}`}async function downloadGitHubRepo(owner,repo){if(!owner||!repo)throw new Error("Missing owner or repo");const zipUrl=`https://github.com/${owner}/${repo}/archive/refs/heads/master.zip`;return(await axios.get(zipUrl,{responseType:"arraybuffer"})).data}app.use(rateLimit({windowMs:6e4,max:1})),app.get("/",((req,res)=>{res.send("Hello There!")})),app.post("/hash",(async(req,res)=>{try{console.log("Request:",req.body);let{url:url}=req.body;if(!url)return res.status(400).json({error:"Missing URL in the request parameters"});Array.isArray(url)||(url=[url]);const promises=url.map((async url=>{const hashedContent=await fetchAndHashContent(url);return{url:url,fileHash:await hashContent(Buffer.from(hashedContent,"utf-8"))}}));let results=await Promise.all(promises);results=results.reduce(((acc,{url:url,fileHash:fileHash})=>(acc[url]=fileHash,acc)),{}),res.json(results)}catch(error){console.error("Error:",error.message),res.status(500).json({error:"Internal Server Error"})}})),app.post("/download-repos",(async(req,res)=>{try{let{urls:urls}=req.body;if(!urls)return res.status(400).json({error:"Missing urls in the request parameters"});Array.isArray(urls)||(urls=[urls]);const archive=archiver("zip");res.attachment("repos.zip");const downloadPromises=urls.map((async url=>{const[owner,name]=url.split("/").slice(-2);if(!owner||!name)return void console.error(`Invalid url format: ${url}`);const zipBuffer=await downloadGitHubRepo(owner,name);archive.append(zipBuffer,{name:`${owner}-${name}.zip`})}));await Promise.all(downloadPromises),archive.finalize(),archive.pipe(res)}catch(error){console.error("Error:",error.message),res.status(500).json({error:"Internal Server Error"})}})),app.listen(port,(()=>{console.log(`Server is running at http://localhost:${port}`)})),module.exports=app;
|
||||
124
package-lock.json
generated
124
package-lock.json
generated
@ -13,7 +13,8 @@
|
||||
"axios": "^1.6.2",
|
||||
"crypto": "^1.0.1",
|
||||
"express": "^4.18.2",
|
||||
"express-rate-limit": "^7.1.5"
|
||||
"express-rate-limit": "^7.1.5",
|
||||
"node-html-parser": "^6.1.11"
|
||||
},
|
||||
"devDependencies": {
|
||||
"dotenv": "^16.3.1",
|
||||
@ -151,6 +152,11 @@
|
||||
"npm": "1.2.8000 || >= 1.4.16"
|
||||
}
|
||||
},
|
||||
"node_modules/boolbase": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
|
||||
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
@ -326,6 +332,32 @@
|
||||
"integrity": "sha512-VxBKmeNcqQdiUQUW2Tzq0t377b54N2bMtXO/qiLa+6eRRmmC4qT3D4OnTGoT/U6O9aklQ/jTwbOtRMTTY8G0Ig==",
|
||||
"deprecated": "This package is no longer supported. It's now a built-in Node module. If you've depended on crypto, you should switch to the one that's built-in."
|
||||
},
|
||||
"node_modules/css-select": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
|
||||
"integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
|
||||
"dependencies": {
|
||||
"boolbase": "^1.0.0",
|
||||
"css-what": "^6.1.0",
|
||||
"domhandler": "^5.0.2",
|
||||
"domutils": "^3.0.1",
|
||||
"nth-check": "^2.0.1"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/fb55"
|
||||
}
|
||||
},
|
||||
"node_modules/css-what": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
|
||||
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
|
||||
"engines": {
|
||||
"node": ">= 6"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/fb55"
|
||||
}
|
||||
},
|
||||
"node_modules/debug": {
|
||||
"version": "2.6.9",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
|
||||
@ -372,6 +404,57 @@
|
||||
"npm": "1.2.8000 || >= 1.4.16"
|
||||
}
|
||||
},
|
||||
"node_modules/dom-serializer": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
|
||||
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
|
||||
"dependencies": {
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.2",
|
||||
"entities": "^4.2.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/domelementtype": {
|
||||
"version": "2.3.0",
|
||||
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
|
||||
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/fb55"
|
||||
}
|
||||
]
|
||||
},
|
||||
"node_modules/domhandler": {
|
||||
"version": "5.0.3",
|
||||
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
|
||||
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
|
||||
"dependencies": {
|
||||
"domelementtype": "^2.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/fb55/domhandler?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/domutils": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz",
|
||||
"integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==",
|
||||
"dependencies": {
|
||||
"dom-serializer": "^2.0.0",
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.3"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/fb55/domutils?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "16.3.1",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.3.1.tgz",
|
||||
@ -397,6 +480,17 @@
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/entities": {
|
||||
"version": "4.5.0",
|
||||
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
|
||||
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
|
||||
"engines": {
|
||||
"node": ">=0.12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/fb55/entities?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/escape-html": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
|
||||
@ -706,6 +800,14 @@
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/he": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
|
||||
"integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
|
||||
"bin": {
|
||||
"he": "bin/he"
|
||||
}
|
||||
},
|
||||
"node_modules/http-errors": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz",
|
||||
@ -938,6 +1040,15 @@
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/node-html-parser": {
|
||||
"version": "6.1.11",
|
||||
"resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-6.1.11.tgz",
|
||||
"integrity": "sha512-FAgwwZ6h0DSDWxfD0Iq1tsDcBCxdJB1nXpLPPxX8YyVWzbfCjKWEzaynF4gZZ/8hziUmp7ZSaKylcn0iKhufUQ==",
|
||||
"dependencies": {
|
||||
"css-select": "^5.1.0",
|
||||
"he": "1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/nodemon": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/nodemon/-/nodemon-3.0.2.tgz",
|
||||
@ -1012,6 +1123,17 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/nth-check": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
|
||||
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
|
||||
"dependencies": {
|
||||
"boolbase": "^1.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/object-inspect": {
|
||||
"version": "1.13.1",
|
||||
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz",
|
||||
|
||||
@ -14,7 +14,8 @@
|
||||
"axios": "^1.6.2",
|
||||
"crypto": "^1.0.1",
|
||||
"express": "^4.18.2",
|
||||
"express-rate-limit": "^7.1.5"
|
||||
"express-rate-limit": "^7.1.5",
|
||||
"node-html-parser": "^6.1.11"
|
||||
},
|
||||
"devDependencies": {
|
||||
"dotenv": "^16.3.1",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user