adding hash get route

This commit is contained in:
sairaj mote 2024-01-12 17:04:43 +05:30
parent d7bccffa42
commit b73bdf2490
2 changed files with 32 additions and 23 deletions

View File

@ -61,38 +61,47 @@ async function fetchAndHashContent(url, visitedUrls = new Set()) {
} }
const hashCache = new Map(); const hashCache = new Map();
router.get("/", async (req, res) => {
const { url } = req.query;
if (!url) {
return res.status(400).json({ error: 'Missing <url> in the query parameters' });
}
res.json(await generateHash(url));
})
// API endpoint to start the recursive download and hashing // API endpoint to start the recursive download and hashing
router.post('/', async (req, res) => { router.post('/', async (req, res) => {
try { try {
let { urls } = req.body; const { urls } = req.body;
if (!urls) { if (!urls) {
return res.status(400).json({ error: 'Missing <urls> in the request parameters' }); return res.status(400).json({ error: 'Missing <urls> in the request parameters' });
} }
if (!Array.isArray(urls)) res.json(await generateHash(urls));
urls = [urls];
const promises = urls.map(async (url) => {
const urlWithoutHashAndQuery = parseUrlWithoutHashAndQuery(url);
let hash;
// regex to identify owner and repo name from https://owner.github.io/repo-name
const githubRepoRegex = /https?:\/\/([\w-]+)\.github\.io\/([\w-]+)/;
if (githubRepoRegex.test(urlWithoutHashAndQuery) && urlWithoutHashAndQuery.match(githubRepoRegex)[1] === 'ranchimall') {
if (!hashCache.has(urlWithoutHashAndQuery)) {
await fetchAndSaveAppHash(urlWithoutHashAndQuery)
}
hash = hashCache.get(urlWithoutHashAndQuery).hash;
} else {
const hashedContent = await fetchAndHashContent(urlWithoutHashAndQuery);
hash = await hashContent(Buffer.from(hashedContent, 'utf-8'));
}
return { url, hash };
});
const results = await Promise.all(promises);
res.json(results);
} catch (error) { } catch (error) {
res.status(500).json({ error: error.message }); res.status(500).json({ error: error.message });
} }
}); });
async function generateHash(urls = []) {
if (!Array.isArray(urls))
urls = [urls];
const promises = urls.map(async (url) => {
const urlWithoutHashAndQuery = parseUrlWithoutHashAndQuery(url);
let hash;
// regex to identify owner and repo name from https://owner.github.io/repo-name
const githubRepoRegex = /https?:\/\/([\w-]+)\.github\.io\/([\w-]+)/;
if (githubRepoRegex.test(urlWithoutHashAndQuery) && urlWithoutHashAndQuery.match(githubRepoRegex)[1] === 'ranchimall') {
if (!hashCache.has(urlWithoutHashAndQuery)) {
await fetchAndSaveAppHash(urlWithoutHashAndQuery)
}
hash = hashCache.get(urlWithoutHashAndQuery).hash;
} else {
const hashedContent = await fetchAndHashContent(urlWithoutHashAndQuery);
hash = await hashContent(Buffer.from(hashedContent, 'utf-8'));
}
return { url, hash };
});
return await Promise.all(promises);
}
async function fetchAndSaveAppHash(url, lastUpdated = Date.now()) { async function fetchAndSaveAppHash(url, lastUpdated = Date.now()) {
const hashedContent = await fetchAndHashContent(url); const hashedContent = await fetchAndHashContent(url);
const hash = await hashContent(Buffer.from(hashedContent, 'utf-8')); const hash = await hashContent(Buffer.from(hashedContent, 'utf-8'));

2
routes/hash.min.js vendored
View File

@ -1 +1 @@
const express=require("express"),router=express.Router(),axios=require("axios"),{createHash:createHash}=require("crypto"),{parse:parseUrl,URL:URL}=require("url"),{parse:parseHtml}=require("node-html-parser");function addProtocolToUrl(url){return url.startsWith("http://")||url.startsWith("https://")||(url="https://"+url),url}function parseUrlWithoutHashAndQuery(fullUrl){fullUrl=addProtocolToUrl(fullUrl);const parsedUrl=new URL(fullUrl);parsedUrl.hash="",parsedUrl.search="";return parsedUrl.toString()}async function hashContent(content){const hash=createHash("sha256");return hash.update(content),hash.digest("hex")}async function fetchAndHashContent(url,visitedUrls=new Set){if(visitedUrls.has(url))return"";visitedUrls.add(url);const content=(await axios.get(url,{responseType:"arraybuffer"})).data.toString("utf-8"),linkedResources=parseHtml(content).querySelectorAll('link[rel="stylesheet"], script[src]');return`${content}_${(await Promise.all(linkedResources.map((async resource=>{const resourceUrl=parseUrl(resource.getAttribute("href")||resource.getAttribute("src"),!0);let absoluteResourceUrl=resourceUrl.href;resourceUrl.hostname||(resourceUrl.path.startsWith("/")||url.endsWith("/")||(url+="/"),absoluteResourceUrl=`${url}${resourceUrl.path}`);const resourceContent=await fetchAndHashContent(absoluteResourceUrl,visitedUrls);return`${resourceUrl.path}_${resourceContent}`})))).join("_")}`}const hashCache=new Map;async function fetchAndSaveAppHash(url,lastUpdated=Date.now()){const hashedContent=await fetchAndHashContent(url),hash=await hashContent(Buffer.from(hashedContent,"utf-8"));hashCache.set(url,{hash:hash,lastUpdated:lastUpdated})}router.post("/",(async(req,res)=>{try{let{urls:urls}=req.body;if(!urls)return res.status(400).json({error:"Missing <urls> in the request parameters"});Array.isArray(urls)||(urls=[urls]);const promises=urls.map((async url=>{const urlWithoutHashAndQuery=parseUrlWithoutHashAndQuery(url);let hash;const githubRepoRegex=/https?:\/\/([\w-]+)\.github\.io\/([\w-]+)/;if(githubRepoRegex.test(urlWithoutHashAndQuery)&&"ranchimall"===urlWithoutHashAndQuery.match(githubRepoRegex)[1])hashCache.has(urlWithoutHashAndQuery)||await fetchAndSaveAppHash(urlWithoutHashAndQuery),hash=hashCache.get(urlWithoutHashAndQuery).hash;else{const hashedContent=await fetchAndHashContent(urlWithoutHashAndQuery);hash=await hashContent(Buffer.from(hashedContent,"utf-8"))}return{url:url,hash:hash}})),results=await Promise.all(promises);res.json(results)}catch(error){res.status(500).json({error:error.message})}})),router.post("/gitwh",(async(req,res)=>{try{if(!req.headers["user-agent"].startsWith("GitHub-Hookshot/"))return res.json({message:"ignored"});const{repository:{pushed_at:pushed_at,organization:organization,name:name,has_pages:has_pages}}=req.body;if(!has_pages)return res.json({message:"ignored"});const url=`https://${organization}.github.io/${name}`;await fetchAndSaveAppHash(url,pushed_at),res.json({message:"success"})}catch(err){res.status(500).json({error:err.message})}})),module.exports=router; const express=require("express"),router=express.Router(),axios=require("axios"),{createHash:createHash}=require("crypto"),{parse:parseUrl,URL:URL}=require("url"),{parse:parseHtml}=require("node-html-parser");function addProtocolToUrl(url){return url.startsWith("http://")||url.startsWith("https://")||(url="https://"+url),url}function parseUrlWithoutHashAndQuery(fullUrl){fullUrl=addProtocolToUrl(fullUrl);const parsedUrl=new URL(fullUrl);parsedUrl.hash="",parsedUrl.search="";return parsedUrl.toString()}async function hashContent(content){const hash=createHash("sha256");return hash.update(content),hash.digest("hex")}async function fetchAndHashContent(url,visitedUrls=new Set){if(visitedUrls.has(url))return"";visitedUrls.add(url);const content=(await axios.get(url,{responseType:"arraybuffer"})).data.toString("utf-8"),linkedResources=parseHtml(content).querySelectorAll('link[rel="stylesheet"], script[src]');return`${content}_${(await Promise.all(linkedResources.map((async resource=>{const resourceUrl=parseUrl(resource.getAttribute("href")||resource.getAttribute("src"),!0);let absoluteResourceUrl=resourceUrl.href;resourceUrl.hostname||(resourceUrl.path.startsWith("/")||url.endsWith("/")||(url+="/"),absoluteResourceUrl=`${url}${resourceUrl.path}`);const resourceContent=await fetchAndHashContent(absoluteResourceUrl,visitedUrls);return`${resourceUrl.path}_${resourceContent}`})))).join("_")}`}const hashCache=new Map;async function generateHash(urls=[]){Array.isArray(urls)||(urls=[urls]);const promises=urls.map((async url=>{const urlWithoutHashAndQuery=parseUrlWithoutHashAndQuery(url);let hash;const githubRepoRegex=/https?:\/\/([\w-]+)\.github\.io\/([\w-]+)/;if(githubRepoRegex.test(urlWithoutHashAndQuery)&&"ranchimall"===urlWithoutHashAndQuery.match(githubRepoRegex)[1])hashCache.has(urlWithoutHashAndQuery)||await fetchAndSaveAppHash(urlWithoutHashAndQuery),hash=hashCache.get(urlWithoutHashAndQuery).hash;else{const hashedContent=await fetchAndHashContent(urlWithoutHashAndQuery);hash=await hashContent(Buffer.from(hashedContent,"utf-8"))}return{url:url,hash:hash}}));return await Promise.all(promises)}async function fetchAndSaveAppHash(url,lastUpdated=Date.now()){const hashedContent=await fetchAndHashContent(url),hash=await hashContent(Buffer.from(hashedContent,"utf-8"));hashCache.set(url,{hash:hash,lastUpdated:lastUpdated})}router.get("/",(async(req,res)=>{const{url:url}=req.query;if(!url)return res.status(400).json({error:"Missing <url> in the query parameters"});res.json(await generateHash(url))})),router.post("/",(async(req,res)=>{try{const{urls:urls}=req.body;if(!urls)return res.status(400).json({error:"Missing <urls> in the request parameters"});res.json(await generateHash(urls))}catch(error){res.status(500).json({error:error.message})}})),router.post("/gitwh",(async(req,res)=>{try{if(!req.headers["user-agent"].startsWith("GitHub-Hookshot/"))return res.json({message:"ignored"});const{repository:{pushed_at:pushed_at,organization:organization,name:name,has_pages:has_pages}}=req.body;if(!has_pages)return res.json({message:"ignored"});const url=`https://${organization}.github.io/${name}`;await fetchAndSaveAppHash(url,pushed_at),res.json({message:"success"})}catch(err){res.status(500).json({error:err.message})}})),module.exports=router;