From ee94988cd056e2ea76dfd7e01c7cc40ed7a4f8f2 Mon Sep 17 00:00:00 2001 From: sairaj mote Date: Fri, 8 Dec 2023 17:52:38 +0530 Subject: [PATCH] added URL formatting --- index.js | 21 ++++++++++++++++++--- index.min.js | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/index.js b/index.js index 63d817b..fab32e2 100644 --- a/index.js +++ b/index.js @@ -5,7 +5,7 @@ const axios = require('axios'); const { createHash } = require('crypto'); const archiver = require('archiver'); const rateLimit = require('express-rate-limit'); -const { parse: parseUrl } = require('url'); +const { parse: parseUrl, URL } = require('url'); const { parse: parseHtml } = require('node-html-parser'); // Set up the allowed domains (replace with your specific domains) @@ -36,6 +36,19 @@ app.use( app.get('/', (req, res) => { res.send('Hello There!'); }) + +function parseUrlWithoutHashAndQuery(fullUrl) { + const parsedUrl = new URL(fullUrl); + + // Set the hash and search/query to empty strings + parsedUrl.hash = ''; + parsedUrl.search = ''; + + // Reconstruct the URL without hash and query + const urlWithoutHashAndQuery = parsedUrl.toString(); + + return urlWithoutHashAndQuery; +} // hashContent function to hash the content of a file async function hashContent(content) { const hash = createHash('sha256'); @@ -85,8 +98,10 @@ app.post('/hash', async (req, res) => { if (!Array.isArray(urls)) urls = [urls]; - const promises = urls.map(async (urls) => { - const hashedContent = await fetchAndHashContent(urls); + const promises = urls.map(async (url) => { + const urlWithoutHashAndQuery = parseUrlWithoutHashAndQuery(url); + console.log(url, `Fetching and hashing ${urlWithoutHashAndQuery}`); + const hashedContent = await fetchAndHashContent(urlWithoutHashAndQuery); const fileHash = await hashContent(Buffer.from(hashedContent, 'utf-8')); return { urls, fileHash }; }); diff --git a/index.min.js b/index.min.js index 6482a2d..0aeea67 100644 --- a/index.min.js +++ b/index.min.js @@ -1 +1 @@ -require("dotenv").config();const express=require("express"),cors=require("cors"),axios=require("axios"),{createHash:createHash}=require("crypto"),archiver=require("archiver"),rateLimit=require("express-rate-limit"),{parse:parseUrl}=require("url"),{parse:parseHtml}=require("node-html-parser"),allowedDomains=process.env.ALLOWED_DOMAINS.split(","),app=express();app.use(cors());const port=process.env.PORT||3e3,host=process.env.HOST||"0.0.0.0";async function hashContent(content){const hash=createHash("sha256");return hash.update(content),hash.digest("hex")}async function fetchAndHashContent(url,visitedUrls=new Set){if(visitedUrls.has(url))return"";visitedUrls.add(url);const content=(await axios.get(url,{responseType:"arraybuffer",timeout:1e4})).data.toString("utf-8"),linkedResources=parseHtml(content).querySelectorAll('link[rel="stylesheet"], script[src]');return`${content}_${(await Promise.all(linkedResources.map((async resource=>{const resourceUrl=parseUrl(resource.getAttribute("href")||resource.getAttribute("src"),!0);let absoluteResourceUrl=resourceUrl.href;resourceUrl.hostname||(resourceUrl.path.startsWith("/")||url.endsWith("/")||(url+="/"),absoluteResourceUrl=`${url}${resourceUrl.path}`);const resourceContent=await fetchAndHashContent(absoluteResourceUrl,visitedUrls);return`${resourceUrl.path}_${resourceContent}`})))).join("_")}`}async function downloadGitHubRepo(owner,repo){if(!owner||!repo)throw new Error("Missing owner or repo");const zipUrl=`https://github.com/${owner}/${repo}/archive/refs/heads/master.zip`;return(await axios.get(zipUrl,{responseType:"arraybuffer"})).data}app.use(express.json()),app.use(rateLimit({windowMs:6e4,max:10})),app.get("/",((req,res)=>{res.send("Hello There!")})),app.post("/hash",(async(req,res)=>{try{let{urls:urls}=req.body;if(!urls)return res.status(400).json({error:"Missing URL in the request parameters"});Array.isArray(urls)||(urls=[urls]);const promises=urls.map((async urls=>{const hashedContent=await fetchAndHashContent(urls);return{urls:urls,fileHash:await hashContent(Buffer.from(hashedContent,"utf-8"))}}));let results=await Promise.all(promises);results=results.reduce(((acc,{urls:urls,fileHash:fileHash})=>(acc[urls]=fileHash,acc)),{}),res.json(results)}catch(error){console.error("Error:",error.message),res.status(500).json({error:"Internal Server Error"})}})),app.post("/download-repos",(async(req,res)=>{try{let{urls:urls}=req.body;if(!urls)return res.status(400).json({error:"Missing urls in the request parameters"});Array.isArray(urls)||(urls=[urls]);const archive=archiver("zip");res.attachment("repos.zip");const downloadPromises=urls.map((async url=>{const[owner,name]=url.split("/").slice(-2);if(!owner||!name)return void console.error(`Invalid url format: ${url}`);const zipBuffer=await downloadGitHubRepo(owner,name);archive.append(zipBuffer,{name:`${owner}-${name}.zip`})}));await Promise.all(downloadPromises),archive.finalize(),archive.pipe(res)}catch(error){console.error("Error:",error.message),res.status(500).json({error:"Internal Server Error"})}})),app.listen(port,host,(()=>{console.log(`Server is running at http://${host}:${port}`)})),module.exports=app; \ No newline at end of file +require("dotenv").config();const express=require("express"),cors=require("cors"),axios=require("axios"),{createHash:createHash}=require("crypto"),archiver=require("archiver"),rateLimit=require("express-rate-limit"),{parse:parseUrl,URL:URL}=require("url"),{parse:parseHtml}=require("node-html-parser"),allowedDomains=process.env.ALLOWED_DOMAINS.split(","),app=express();app.use(cors());const port=process.env.PORT||3e3,host=process.env.HOST||"0.0.0.0";function parseUrlWithoutHashAndQuery(fullUrl){const parsedUrl=new URL(fullUrl);parsedUrl.hash="",parsedUrl.search="";return parsedUrl.toString()}async function hashContent(content){const hash=createHash("sha256");return hash.update(content),hash.digest("hex")}async function fetchAndHashContent(url,visitedUrls=new Set){if(visitedUrls.has(url))return"";visitedUrls.add(url);const content=(await axios.get(url,{responseType:"arraybuffer",timeout:1e4})).data.toString("utf-8"),linkedResources=parseHtml(content).querySelectorAll('link[rel="stylesheet"], script[src]');return`${content}_${(await Promise.all(linkedResources.map((async resource=>{const resourceUrl=parseUrl(resource.getAttribute("href")||resource.getAttribute("src"),!0);let absoluteResourceUrl=resourceUrl.href;resourceUrl.hostname||(resourceUrl.path.startsWith("/")||url.endsWith("/")||(url+="/"),absoluteResourceUrl=`${url}${resourceUrl.path}`);const resourceContent=await fetchAndHashContent(absoluteResourceUrl,visitedUrls);return`${resourceUrl.path}_${resourceContent}`})))).join("_")}`}async function downloadGitHubRepo(owner,repo){if(!owner||!repo)throw new Error("Missing owner or repo");const zipUrl=`https://github.com/${owner}/${repo}/archive/refs/heads/master.zip`;return(await axios.get(zipUrl,{responseType:"arraybuffer"})).data}app.use(express.json()),app.use(rateLimit({windowMs:6e4,max:10})),app.get("/",((req,res)=>{res.send("Hello There!")})),app.post("/hash",(async(req,res)=>{try{let{urls:urls}=req.body;if(!urls)return res.status(400).json({error:"Missing URL in the request parameters"});Array.isArray(urls)||(urls=[urls]);const promises=urls.map((async url=>{const urlWithoutHashAndQuery=parseUrlWithoutHashAndQuery(url);console.log(url,`Fetching and hashing ${urlWithoutHashAndQuery}`);const hashedContent=await fetchAndHashContent(urlWithoutHashAndQuery),fileHash=await hashContent(Buffer.from(hashedContent,"utf-8"));return{urls:urls,fileHash:fileHash}}));let results=await Promise.all(promises);results=results.reduce(((acc,{urls:urls,fileHash:fileHash})=>(acc[urls]=fileHash,acc)),{}),res.json(results)}catch(error){console.error("Error:",error.message),res.status(500).json({error:"Internal Server Error"})}})),app.post("/download-repos",(async(req,res)=>{try{let{urls:urls}=req.body;if(!urls)return res.status(400).json({error:"Missing urls in the request parameters"});Array.isArray(urls)||(urls=[urls]);const archive=archiver("zip");res.attachment("repos.zip");const downloadPromises=urls.map((async url=>{const[owner,name]=url.split("/").slice(-2);if(!owner||!name)return void console.error(`Invalid url format: ${url}`);const zipBuffer=await downloadGitHubRepo(owner,name);archive.append(zipBuffer,{name:`${owner}-${name}.zip`})}));await Promise.all(downloadPromises),archive.finalize(),archive.pipe(res)}catch(error){console.error("Error:",error.message),res.status(500).json({error:"Internal Server Error"})}})),app.listen(port,host,(()=>{console.log(`Server is running at http://${host}:${port}`)})),module.exports=app; \ No newline at end of file