156 lines
5.5 KiB
JavaScript
156 lines
5.5 KiB
JavaScript
require('dotenv').config();
|
|
const express = require('express');
|
|
const cors = require('cors');
|
|
const axios = require('axios');
|
|
const { createHash } = require('crypto');
|
|
const rateLimit = require('express-rate-limit');
|
|
const { parse: parseUrl, URL } = require('url');
|
|
const { parse: parseHtml } = require('node-html-parser');
|
|
|
|
// Set up the allowed domains (replace with your specific domains)
|
|
const allowedDomains = process.env.ALLOWED_DOMAINS.split(',');
|
|
const app = express();
|
|
|
|
// pass the cors options to the cors middleware to enable CORS for the allowed domains
|
|
// const corsOptions = {
|
|
// origin: allowedDomains,
|
|
// optionsSuccessStatus: 200, // Some legacy browsers (IE11, various SmartTVs) choke on 204
|
|
// }
|
|
app.use(cors());
|
|
const port = process.env.PORT || 3000;
|
|
const host = process.env.HOST || '0.0.0.0';
|
|
|
|
// Middleware to parse JSON requests
|
|
app.use(express.json());
|
|
// Middleware to enable CORS
|
|
|
|
|
|
app.use(
|
|
rateLimit({
|
|
windowMs: 1 * 60 * 1000, // 1 minute
|
|
max: 20, // limit each IP request per windowMs
|
|
})
|
|
);
|
|
|
|
app.get('/', (req, res) => {
|
|
res.send('Hello There!');
|
|
})
|
|
function addProtocolToUrl(url) {
|
|
if (!url.startsWith('http://') && !url.startsWith('https://')) {
|
|
url = 'https://' + url;
|
|
}
|
|
return url;
|
|
}
|
|
|
|
function parseUrlWithoutHashAndQuery(fullUrl) {
|
|
fullUrl = addProtocolToUrl(fullUrl);
|
|
const parsedUrl = new URL(fullUrl);
|
|
|
|
// Set the hash and search/query to empty strings
|
|
parsedUrl.hash = '';
|
|
parsedUrl.search = '';
|
|
|
|
// Reconstruct the URL without hash and query
|
|
const urlWithoutHashAndQuery = parsedUrl.toString();
|
|
|
|
return urlWithoutHashAndQuery;
|
|
}
|
|
// hashContent function to hash the content of a file
|
|
async function hashContent(content) {
|
|
const hash = createHash('sha256');
|
|
hash.update(content);
|
|
return hash.digest('hex');
|
|
}
|
|
|
|
// Recursive function to fetch and hash content, including linked resources
|
|
async function fetchAndHashContent(url, visitedUrls = new Set()) {
|
|
if (visitedUrls.has(url)) {
|
|
return ''; // Avoid fetching the same URL multiple times to prevent infinite loops
|
|
}
|
|
|
|
visitedUrls.add(url);
|
|
const response = await axios.get(url, { responseType: 'arraybuffer' });
|
|
const content = response.data.toString('utf-8');
|
|
// Parse HTML content to identify linked resources
|
|
const root = parseHtml(content);
|
|
const linkedResources = root.querySelectorAll('link[rel="stylesheet"], script[src]');
|
|
// Fetch and hash linked resources
|
|
const linkedResource = await Promise.all(linkedResources.map(async (resource) => {
|
|
const resourceUrl = parseUrl(resource.getAttribute('href') || resource.getAttribute('src'), true);
|
|
let absoluteResourceUrl = resourceUrl.href;
|
|
if (!resourceUrl.hostname) {
|
|
if (!resourceUrl.path.startsWith('/') && !url.endsWith('/'))
|
|
url += '/';
|
|
absoluteResourceUrl = `${url}${resourceUrl.path}`;
|
|
}
|
|
const resourceContent = await fetchAndHashContent(absoluteResourceUrl, visitedUrls);
|
|
return `${resourceUrl.path}_${resourceContent}`;
|
|
}));
|
|
|
|
// Combine the content and hashes of linked resources
|
|
return `${content}_${linkedResource.join('_')}`;
|
|
}
|
|
|
|
const hashCache = new Map();
|
|
// API endpoint to start the recursive download and hashing
|
|
app.post('/hash', async (req, res) => {
|
|
try {
|
|
let { urls } = req.body;
|
|
if (!urls) {
|
|
return res.status(400).json({ error: 'Missing <urls> in the request parameters' });
|
|
}
|
|
if (!Array.isArray(urls))
|
|
urls = [urls];
|
|
const promises = urls.map(async (url) => {
|
|
const urlWithoutHashAndQuery = parseUrlWithoutHashAndQuery(url);
|
|
let hash;
|
|
// regex to identify owner and repo name from https://owner.github.io/repo-name
|
|
const githubRepoRegex = /https?:\/\/([\w-]+)\.github\.io\/([\w-]+)/;
|
|
if (githubRepoRegex.test(urlWithoutHashAndQuery)) {
|
|
if (!hashCache.has(urlWithoutHashAndQuery)) {
|
|
await fetchAndSaveAppHash(urlWithoutHashAndQuery)
|
|
}
|
|
hash = hashCache.get(urlWithoutHashAndQuery).hash;
|
|
} else {
|
|
const hashedContent = await fetchAndHashContent(urlWithoutHashAndQuery);
|
|
hash = await hashContent(Buffer.from(hashedContent, 'utf-8'));
|
|
}
|
|
return { url, hash };
|
|
});
|
|
|
|
const results = await Promise.all(promises);
|
|
res.json(results);
|
|
} catch (error) {
|
|
res.status(500).json({ error: error.message });
|
|
}
|
|
});
|
|
async function fetchAndSaveAppHash(url, lastUpdated = Date.now()) {
|
|
const hashedContent = await fetchAndHashContent(url);
|
|
const hash = await hashContent(Buffer.from(hashedContent, 'utf-8'));
|
|
hashCache.set(url, { hash, lastUpdated });
|
|
}
|
|
|
|
app.post('/gitwh', async (req, res) => {
|
|
try {
|
|
// ignore if request is not from github
|
|
if (!req.headers['user-agent'].startsWith('GitHub-Hookshot/'))
|
|
return;
|
|
const { repository: { pushed_at, organization, name, has_pages } } = req.body;
|
|
if (!has_pages)
|
|
return;
|
|
const url = `https://${organization}.github.io/${name}`
|
|
await fetchAndSaveAppHash(url, pushed_at)
|
|
res.json({ message: 'success' });
|
|
} catch (err) {
|
|
res.status(500).json({ error: err.message });
|
|
}
|
|
})
|
|
|
|
// Start the server
|
|
app.listen(port, host, () => {
|
|
console.log(`Server is running at http://${host}:${port}`);
|
|
});
|
|
|
|
// Export the Express API
|
|
module.exports = app;
|