quick-musician-29561
03/31/2024, 8:28 AMquick-musician-29561
03/31/2024, 8:29 AMquick-musician-29561
03/31/2024, 8:39 AMjs
const chromium = require("@sparticuz/chromium");
const puppeteer = require('puppeteer-core');
exports.handler = async function(event, context) {
const data = JSON.parse(event.body);
const url = data.url;
if (!url) {
return {
statusCode: 400,
body: JSON.stringify({ error: 'No URL provided in the request body' })
};
}
const browser = await puppeteer.launch({
args: chromium.args,
executablePath: await chromium.executablePath(),
headless: chromium.headless,
ignoreHTTPSErrors: true,
});
const page = await browser.newPage();
await page.goto(url, { waitUntil: 'domcontentloaded' });
const headingsAndTexts = await page.evaluate(() => {
const headings = Array.from(document.querySelectorAll('h1, h2, h3, h4, h5, h6'));
const paragraphs = Array.from(document.querySelectorAll('p'));
const headingsTexts = headings.map(h => ({ tag: h.tagName, text: h.innerText }));
const paragraphsTexts = paragraphs.map(p => ({ tag: p.tagName, text: p.innerText }));
return [...headingsTexts, ...paragraphsTexts];
});
await browser.close();
return {
statusCode: 200,
body: JSON.stringify({
status: 'Ok',
content: headingsAndTexts
})
};
}
quick-musician-29561
03/31/2024, 8:40 AMjs
const fetchUrl = workflow.webpage
async function fetchPageContentWithRetry(url, retryDelay = 10000, maxRetries = 1) {
let attempts = 0;
const attemptFetchingData = async () => {
try {
const data = { url };
const response = await axios.post('https://botpress-puppeteer.netlify.app/.netlify/functions/meta', data);
console.log(response.data);
workflow.scrapeData = JSON.stringify(response.data);
return true;
} catch (error) {
console.error(`Attempt ${attempts + 1}: Error fetching page content:`, error);
if (attempts < maxRetries) {
console.log(`Waiting for ${retryDelay / 1000} seconds before retrying...`);
await new Promise(resolve => setTimeout(resolve, retryDelay));
attempts++;
return attemptFetchingData();
} else {
console.log(`Failed to fetch page content after ${maxRetries} attempts.`);
return false;
}
}
};
return attemptFetchingData();
}
await fetchPageContentWithRetry(fetchUrl);
quick-musician-29561
03/31/2024, 8:45 AMquick-musician-29561
03/31/2024, 9:37 AMincalculable-book-10414
03/31/2024, 5:05 PMincalculable-book-10414
03/31/2024, 5:05 PMfamous-jewelry-85388
04/01/2024, 4:56 PMquick-musician-29561
04/02/2024, 6:00 AMquick-musician-29561
04/02/2024, 6:26 AMquick-musician-29561
04/02/2024, 6:29 AMquick-musician-29561
04/02/2024, 6:57 AMquick-musician-29561
04/03/2024, 10:28 AMquick-musician-29561
04/03/2024, 4:20 PMquick-musician-29561
04/03/2024, 4:39 PMquick-musician-29561
04/03/2024, 4:50 PMcold-jewelry-54343
04/04/2024, 7:49 AMquick-musician-29561
04/04/2024, 5:54 PMquick-musician-29561
04/04/2024, 5:55 PMquick-musician-29561
04/04/2024, 5:55 PMquick-musician-29561
04/04/2024, 5:59 PMquick-musician-29561
04/05/2024, 4:25 PMflat-waiter-90531
04/11/2024, 1:54 PMstraight-wolf-37371
04/11/2024, 10:31 PMflat-waiter-90531
04/12/2024, 8:07 AMlimited-orange-3544
08/15/2024, 1:32 PMlimited-orange-3544
08/15/2024, 1:33 PMlimited-orange-3544
08/15/2024, 1:37 PMlimited-orange-3544
08/15/2024, 1:38 PM