I login to a site and it gives a browser cookie.
I go to a URL and it is a json response.
How do I scrape the page after entering await page.goto('blahblahblah.json');?
2 Answers
Another way which doesn't give you intermittent issues is to evaluate the body when it becomes available and return it as JSON e.g.
const puppeteer = require('puppeteer');
async function run() { const browser = await puppeteer.launch( { headless: false //change to true in prod! }); const page = await browser.newPage(); await page.goto('); //I would leave this here as a fail safe await page.content(); const innerText = await page.evaluate(() => { return JSON.parse(document.querySelector("body").innerText); }); console.log("innerText now contains the JSON"); console.log(innerText); //I will leave this as an excercise for you to // write out to FS... await browser.close();
};
run(); 2 You can intercept the network response, like this:
const puppeteer = require('puppeteer');
const fs = require('fs');
(async () => { const browser = await puppeteer.launch() const page = await browser.newPage() page.on('response', async response => { console.log('got response', response._url) const data = await response.buffer() fs.writeFileSync('/tmp/response.json', data) }) await page.goto(' {waitUntil: 'networkidle0'}) await browser.close()
})() 3