diff --git a/packages/scraper/README.md b/packages/scraper/README.md index 7ff6990..f1d2aea 100644 --- a/packages/scraper/README.md +++ b/packages/scraper/README.md @@ -8,5 +8,3 @@ Clone repo and run the command. Resulting data can be found in `db.json`. yarn install yarn start ``` - -**NOTE**: You may need to run it multiple times (4 or 5 should do it), since some strains will get skipped the first few times. I don't know if it's leafly's endpoint or some weird race condition in the scraping code though. diff --git a/packages/scraper/src/leafly.mjs b/packages/scraper/src/leafly.mjs index dfd6865..353f432 100644 --- a/packages/scraper/src/leafly.mjs +++ b/packages/scraper/src/leafly.mjs @@ -8,14 +8,14 @@ const adapter = new FileAsync('db.json'); const xhr = axios.create({ headers: { Accept: 'application/json, text/plain, */*', - Referer: 'https://www.leafly.com/explore', + Referer: 'https://www.leafly.com/explore/sort-alpha', }, }); const pSeries = tasks => tasks.reduce((c, task) => c.then(task), Promise.resolve()); const getPage = async num => { - const url = `https://www.leafly.com/explore/page-${num}`; + const url = `https://www.leafly.com/explore/page-${num}/sort-alpha`; const response = await xhr.get(url, { responseType: 'json', });