fix: scrape strains alphabetically
ensures that we get everything
This commit is contained in:
@@ -8,5 +8,3 @@ Clone repo and run the command. Resulting data can be found in `db.json`.
|
|||||||
yarn install
|
yarn install
|
||||||
yarn start
|
yarn start
|
||||||
```
|
```
|
||||||
|
|
||||||
**NOTE**: You may need to run it multiple times (4 or 5 should do it), since some strains will get skipped the first few times. I don't know if it's leafly's endpoint or some weird race condition in the scraping code though.
|
|
||||||
|
|||||||
@@ -8,14 +8,14 @@ const adapter = new FileAsync('db.json');
|
|||||||
const xhr = axios.create({
|
const xhr = axios.create({
|
||||||
headers: {
|
headers: {
|
||||||
Accept: 'application/json, text/plain, */*',
|
Accept: 'application/json, text/plain, */*',
|
||||||
Referer: 'https://www.leafly.com/explore',
|
Referer: 'https://www.leafly.com/explore/sort-alpha',
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const pSeries = tasks => tasks.reduce((c, task) => c.then(task), Promise.resolve());
|
const pSeries = tasks => tasks.reduce((c, task) => c.then(task), Promise.resolve());
|
||||||
|
|
||||||
const getPage = async num => {
|
const getPage = async num => {
|
||||||
const url = `https://www.leafly.com/explore/page-${num}`;
|
const url = `https://www.leafly.com/explore/page-${num}/sort-alpha`;
|
||||||
const response = await xhr.get(url, {
|
const response = await xhr.get(url, {
|
||||||
responseType: 'json',
|
responseType: 'json',
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user