diff --git a/packages/scraper/src/leafly.mjs b/packages/scraper/src/leafly.mjs index 539d5ec..dfd6865 100644 --- a/packages/scraper/src/leafly.mjs +++ b/packages/scraper/src/leafly.mjs @@ -12,6 +12,8 @@ const xhr = axios.create({ }, }); +const pSeries = tasks => tasks.reduce((c, task) => c.then(task), Promise.resolve()); + const getPage = async num => { const url = `https://www.leafly.com/explore/page-${num}`; const response = await xhr.get(url, { @@ -45,28 +47,63 @@ export default async function scrapeLeafly(startFrom = 1, endAt = Infinity) { let finished = false; const db = await low(adapter); - await db.defaults({ strains: [] }).write(); + async function writeTag(type, tag) { + const res = await db + .get(type) + .indexOf(tag) + .value(); + + if (res < 0) { + await db + .get(type) + .push(tag) + .write(); + } + } + + async function writeTags(type, tags) { + await pSeries(tags.map(tag => () => writeTag(type, tag))); + } + + async function writeDoc(strain) { + // check for value + const doc = db + .get('strains') + .filter({ id: strain.id }) + .first() + .value(); + + if (!doc) { + console.log(`Adding ${strain.id}, ${strain.name}`); + await db + .get('strains') + .push(strain) + .write(); + + await writeTags('effects', strain.effects); + await writeTags('negative_effects', strain.negative_effects); + await writeTags('uses', strain.uses); + await writeTags('conditions', strain.conditions); + await writeTags('flavors', strain.flavors); + } + } + + await db + .defaults({ + strains: [], + effects: [], + negative_effects: [], + uses: [], + conditions: [], + flavors: [], + }) + .write(); while (!finished) { console.log(`Fetching page ${pageNum}`); const data = await getPage(pageNum); - data.strains.forEach(async strain => { - // check for value - const doc = db - .get('strains') - .filter({ id: strain.id }) - .first() - .value(); - - if (!doc) { - console.log(`Adding ${strain.id}, ${strain.name}`); - await db - .get('strains') - .push(strain) - .write(); - } - }); + await pSeries(data.strains.map(strain => () => writeDoc(strain))); if (pageNum >= endAt || !data.strains.length || data.page.isLastPage) finished = true; pageNum += 1; diff --git a/packages/search-site/src/index.ejs b/packages/search-site/src/index.ejs index 274df33..cae9890 100644 --- a/packages/search-site/src/index.ejs +++ b/packages/search-site/src/index.ejs @@ -10,104 +10,137 @@ +
-
-

- Strain Search -

+
+
+

+ Strain Search +

- -
- -
- + +
+ +
+ +
-
- -
-
-
-
-
- -
- + +
+
+
+
+
+ +
+ +
+
+
+
+
+ +
+ +
-
-
- -
- +
+ +
+
+
+
+ +
+ +
+
+
+ +
+
+ +
+ +
-
-
-
-
- -
- -
-
-
- -
-
- -
- -
-
-
+ +
+
+ +
+
+
- - -
-
- -
-
- -
-
-
- +
+
- - + + + \ No newline at end of file