import axios from 'axios'; import get from 'lodash/get'; import low from 'lowdb'; import FileAsync from 'lowdb/adapters/FileAsync'; const adapter = new FileAsync('db.json'); const xhr = axios.create({ headers: { Accept: 'application/json, text/plain, */*', Referer: 'https://www.leafly.com/explore/sort-alpha', }, }); const pSeries = tasks => tasks.reduce((c, task) => c.then(task), Promise.resolve()); const getPage = async num => { const url = `https://www.leafly.com/explore/page-${num}/sort-alpha`; const response = await xhr.get(url, { responseType: 'json', }); // transform strain data const strains = response.data.Model.Strains.filter(strain => strain.Name != null).map(strain => ({ id: parseInt(strain.Id, 10), name: strain.Name, symbol: strain.Symbol, category: get(strain, 'Category', 'unknown').toLowerCase(), profile: get(strain, 'CannabinoidProfile', 'none').toLowerCase(), rating: strain.Rating, rating_count: strain.RatingCount, effects: get(strain, 'Tags', []).map(tag => tag.DisplayLabel.toLowerCase()), negative_effects: get(strain, 'NegativeEffects', []).map(tag => tag.DisplayLabel.toLowerCase()), flavors: get(strain, 'Flavors', []).map(tag => tag.DisplayLabel.toLowerCase()), uses: get(strain, 'Symptoms', []).map(tag => tag.DisplayLabel.toLowerCase()), conditions: get(strain, 'Conditions', []).map(tag => tag.DisplayLabel.toLowerCase()), })); return { strains, page: response.data.Model.PagingContext, }; }; export default async function scrapeLeafly(startFrom = 1, endAt = Infinity) { let pageNum = startFrom; let finished = false; const db = await low(adapter); async function writeTag(type, tag) { const res = await db .get(type) .indexOf(tag) .value(); if (res < 0) { await db .get(type) .push(tag) .write(); } } async function writeTags(type, tags) { await pSeries(tags.map(tag => () => writeTag(type, tag))); } async function writeDoc(strain) { // check for value const doc = db .get('strains') .filter({ id: strain.id }) .first() .value(); if (!doc) { console.log(`Adding ${strain.id}, ${strain.name}`); await db .get('strains') .push(strain) .write(); await writeTags('effects', strain.effects); await writeTags('negative_effects', strain.negative_effects); await writeTags('uses', strain.uses); await writeTags('conditions', strain.conditions); await writeTags('flavors', strain.flavors); } } await db .defaults({ strains: [], effects: [], negative_effects: [], uses: [], conditions: [], flavors: [], }) .write(); while (!finished) { console.log(`Fetching page ${pageNum}`); const data = await getPage(pageNum); await pSeries(data.strains.map(strain => () => writeDoc(strain))); if (pageNum >= endAt || !data.strains.length || data.page.isLastPage) finished = true; pageNum += 1; } }