112 lines
3.0 KiB
JavaScript
112 lines
3.0 KiB
JavaScript
import axios from 'axios';
|
|
import get from 'lodash/get';
|
|
import low from 'lowdb';
|
|
import FileAsync from 'lowdb/adapters/FileAsync';
|
|
|
|
const adapter = new FileAsync('db.json');
|
|
|
|
const xhr = axios.create({
|
|
headers: {
|
|
Accept: 'application/json, text/plain, */*',
|
|
Referer: 'https://www.leafly.com/explore/sort-alpha',
|
|
},
|
|
});
|
|
|
|
const pSeries = tasks => tasks.reduce((c, task) => c.then(task), Promise.resolve());
|
|
|
|
const getPage = async num => {
|
|
const url = `https://www.leafly.com/explore/page-${num}/sort-alpha`;
|
|
const response = await xhr.get(url, {
|
|
responseType: 'json',
|
|
});
|
|
|
|
// transform strain data
|
|
const strains = response.data.Model.Strains.filter(strain => strain.Name != null).map(strain => ({
|
|
id: parseInt(strain.Id, 10),
|
|
name: strain.Name,
|
|
symbol: strain.Symbol,
|
|
category: get(strain, 'Category', 'unknown').toLowerCase(),
|
|
profile: get(strain, 'CannabinoidProfile', 'none').toLowerCase(),
|
|
rating: strain.Rating,
|
|
rating_count: strain.RatingCount,
|
|
effects: get(strain, 'Tags', []).map(tag => tag.DisplayLabel.toLowerCase()),
|
|
negative_effects: get(strain, 'NegativeEffects', []).map(tag => tag.DisplayLabel.toLowerCase()),
|
|
flavors: get(strain, 'Flavors', []).map(tag => tag.DisplayLabel.toLowerCase()),
|
|
uses: get(strain, 'Symptoms', []).map(tag => tag.DisplayLabel.toLowerCase()),
|
|
conditions: get(strain, 'Conditions', []).map(tag => tag.DisplayLabel.toLowerCase()),
|
|
}));
|
|
|
|
return {
|
|
strains,
|
|
page: response.data.Model.PagingContext,
|
|
};
|
|
};
|
|
|
|
export default async function scrapeLeafly(startFrom = 1, endAt = Infinity) {
|
|
let pageNum = startFrom;
|
|
let finished = false;
|
|
const db = await low(adapter);
|
|
|
|
async function writeTag(type, tag) {
|
|
const res = await db
|
|
.get(type)
|
|
.indexOf(tag)
|
|
.value();
|
|
|
|
if (res < 0) {
|
|
await db
|
|
.get(type)
|
|
.push(tag)
|
|
.write();
|
|
}
|
|
}
|
|
|
|
async function writeTags(type, tags) {
|
|
await pSeries(tags.map(tag => () => writeTag(type, tag)));
|
|
}
|
|
|
|
async function writeDoc(strain) {
|
|
// check for value
|
|
const doc = db
|
|
.get('strains')
|
|
.filter({ id: strain.id })
|
|
.first()
|
|
.value();
|
|
|
|
if (!doc) {
|
|
console.log(`Adding ${strain.id}, ${strain.name}`);
|
|
await db
|
|
.get('strains')
|
|
.push(strain)
|
|
.write();
|
|
|
|
await writeTags('effects', strain.effects);
|
|
await writeTags('negative_effects', strain.negative_effects);
|
|
await writeTags('uses', strain.uses);
|
|
await writeTags('conditions', strain.conditions);
|
|
await writeTags('flavors', strain.flavors);
|
|
}
|
|
}
|
|
|
|
await db
|
|
.defaults({
|
|
strains: [],
|
|
effects: [],
|
|
negative_effects: [],
|
|
uses: [],
|
|
conditions: [],
|
|
flavors: [],
|
|
})
|
|
.write();
|
|
|
|
while (!finished) {
|
|
console.log(`Fetching page ${pageNum}`);
|
|
const data = await getPage(pageNum);
|
|
|
|
await pSeries(data.strains.map(strain => () => writeDoc(strain)));
|
|
|
|
if (pageNum >= endAt || !data.strains.length || data.page.isLastPage) finished = true;
|
|
pageNum += 1;
|
|
}
|
|
}
|