feat: add tags to scraper output
This commit is contained in:
@@ -12,6 +12,8 @@ const xhr = axios.create({
|
||||
},
|
||||
});
|
||||
|
||||
const pSeries = tasks => tasks.reduce((c, task) => c.then(task), Promise.resolve());
|
||||
|
||||
const getPage = async num => {
|
||||
const url = `https://www.leafly.com/explore/page-${num}`;
|
||||
const response = await xhr.get(url, {
|
||||
@@ -45,28 +47,63 @@ export default async function scrapeLeafly(startFrom = 1, endAt = Infinity) {
|
||||
let finished = false;
|
||||
const db = await low(adapter);
|
||||
|
||||
await db.defaults({ strains: [] }).write();
|
||||
async function writeTag(type, tag) {
|
||||
const res = await db
|
||||
.get(type)
|
||||
.indexOf(tag)
|
||||
.value();
|
||||
|
||||
if (res < 0) {
|
||||
await db
|
||||
.get(type)
|
||||
.push(tag)
|
||||
.write();
|
||||
}
|
||||
}
|
||||
|
||||
async function writeTags(type, tags) {
|
||||
await pSeries(tags.map(tag => () => writeTag(type, tag)));
|
||||
}
|
||||
|
||||
async function writeDoc(strain) {
|
||||
// check for value
|
||||
const doc = db
|
||||
.get('strains')
|
||||
.filter({ id: strain.id })
|
||||
.first()
|
||||
.value();
|
||||
|
||||
if (!doc) {
|
||||
console.log(`Adding ${strain.id}, ${strain.name}`);
|
||||
await db
|
||||
.get('strains')
|
||||
.push(strain)
|
||||
.write();
|
||||
|
||||
await writeTags('effects', strain.effects);
|
||||
await writeTags('negative_effects', strain.negative_effects);
|
||||
await writeTags('uses', strain.uses);
|
||||
await writeTags('conditions', strain.conditions);
|
||||
await writeTags('flavors', strain.flavors);
|
||||
}
|
||||
}
|
||||
|
||||
await db
|
||||
.defaults({
|
||||
strains: [],
|
||||
effects: [],
|
||||
negative_effects: [],
|
||||
uses: [],
|
||||
conditions: [],
|
||||
flavors: [],
|
||||
})
|
||||
.write();
|
||||
|
||||
while (!finished) {
|
||||
console.log(`Fetching page ${pageNum}`);
|
||||
const data = await getPage(pageNum);
|
||||
|
||||
data.strains.forEach(async strain => {
|
||||
// check for value
|
||||
const doc = db
|
||||
.get('strains')
|
||||
.filter({ id: strain.id })
|
||||
.first()
|
||||
.value();
|
||||
|
||||
if (!doc) {
|
||||
console.log(`Adding ${strain.id}, ${strain.name}`);
|
||||
await db
|
||||
.get('strains')
|
||||
.push(strain)
|
||||
.write();
|
||||
}
|
||||
});
|
||||
await pSeries(data.strains.map(strain => () => writeDoc(strain)));
|
||||
|
||||
if (pageNum >= endAt || !data.strains.length || data.page.isLastPage) finished = true;
|
||||
pageNum += 1;
|
||||
|
||||
Reference in New Issue
Block a user