chore: monorepo

This commit is contained in:
2018-08-30 19:00:59 -07:00
parent cc5e15dbb1
commit 9684309e6a
6 changed files with 57 additions and 23 deletions

View File

@@ -0,0 +1,5 @@
/* eslint no-global-assign: 0 */
require = require('esm')(module);
const mod = require('./src/index.mjs').default;
module.exports = mod;

View File

@@ -0,0 +1,3 @@
import mod from './src/index.mjs';
export default mod;

View File

@@ -0,0 +1,46 @@
{
"name": "leafly-scraper",
"version": "0.0.0",
"description": "scrapes strain info, stores for later reference",
"main": "index",
"module": "index.mjs",
"scripts": {
"start": "node .",
"dev": "nodemon --ignore db.json ."
},
"keywords": [],
"files": [
"src/",
"index.js",
"index.mjs",
"CHANGELOG.md",
"AUTHORS.md"
],
"author": "joe fleming (https://github.com/w33ble)",
"license": "MIT",
"lint-staged": {
"*.{js,mjs}": [
"eslint --fix"
],
"*.{js,mjs,json,css}": [
"prettier --write"
]
},
"prettier": {
"printWidth": 100,
"singleQuote": true,
"trailingComma": "es5"
},
"esm": {
"cjs": true
},
"dependencies": {
"axios": "^0.18.0",
"esm": "^3.0.17",
"lodash": "^4.17.10",
"lowdb": "^1.0.0"
},
"devDependencies": {
"nodemon": "^1.18.4"
}
}

View File

@@ -0,0 +1,7 @@
import leafly from './leafly.mjs';
async function scrape() {
await leafly();
}
scrape();

View File

@@ -0,0 +1,74 @@
import axios from 'axios';
import get from 'lodash/get';
import low from 'lowdb';
import FileAsync from 'lowdb/adapters/FileAsync';
const adapter = new FileAsync('db.json');
const xhr = axios.create({
headers: {
Accept: 'application/json, text/plain, */*',
Referer: 'https://www.leafly.com/explore',
},
});
const getPage = async num => {
const url = `https://www.leafly.com/explore/page-${num}`;
const response = await xhr.get(url, {
responseType: 'json',
});
// transform strain data
const strains = response.data.Model.Strains.filter(strain => strain.Name != null).map(strain => ({
id: parseInt(strain.Id, 10),
name: strain.Name,
symbol: strain.Symbol,
category: get(strain, 'Category', 'unknown').toLowerCase(),
profile: get(strain, 'CannabinoidProfile', 'none').toLowerCase(),
rating: strain.Rating,
rating_count: strain.RatingCount,
effects: get(strain, 'Tags', []).map(tag => tag.DisplayLabel.toLowerCase()),
negative_effects: get(strain, 'NegativeEffects', []).map(tag => tag.DisplayLabel.toLowerCase()),
flavors: get(strain, 'Flavors', []).map(tag => tag.DisplayLabel.toLowerCase()),
uses: get(strain, 'Symptoms', []).map(tag => tag.DisplayLabel.toLowerCase()),
conditions: get(strain, 'Conditions', []).map(tag => tag.DisplayLabel.toLowerCase()),
}));
return {
strains,
page: response.data.Model.PagingContext,
};
};
export default async function scrapeLeafly(startFrom = 1, endAt = Infinity) {
let pageNum = startFrom;
let finished = false;
const db = await low(adapter);
await db.defaults({ strains: [] }).write();
while (!finished) {
console.log(`Fetching page ${pageNum}`);
const data = await getPage(pageNum);
data.strains.forEach(async strain => {
// check for value
const doc = db
.get('strains')
.filter({ id: strain.id })
.first()
.value();
if (!doc) {
console.log(`Adding ${strain.id}, ${strain.name}`);
await db
.get('strains')
.push(strain)
.write();
}
});
if (pageNum >= endAt || !data.strains.length || data.page.isLastPage) finished = true;
pageNum += 1;
}
}