feat: working scraper
This commit is contained in:
@@ -35,6 +35,8 @@
|
|||||||
"index"
|
"index"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"no-await-in-loop": 0,
|
||||||
|
"no-console": 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -9,3 +9,4 @@ coverage
|
|||||||
.nyc_output
|
.nyc_output
|
||||||
coverage.lcov
|
coverage.lcov
|
||||||
/lib
|
/lib
|
||||||
|
db.json
|
||||||
27
package.json
27
package.json
@@ -8,7 +8,8 @@
|
|||||||
"lint": "eslint \"*.{js,mjs}\" \"src/**/*.{js,mjs}\"",
|
"lint": "eslint \"*.{js,mjs}\" \"src/**/*.{js,mjs}\"",
|
||||||
"precommit": "lint-staged",
|
"precommit": "lint-staged",
|
||||||
"version": "auto-changelog -p && auto-authors && git add CHANGELOG.md AUTHORS.md",
|
"version": "auto-changelog -p && auto-authors && git add CHANGELOG.md AUTHORS.md",
|
||||||
"start": "node ."
|
"start": "node .",
|
||||||
|
"dev": "nodemon --ignore db.json ."
|
||||||
},
|
},
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
@@ -34,10 +35,14 @@
|
|||||||
"template": ".changelog_template.hbs",
|
"template": ".changelog_template.hbs",
|
||||||
"ignoreCommitPattern": "(chore|test):",
|
"ignoreCommitPattern": "(chore|test):",
|
||||||
"commitLimit": false
|
"commitLimit": false
|
||||||
},
|
},
|
||||||
"lint-staged": {
|
"lint-staged": {
|
||||||
"*.{js,mjs}": ["eslint --fix"],
|
"*.{js,mjs}": [
|
||||||
"*.{js,mjs,json,css}": ["prettier --write"]
|
"eslint --fix"
|
||||||
|
],
|
||||||
|
"*.{js,mjs,json,css}": [
|
||||||
|
"prettier --write"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"prettier": {
|
"prettier": {
|
||||||
"printWidth": 100,
|
"printWidth": 100,
|
||||||
@@ -48,20 +53,24 @@
|
|||||||
"cjs": true
|
"cjs": true
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"esm": "^3.0.17"
|
"axios": "^0.18.0",
|
||||||
|
"esm": "^3.0.17",
|
||||||
|
"lodash": "^4.17.10",
|
||||||
|
"lowdb": "^1.0.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"auto-authors": "^0.1.1",
|
"auto-authors": "^0.1.1",
|
||||||
"auto-changelog": "^1.7.0",
|
"auto-changelog": "^1.7.0",
|
||||||
"eslint": "^4.9.0",
|
"eslint": "^4.9.0",
|
||||||
|
"eslint-config-airbnb": "^16.1.0",
|
||||||
"eslint-config-prettier": "^2.9.0",
|
"eslint-config-prettier": "^2.9.0",
|
||||||
"eslint-plugin-prettier": "^2.3.1",
|
|
||||||
"eslint-plugin-import": "^2.7.0",
|
"eslint-plugin-import": "^2.7.0",
|
||||||
|
"eslint-plugin-jsx-a11y": "^6.0.2",
|
||||||
|
"eslint-plugin-prettier": "^2.3.1",
|
||||||
|
"eslint-plugin-react": "^7.1.0",
|
||||||
"husky": "^0.14.3",
|
"husky": "^0.14.3",
|
||||||
"lint-staged": "^7.0.4",
|
"lint-staged": "^7.0.4",
|
||||||
"eslint-config-airbnb": "^16.1.0",
|
"nodemon": "^1.18.4",
|
||||||
"eslint-plugin-jsx-a11y": "^6.0.2",
|
|
||||||
"eslint-plugin-react": "^7.1.0",
|
|
||||||
"prettier": "^1.9.0"
|
"prettier": "^1.9.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
export default function() {
|
import leafly from './leafly.mjs';
|
||||||
// es6 module code goes here
|
|
||||||
|
async function scrape() {
|
||||||
|
await leafly();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
scrape();
|
||||||
|
|||||||
74
src/leafly.mjs
Normal file
74
src/leafly.mjs
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import axios from 'axios';
|
||||||
|
import get from 'lodash/get';
|
||||||
|
import low from 'lowdb';
|
||||||
|
import FileAsync from 'lowdb/adapters/FileAsync';
|
||||||
|
|
||||||
|
const adapter = new FileAsync('db.json');
|
||||||
|
|
||||||
|
const xhr = axios.create({
|
||||||
|
headers: {
|
||||||
|
Accept: 'application/json, text/plain, */*',
|
||||||
|
Referer: 'https://www.leafly.com/explore',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const getPage = async num => {
|
||||||
|
const url = `https://www.leafly.com/explore/page-${num}`;
|
||||||
|
const response = await xhr.get(url, {
|
||||||
|
responseType: 'json',
|
||||||
|
});
|
||||||
|
|
||||||
|
// transform strain data
|
||||||
|
const strains = response.data.Model.Strains.filter(strain => strain.Name != null).map(strain => ({
|
||||||
|
id: strain.Id,
|
||||||
|
name: strain.Name,
|
||||||
|
symbol: strain.Symbol,
|
||||||
|
category: get(strain, 'Category', 'unknown').toLowerCase(),
|
||||||
|
profile: get(strain, 'CannabinoidProfile', 'none').toLowerCase(),
|
||||||
|
rating: strain.Rating,
|
||||||
|
rating_count: strain.RatingCount,
|
||||||
|
effects: get(strain, 'Tags', []).map(tag => tag.DisplayLabel.toLowerCase()),
|
||||||
|
negative_effects: get(strain, 'NegativeEffects', []).map(tag => tag.DisplayLabel.toLowerCase()),
|
||||||
|
flavors: get(strain, 'Flavors', []).map(tag => tag.DisplayLabel.toLowerCase()),
|
||||||
|
uses: get(strain, 'Symptoms', []).map(tag => tag.DisplayLabel.toLowerCase()),
|
||||||
|
conditions: get(strain, 'Conditions', []).map(tag => tag.DisplayLabel.toLowerCase()),
|
||||||
|
}));
|
||||||
|
|
||||||
|
return {
|
||||||
|
strains,
|
||||||
|
page: response.data.Model.PagingContext,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
export default async function scrapeLeafly(startFrom = 1, endAt = Infinity) {
|
||||||
|
let pageNum = startFrom;
|
||||||
|
let finished = false;
|
||||||
|
const db = await low(adapter);
|
||||||
|
|
||||||
|
await db.defaults({ strains: [] }).write();
|
||||||
|
|
||||||
|
while (!finished) {
|
||||||
|
console.log(`Fetching page ${pageNum}`);
|
||||||
|
const data = await getPage(pageNum);
|
||||||
|
|
||||||
|
data.strains.forEach(async strain => {
|
||||||
|
// check for value
|
||||||
|
const doc = db
|
||||||
|
.get('strains')
|
||||||
|
.filter({ id: strain.id })
|
||||||
|
.first()
|
||||||
|
.value();
|
||||||
|
|
||||||
|
if (!doc) {
|
||||||
|
console.log(`Adding ${strain.id}, ${strain.name}`);
|
||||||
|
await db
|
||||||
|
.get('strains')
|
||||||
|
.push(strain)
|
||||||
|
.write();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (pageNum >= endAt || !data.strains.length || data.page.isLastPage) finished = true;
|
||||||
|
pageNum += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user