mirror of
https://github.com/stashapp/CommunityScrapers.git
synced 2025-12-11 11:18:56 -06:00
357 lines
10 KiB
JavaScript
Executable File
357 lines
10 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
'use strict';
|
|
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const safeRequire = (name) => {
|
|
try {
|
|
return require(name);
|
|
} catch (error) {
|
|
if (error && error.code === 'MODULE_NOT_FOUND') {
|
|
console.log(`Error: Cannot find module '${name}', have you installed the dependencies?`);
|
|
process.exit(1);
|
|
}
|
|
throw error;
|
|
}
|
|
};
|
|
|
|
const Ajv = safeRequire('ajv').default;
|
|
const betterAjvErrors = safeRequire('better-ajv-errors').default;
|
|
const chalk = safeRequire('chalk');
|
|
const YAML = safeRequire('yaml');
|
|
const addFormats = safeRequire('ajv-formats');
|
|
|
|
// https://www.peterbe.com/plog/nodejs-fs-walk-or-glob-or-fast-glob
|
|
function walk(directory, ext, filepaths = []) {
|
|
const files = fs.readdirSync(directory);
|
|
for (const filename of files) {
|
|
const filepath = path.join(directory, filename);
|
|
if (fs.statSync(filepath).isDirectory()) {
|
|
walk(filepath, ext, filepaths);
|
|
} else if (path.extname(filename) === ext) {
|
|
filepaths.push(filepath);
|
|
}
|
|
}
|
|
return filepaths;
|
|
}
|
|
|
|
// https://stackoverflow.com/a/53833620
|
|
const isSorted = arr => arr.every((v,i,a) => !i || a[i-1] <= v);
|
|
|
|
class Validator {
|
|
constructor(flags) {
|
|
this.allowDeprecations = flags.includes('-d');
|
|
this.stopOnError = !flags.includes('-a');
|
|
this.sortedURLs = flags.includes('-s');
|
|
this.verbose = flags.includes('-v');
|
|
|
|
const schemaPath = path.resolve(__dirname, './scraper.schema.json');
|
|
this.schema = JSON.parse(fs.readFileSync(schemaPath, 'utf8'));
|
|
this.ajv = new Ajv({
|
|
// allErrors: true,
|
|
strict: true,
|
|
});
|
|
addFormats(this.ajv);
|
|
|
|
this.mappingPattern = /^([a-z]+)By(Fragment|Name|URL)$/;
|
|
}
|
|
|
|
run(files) {
|
|
let scrapers;
|
|
|
|
if (files && Array.isArray(files) && files.length > 0) {
|
|
scrapers = files.map(file => path.resolve(file));
|
|
} else {
|
|
const scrapersDir = path.resolve(__dirname, '../scrapers');
|
|
scrapers = walk(scrapersDir, '.yml');
|
|
}
|
|
|
|
const yamlLoadOptions = {
|
|
prettyErrors: true,
|
|
version: '1.2',
|
|
merge: true,
|
|
};
|
|
|
|
let result = true;
|
|
const validate = this.ajv.compile(this.schema);
|
|
|
|
for (const file of scrapers) {
|
|
const relPath = path.relative(process.cwd(), file);
|
|
let contents, data;
|
|
try {
|
|
contents = fs.readFileSync(file, 'utf8');
|
|
data = YAML.parse(contents, yamlLoadOptions);
|
|
} catch (error) {
|
|
console.error(`${chalk.red(chalk.bold('ERROR'))} in: ${relPath}:`);
|
|
error.stack = null;
|
|
console.error(error);
|
|
result = result && false;
|
|
if (this.stopOnError) break;
|
|
else continue;
|
|
}
|
|
|
|
let valid = validate(data);
|
|
|
|
// If schema validation did not pass, don't try to validate mappings.
|
|
if (valid) {
|
|
const mappingErrors = this.getMappingErrors(data);
|
|
const validMapping = mappingErrors.length === 0;
|
|
if (!validMapping) {
|
|
validate.errors = (validate.errors || []).concat(mappingErrors);
|
|
}
|
|
|
|
valid = valid && validMapping;
|
|
}
|
|
|
|
// Output validation errors
|
|
if (!valid) {
|
|
const output = betterAjvErrors('scraper', data, validate.errors, { indent: 2 });
|
|
console.log(output);
|
|
}
|
|
|
|
if (this.verbose || !valid) {
|
|
const validColor = valid ? chalk.green : chalk.red;
|
|
console.log(`${relPath} Valid: ${validColor(valid)}`);
|
|
}
|
|
|
|
result = result && valid;
|
|
|
|
if (!valid && this.stopOnError) break;
|
|
}
|
|
|
|
if (!this.verbose && result) {
|
|
console.log(chalk.green('Validation passed!'));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
getMappingErrors(data) {
|
|
return [].concat(
|
|
this._collectConfigMappingErrors(data),
|
|
this._collectScraperDefinitionErrors(data),
|
|
this._collectCookieErrors(data),
|
|
);
|
|
}
|
|
|
|
_collectConfigMappingErrors(data) {
|
|
const errors = [];
|
|
|
|
if (data.sceneByName && !data.sceneByQueryFragment) {
|
|
errors.push({
|
|
keyword: 'sceneByName',
|
|
message: `a \`sceneByQueryFragment\` configuration is required for \`sceneByName\` to work`,
|
|
params: { keyword: 'sceneByName' },
|
|
dataPath: '/sceneByName',
|
|
});
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
_collectScraperDefinitionErrors(data) {
|
|
const hasStashServer = Object.keys(data).includes('stashServer');
|
|
const xPathScrapers = data.xPathScrapers ? Object.keys(data.xPathScrapers) : [];
|
|
const jsonScrapers = data.jsonScrapers ? Object.keys(data.jsonScrapers) : [];
|
|
|
|
let needsStashServer = false;
|
|
const configuredXPathScrapers = [];
|
|
const configuredJsonScrapers = [];
|
|
|
|
const errors = [];
|
|
|
|
Object.entries(data).forEach(([key, value]) => {
|
|
const match = this.mappingPattern.exec(key);
|
|
if (!match) {
|
|
return;
|
|
}
|
|
|
|
const seenURLs = {};
|
|
|
|
const type = match[1];
|
|
|
|
const multiple = value instanceof Array;
|
|
(multiple ? value : [value]).forEach(({ action, scraper, url }, idx) => {
|
|
const dataPath = `/${key}${multiple ? `/${idx}` : ''}`;
|
|
|
|
if (action === 'stash') {
|
|
needsStashServer = true;
|
|
if (!hasStashServer) {
|
|
errors.push({
|
|
keyword: 'action',
|
|
message: `root object should contain a \`stashServer\` definition`,
|
|
params: { keyword: 'action' },
|
|
dataPath: dataPath + '/action',
|
|
});
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (action === 'scrapeXPath') {
|
|
configuredXPathScrapers.push(scraper);
|
|
if (!xPathScrapers.includes(scraper)) {
|
|
errors.push({
|
|
keyword: 'scraper',
|
|
message: `xPathScrapers should contain a XPath scraper definition for \`${scraper}\``,
|
|
params: { keyword: 'scraper' },
|
|
dataPath: dataPath + '/scraper',
|
|
});
|
|
} else if (!data.xPathScrapers || !data.xPathScrapers[scraper][type]) {
|
|
errors.push({
|
|
keyword: scraper,
|
|
message: `\`${scraper}\` should create an object of type \`${type}\``,
|
|
params: { keyword: scraper },
|
|
dataPath: `/xPathScrapers/${scraper}`,
|
|
});
|
|
}
|
|
|
|
if (url) {
|
|
url.forEach((u, uIdx) => {
|
|
const exists = seenURLs[u];
|
|
if (exists) {
|
|
errors.push({
|
|
keyword: 'url',
|
|
message: `URLs for type \`${type}\` should be unique, already exists on ${exists}`,
|
|
params: { keyword: 'url' },
|
|
dataPath: `${dataPath}/url/${uIdx}`,
|
|
});
|
|
} else {
|
|
seenURLs[u] = `${dataPath}/url/${uIdx}`;
|
|
}
|
|
});
|
|
|
|
if (this.sortedURLs && !isSorted(url)) {
|
|
errors.push({
|
|
keyword: 'url',
|
|
message: 'URL list should be sorted in ascending alphabetical order',
|
|
params: { keyword: 'url' },
|
|
dataPath: dataPath + '/url',
|
|
});
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (action === 'scrapeJson') {
|
|
configuredJsonScrapers.push(scraper);
|
|
if (!jsonScrapers.includes(scraper)) {
|
|
errors.push({
|
|
keyword: 'scraper',
|
|
message: `jsonScrapers should contain a JSON scraper definition for \`${scraper}\``,
|
|
params: { keyword: 'scraper' },
|
|
dataPath: dataPath + '/scraper',
|
|
});
|
|
} else if (!data.jsonScrapers || !data.jsonScrapers[scraper][type]) {
|
|
errors.push({
|
|
keyword: scraper,
|
|
message: `\`${scraper}\` should create an object of type \`${type}\``,
|
|
params: { keyword: scraper },
|
|
dataPath: `/jsonScrapers/${scraper}`,
|
|
});
|
|
}
|
|
|
|
if (url) {
|
|
url.forEach((u, uIdx) => {
|
|
const exists = seenURLs[u];
|
|
if (exists) {
|
|
errors.push({
|
|
keyword: 'url',
|
|
message: `URLs for type \`${type}\` should be unique, already exists on ${exists}`,
|
|
params: { keyword: 'url' },
|
|
dataPath: `${dataPath}/url/${uIdx}`,
|
|
});
|
|
} else {
|
|
seenURLs[u] = `${dataPath}/url/${uIdx}`;
|
|
}
|
|
});
|
|
|
|
if (this.sortedURLs && !isSorted(url)) {
|
|
errors.push({
|
|
keyword: 'url',
|
|
message: 'URL list should be sorted in ascending alphabetical order',
|
|
params: { keyword: 'url' },
|
|
dataPath: dataPath + '/url',
|
|
});
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// if (action === 'script') {
|
|
// return;
|
|
// }
|
|
//
|
|
// errors.push({
|
|
// keyword: 'action',
|
|
// message: `unsupported action \`${action}\``,
|
|
// params: { keyword: 'action' },
|
|
// dataPath: dataPath + '/action',
|
|
// });
|
|
});
|
|
});
|
|
|
|
// Check for unused definitions
|
|
|
|
if (!needsStashServer && hasStashServer) {
|
|
errors.unshift({
|
|
keyword: 'stashServer',
|
|
message: '`stashServer` is defined, but never used',
|
|
params: { keyword: 'stashServer' },
|
|
dataPath: '/stashServer',
|
|
});
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
_collectCookieErrors(data) {
|
|
const errors = [];
|
|
|
|
const cookies = data.driver && data.driver.cookies;
|
|
if (cookies) {
|
|
const usesCDP = Boolean(data.driver && data.driver.useCDP);
|
|
cookies.forEach((cookieItem, idx) => {
|
|
const hasCookieURL = 'CookieURL' in cookieItem;
|
|
if (!usesCDP && !hasCookieURL) {
|
|
errors.push({
|
|
keyword: 'CookieURL',
|
|
message: '`CookieURL` is required because useCDP is `false`',
|
|
params: { keyword: 'CookieURL' },
|
|
dataPath: `/driver/cookies/${idx}`,
|
|
});
|
|
} else if (usesCDP && hasCookieURL) {
|
|
errors.push({
|
|
keyword: 'CookieURL',
|
|
message: 'Should not have `CookieURL` because useCDP is `true`',
|
|
params: { keyword: 'CookieURL' },
|
|
dataPath: `/driver/cookies/${idx}/CookieURL`,
|
|
});
|
|
}
|
|
});
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
}
|
|
|
|
function main(flags, files) {
|
|
const args = process.argv.slice(2)
|
|
flags = (flags === undefined) ? args.filter(arg => arg.startsWith('-')) : flags;
|
|
files = (files === undefined) ? args.filter(arg => !arg.startsWith('-')) : files;
|
|
const validator = new Validator(flags);
|
|
const result = validator.run(files);
|
|
if (flags.includes('--ci')) {
|
|
process.exit(result ? 0 : 1);
|
|
}
|
|
}
|
|
|
|
if (require.main === module) {
|
|
main();
|
|
}
|
|
|
|
module.exports = main;
|
|
module.exports.Validator = Validator;
|