From 7f1b61be4df2fbecf828b2d398717cda60f661e6 Mon Sep 17 00:00:00 2001
From: gitgiggety <79809426+gitgiggety@users.noreply.github.com>
Date: Tue, 28 Sep 2021 01:58:24 +0200
Subject: [PATCH] Add plugin which parses scene and gallery filenames on add
(#16)
Add a plugin which tries to parse the names of scenes and galleries when
they're added / scanned. The primarily supported format is
`{studio}.{year}.{month}.{day}.{performer}.{performer}.{title}`. Studio
can optionally include ".com". Some "parts" are ignored like resolution,
some release group names etc. Performers can be separated by an optional
"and" which will be ignored as well. Performers are both searched based
on name and aliases, for which aliases are split based on `,` and `;`
and needs to result in an "exact" match meaning "First Last" won't match
a performer which name or alias is just "First". When there are multiple
matches (for example when filename is "first1.first2.last2" and multiple
performers having the "first1" alias) all matched performers will be
added.
---
PLUGINS-LIST.md | 1 +
plugins/filenameParser/filenameParser.js | 397 ++++++++++++++++++++++
plugins/filenameParser/filenameParser.yml | 13 +
3 files changed, 411 insertions(+)
create mode 100644 plugins/filenameParser/filenameParser.js
create mode 100644 plugins/filenameParser/filenameParser.yml
diff --git a/PLUGINS-LIST.md b/PLUGINS-LIST.md
index 7c1b97f..b08c533 100644
--- a/PLUGINS-LIST.md
+++ b/PLUGINS-LIST.md
@@ -9,6 +9,7 @@ Category|Triggers|Plugin Name|Description|Minimum Stash version
Scraper|Task|[GHScraper_Checker](plugins/GHScraper_Checker)|Compare local file against github file from the community scraper repo.|v0.8
Maintenance|Scene.Update|[renamerOnUpdate](plugins/renamerOnUpdate)|Rename your file based on Stash metadata.|v0.7
Scenes|SceneMarker.Create
SceneMarker.Update|[markerTagToScene](plugins/markerTagToScene)|Adds primary tag of Scene Marker to the Scene on marker create/update.|v0.8 ([46bbede](https://github.com/stashapp/stash/commit/46bbede9a07144797d6f26cf414205b390ca88f9))
+Scanning|Scene.Create
Gallery.Create|[filenameParser](plugins/filenameParser)|Tries to parse filenames, primarily in {studio}.{year}.{month}.{day}.{performer1firstname}.{performer1lastname}.{performer2}.{title} format, into the respective fields|v0.10
## Utility Scripts
diff --git a/plugins/filenameParser/filenameParser.js b/plugins/filenameParser/filenameParser.js
new file mode 100644
index 0000000..999f29b
--- /dev/null
+++ b/plugins/filenameParser/filenameParser.js
@@ -0,0 +1,397 @@
+function ok() {
+ return {
+ output: "ok"
+ };
+}
+
+function main() {
+ var hookContext = input.Args.hookContext;
+ var type = hookContext.type;
+ var ID = hookContext.ID;
+
+ if (!ID) {
+ return ok();
+ }
+
+ var filenameFetcher;
+ var saver;
+ if (type === 'Scene.Create.Post') {
+ filenameFetcher = getSceneFilename;
+ saver = updateScene;
+ } else if (type === 'Gallery.Create.Post') {
+ filenameFetcher = getGalleryFilename;
+ saver = updateGallery;
+ } else {
+ return ok();
+ }
+
+ var filename = filenameFetcher(ID);
+ if (!filename) {
+ return ok();
+ }
+
+ filename = cleanFilename(filename);
+ var parseResult = parseFilename(filename);
+
+ saver(ID, parseResult);
+
+ return ok();
+}
+
+function getSceneFilename(sceneID) {
+ var query = "\
+query findScene($id: ID) {\
+ findScene(id: $id) {\
+ path\
+ }\
+}";
+
+ var variables = {
+ id: sceneID
+ };
+
+ var result = gql.Do(query, variables);
+ var findScene = result.findScene;
+ if (!findScene) {
+ return null;
+ }
+
+ var path = findScene.path;
+ return path.substring(path.lastIndexOf('/') + 1);
+}
+
+function updateScene(sceneID, parseResult) {
+ var query = "\
+mutation SceneUpdate($input: SceneUpdateInput!) {\
+ sceneUpdate(input: $input) {\
+ id\
+ }\
+}";
+
+ var variables = {
+ input: parseResult
+ };
+
+ variables.input.id = sceneID;
+
+ gql.Do(query, variables);
+}
+
+function getGalleryFilename(galleryID) {
+ var query = "\
+query findGallery($id: ID!) {\
+ findGallery(id: $id) {\
+ path\
+ }\
+}";
+
+ var variables = {
+ id: galleryID
+ };
+
+ var result = gql.Do(query, variables);
+ var findGallery = result.findGallery;
+ if (!findGallery) {
+ return null;
+ }
+
+ var path = findGallery.path;
+ return path.substring(path.lastIndexOf('/') + 1);
+}
+
+function updateGallery(galleryID, parseResult) {
+ var query = "\
+mutation GalleryUpdate($input: GalleryUpdateInput!) {\
+ galleryUpdate(input: $input) {\
+ id\
+ }\
+}";
+
+ var variables = {
+ input: parseResult
+ };
+
+ variables.input.id = galleryID;
+
+ gql.Do(query, variables);
+}
+
+function matchNames(parts, name, aliases) {
+ var names = [name].concat(aliases);
+
+ var partRegexes = [];
+
+ for (var i = 0; i < parts.length; i++) {
+ partRegexes[i] = new RegExp('^' + parts[i].toLowerCase() + '[. \\-_]*');
+ }
+
+ var cleanRegex = /[. \-_]/g;
+ var longestMatch = 0;
+ for (var i = 0; i < names.length; i++) {
+ var name = names[i].replace(cleanRegex, '').toLowerCase();
+ for (var j = 0; j < partRegexes.length; j++) {
+ if (!partRegexes[j].test(name)) {
+ break;
+ }
+
+ name = name.replace(partRegexes[j], '');
+
+ if (name.length === 0) {
+ if (j + 1 > longestMatch) {
+ longestMatch = j + 1;
+ }
+ }
+ }
+ }
+
+ return longestMatch;
+}
+
+function cleanFilename(name) {
+ name = name
+ // remove imageset-...[rarbg]
+ .replace(/imageset-[\w\d]+\[rarbg]/i, '')
+ // replace [...] with just ...
+ .replace(/\[(.*?)]/g, '$1')
+ // replace (...) with just ...
+ .replace(/\((.*?)\)/g, '$1')
+ // replace {...} with just ...
+ .replace(/{(.*?)}/g, '$1')
+ ;
+
+ var blockList = [
+ 'mp4',
+ 'mov',
+ 'xxx',
+ '4k',
+ '4096x2160',
+ '3840x2160',
+ '2160p',
+ '1080p',
+ '1920x1080',
+ '60fps',
+ '30fps',
+ 'repack',
+ 'ktr',
+ ];
+ var regExp = new RegExp('(_|[^\\w\\d]|^)(' + blockList.join('|') + ')(_|[^\\w\\d]|$)', 'i');
+ while (regExp.test(name)) {
+ name = name.replace(regExp, '$1$3');
+ }
+
+ // If name starts with <...>.com remove the .com (sometimes names include studio name as site/domain)
+ name = name.replace(/^([\w\d-]+?)\.com/, '$1');
+
+ name = name
+ // Remove everything except letters and digits at the start
+ .replace(/^(_|[^\w\d])+/, '')
+ // Remove everything except letters and digits at the end
+ .replace(/(_|[^\w\d])+$/, '')
+ ;
+
+ return name;
+}
+
+function matchStudio(parts, result) {
+ var query = "\
+query findStudios($studio_filter: StudioFilterType, $filter: FindFilterType!) {\
+ findStudios(studio_filter: $studio_filter, filter: $filter) {\
+ studios {\
+ id\
+ name\
+ aliases\
+ }\
+ }\
+}";
+
+ var searchTerm = parts[0].substring(0, 2);
+ if (parts[0].substring(0, 1) === 'a') {
+ searchTerm = parts[0].substring(1, 3);
+ }
+ var variables = {
+ filter: {
+ per_page: -1,
+ },
+ studio_filter: {
+ name: {
+ modifier: "INCLUDES",
+ value: searchTerm
+ },
+ OR: {
+ aliases: {
+ modifier: "INCLUDES",
+ value: searchTerm
+ }
+ }
+ }
+ };
+
+ var queryResult = gql.Do(query, variables);
+ var studios = queryResult.findStudios.studios;
+ if (!studios.length && parts[0].substring(0, 1) === 'a') {
+ variables.studio_filter.name.value = variables.studio_filter.OR.aliases.value = parts[0].substring(1, 3);
+ queryResult = gql.Do(query, variables);
+ studios = queryResult.findStudios.studios;
+ }
+
+ var matchingParts = 0;
+ for (var i = 0; i < studios.length; i++) {
+ var studio = studios[i];
+ matchingParts = matchNames(parts, studio.name, studio.aliases);
+ if (matchingParts === 0) {
+ continue;
+ }
+
+ result.studio_id = studio.id;
+
+ break;
+ }
+
+ return matchingParts;
+}
+
+function matchDate(parts, result) {
+ if (
+ parts.length < 3 ||
+ !/^(\d{2}|\d{4})$/.test(parts[0]) ||
+ !/^\d{2}$/.test(parts[1]) ||
+ !/^\d{2}$/.test(parts[2])
+ ) {
+ return 0;
+ }
+
+ var year = parseInt(parts[0], 10);
+ var month = parseInt(parts[1], 10);
+ var day = parseInt(parts[2], 10);
+
+ if (year < 100) {
+ year += 2000;
+ }
+
+ if (
+ year < 2000 || year > 2100 ||
+ month < 1 || month > 12 ||
+ day < 1 || day > 31
+ ) {
+ return 0;
+ }
+
+ result.date = year + "-" + (month < 10 ? "0" + month : month) + "-" + (day < 10 ? "0" + day : day);
+
+ return 3;
+}
+
+function matchPerformers(parts, result) {
+ var query = "\
+query findPerformers($performer_filter: PerformerFilterType, $filter: FindFilterType!) {\
+ findPerformers(performer_filter: $performer_filter, filter: $filter) {\
+ performers {\
+ id\
+ name\
+ aliases\
+ }\
+ }\
+}"
+ var variables = {
+ filter: {
+ per_page: -1
+ },
+ performer_filter: {
+ name: {
+ modifier: "INCLUDES"
+ },
+ OR: {
+ aliases: {
+ modifier: "INCLUDES"
+ }
+ }
+ }
+ };
+
+ var totalMatchingParts = 0;
+ result.performer_ids = [];
+ do {
+ variables.performer_filter.name.value = variables.performer_filter.OR.aliases.value = parts[0].substring(0, 2);
+
+ var queryResult = gql.Do(query, variables);
+ var performers = queryResult.findPerformers.performers;
+ if (!performers.length) {
+ parts.shift();
+ continue;
+ }
+
+ var maxMatchLength = 0;
+ var matches = [];
+ for (var i = 0; i < performers.length; i++) {
+ var performer = performers[i];
+ var aliases = performer.aliases ? performer.aliases.split(/\s*[,;]+\s*/) : [];
+ var matchingParts = matchNames(parts, performer.name, aliases);
+ if (matchingParts === 0) {
+ continue;
+ }
+
+ if (matchingParts > maxMatchLength) {
+ maxMatchLength = matchingParts;
+ matches = [performer.id];
+ } else if (matchingParts === maxMatchLength) {
+ matches.push(performer.id);
+ }
+ }
+
+ if (maxMatchLength === 0) {
+ break;
+ }
+
+ result.performer_ids = result.performer_ids.concat(matches);
+
+ totalMatchingParts += maxMatchLength;
+
+ parts = parts.slice(maxMatchLength);
+ while (parts.length > 0 && (parts[0].toLowerCase() === 'and' || parts[0] === '&')) {
+ parts.shift();
+ totalMatchingParts += 1;
+ }
+ } while (parts.length > 0);
+
+ return totalMatchingParts;
+}
+
+function parseFilename(name) {
+ var parts = name.split(/[. \-_,]+/);
+
+ var matchers = [
+ matchStudio,
+ matchDate,
+ matchPerformers,
+ ];
+
+ var result = {};
+ var hasMatched = false;
+ for (var matchTries = 0; matchTries < 3 && !hasMatched && parts.length; matchTries++) {
+ for (var i = 0; i < matchers.length && parts.length > 0; i++) {
+ var matchedParts = matchers[i](parts, result);
+
+ if (matchedParts > 0) {
+ hasMatched = true;
+ parts = parts.slice(matchedParts);
+ }
+ }
+
+ // If no matchers worked remove a part. Maybe the format is correct but studio isn't found? etc
+ if (!hasMatched) {
+ parts.shift();
+ }
+ }
+
+ if (hasMatched && parts.length > 0) {
+ var title = parts.join(' ');
+ // Look behind assertions are not supported, so can't use `replace(/(?<=.)([A-Z]/g, ' $1')` so instead have to do a loop. Otherwise for example 'FooABar' will become 'Foo ABar' instead of 'Foo A Bar'
+ while (/[^\s][A-Z]/.test(title)) {
+ title = title.replace(/([^\s])([A-Z])/g, '$1 $2');
+ }
+ result.title = title.trim();
+ }
+ return result;
+}
+
+main();
diff --git a/plugins/filenameParser/filenameParser.yml b/plugins/filenameParser/filenameParser.yml
new file mode 100644
index 0000000..2d4e47c
--- /dev/null
+++ b/plugins/filenameParser/filenameParser.yml
@@ -0,0 +1,13 @@
+name: Filename parser
+description: Parses filename into studio, date, performers and title
+url:
+version: 0.1
+exec:
+ - filenameParser.js
+interface: js
+hooks:
+ - name: Prepopulates data based on filename
+ description:
+ triggeredBy:
+ - Scene.Create.Post
+ - Gallery.Create.Post