From 00f9129b324600ceaf12f94ea74a40cc48b19da0 Mon Sep 17 00:00:00 2001 From: Christian Rupp Date: Sat, 22 Apr 2023 11:36:29 +0200 Subject: [PATCH] Add a ComicRack Metadata Extractor for cbz based Stash Galleries --- plugins/comicInfoExtractor/README.md | 12 ++ .../comicInfoExtractor/comicInfoExtractor.py | 124 ++++++++++++++++++ .../comicInfoExtractor/comicInfoExtractor.yml | 18 +++ plugins/comicInfoExtractor/config.yml | 11 ++ plugins/comicInfoExtractor/requirements.txt | 2 + 5 files changed, 167 insertions(+) create mode 100644 plugins/comicInfoExtractor/README.md create mode 100644 plugins/comicInfoExtractor/comicInfoExtractor.py create mode 100644 plugins/comicInfoExtractor/comicInfoExtractor.yml create mode 100644 plugins/comicInfoExtractor/config.yml create mode 100644 plugins/comicInfoExtractor/requirements.txt diff --git a/plugins/comicInfoExtractor/README.md b/plugins/comicInfoExtractor/README.md new file mode 100644 index 0000000..759952c --- /dev/null +++ b/plugins/comicInfoExtractor/README.md @@ -0,0 +1,12 @@ +# Comic Archive Metadata Extractor +Follows the Comicrack Standard for saving Comic Metadata in .cbz files by reading the ComicInfo.xml file in the archive and writing the result into the stash gallery. +Use the config.py ImportList to define what XML names should be mapped to what. +Currently, Bookmark and Type are recognized as chapters that are imported as well. +The current Configuration will overwrite any value you try to set that is already set in the ComicInfo.xml. For a change in that, change the hook condition in the yml. + +### Installation +Move the `comicInfoExtractor` directory into Stash's plugins directory, reload plugins. + +### Tasks +* Load all cbz Metadata - Fetch metadata for all galleries. +* Post update hook - Fetch metadata for that gallery diff --git a/plugins/comicInfoExtractor/comicInfoExtractor.py b/plugins/comicInfoExtractor/comicInfoExtractor.py new file mode 100644 index 0000000..1fda7f1 --- /dev/null +++ b/plugins/comicInfoExtractor/comicInfoExtractor.py @@ -0,0 +1,124 @@ +import stashapi.log as log +from stashapi.stashapp import StashInterface +import stashapi.marker_parse as mp +import yaml +import json +import os +import sys +import xml.etree.ElementTree as ET +import zipfile + +per_page = 100 + +def processGallery(g): + #Read ComicInfo.xml File + if len(g["files"]) == 0: + log.info(g["id"] + " is not an archive. No scanning for Comic Metadata.") + return + comicInfo = False + with zipfile.ZipFile(g["files"][0]["path"], 'r') as archive: + archivecontent = [x.lower() for x in archive.namelist()] + for archivefile in archivecontent: + if archivefile.lower() == "comicinfo.xml": + comicInfo = ET.fromstring(archive.read("ComicInfo.xml")) + if not comicInfo: + log.info(g["files"][0]["path"] + " does not contain a ComicInfo.xml file. No scan will be triggered.") + return + + #Adjust names for giving ids + for key in ImportList.keys(): + if ImportList[key] == "tags": + ImportList[key] = "tag_ids" + if ImportList[key] == "performers": + ImportList[key] = "performer_ids" + if ImportList[key] == "studio": + ImportList[key] = "studio_id" + + #Get Metadata from ComicInfo.xml + galleryData = {"id": g["id"]} + for item in ImportList.keys(): + value = comicInfo.find(item) + if value != None: + galleryData[ImportList[item]] = value.text + chapterData = [] + pageData = comicInfo.find("Pages") + if pageData: + for page in pageData: + if page.get("Bookmark"): + chapterData.append({"image_index": int(page.get("Image")) + 1, "title": page.get("Bookmark")}) + if page.get("Type"): + chapterData.append({"image_index": int(page.get("Image")) + 1, "title": page.get("Type")}) + + #Adjust the retrieved data if necessary + for data in galleryData.keys(): + if data in ["tag_ids", "performer_ids"]: + galleryData[data] = [x.strip() for x in galleryData[data].split(",")] + if data == "tag_ids": + tagids = [] + for tag in galleryData[data]: + tagids.append(stash.find_tag(tag, create=True)["id"]) + galleryData[data] = tagids + if data == "performer_ids": + performerids = [] + for performer in galleryData[data]: + performerids.append(stash.find_performer(performer, create=True)["id"]) + galleryData[data] = performerids + if data == "studio_id": + galleryData[data] = stash.find_studio(galleryData[data], create=True)["id"] + if data == "date": + galleryData[data] = galleryData[data] + "-01-01" + if data == "organized": + galleryData[data] = eval(galleryData[data].lower().capitalize()) + if data == "rating100": + galleryData[data] = int(galleryData[data]) + + #Add Chapter if it does not exist and finally update Gallery Metadata + for chapter in chapterData: + addChapter = True + for existingChapter in g["chapters"]: + if existingChapter["title"] == chapter["title"] and existingChapter["image_index"] == chapter["image_index"]: + addChapter = False + if addChapter: + stash.create_gallery_chapter({"title": chapter["title"], "image_index": chapter["image_index"], "gallery_id": g["id"]}) + stash.update_gallery(galleryData) + + + +def processAll(): + log.info('Getting gallery count') + count=stash.find_galleries(f={},filter={"per_page": 1},get_count=True)[0] + log.info(str(count)+' galleries to scan.') + for r in range(1,int(count/per_page)+1): + log.info('processing '+str(r*per_page)+ ' - '+str(count)) + galleries=stash.find_galleries(f={},filter={"page":r,"per_page": per_page}) + for g in galleries: + processGallery(g) + + + +#Start of the Program +json_input = json.loads(sys.stdin.read()) +FRAGMENT_SERVER = json_input["server_connection"] +stash = StashInterface(FRAGMENT_SERVER) + +#Load Config +with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "config.yml"), "r") as f: + try: + config = yaml.safe_load(f) + except yaml.YAMLError as exc: + log.error("Could not load config.yml: " + str(exc)) + sys.exit(1) +try: + ImportList=config["ImportList"] +except KeyError as key: + log.error(str(key) + " is not defined in config.yml, but is needed for this script to proceed") + sys.exit(1) + +if 'mode' in json_input['args']: + PLUGIN_ARGS = json_input['args']["mode"] + if 'process' in PLUGIN_ARGS: + processAll() +elif 'hookContext' in json_input['args']: + id=json_input['args']['hookContext']['id'] + gallery=stash.find_gallery(id) + processGallery(gallery) diff --git a/plugins/comicInfoExtractor/comicInfoExtractor.yml b/plugins/comicInfoExtractor/comicInfoExtractor.yml new file mode 100644 index 0000000..fc10bf3 --- /dev/null +++ b/plugins/comicInfoExtractor/comicInfoExtractor.yml @@ -0,0 +1,18 @@ +name: Comic Info Extractor +description: Extract the metadata from cbz with the Comicrack standard (ComicInfo.xml) +version: 0.1 +url: https://github.com/stashapp/CommunityScripts/ +exec: + - "/usr/bin/python3" + - "{pluginDir}/comicInfoExtractor.py" +interface: raw +hooks: + - name: Add Metadata to Gallery + description: Update Metadata for Gallery by evaluating the ComicInfo.xml. + triggeredBy: + - Gallery.Update.Post +tasks: + - name: Load all cbz Metadata + description: Get Metadata for all Galleries by looking for ComicInfo.xml files in the Archive. + defaultArgs: + mode: process diff --git a/plugins/comicInfoExtractor/config.yml b/plugins/comicInfoExtractor/config.yml new file mode 100644 index 0000000..51c7d1f --- /dev/null +++ b/plugins/comicInfoExtractor/config.yml @@ -0,0 +1,11 @@ +#ImportList is a dictionary +#that matches an xml Attribute from ComicInfo.xml to the according value in stash (using the graphql naming) +#Fields that refer to different types of media are resolved by name and created if necessary (tags, studio, performers) +#Fields that can contain multiple values (tags, performers) will be expected as a comma separated string, like +#Outdoor, Blonde +ImportList: + Genre: tags + Title: title + Writer: studio + Year: date + Summary: details diff --git a/plugins/comicInfoExtractor/requirements.txt b/plugins/comicInfoExtractor/requirements.txt new file mode 100644 index 0000000..4e5ec4c --- /dev/null +++ b/plugins/comicInfoExtractor/requirements.txt @@ -0,0 +1,2 @@ +stashapp-tools +pyyaml