From 00f9129b324600ceaf12f94ea74a40cc48b19da0 Mon Sep 17 00:00:00 2001
From: Christian Rupp <rupp@vonaffenfels.de>
Date: Sat, 22 Apr 2023 11:36:29 +0200
Subject: [PATCH] Add a ComicRack Metadata Extractor for cbz based Stash
 Galleries

---
 plugins/comicInfoExtractor/README.md          |  12 ++
 .../comicInfoExtractor/comicInfoExtractor.py  | 124 ++++++++++++++++++
 .../comicInfoExtractor/comicInfoExtractor.yml |  18 +++
 plugins/comicInfoExtractor/config.yml         |  11 ++
 plugins/comicInfoExtractor/requirements.txt   |   2 +
 5 files changed, 167 insertions(+)
 create mode 100644 plugins/comicInfoExtractor/README.md
 create mode 100644 plugins/comicInfoExtractor/comicInfoExtractor.py
 create mode 100644 plugins/comicInfoExtractor/comicInfoExtractor.yml
 create mode 100644 plugins/comicInfoExtractor/config.yml
 create mode 100644 plugins/comicInfoExtractor/requirements.txt

diff --git a/plugins/comicInfoExtractor/README.md b/plugins/comicInfoExtractor/README.md
new file mode 100644
index 0000000..759952c
--- /dev/null
+++ b/plugins/comicInfoExtractor/README.md
@@ -0,0 +1,12 @@
+# Comic Archive Metadata Extractor
+Follows the Comicrack Standard for saving Comic Metadata in .cbz files by reading the ComicInfo.xml file in the archive and writing the result into the stash gallery.
+Use the config.py ImportList to define what XML names should be mapped to what.
+Currently, Bookmark and Type are recognized as chapters that are imported as well.
+The current Configuration will overwrite any value you try to set that is already set in the ComicInfo.xml. For a change in that, change the hook condition in the yml.
+
+### Installation 
+Move the `comicInfoExtractor` directory into Stash's plugins directory, reload plugins.
+
+### Tasks
+* Load all cbz Metadata - Fetch metadata for all galleries.
+* Post update hook - Fetch metadata for that gallery
diff --git a/plugins/comicInfoExtractor/comicInfoExtractor.py b/plugins/comicInfoExtractor/comicInfoExtractor.py
new file mode 100644
index 0000000..1fda7f1
--- /dev/null
+++ b/plugins/comicInfoExtractor/comicInfoExtractor.py
@@ -0,0 +1,124 @@
+import stashapi.log as log
+from stashapi.stashapp import StashInterface
+import stashapi.marker_parse as mp
+import yaml
+import json
+import os
+import sys
+import xml.etree.ElementTree as ET
+import zipfile
+
+per_page = 100
+
+def processGallery(g):
+    #Read ComicInfo.xml File
+    if len(g["files"]) == 0:
+        log.info(g["id"] + " is not an archive. No scanning for Comic Metadata.")
+        return
+    comicInfo = False
+    with zipfile.ZipFile(g["files"][0]["path"], 'r') as archive:
+        archivecontent = [x.lower() for x in archive.namelist()]
+        for archivefile in archivecontent:
+            if archivefile.lower() == "comicinfo.xml":
+                comicInfo = ET.fromstring(archive.read("ComicInfo.xml"))
+    if not comicInfo:
+        log.info(g["files"][0]["path"] + " does not contain a ComicInfo.xml file. No scan will be triggered.")
+        return
+
+    #Adjust names for giving ids
+    for key in ImportList.keys():
+        if ImportList[key] == "tags":
+            ImportList[key] = "tag_ids"
+        if ImportList[key] == "performers":
+            ImportList[key] = "performer_ids"
+        if ImportList[key] == "studio":
+            ImportList[key] = "studio_id"
+
+    #Get Metadata from ComicInfo.xml
+    galleryData = {"id": g["id"]}
+    for item in ImportList.keys():
+        value = comicInfo.find(item)
+        if value != None:
+            galleryData[ImportList[item]] = value.text
+    chapterData = []
+    pageData = comicInfo.find("Pages")
+    if pageData:
+        for page in pageData:
+            if page.get("Bookmark"):
+                chapterData.append({"image_index": int(page.get("Image")) + 1, "title": page.get("Bookmark")})
+            if page.get("Type"):
+                chapterData.append({"image_index": int(page.get("Image")) + 1, "title": page.get("Type")})
+
+    #Adjust the retrieved data if necessary
+    for data in galleryData.keys():
+        if data in ["tag_ids", "performer_ids"]:
+            galleryData[data] = [x.strip() for x in galleryData[data].split(",")]
+        if data == "tag_ids":
+            tagids = []
+            for tag in galleryData[data]:
+                tagids.append(stash.find_tag(tag, create=True)["id"])
+            galleryData[data] = tagids
+        if data == "performer_ids":
+            performerids = []
+            for performer in galleryData[data]:
+                performerids.append(stash.find_performer(performer, create=True)["id"])
+            galleryData[data] = performerids
+        if data == "studio_id":
+            galleryData[data] = stash.find_studio(galleryData[data], create=True)["id"]
+        if data == "date":
+            galleryData[data] = galleryData[data] + "-01-01"
+        if data == "organized":
+            galleryData[data] = eval(galleryData[data].lower().capitalize())
+        if data == "rating100":
+            galleryData[data] = int(galleryData[data])
+
+    #Add Chapter if it does not exist and finally update Gallery Metadata
+    for chapter in chapterData:
+        addChapter = True
+        for existingChapter in g["chapters"]:
+            if existingChapter["title"] == chapter["title"] and existingChapter["image_index"]  == chapter["image_index"]:
+                addChapter = False
+        if addChapter:
+            stash.create_gallery_chapter({"title": chapter["title"], "image_index": chapter["image_index"], "gallery_id": g["id"]})
+    stash.update_gallery(galleryData)
+
+
+
+def processAll():
+    log.info('Getting gallery count')
+    count=stash.find_galleries(f={},filter={"per_page": 1},get_count=True)[0]
+    log.info(str(count)+' galleries to scan.')
+    for r in range(1,int(count/per_page)+1):
+        log.info('processing '+str(r*per_page)+ ' - '+str(count))
+        galleries=stash.find_galleries(f={},filter={"page":r,"per_page": per_page})
+        for g in galleries:
+            processGallery(g)
+
+
+
+#Start of the Program
+json_input = json.loads(sys.stdin.read())
+FRAGMENT_SERVER = json_input["server_connection"]
+stash = StashInterface(FRAGMENT_SERVER)
+
+#Load Config
+with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "config.yml"), "r") as f:
+  try:
+    config = yaml.safe_load(f)
+  except yaml.YAMLError as exc:
+    log.error("Could not load config.yml: " + str(exc))
+    sys.exit(1)
+try:
+    ImportList=config["ImportList"]
+except KeyError as key:
+    log.error(str(key) + " is not defined in config.yml, but is needed for this script to proceed")
+    sys.exit(1)
+
+if 'mode' in json_input['args']:
+    PLUGIN_ARGS = json_input['args']["mode"]
+    if 'process' in PLUGIN_ARGS:
+        processAll()
+elif 'hookContext' in json_input['args']:
+    id=json_input['args']['hookContext']['id']
+    gallery=stash.find_gallery(id)
+    processGallery(gallery)
diff --git a/plugins/comicInfoExtractor/comicInfoExtractor.yml b/plugins/comicInfoExtractor/comicInfoExtractor.yml
new file mode 100644
index 0000000..fc10bf3
--- /dev/null
+++ b/plugins/comicInfoExtractor/comicInfoExtractor.yml
@@ -0,0 +1,18 @@
+name: Comic Info Extractor
+description: Extract the metadata from cbz with the Comicrack standard (ComicInfo.xml)
+version: 0.1
+url: https://github.com/stashapp/CommunityScripts/
+exec:
+  - "/usr/bin/python3"
+  - "{pluginDir}/comicInfoExtractor.py"
+interface: raw
+hooks:
+  - name: Add Metadata to Gallery
+    description: Update Metadata for Gallery by evaluating the ComicInfo.xml.
+    triggeredBy:
+      - Gallery.Update.Post
+tasks:
+  - name: Load all cbz Metadata
+    description: Get Metadata for all Galleries by looking for ComicInfo.xml files in the Archive.
+    defaultArgs:
+      mode: process
diff --git a/plugins/comicInfoExtractor/config.yml b/plugins/comicInfoExtractor/config.yml
new file mode 100644
index 0000000..51c7d1f
--- /dev/null
+++ b/plugins/comicInfoExtractor/config.yml
@@ -0,0 +1,11 @@
+#ImportList is a dictionary
+#that matches an xml Attribute from ComicInfo.xml to the according value in stash (using the graphql naming)
+#Fields that refer to different types of media are resolved by name and created if necessary (tags, studio, performers)
+#Fields that can contain multiple values (tags, performers) will be expected as a comma separated string, like
+#<Genre>Outdoor, Blonde</Genre>
+ImportList:
+  Genre: tags
+  Title: title
+  Writer: studio
+  Year: date
+  Summary: details
diff --git a/plugins/comicInfoExtractor/requirements.txt b/plugins/comicInfoExtractor/requirements.txt
new file mode 100644
index 0000000..4e5ec4c
--- /dev/null
+++ b/plugins/comicInfoExtractor/requirements.txt
@@ -0,0 +1,2 @@
+stashapp-tools
+pyyaml