Add a ComicRack Metadata Extractor for cbz based Stash Galleries

This commit is contained in:
Christian Rupp 2023-04-22 11:36:29 +02:00
parent 4763938207
commit 00f9129b32
5 changed files with 167 additions and 0 deletions

View File

@ -0,0 +1,12 @@
# Comic Archive Metadata Extractor
Follows the Comicrack Standard for saving Comic Metadata in .cbz files by reading the ComicInfo.xml file in the archive and writing the result into the stash gallery.
Use the config.py ImportList to define what XML names should be mapped to what.
Currently, Bookmark and Type are recognized as chapters that are imported as well.
The current Configuration will overwrite any value you try to set that is already set in the ComicInfo.xml. For a change in that, change the hook condition in the yml.
### Installation
Move the `comicInfoExtractor` directory into Stash's plugins directory, reload plugins.
### Tasks
* Load all cbz Metadata - Fetch metadata for all galleries.
* Post update hook - Fetch metadata for that gallery

View File

@ -0,0 +1,124 @@
import stashapi.log as log
from stashapi.stashapp import StashInterface
import stashapi.marker_parse as mp
import yaml
import json
import os
import sys
import xml.etree.ElementTree as ET
import zipfile
per_page = 100
def processGallery(g):
#Read ComicInfo.xml File
if len(g["files"]) == 0:
log.info(g["id"] + " is not an archive. No scanning for Comic Metadata.")
return
comicInfo = False
with zipfile.ZipFile(g["files"][0]["path"], 'r') as archive:
archivecontent = [x.lower() for x in archive.namelist()]
for archivefile in archivecontent:
if archivefile.lower() == "comicinfo.xml":
comicInfo = ET.fromstring(archive.read("ComicInfo.xml"))
if not comicInfo:
log.info(g["files"][0]["path"] + " does not contain a ComicInfo.xml file. No scan will be triggered.")
return
#Adjust names for giving ids
for key in ImportList.keys():
if ImportList[key] == "tags":
ImportList[key] = "tag_ids"
if ImportList[key] == "performers":
ImportList[key] = "performer_ids"
if ImportList[key] == "studio":
ImportList[key] = "studio_id"
#Get Metadata from ComicInfo.xml
galleryData = {"id": g["id"]}
for item in ImportList.keys():
value = comicInfo.find(item)
if value != None:
galleryData[ImportList[item]] = value.text
chapterData = []
pageData = comicInfo.find("Pages")
if pageData:
for page in pageData:
if page.get("Bookmark"):
chapterData.append({"image_index": int(page.get("Image")) + 1, "title": page.get("Bookmark")})
if page.get("Type"):
chapterData.append({"image_index": int(page.get("Image")) + 1, "title": page.get("Type")})
#Adjust the retrieved data if necessary
for data in galleryData.keys():
if data in ["tag_ids", "performer_ids"]:
galleryData[data] = [x.strip() for x in galleryData[data].split(",")]
if data == "tag_ids":
tagids = []
for tag in galleryData[data]:
tagids.append(stash.find_tag(tag, create=True)["id"])
galleryData[data] = tagids
if data == "performer_ids":
performerids = []
for performer in galleryData[data]:
performerids.append(stash.find_performer(performer, create=True)["id"])
galleryData[data] = performerids
if data == "studio_id":
galleryData[data] = stash.find_studio(galleryData[data], create=True)["id"]
if data == "date":
galleryData[data] = galleryData[data] + "-01-01"
if data == "organized":
galleryData[data] = eval(galleryData[data].lower().capitalize())
if data == "rating100":
galleryData[data] = int(galleryData[data])
#Add Chapter if it does not exist and finally update Gallery Metadata
for chapter in chapterData:
addChapter = True
for existingChapter in g["chapters"]:
if existingChapter["title"] == chapter["title"] and existingChapter["image_index"] == chapter["image_index"]:
addChapter = False
if addChapter:
stash.create_gallery_chapter({"title": chapter["title"], "image_index": chapter["image_index"], "gallery_id": g["id"]})
stash.update_gallery(galleryData)
def processAll():
log.info('Getting gallery count')
count=stash.find_galleries(f={},filter={"per_page": 1},get_count=True)[0]
log.info(str(count)+' galleries to scan.')
for r in range(1,int(count/per_page)+1):
log.info('processing '+str(r*per_page)+ ' - '+str(count))
galleries=stash.find_galleries(f={},filter={"page":r,"per_page": per_page})
for g in galleries:
processGallery(g)
#Start of the Program
json_input = json.loads(sys.stdin.read())
FRAGMENT_SERVER = json_input["server_connection"]
stash = StashInterface(FRAGMENT_SERVER)
#Load Config
with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "config.yml"), "r") as f:
try:
config = yaml.safe_load(f)
except yaml.YAMLError as exc:
log.error("Could not load config.yml: " + str(exc))
sys.exit(1)
try:
ImportList=config["ImportList"]
except KeyError as key:
log.error(str(key) + " is not defined in config.yml, but is needed for this script to proceed")
sys.exit(1)
if 'mode' in json_input['args']:
PLUGIN_ARGS = json_input['args']["mode"]
if 'process' in PLUGIN_ARGS:
processAll()
elif 'hookContext' in json_input['args']:
id=json_input['args']['hookContext']['id']
gallery=stash.find_gallery(id)
processGallery(gallery)

View File

@ -0,0 +1,18 @@
name: Comic Info Extractor
description: Extract the metadata from cbz with the Comicrack standard (ComicInfo.xml)
version: 0.1
url: https://github.com/stashapp/CommunityScripts/
exec:
- "/usr/bin/python3"
- "{pluginDir}/comicInfoExtractor.py"
interface: raw
hooks:
- name: Add Metadata to Gallery
description: Update Metadata for Gallery by evaluating the ComicInfo.xml.
triggeredBy:
- Gallery.Update.Post
tasks:
- name: Load all cbz Metadata
description: Get Metadata for all Galleries by looking for ComicInfo.xml files in the Archive.
defaultArgs:
mode: process

View File

@ -0,0 +1,11 @@
#ImportList is a dictionary
#that matches an xml Attribute from ComicInfo.xml to the according value in stash (using the graphql naming)
#Fields that refer to different types of media are resolved by name and created if necessary (tags, studio, performers)
#Fields that can contain multiple values (tags, performers) will be expected as a comma separated string, like
#<Genre>Outdoor, Blonde</Genre>
ImportList:
Genre: tags
Title: title
Writer: studio
Year: date
Summary: details

View File

@ -0,0 +1,2 @@
stashapp-tools
pyyaml