Adding requested option to create collections from ALL scene tags (#47)

* Adding requested option to create collections from ALL scene tags
2026-05-03 02:01:29 -05:00 · 2022-07-13 19:17:33 -04:00
parent 91b7bc5cf8
commit d0c6a34914
6 changed files with 365 additions and 2 deletions
--- a/party/StashPlexAgent.bundle/Contents/Code/init.py
+++ b/party/StashPlexAgent.bundle/Contents/Code/init.py
@@ -45,7 +45,7 @@ class StashPlexAgent(Agent.Movies):
    name = 'Stash Plex Agent'
    languages = [Locale.Language.English]
    primary_provider = True
-    accepts_from = ['com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.phoenixadult', 'com.plexapp.agents.data18-phoenix', 'com.plexapp.agents.adultdvdempire']
+    accepts_from = ['com.plexapp.agents.localmedia', 'com.plexapp.agents.xbmcnfo', 'com.plexapp.agents.phoenixadult', 'com.plexapp.agents.data18-phoenix', 'com.plexapp.agents.adultdvdempire']

    def search(self, results, media, lang):
        DEBUG = Prefs['debug']
@@ -222,7 +222,14 @@ class StashPlexAgent(Agent.Movies):
                    for genre in genres:
                        if not genre["id"] in ignore_tags and "ambiguous" not in genre["name"].lower():
                            metadata.genres.add(genre["name"])
-                            if genre["id"] in collection_tags:
+                            if not Prefs["CreateAllTagCollectionTags"] and genre["id"] in collection_tags:
+                                try:
+                                    if DEBUG:
+                                        Log("Adding Tag Collection: " + genre["name"])
+                                    metadata.collections.add(genre["name"])
+                                except:
+                                    pass
+                            elif Prefs["CreateAllTagCollectionTags"] and genre["id"] not in collection_tags:
                                try:
                                    if DEBUG:
                                        Log("Adding Tag Collection: " + genre["name"])
--- a/party/StashPlexAgent.bundle/Contents/DefaultPrefs.json
+++ b/party/StashPlexAgent.bundle/Contents/DefaultPrefs.json
@@ -53,6 +53,12 @@
        "type": "text",
        "default": "0"
    },
+    {
+        "id": "CreateAllTagCollectionTags",
+        "label": "Create Collections from ALL Tags (If TRUE then option above will exclude instead of include tags)",
+        "type": "bool",
+        "default": false
+    },
    {
        "id": "CreateSiteCollectionTags",
        "label": "Auto create Plex Collection tags for scene Site",
--- a/plugins/phashDuplicateTagger/README.md
+++ b/plugins/phashDuplicateTagger/README.md
@@ -0,0 +1,27 @@
+This plugin has four functions:
+
+1) It will create two tags for review, [Dupe: Keep] and [Dupe: Remove]
+
+2) It will auto assign those tags to scenes with EXACT PHashes based on (and in this order):
+      a) Keep the larger resolution
+      b) Keep the larger file size (if same resolution)
+      c) Keep the older scene (if same file size.)
+          (Older scene is kept since it's more likely to have been organized if they're the same file)
+   With this order of precedence one scene is determined to be the "Keeper" and the rest are assigned for Removal
+   When the scenes are tagged, the titles are also modified to add '[Dupe: {SceneID}K/R]'
+   The SceneID put into the title is the one determined to be the "Keeper", and is put into all matching scenes
+   This way you can sort by title after matching and verify the scenes are actually the same thing, and the Keeper
+   will be the first scene in the set. (Since you'll have [Dupe: 72412K], [Dupe: 72412R], [Dupe: 72412R] as an example
+
+   What I have personally done is essentially set a filter on the two Dupe tags, then sort by title.  Then I spot check the 
+   'K' scenes versus the 'R' scenes.  If everything looks good then I just drop [Dupe: Keep] out of the filter (leaving only
+   [Dupe: Remove], Select All and delete the files.
+
+3) It will remove the [Dupe: Keep] and [Dupe: Remove] tags from Stash
+4) It will remove the [Dupe: ######K/R] tags from the titles
+  (These last two options are obviously for after you have removed the scenes you don't want any longer)
+
+PS. This script is essentially a hack and slash job on scripts from Belley and WithoutPants, thanks guys!
+
+PPS. The original plugin has been rewritten by stg-annon, and does now require hos stashapp-tools module (pip install stashapp-tools)
+     (Yes, this works with the Stash Docker)
--- a/plugins/phashDuplicateTagger/phashDuplicateTagger.py
+++ b/plugins/phashDuplicateTagger/phashDuplicateTagger.py
@@ -0,0 +1,269 @@
+import json
+import sys
+import re
+import datetime as dt
+
+try:
+    import stashapi.log as log
+    from stashapi.tools import human_bytes
+    from stashapi.types import PhashDistance
+    from stashapi.stashapp import StashInterface
+except ModuleNotFoundError:
+    print("You need to install the stashapi module. (pip install stashapp-tools)",
+     file=sys.stderr)
+
+
+PRIORITY = ['resolution', 'bitrate', 'size', 'age'] # 'encoding'
+CODEC_PRIORITY = ['H265','HEVC','H264','MPEG4']
+
+FRAGMENT = json.loads(sys.stdin.read())
+MODE = FRAGMENT['args']['mode']
+stash = StashInterface(FRAGMENT["server_connection"])
+
+SLIM_SCENE_FRAGMENT = """
+	id
+	title
+	path
+	file_mod_time
+	tags { id }
+	file {
+		size
+		height
+		bitrate
+		video_codec
+	}
+"""
+
+def main():
+	if MODE == "create":
+		stash.find_tag('[Dupe: Keep]', create=True)
+		stash.find_tag('[Dupe: Remove]', create=True)
+		stash.find_tag('[Dupe: Ignore]', create=True)
+		
+	if MODE == "remove":
+		tag_id = stash.find_tag('[Dupe: Keep]').get("id")
+		stash.destroy_tag(tag_id)
+		tag_id = stash.find_tag('[Dupe: Remove]').get("id")
+		stash.destroy_tag(tag_id)
+
+	if MODE == "tagexact":
+		duplicate_list = stash.find_duplicate_scenes(PhashDistance.EXACT, fragment=SLIM_SCENE_FRAGMENT)
+		process_duplicates(duplicate_list)
+	if MODE == "taghigh":
+		duplicate_list = stash.find_duplicate_scenes(PhashDistance.HIGH, fragment=SLIM_SCENE_FRAGMENT)
+		process_duplicates(duplicate_list)
+	if MODE == "tagmid":
+		duplicate_list = stash.find_duplicate_scenes(PhashDistance.MEDIUM, fragment=SLIM_SCENE_FRAGMENT)
+		process_duplicates(duplicate_list)
+
+	if MODE == "cleantitle":
+		clean_titles()
+
+	log.exit("Plugin exited normally.")
+
+
+def parse_timestamp(ts, format="%Y-%m-%dT%H:%M:%S%z"):
+	ts = re.sub(r'\.\d+', "", ts) #remove fractional seconds
+	return dt.datetime.strptime(ts, format)
+
+class StashScene:
+
+	def __init__(self, scene=None) -> None:
+		self.id = int(scene['id'])
+		self.mod_time = parse_timestamp(scene['file_mod_time'])
+		self.height = scene['file']['height']
+		self.size = int(scene['file']['size'])
+		self.bitrate = int(scene['file']['bitrate'])
+		# replace any existing tagged title
+		self.title = re.sub(r'^\[Dupe: \d+[KR]\]\s+', '', scene['title'])
+		self.path = scene['path']
+
+		self.codec = scene['file']['video_codec'].upper()
+		if self.codec in CODEC_PRIORITY:
+			self.codec = CODEC_PRIORITY.index(self.codec)
+		else:
+			log.warning(f"could not find codec {self.codec}")
+
+	def __repr__(self) -> str:
+		return f'<StashScene ({self.id})>'
+
+	def __str__(self) -> str:
+		return f'id:{self.id}, height:{self.height}, size:{human_bytes(self.size)}, file_mod_time:{self.mod_time}, title:{self.title}'
+
+	def compare(self, other):
+		if not (isinstance(other, StashScene)):
+			raise Exception(f"can only compare to <StashScene> not <{type(other)}>")
+
+		# Check if same scene
+		if self.id == other.id:
+			return None, "Matching IDs {self.id}=={other.id}"
+
+		def compare_not_found():
+			raise Exception("comparison not found")
+		for type in PRIORITY:
+			try:
+				compare_function = getattr(self, f'compare_{type}', compare_not_found)
+				best, msg = compare_function(other)
+				if best:
+					return best, msg
+			except Exception as e:
+				log.error(f"Issue Comparing <{type}> {e}")
+		
+		return None, f"{self.id} worse than {other.id}"
+
+	def compare_resolution(self, other):
+		# Checking Resolution
+		if self.height != other.height:
+			if self.height > other.height:
+				return self, f"Better Resolution {self.height} > {other.height} | {self.id}>{other.id}"
+			else:
+				return other, f"Better Resolution {other.height} > {self.height} | {other.id}>{self.id}"
+		return None, None
+	def compare_bitrate(self, other):
+		# Checking Bitrate
+		if self.bitrate != other.bitrate:
+			if self.bitrate > other.bitrate:
+				return self, f"Better Bitrate {human_bytes(self.bitrate)} > {human_bytes(other.bitrate)} Δ:({human_bytes(self.bitrate-other.bitrate)}) | {self.id}>{other.id}"
+			else:
+				return other, f"Better Bitrate {human_bytes(other.bitrate)} > {human_bytes(self.bitrate)} Δ:({human_bytes(other.bitrate-self.bitrate)}) | {other.id}>{self.id}"
+		return None, None
+	def compare_size(self, other):
+		# Checking Size
+		if self.size != other.size:
+			if self.size > other.size:
+				return self, f"Better Size {human_bytes(self.size)} > {human_bytes(other.size)} Δ:({human_bytes(self.size-other.size)}) | {self.id} > {other.id}"
+			else:
+				return other, f"Better Size {human_bytes(other.size)} > {human_bytes(self.size)} Δ:({human_bytes(other.size-self.size)}) | {other.id} > {self.id}"
+		return None, None
+	def compare_age(self, other):
+		# Checking Age
+		if self.mod_time != other.mod_time:
+			if self.mod_time < other.mod_time:
+				return self, f"Choose Oldest: Δ:{other.mod_time-self.mod_time} | {self.id} older than {other.id}"
+			else:
+				return other, f"Choose Oldest: Δ:{self.mod_time-other.mod_time} | {other.id} older than {self.id}"
+		return None, None
+	def compare_encoding(self, other):
+		# could not find one of the codecs in priority list
+		if not isinstance(self.codec, int) or not isinstance(other.codec, int):
+			return None, None
+		if self.codec != other.codec:
+			if self.codec < other.codec:
+				return self, f"Preferred Codec {CODEC_PRIORITY[self.codec]} over {CODEC_PRIORITY[other.codec]} | {self.id} better than {other.id}"
+			else:
+				return other, f"Preferred Codec {CODEC_PRIORITY[other.codec]} over {CODEC_PRIORITY[self.codec]} | {other.id} better than {self.id}"
+		return None, None
+
+
+def process_duplicates(duplicate_list):
+	ignore_tag_id = stash.find_tag('[Dupe: Ignore]', create=True).get("id")
+	total = len(duplicate_list)
+	log.info(f"There is {total} sets of duplicates found.")
+	for i, group in enumerate(duplicate_list):
+		log.progress(i/total)
+		filtered_group = []
+		for scene in group:
+			tag_ids = [ t['id'] for t in scene['tags'] ]
+			if ignore_tag_id in tag_ids:
+				log.debug(f"Ignore {scene['id']} {scene['title']}")
+			else:
+				filtered_group.append(scene)
+		if len(filtered_group) > 1:
+			tag_files(filtered_group)
+
+def tag_files(group):
+	tag_keep = stash.find_tag('[Dupe: Keep]', create=True).get("id")
+	tag_remove = stash.find_tag('[Dupe: Remove]', create=True).get("id")
+
+	group = [StashScene(s) for s in group]
+
+	keep_reasons = []
+	keep_scene = group[0]
+	for scene in group[1:]:
+		better, msg = scene.compare(keep_scene)
+		if better:
+			keep_scene = better
+		keep_reasons.append(msg)
+
+	keep_scene.reasons = keep_reasons
+
+	log.info(f"{keep_scene.id} best of:{[s.id for s in group]} {keep_scene.reasons}")
+
+	for scene in group:
+		if scene.id == keep_scene.id:
+			# log.debug(f"Tag for Keeping: {scene.id} {scene.path}")
+			stash.update_scenes({
+				'ids': [scene.id],
+				'title':  f'[Dupe: {keep_scene.id}K] {scene.title}',
+				'tag_ids': {
+					'mode': 'ADD',
+					'ids': [tag_keep]
+				} 
+			})
+		else:
+			# log.debug(f"Tag for Removal: {scene.id} {scene.path}")
+			stash.update_scenes({
+				'ids': [scene.id],
+				'title':  f'[Dupe: {keep_scene.id}R] {scene.title}',
+				'tag_ids': {
+					'mode': 'ADD',
+					'ids': [tag_remove]
+				} 
+			})
+
+def clean_titles():
+	scenes = stash.find_scenes(f={
+		"title": {
+			"modifier": "MATCHES_REGEX",
+			"value": "^\\[Dupe: (\\d+)([KR])\\]"
+		}
+	},fragment="id title")
+
+	log.info(f"Cleaning Titles/Tags of {len(scenes)} Scenes ")
+	
+	for scene in scenes:
+		title = re.sub(r'\[Dupe: \d+[KR]\]\s+', '', scene['title'])
+		log.info(f"Removing Dupe Title String from: [{scene['id']}] {scene['title']}")
+		stash.update_scenes({
+			'ids': [scene['id']],
+			'title': title
+		})
+
+	tag_keep = stash.find_tag('[Dupe: Keep]')
+	if tag_keep:
+		tag_keep = tag_keep['id']
+		scenes = stash.find_scenes(f={
+			"tags":{
+				"value": [tag_keep],
+				"modifier": "INCLUDES",
+				"depth": 0
+			}
+		},fragment="id title")
+		stash.update_scenes({
+			'ids': [s['id'] for s in scenes],
+			'tag_ids': {
+				'mode': 'REMOVE',
+				'ids': [tag_keep]
+			} 
+		})
+
+	tag_remove = stash.find_tag('[Dupe: Remove]')
+	if tag_remove:
+		tag_remove = tag_remove['id']
+		scenes = stash.find_scenes(f={
+			"tags":{
+				"value": [tag_remove],
+				"modifier": "INCLUDES",
+				"depth": 0
+			}
+		},fragment="id title")
+		stash.update_scenes({
+			'ids': [s['id'] for s in scenes],
+			'tag_ids': {
+				'mode': 'REMOVE',
+				'ids': [tag_remove]
+			} 
+		})
+
+if __name__ == '__main__':
+	main()
--- a/plugins/phashDuplicateTagger/phashDuplicateTagger.yml
+++ b/plugins/phashDuplicateTagger/phashDuplicateTagger.yml
@@ -0,0 +1,53 @@
+# This plugin has four functions:
+#
+# 1) It will create two tags for review, [Dupe: Keep] and [Dupe: Remove]
+# 2) It will auto assign those tags to scenes with different degrees of matching PHashes based on (and in this order):
+#       a) Keep the larger resolution
+#       b) Keep the larger file size (if same resolution)
+#       c) Keep the older scene (if same file size.)
+#           (Older scene is kept since it's more likely to have been organized if they're the same file)
+#    With this order of precedence one scene is determined to be the "Keeper" and the rest are assigned for Removal
+#    When the scenes are tagged, the titles are also modified to add '[Dupe: {SceneID}K/R]'
+#    The SceneID put into the title is the one determined to be the "Keeper", and is put into all matching scenes
+#    This way you can sort by title after matching and verify the scenes are actually the same thing, and the Keeper
+#    will be the first scene in the set. (Since you'll have [Dupe: 72412K], [Dupe: 72412R], [Dupe: 72412R] as an example
+#
+# 3) It will remove the [Dupe: Keep] and [Dupe: Remove] tags from Stash
+# 4) It will remove the [Dupe: ######K/R] tags from the titles
+#   (These last two options are obviously for after you have removed the scenes you don't want any longer)
+#
+# PS. This script is essentially a hack and slash job on scripts from Belley and WithoutPants, thanks guys!
+
+name: "PHash Duplicate Tagger"
+description: Will tag scenes based on duplicate PHashes for easier/safer removal.
+version: 0.1.0
+url: https://github.com/Darklyter/CommunityScripts
+exec:
+  - python
+  - "{pluginDir}/phashDuplicateTagger.py"
+interface: raw
+tasks:
+  - name: 'Create [Dupe] Tags'
+    description: 'Create [Dupe: Keep] and [Dupe: Remove] scene tags for filtering '
+    defaultArgs:
+      mode: create
+  - name: 'Set Dupe Tags (EXACT)'
+    description: 'Assign duplicates tags to Exact Match (Dist 0) scenes'
+    defaultArgs:
+      mode: tagexact
+  - name: 'Set Dupe Tags (HIGH)'
+    description: 'Assign duplicates tags to High Match (Dist 3) scenes'
+    defaultArgs:
+      mode: taghigh
+  - name: 'Set Dupe Tags (MEDIUM)'
+    description: 'Assign duplicates tags to Medium Match (Dist 6) scenes (BE CAREFUL WITH THIS LEVEL)'
+    defaultArgs:
+      mode: tagmid
+  - name: 'Remove [Dupe] Tags'
+    description: 'Remove duplicates scene tags from Stash database'
+    defaultArgs:
+      mode: remove
+  - name: 'Strip [Dupe] From Titles'
+    description: 'Clean prefixed Dupe string from scene titles'
+    defaultArgs:
+      mode: cleantitle
--- a/plugins/phashDuplicateTagger/requirements.txt
+++ b/plugins/phashDuplicateTagger/requirements.txt
@@ -0,0 +1 @@
+stashapp-tools>=0.2.0