From a8e2d9bfb97e8b28a848ca99439e4417eb7b5ae1 Mon Sep 17 00:00:00 2001 From: stg-annon <14135675+stg-annon@users.noreply.github.com> Date: Wed, 18 Oct 2023 04:25:41 -0400 Subject: [PATCH] pdt-updates --- .../phashDuplicateTagger.py | 221 +++++++++++------- .../phashDuplicateTagger.yml | 20 +- 2 files changed, 141 insertions(+), 100 deletions(-) diff --git a/plugins/phashDuplicateTagger/phashDuplicateTagger.py b/plugins/phashDuplicateTagger/phashDuplicateTagger.py index 9f9ee53..e64226b 100644 --- a/plugins/phashDuplicateTagger/phashDuplicateTagger.py +++ b/plugins/phashDuplicateTagger/phashDuplicateTagger.py @@ -5,7 +5,7 @@ import datetime as dt try: import stashapi.log as log - from stashapi.tools import human_bytes + from stashapi.tools import human_bytes, human_bits from stashapi.stash_types import PhashDistance from stashapi.stashapp import StashInterface except ModuleNotFoundError: @@ -13,25 +13,29 @@ except ModuleNotFoundError: file=sys.stderr) -PRIORITY = ['resolution', 'bitrate', 'size', 'age'] # 'encoding' -CODEC_PRIORITY = ['H265','HEVC','H264','MPEG4'] +PRIORITY = ['resolution', 'bitrate', 'encoding', 'size', 'age'] +CODEC_PRIORITY = {'AV1':0,'H265':1,'HEVC':1,'H264':2,'MPEG4':3,'MPEG1VIDEO':3,'WMV3':4,'WMV2':5,'VC1':6,'SVQ3':7} FRAGMENT = json.loads(sys.stdin.read()) MODE = FRAGMENT['args']['mode'] stash = StashInterface(FRAGMENT["server_connection"]) SLIM_SCENE_FRAGMENT = """ - id - title - path - file_mod_time - tags { id } - file { - size - height - bitrate - video_codec - } +id +title +date +path +file_mod_time +tags { id } +file { + size + width + height + bitrate + duration + framerate + video_codec +} """ def main(): @@ -46,18 +50,18 @@ def main(): tag_id = stash.find_tag('[Dupe: Remove]').get("id") stash.destroy_tag(tag_id) - if MODE == "tagexact": - duplicate_list = stash.find_duplicate_scenes(PhashDistance.EXACT, fragment=SLIM_SCENE_FRAGMENT) - process_duplicates(duplicate_list) - if MODE == "taghigh": - duplicate_list = stash.find_duplicate_scenes(PhashDistance.HIGH, fragment=SLIM_SCENE_FRAGMENT) - process_duplicates(duplicate_list) - if MODE == "tagmid": - duplicate_list = stash.find_duplicate_scenes(PhashDistance.MEDIUM, fragment=SLIM_SCENE_FRAGMENT) - process_duplicates(duplicate_list) + if MODE == "tag_exact": + process_duplicates(PhashDistance.EXACT) + if MODE == "tag_high": + process_duplicates(PhashDistance.HIGH) + if MODE == "tag_medium": + process_duplicates(PhashDistance.MEDIUM) + + if MODE == "clean_scenes": + clean_scenes() + if MODE == "generate_phash": + generate_phash() - if MODE == "cleantitle": - clean_titles() log.exit("Plugin exited normally.") @@ -71,18 +75,31 @@ class StashScene: def __init__(self, scene=None) -> None: self.id = int(scene['id']) self.mod_time = parse_timestamp(scene['file_mod_time']) + if scene.get("date"): + self.date = parse_timestamp(scene['date'], format="%Y-%m-%d") + else: + self.date = None + self.path = scene.get("path") + self.width = scene['file']['width'] self.height = scene['file']['height'] + # File size in # of BYTES self.size = int(scene['file']['size']) + self.frame_rate = int(scene['file']['framerate']) self.bitrate = int(scene['file']['bitrate']) + self.duration = float(scene['file']['duration']) # replace any existing tagged title self.title = re.sub(r'^\[Dupe: \d+[KR]\]\s+', '', scene['title']) self.path = scene['path'] + self.tag_ids = [t["id"]for t in scene["tags"]] + + self.reason = None self.codec = scene['file']['video_codec'].upper() if self.codec in CODEC_PRIORITY: - self.codec = CODEC_PRIORITY.index(self.codec) + self.codec_priority = CODEC_PRIORITY[self.codec] else: - log.warning(f"could not find codec {self.codec}") + self.codec_priority = 99 + log.warning(f"could not find codec {self.codec} used in SceneID:{self.id}") def __repr__(self) -> str: return f'' @@ -115,65 +132,84 @@ class StashScene: # Checking Resolution if self.height != other.height: if self.height > other.height: - return self, f"Better Resolution {self.height} > {other.height} | {self.id}>{other.id}" + better, worse = self, other else: - return other, f"Better Resolution {other.height} > {self.height} | {other.id}>{self.id}" + worse, better = self, other + worse.reason = "resolution" + return better, f"Better Resolution {better.id}:{better.height}p > {worse.id}:{worse.height}p" return None, None def compare_bitrate(self, other): # Checking Bitrate if self.bitrate != other.bitrate: if self.bitrate > other.bitrate: - return self, f"Better Bitrate {human_bytes(self.bitrate)} > {human_bytes(other.bitrate)} Δ:({human_bytes(self.bitrate-other.bitrate)}) | {self.id}>{other.id}" + better, worse = self, other else: - return other, f"Better Bitrate {human_bytes(other.bitrate)} > {human_bytes(self.bitrate)} Δ:({human_bytes(other.bitrate-self.bitrate)}) | {other.id}>{self.id}" + worse, better = self, other + worse.reason = "bitrate" + return better, f"Better Bitrate {human_bits(better.bitrate)}ps > {human_bits(worse.bitrate)}ps Δ:({human_bits(better.bitrate-other.bitrate)}ps)" return None, None def compare_size(self, other): # Checking Size - if self.size != other.size: + if abs(self.size-other.size) > 100000: # diff is > than 0.1 Mb if self.size > other.size: - return self, f"Better Size {human_bytes(self.size)} > {human_bytes(other.size)} Δ:({human_bytes(self.size-other.size)}) | {self.id} > {other.id}" + better, worse = self, other else: - return other, f"Better Size {human_bytes(other.size)} > {human_bytes(self.size)} Δ:({human_bytes(other.size-self.size)}) | {other.id} > {self.id}" + worse, better = self, other + worse.reason = "file_size" + return better, f"Better Size {human_bytes(better.size)} > {human_bytes(worse.size)} Δ:({human_bytes(better.size-worse.size)})" return None, None def compare_age(self, other): # Checking Age - if self.mod_time != other.mod_time: - if self.mod_time < other.mod_time: - return self, f"Choose Oldest: Δ:{other.mod_time-self.mod_time} | {self.id} older than {other.id}" + if (self.date and other.date) and (self.date != other.date): + if self.date < other.date: + better, worse = self, other else: - return other, f"Choose Oldest: Δ:{self.mod_time-other.mod_time} | {other.id} older than {self.id}" + worse, better = self, other + worse.reason = "age" + return better, f"Choose Oldest: Δ:{worse.date-better.date} | {better.id} older than {worse.id}" return None, None def compare_encoding(self, other): - # could not find one of the codecs in priority list - if not isinstance(self.codec, int) or not isinstance(other.codec, int): - return None, None - if self.codec != other.codec: - if self.codec < other.codec: - return self, f"Preferred Codec {CODEC_PRIORITY[self.codec]} over {CODEC_PRIORITY[other.codec]} | {self.id} better than {other.id}" - else: - return other, f"Preferred Codec {CODEC_PRIORITY[other.codec]} over {CODEC_PRIORITY[self.codec]} | {other.id} better than {self.id}" + if self.codec_priority != other.codec_priority: + try: + if self.codec_priority < other.codec_priority: + better, worse = self, other + else: + worse, better = self, other + worse.reason = "video_codec" + return self, f"Prefer Codec {better.codec}({better.id}) over {worse.codec}({worse.id})" + except TypeError: + # could not find one of the codecs in priority list (comparing int to str) + pass return None, None -def process_duplicates(duplicate_list): +def process_duplicates(distance:PhashDistance=PhashDistance.EXACT): + + clean_scenes() # clean old results + ignore_tag_id = stash.find_tag('[Dupe: Ignore]', create=True).get("id") + duplicate_list = stash.find_duplicate_scenes(distance, fragment=SLIM_SCENE_FRAGMENT) + total = len(duplicate_list) log.info(f"There is {total} sets of duplicates found.") + for i, group in enumerate(duplicate_list): - log.progress(i/total) + group = [StashScene(s) for s in group] filtered_group = [] for scene in group: - tag_ids = [ t['id'] for t in scene['tags'] ] - if ignore_tag_id in tag_ids: - log.debug(f"Ignore {scene['id']} {scene['title']}") + if ignore_tag_id in scene.tag_ids: + log.debug(f"Ignore {scene.id} {scene.title}") else: filtered_group.append(scene) + if len(filtered_group) > 1: tag_files(filtered_group) + + log.progress(i/total) def tag_files(group): - tag_keep = stash.find_tag('[Dupe: Keep]', create=True).get("id") - tag_remove = stash.find_tag('[Dupe: Remove]', create=True).get("id") + keep_tag_id = stash.find_tag('[Dupe: Keep]', create=True).get("id") + remove_tag_id = stash.find_tag('[Dupe: Remove]', create=True).get("id") group = [StashScene(s) for s in group] @@ -185,6 +221,11 @@ def tag_files(group): keep_scene = better keep_reasons.append(msg) + if not keep_scene: + log.warning(f"could not determine better scene from {group}") + return + + total_size = human_bytes(total_size) keep_scene.reasons = keep_reasons log.info(f"{keep_scene.id} best of:{[s.id for s in group]} {keep_scene.reasons}") @@ -194,76 +235,76 @@ def tag_files(group): # log.debug(f"Tag for Keeping: {scene.id} {scene.path}") stash.update_scenes({ 'ids': [scene.id], - 'title': f'[Dupe: {keep_scene.id}K] {scene.title}', + 'title': f'[PDT: {total_size}|{keep_scene.id}K] {scene.title}', 'tag_ids': { 'mode': 'ADD', - 'ids': [tag_keep] + 'ids': [keep_tag_id] } }) else: - # log.debug(f"Tag for Removal: {scene.id} {scene.path}") + tag_ids = [remove_tag_id] + if scene.reason: + tag_ids.append(stash.find_tag(f'[Reason: {scene.reason}]', create=True).get('id')) stash.update_scenes({ 'ids': [scene.id], - 'title': f'[Dupe: {keep_scene.id}R] {scene.title}', + 'title': f'[PDT: {total_size}|{keep_scene.id}R] {scene.title}', 'tag_ids': { 'mode': 'ADD', - 'ids': [tag_remove] + 'ids': [remove_tag_id] } }) -def clean_titles(): +def clean_scenes(): scenes = stash.find_scenes(f={ "title": { "modifier": "MATCHES_REGEX", - "value": "^\\[Dupe: (\\d+)([KR])\\]" + "value": "^\\[PDT: .+?\\]" } },fragment="id title") log.info(f"Cleaning Titles/Tags of {len(scenes)} Scenes ") + # Clean scene Title for scene in scenes: - title = re.sub(r'\[Dupe: \d+[KR]\]\s+', '', scene['title']) + title = re.sub(r'\[PDT: .+?\]\s+', '', scene['title']) log.info(f"Removing Dupe Title String from: [{scene['id']}] {scene['title']}") stash.update_scenes({ 'ids': [scene['id']], 'title': title }) - tag_keep = stash.find_tag('[Dupe: Keep]') - if tag_keep: - tag_keep = tag_keep['id'] - scenes = stash.find_scenes(f={ - "tags":{ - "value": [tag_keep], - "modifier": "INCLUDES", - "depth": 0 - } - },fragment="id title") + # Remove Tags + tag_ids_to_remove = [] + keep_tag = stash.find_tag('[Dupe: Keep]') + if keep_tag: + tag_ids_to_remove.append(keep_tag["id"]) + remove_tag = stash.find_tag('[Dupe: Remove]') + if remove_tag: + tag_ids_to_remove.append(remove_tag["id"]) + reason_tags = stash.find_tags(f={ + "name": { + "value": "^\\[Reason", + "modifier": "MATCHES_REGEX" + }}) + tag_ids_to_remove.extend([t["id"] for t in reason_tags]) + + for tag_id in tag_ids_to_remove: + scene_filter={"tags":{"value": [tag_id],"modifier": "INCLUDES","depth": 0}} stash.update_scenes({ - 'ids': [s['id'] for s in scenes], + 'ids': [s["id"] for s in stash.find_scenes(f=scene_filter, fragment="id")], 'tag_ids': { 'mode': 'REMOVE', - 'ids': [tag_keep] + 'ids': [tag_id] } }) - tag_remove = stash.find_tag('[Dupe: Remove]') - if tag_remove: - tag_remove = tag_remove['id'] - scenes = stash.find_scenes(f={ - "tags":{ - "value": [tag_remove], - "modifier": "INCLUDES", - "depth": 0 - } - },fragment="id title") - stash.update_scenes({ - 'ids': [s['id'] for s in scenes], - 'tag_ids': { - 'mode': 'REMOVE', - 'ids': [tag_remove] - } - }) + +def generate_phash(): + query = """mutation MetadataGenerate($input: GenerateMetadataInput!) { + metadataGenerate(input: $input) + }""" + variables = {"phashes", True} + stash._callGraphQL(query, variables) if __name__ == '__main__': main() diff --git a/plugins/phashDuplicateTagger/phashDuplicateTagger.yml b/plugins/phashDuplicateTagger/phashDuplicateTagger.yml index 9a789fc..99b0a35 100644 --- a/plugins/phashDuplicateTagger/phashDuplicateTagger.yml +++ b/plugins/phashDuplicateTagger/phashDuplicateTagger.yml @@ -27,27 +27,27 @@ exec: - "{pluginDir}/phashDuplicateTagger.py" interface: raw tasks: - - name: 'Create [Dupe] Tags' - description: 'Create [Dupe: Keep] and [Dupe: Remove] scene tags for filtering ' - defaultArgs: - mode: create - name: 'Set Dupe Tags (EXACT)' description: 'Assign duplicates tags to Exact Match (Dist 0) scenes' defaultArgs: - mode: tagexact + mode: tag_exact - name: 'Set Dupe Tags (HIGH)' description: 'Assign duplicates tags to High Match (Dist 3) scenes' defaultArgs: - mode: taghigh + mode: tag_high - name: 'Set Dupe Tags (MEDIUM)' description: 'Assign duplicates tags to Medium Match (Dist 6) scenes (BE CAREFUL WITH THIS LEVEL)' defaultArgs: - mode: tagmid + mode: tag_medium - name: 'Remove [Dupe] Tags' description: 'Remove duplicates scene tags from Stash database' defaultArgs: mode: remove - - name: 'Strip [Dupe] From Titles' - description: 'Clean prefixed Dupe string from scene titles' + - name: 'Scene Cleanup' + description: 'Removes titles from scenes and any generated tags excluding [Dupe: Ignore]' defaultArgs: - mode: cleantitle + mode: clean_scenes + - name: 'Generate PHASH' + description: 'Generate PHASHs for all scenes where they are missing' + defaultArgs: + mode: generate_phash