Merge pull request #249 from stg-annon/PDT-Fix

[phashDuplicateTagger] Bugfix + Add Compare to Scene paths + Untrack config.py
This commit is contained in:
Maista
2024-02-17 00:09:20 +01:00
committed by GitHub
3 changed files with 56 additions and 13 deletions

View File

@@ -0,0 +1 @@
config.py

View File

@@ -1,3 +1,4 @@
from pathlib import Path
import stashapi.log as log
from stashapi.tools import human_bytes, human_bits
@@ -14,6 +15,8 @@ CODEC_PRIORITY = {
"VC1": 6,
"SVQ3": 7,
}
# Path priority is from highest to lowest and works off the root of the path, to enable add "path" to the PRIORITY list
PATH_PRIORITY = ["/root/most/important/path","/root/least/important/path"]
KEEP_TAG_NAME = "[PDT: Keep]"
REMOVE_TAG_NAME = "[PDT: Remove]"
@@ -113,18 +116,18 @@ def compare_size(self, other):
def compare_age(self, other):
if not (self.mod_time and other.mod_time):
if not (self.created_at and other.created_at):
return
if self.mod_time == other.mod_time:
if self.created_at == other.created_at:
return
if self.mod_time < other.mod_time:
if self.created_at < other.created_at:
better, worse = self, other
else:
worse, better = self, other
worse.remove_reason = "age"
return (
better,
f"Choose Oldest: Δ:{worse.mod_time-better.mod_time} | {better.id} older than {worse.id}",
f"Choose Oldest: Δ:{worse.created_at-better.created_at} | {better.id} older than {worse.id}",
)
@@ -145,3 +148,33 @@ def compare_encoding(self, other):
self,
f"Prefer Codec {better.codec}({better.id}) over {worse.codec}({worse.id})",
)
def compare_path(self, other):
if PATH_PRIORITY[0] == "/root/most/important/path":
return
if not self.path or not other.path:
return
self.path = Path(self.path)
other.path = Path(other.path)
min_score = len(PATH_PRIORITY)
self.score = min_score
other.score = min_score
for score, path in enumerate(PATH_PRIORITY):
path = Path(path)
if path in self.path.parents:
self.score = score
if path in other.path.parents:
other.score = score
if self.score == other.score:
return
if self.score < other.score:
better, worse = self, other
else:
worse, better = self, other
worse.remove_reason = "filepath"
return better, f"Prefer Filepath {PATH_PRIORITY[better.score]} | {better.id} better than {worse.id}"

View File

@@ -12,8 +12,10 @@ except ModuleNotFoundError:
"You need to install the stashapi module. (pip install stashapp-tools)",
file=sys.stderr,
)
import config
try:
import config
except ModuleNotFoundError:
log.error("Could not import 'config.py' did you copy and rename 'config_example.py'?")
FRAGMENT = json.loads(sys.stdin.read())
MODE = FRAGMENT["args"]["mode"]
@@ -30,7 +32,7 @@ files {
width
height
bit_rate
mod_time
created_at
duration
frame_rate
video_codec
@@ -68,10 +70,12 @@ def parse_timestamp(ts, format="%Y-%m-%dT%H:%M:%S%z"):
class StashScene:
def __init__(self, scene=None) -> None:
if len(scene["files"]) != 1:
raise Exception(f"Scene has {len(scene['files'])} scene must have one file for comparing")
file = scene["files"][0]
self.id = int(scene["id"])
self.mod_time = parse_timestamp(file["mod_time"])
self.created_at = parse_timestamp(file["created_at"])
if scene.get("date"):
self.date = parse_timestamp(scene["date"], format="%Y-%m-%d")
else:
@@ -102,7 +106,7 @@ class StashScene:
return f"<StashScene ({self.id})>"
def __str__(self) -> str:
return f"id:{self.id}, height:{self.height}, size:{human_bytes(self.size)}, file_mod_time:{self.mod_time}, title:{self.title}"
return f"id:{self.id}, height:{self.height}, size:{human_bytes(self.size)}, file_created_at:{self.created_at}, title:{self.title}"
def compare(self, other):
if not (isinstance(other, StashScene)):
@@ -138,9 +142,14 @@ def process_duplicates(distance: PhashDistance = PhashDistance.EXACT):
log.info(f"Found {total} sets of duplicates.")
for i, group in enumerate(duplicate_list):
group = [StashScene(s) for s in group]
scene_group = []
for s in group:
try:
scene_group.append(StashScene(s))
except Exception as e:
log.warning(f"Issue parsing SceneID:{s['id']} - {e}")
filtered_group = []
for scene in group:
for scene in scene_group:
if ignore_tag_id in scene.tag_ids:
log.debug(f"Ignore {scene.id} {scene.title}")
else:
@@ -155,12 +164,12 @@ def process_duplicates(distance: PhashDistance = PhashDistance.EXACT):
def tag_files(group):
keep_reasons = []
keep_scene = None
keep_scene = group[0]
total_size = group[0].size
for scene in group[1:]:
total_size += scene.size
better, msg = scene.compare(group[0])
better, msg = scene.compare(keep_scene)
if better:
keep_scene = better
keep_reasons.append(msg)