mirror of
https://github.com/stashapp/CommunityScripts.git
synced 2026-02-04 01:52:30 -06:00
Improve e621_tagger (#615)
This commit is contained in:
parent
c1151f670b
commit
233d90c689
@ -4,77 +4,131 @@ import sys
|
||||
import json
|
||||
import time
|
||||
import requests
|
||||
import itertools
|
||||
import stashapi.log as log
|
||||
from stashapi.stashapp import StashInterface
|
||||
from typing import List
|
||||
|
||||
MD5_RE = re.compile(r"^[a-f0-9]{32}$")
|
||||
|
||||
|
||||
def get_all_images(
|
||||
def _build_filter(skip_tag_ids, exclude_organized):
|
||||
f = {}
|
||||
if skip_tag_ids:
|
||||
f["tags"] = {
|
||||
"value": [],
|
||||
"excludes": skip_tag_ids,
|
||||
"modifier": "INCLUDES_ALL",
|
||||
"depth": -1,
|
||||
}
|
||||
if exclude_organized:
|
||||
f["organized"] = False
|
||||
return f
|
||||
|
||||
|
||||
def count_images(
|
||||
client: StashInterface, skip_tag_ids: list, exclude_organized: bool
|
||||
) -> int:
|
||||
image_filter = _build_filter(skip_tag_ids, exclude_organized)
|
||||
pagination = {"page": 1, "per_page": 0, "sort": "created_at", "direction": "ASC"}
|
||||
total, _ = client.find_images(f=image_filter, filter=pagination, get_count=True)
|
||||
return total
|
||||
|
||||
|
||||
def count_scenes(
|
||||
client: StashInterface, skip_tag_ids: list, exclude_organized: bool
|
||||
) -> int:
|
||||
scene_filter = _build_filter(skip_tag_ids, exclude_organized)
|
||||
pagination = {"page": 1, "per_page": 0, "sort": "created_at", "direction": "ASC"}
|
||||
total, _ = client.find_scenes(f=scene_filter, filter=pagination, get_count=True)
|
||||
return total
|
||||
|
||||
|
||||
def stream_images(
|
||||
client: StashInterface,
|
||||
skip_tags: list[int],
|
||||
skip_tag_ids: List[int],
|
||||
exclude_organized: bool,
|
||||
per_page: int = 100,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Generator to fetch images in pages from the stash API.
|
||||
"""
|
||||
):
|
||||
page = 1
|
||||
base_filter = _build_filter(skip_tag_ids, exclude_organized)
|
||||
while True:
|
||||
image_filter = {}
|
||||
pagination = {
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"sort": "created_at",
|
||||
"direction": "ASC",
|
||||
}
|
||||
|
||||
if skip_tags:
|
||||
image_filter["tags"] = {
|
||||
"value": [],
|
||||
"excludes": skip_tags,
|
||||
"modifier": "INCLUDES_ALL",
|
||||
"depth": -1,
|
||||
}
|
||||
|
||||
if exclude_organized:
|
||||
image_filter["organized"] = False
|
||||
|
||||
images = client.find_images(f=image_filter, filter=pagination)
|
||||
images = client.find_images(f=base_filter, filter=pagination)
|
||||
if not images:
|
||||
# no more pages
|
||||
break
|
||||
|
||||
log.info(f"Fetched page {page} with {len(images)} images")
|
||||
log.info(f"Fetched image page {page} with {len(images)} images")
|
||||
for img in images:
|
||||
yield img
|
||||
|
||||
# move to next page
|
||||
yield ("image", img)
|
||||
page += 1
|
||||
|
||||
|
||||
def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> None:
|
||||
"""Process e621 metadata and update Stash records"""
|
||||
# same as before...
|
||||
image = stash.find_image(image_id)
|
||||
if any(t["name"] == "e621_tagged" for t in image.get("tags", [])):
|
||||
return
|
||||
def stream_scenes(
|
||||
client: StashInterface,
|
||||
skip_tag_ids: List[int],
|
||||
exclude_organized: bool,
|
||||
per_page: int = 100,
|
||||
):
|
||||
page = 1
|
||||
base_filter = _build_filter(skip_tag_ids, exclude_organized)
|
||||
while True:
|
||||
pagination = {
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"sort": "created_at",
|
||||
"direction": "ASC",
|
||||
}
|
||||
scenes = client.find_scenes(f=base_filter, filter=pagination)
|
||||
if not scenes:
|
||||
break
|
||||
log.info(f"Fetched scene page {page} with {len(scenes)} scenes")
|
||||
for sc in scenes:
|
||||
yield ("scene", sc)
|
||||
page += 1
|
||||
|
||||
if any(t["name"] == "e621_tag_failed" for t in image.get("tags", [])):
|
||||
|
||||
def process_e621_post_for_item(
|
||||
stash: StashInterface, item_type: str, item_id: str, item_md5: str
|
||||
) -> None:
|
||||
# Fetch latest object to check tags
|
||||
if item_type == "image":
|
||||
obj = stash.find_image(item_id)
|
||||
already_tagged = any(t["name"] == "e621_tagged" for t in obj.get("tags", []))
|
||||
already_failed = any(
|
||||
t["name"] == "e621_tag_failed" for t in obj.get("tags", [])
|
||||
)
|
||||
else:
|
||||
obj = stash.find_scene(item_id)
|
||||
already_tagged = any(t["name"] == "e621_tagged" for t in obj.get("tags", []))
|
||||
already_failed = any(
|
||||
t["name"] == "e621_tag_failed" for t in obj.get("tags", [])
|
||||
)
|
||||
|
||||
if already_tagged or already_failed:
|
||||
return
|
||||
|
||||
try:
|
||||
time.sleep(0.5)
|
||||
response = requests.get(
|
||||
f"https://e621.net/posts.json?md5={image_md5}",
|
||||
f"https://e621.net/posts.json?md5={item_md5}",
|
||||
headers={"User-Agent": "Stash-e621-Tagger/1.0"},
|
||||
timeout=10
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
post_data = response.json().get("post", {})
|
||||
except Exception as e:
|
||||
log.error(f"Marking as failed. e621 API error: {str(e)}")
|
||||
e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed")
|
||||
fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in image.get("tags", [])]
|
||||
stash.update_image({"id": image_id, "tag_ids": list(set(fail_ids))})
|
||||
fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in obj.get("tags", [])]
|
||||
if item_type == "image":
|
||||
stash.update_image({"id": item_id, "tag_ids": list(set(fail_ids))})
|
||||
else:
|
||||
stash.update_scene({"id": item_id, "tag_ids": list(set(fail_ids))})
|
||||
return
|
||||
|
||||
if not post_data:
|
||||
@ -84,7 +138,7 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N
|
||||
post_url = f"https://e621.net/posts/{post_data['id']}"
|
||||
|
||||
tag_ids = [e621_tag["id"]]
|
||||
for cat in ["general", "species", "character", "artist", "copyright"]:
|
||||
for cat in ["general", "species", "artist", "copyright", "meta"]:
|
||||
for tag in post_data.get("tags", {}).get(cat, []):
|
||||
clean_tag = tag.strip()
|
||||
if not clean_tag:
|
||||
@ -100,27 +154,30 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N
|
||||
|
||||
performer_ids = []
|
||||
for char in post_data.get("tags", {}).get("character", []):
|
||||
name = char.split('_(')[0]
|
||||
name = char.split("_(")[0]
|
||||
perf = get_or_create_performer(stash, name)
|
||||
performer_ids.append(perf["id"])
|
||||
|
||||
try:
|
||||
stash.update_image({
|
||||
"id": image_id,
|
||||
update_payload = {
|
||||
"id": item_id,
|
||||
"organized": True,
|
||||
"urls": [post_url],
|
||||
"tag_ids": list(set(tag_ids)),
|
||||
"studio_id": studio_id,
|
||||
"performer_ids": performer_ids
|
||||
})
|
||||
log.info(f"Image updated: {image_id}")
|
||||
"performer_ids": performer_ids,
|
||||
}
|
||||
if item_type == "image":
|
||||
stash.update_image(update_payload)
|
||||
log.info(f"Image updated: {item_id}")
|
||||
else:
|
||||
stash.update_scene(update_payload)
|
||||
log.info(f"Scene updated: {item_id}")
|
||||
except Exception as e:
|
||||
log.error(f"Update failed: {str(e)}")
|
||||
|
||||
|
||||
def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict:
|
||||
"""Find or create tag with hierarchy handling"""
|
||||
# Validate tag name
|
||||
tag_name = tag_name.strip()
|
||||
if not tag_name:
|
||||
log.error("Attempted to create tag with empty name")
|
||||
@ -129,15 +186,17 @@ def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict:
|
||||
existing = stash.find_tags(f={"name": {"value": tag_name, "modifier": "EQUALS"}})
|
||||
if existing:
|
||||
return existing[0]
|
||||
|
||||
|
||||
parts = tag_name.split(":")
|
||||
parent_id = None
|
||||
for i in range(len(parts)):
|
||||
current_name = ":".join(parts[:i+1]).strip()
|
||||
current_name = ":".join(parts[: i + 1]).strip()
|
||||
if not current_name:
|
||||
continue
|
||||
|
||||
existing = stash.find_tags(f={"name": {"value": current_name, "modifier": "EQUALS"}})
|
||||
|
||||
existing = stash.find_tags(
|
||||
f={"name": {"value": current_name, "modifier": "EQUALS"}}
|
||||
)
|
||||
if not existing:
|
||||
create_data = {"name": current_name}
|
||||
if parent_id:
|
||||
@ -155,72 +214,153 @@ def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict:
|
||||
parent_id = existing[0]["id"]
|
||||
return {"id": parent_id}
|
||||
|
||||
|
||||
def get_or_create_studio(stash: StashInterface, name: str) -> dict:
|
||||
"""Find or create studio"""
|
||||
studios = stash.find_studios(f={"name": {"value": name, "modifier": "EQUALS"}})
|
||||
return studios[0] if studios else stash.create_studio({"name": name})
|
||||
|
||||
|
||||
def get_or_create_performer(stash: StashInterface, name: str) -> dict:
|
||||
"""Find or create performer"""
|
||||
performers = stash.find_performers(f={"name": {"value": name, "modifier": "EQUALS"}})
|
||||
performers = stash.find_performers(
|
||||
f={"name": {"value": name, "modifier": "EQUALS"}}
|
||||
)
|
||||
return performers[0] if performers else stash.create_performer({"name": name})
|
||||
|
||||
|
||||
def scrape_image(client: StashInterface, image_id: str) -> None:
|
||||
"""Main scraping handler"""
|
||||
# same logic as before for MD5 extraction and process_e621_post call
|
||||
image = client.find_image(image_id)
|
||||
if not image or not image.get("visual_files"):
|
||||
return
|
||||
|
||||
file_data = image["visual_files"][0]
|
||||
filename = file_data["basename"]
|
||||
filename_md5 = filename.split('.')[0]
|
||||
filename = file_data.get("basename", "")
|
||||
filename_md5 = filename.split(".")[0] if filename else ""
|
||||
|
||||
if re.match(r"^[a-f0-9]{32}$", filename_md5):
|
||||
if MD5_RE.match(filename_md5):
|
||||
final_md5 = filename_md5
|
||||
log.info(f"Using filename MD5: {final_md5}")
|
||||
log.info(f"Using filename MD5 for image: {final_md5}")
|
||||
else:
|
||||
try:
|
||||
md5_hash = hashlib.md5()
|
||||
with open(file_data["path"], "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
md5_hash.update(chunk)
|
||||
final_md5 = md5_hash.hexdigest()
|
||||
log.info(f"Generated content MD5: {final_md5}")
|
||||
except Exception as e:
|
||||
log.error(f"Failed to generate MD5: {str(e)}")
|
||||
if image.get("checksum"):
|
||||
final_md5 = image["checksum"]
|
||||
log.info(f"Using image checksum: {final_md5}")
|
||||
elif image.get("md5"):
|
||||
final_md5 = image["md5"]
|
||||
log.info(f"Using image md5: {final_md5}")
|
||||
else:
|
||||
try:
|
||||
md5_hash = hashlib.md5()
|
||||
with open(file_data["path"], "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
md5_hash.update(chunk)
|
||||
final_md5 = md5_hash.hexdigest()
|
||||
log.info(f"Generated content MD5 for image: {final_md5}")
|
||||
except Exception as e:
|
||||
log.error(f"Failed to generate MD5 for image: {str(e)}")
|
||||
return
|
||||
|
||||
process_e621_post_for_item(client, "image", image_id, final_md5)
|
||||
|
||||
|
||||
def scrape_scene(client: StashInterface, scene_id: str) -> None:
|
||||
scene = client.find_scene(scene_id)
|
||||
if not scene:
|
||||
return
|
||||
|
||||
final_md5 = None
|
||||
|
||||
if scene.get("checksum") and MD5_RE.match(scene.get("checksum")):
|
||||
final_md5 = scene.get("checksum")
|
||||
log.info(f"Using scene checksum: {final_md5}")
|
||||
elif scene.get("md5") and MD5_RE.match(scene.get("md5")):
|
||||
final_md5 = scene.get("md5")
|
||||
log.info(f"Using scene md5: {final_md5}")
|
||||
else:
|
||||
files = scene.get("files") or scene.get("scene_files") or []
|
||||
if files:
|
||||
file_data = files[0]
|
||||
if file_data.get("checksum") and MD5_RE.match(file_data.get("checksum")):
|
||||
final_md5 = file_data.get("checksum")
|
||||
log.info(f"Using file checksum for scene: {final_md5}")
|
||||
else:
|
||||
basename = file_data.get("basename", "")
|
||||
filename_md5 = basename.split(".")[0] if basename else ""
|
||||
if MD5_RE.match(filename_md5):
|
||||
final_md5 = filename_md5
|
||||
log.info(f"Using filename MD5 for scene: {final_md5}")
|
||||
else:
|
||||
try:
|
||||
md5_hash = hashlib.md5()
|
||||
with open(file_data["path"], "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
md5_hash.update(chunk)
|
||||
final_md5 = md5_hash.hexdigest()
|
||||
log.info(f"Generated content MD5 for scene: {final_md5}")
|
||||
except Exception as e:
|
||||
log.error(f"Failed to generate MD5 for scene: {str(e)}")
|
||||
return
|
||||
else:
|
||||
log.error(f"No files found for scene {scene_id}; cannot compute md5")
|
||||
return
|
||||
|
||||
process_e621_post(client, image_id, final_md5)
|
||||
if final_md5:
|
||||
process_e621_post_for_item(client, "scene", scene_id, final_md5)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
log.info("Starting tagger with pagination...")
|
||||
log.info("Starting tagger with stable pagination snapshot (streamed)...")
|
||||
json_input = json.loads(sys.stdin.read())
|
||||
stash = StashInterface(json_input["server_connection"])
|
||||
|
||||
config = stash.get_configuration().get("plugins", {})
|
||||
settings = {
|
||||
"SkipTags": "e621_tagged, e621_tag_failed",
|
||||
"ExcludeOrganized": False
|
||||
}
|
||||
settings = {"SkipTags": "e621_tagged, e621_tag_failed", "ExcludeOrganized": False}
|
||||
settings.update(config.get("e621_tagger", {}))
|
||||
|
||||
e621_tagged = get_or_create_tag(stash, "e621_tagged")
|
||||
e621_failed = get_or_create_tag(stash, "e621_tag_failed")
|
||||
|
||||
skip_tags = [t.strip() for t in settings["SkipTags"].split(",") if t.strip()]
|
||||
skip_tags = [st for st in skip_tags]
|
||||
skip_tags.extend([e621_tagged["id"], e621_failed["id"]])
|
||||
skip_tag_names = [n.strip() for n in settings["SkipTags"].split(",") if n.strip()]
|
||||
skip_tag_ids: List[int] = []
|
||||
for name in skip_tag_names:
|
||||
found = stash.find_tags(f={"name": {"value": name, "modifier": "EQUALS"}})
|
||||
if found:
|
||||
skip_tag_ids.append(found[0]["id"])
|
||||
skip_tag_ids.extend([e621_tagged["id"], e621_failed["id"]])
|
||||
|
||||
log.info("Fetching images in pages...")
|
||||
for idx, image in enumerate(get_all_images(stash, skip_tags, settings["ExcludeOrganized"], per_page=100), start=1):
|
||||
current_tags = [t["name"] for t in image.get("tags", [])]
|
||||
if any(t in current_tags for t in skip_tags):
|
||||
log.info(f"Skipping image {image['id']} - contains skip tag")
|
||||
per_page = 50
|
||||
|
||||
log.info("Counting images (no storage)...")
|
||||
num_images = count_images(stash, skip_tag_ids, settings["ExcludeOrganized"])
|
||||
log.info("Counting scenes (no storage)...")
|
||||
num_scenes = count_scenes(stash, skip_tag_ids, settings["ExcludeOrganized"])
|
||||
|
||||
total = (num_images + num_scenes) or 1
|
||||
|
||||
log.info(f"Total items (images + scenes): {total}")
|
||||
|
||||
stream = itertools.chain(
|
||||
stream_images(
|
||||
stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=per_page
|
||||
),
|
||||
stream_scenes(
|
||||
stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=per_page
|
||||
),
|
||||
)
|
||||
|
||||
for idx, (item_type, item) in enumerate(stream, start=1):
|
||||
log.progress(float(idx - 1) / float(total))
|
||||
|
||||
item_id = item["id"]
|
||||
current_tag_ids = [t["id"] for t in item.get("tags", [])]
|
||||
if any(tid in current_tag_ids for tid in skip_tag_ids):
|
||||
log.info(f"Skipping {item_type} {item_id} - contains skip tag")
|
||||
log.progress(float(idx) / float(total))
|
||||
continue
|
||||
|
||||
log.progress(idx)
|
||||
scrape_image(stash, image["id"])
|
||||
if item_type == "image":
|
||||
scrape_image(stash, item_id)
|
||||
else:
|
||||
scrape_scene(stash, item_id)
|
||||
|
||||
log.progress(float(idx) / float(total))
|
||||
|
||||
log.progress(1.0)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
name: e621_tagger
|
||||
description: Finding images and videos on e621 and tagging them.
|
||||
version: 0.2
|
||||
version: 0.3
|
||||
url: https://github.com/stashapp/CommunityScripts/
|
||||
exec:
|
||||
- python
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user