From 4e201533463a7989923164e55bb3d2986b5b0a6b Mon Sep 17 00:00:00 2001 From: yokarion Date: Sat, 17 May 2025 23:52:12 +0200 Subject: [PATCH] e621_tagger new features (#558) --- plugins/e621_tagger/e621_tagger.py | 145 +++++++++++++--------------- plugins/e621_tagger/e621_tagger.yml | 2 +- 2 files changed, 68 insertions(+), 79 deletions(-) diff --git a/plugins/e621_tagger/e621_tagger.py b/plugins/e621_tagger/e621_tagger.py index 4b66bdb..f5bd37b 100644 --- a/plugins/e621_tagger/e621_tagger.py +++ b/plugins/e621_tagger/e621_tagger.py @@ -8,54 +8,61 @@ import stashapi.log as log from stashapi.stashapp import StashInterface - def get_all_images( - client: StashInterface, - skip_tags: list[str], - exclude_organized: bool + client: StashInterface, + skip_tags: list[int], + exclude_organized: bool, + per_page: int = 100, ) -> list[dict]: """ - Get all images with proper tag exclusion and organization filter + Generator to fetch images in pages from the stash API. """ - image_filter = {} - pagination = { - "page": 1, - "per_page": -1, # -1 gets all results at once - "sort": "created_at", - "direction": "ASC", - } - - # Convert tag names to IDs - tag_ids = [] - for tag_name in skip_tags: - tag = get_or_create_tag(client, tag_name) - if tag: - tag_ids.append(tag["id"]) - - if tag_ids: - image_filter["tags"] = { - "value": [], - "excludes": tag_ids, - "modifier": "INCLUDES_ALL", - "depth": -1, + page = 1 + while True: + image_filter = {} + pagination = { + "page": page, + "per_page": per_page, + "sort": "created_at", + "direction": "ASC", } - if exclude_organized: - image_filter["organized"] = False # Correct field name + if skip_tags: + image_filter["tags"] = { + "value": [], + "excludes": skip_tags, + "modifier": "INCLUDES_ALL", + "depth": -1, + } - # Maintain original parameter structure - return client.find_images(f=image_filter, filter=pagination) + if exclude_organized: + image_filter["organized"] = False + + images = client.find_images(f=image_filter, filter=pagination) + if not images: + # no more pages + break + + log.info(f"Fetched page {page} with {len(images)} images") + for img in images: + yield img + + # move to next page + page += 1 def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> None: """Process e621 metadata and update Stash records""" - # Skip already processed images + # same as before... image = stash.find_image(image_id) - if any(tag["name"] == "e621_tagged" for tag in image.get("tags", [])): + if any(t["name"] == "e621_tagged" for t in image.get("tags", [])): + return + + if any(t["name"] == "e621_tag_failed" for t in image.get("tags", [])): return try: - time.sleep(2) # Rate limiting + time.sleep(0.5) response = requests.get( f"https://e621.net/posts.json?md5={image_md5}", headers={"User-Agent": "Stash-e621-Tagger/1.0"}, @@ -64,53 +71,49 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N response.raise_for_status() post_data = response.json().get("post", {}) except Exception as e: - log.error(f"e621 API error: {str(e)}") + log.error(f"Marking as failed. e621 API error: {str(e)}") + e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed") + fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in image.get("tags", [])] + stash.update_image({"id": image_id, "tag_ids": list(set(fail_ids))}) return if not post_data: return - # Create essential entities e621_tag = get_or_create_tag(stash, "e621_tagged") post_url = f"https://e621.net/posts/{post_data['id']}" - # Process tags tag_ids = [e621_tag["id"]] - for category in ["general", "species", "character", "artist", "copyright"]: - for tag in post_data.get("tags", {}).get(category, []): - # Clean and validate tag + for cat in ["general", "species", "character", "artist", "copyright"]: + for tag in post_data.get("tags", {}).get(cat, []): clean_tag = tag.strip() if not clean_tag: continue - stash_tag = get_or_create_tag(stash, clean_tag) if stash_tag: tag_ids.append(stash_tag["id"]) - # Process studio studio_id = None if artists := post_data.get("tags", {}).get("artist"): studio = get_or_create_studio(stash, artists[0]) studio_id = studio["id"] - # Process performers performer_ids = [] - for char_tag in post_data.get("tags", {}).get("character", []): - performer_name = char_tag.split('_(')[0] - performer = get_or_create_performer(stash, performer_name) - performer_ids.append(performer["id"]) + for char in post_data.get("tags", {}).get("character", []): + name = char.split('_(')[0] + perf = get_or_create_performer(stash, name) + performer_ids.append(perf["id"]) - # Update image try: stash.update_image({ "id": image_id, + "organized": True, "urls": [post_url], "tag_ids": list(set(tag_ids)), "studio_id": studio_id, "performer_ids": performer_ids }) - - log.info("Image updated: ${image_id}") + log.info(f"Image updated: {image_id}") except Exception as e: log.error(f"Update failed: {str(e)}") @@ -166,6 +169,7 @@ def get_or_create_performer(stash: StashInterface, name: str) -> dict: def scrape_image(client: StashInterface, image_id: str) -> None: """Main scraping handler""" + # same logic as before for MD5 extraction and process_e621_post call image = client.find_image(image_id) if not image or not image.get("visual_files"): return @@ -173,65 +177,50 @@ def scrape_image(client: StashInterface, image_id: str) -> None: file_data = image["visual_files"][0] filename = file_data["basename"] filename_md5 = filename.split('.')[0] - final_md5 = None - # First try filename-based MD5 if re.match(r"^[a-f0-9]{32}$", filename_md5): final_md5 = filename_md5 log.info(f"Using filename MD5: {final_md5}") else: - # Fallback to content-based MD5 try: - file_path = file_data["path"] - log.info(f"Generating MD5 from file content: {file_path}") - md5_hash = hashlib.md5() - with open(file_path, "rb") as f: - # Read file in 64kb chunks for memory efficiency + with open(file_data["path"], "rb") as f: for chunk in iter(lambda: f.read(65536), b""): md5_hash.update(chunk) - final_md5 = md5_hash.hexdigest() log.info(f"Generated content MD5: {final_md5}") except Exception as e: log.error(f"Failed to generate MD5: {str(e)}") return - if final_md5: - process_e621_post(client, image_id, final_md5) - else: - log.warning("No valid MD5 available for processing") + process_e621_post(client, image_id, final_md5) + -# Plugin setup and execution -# In the main execution block: if __name__ == "__main__": + log.info("Starting tagger with pagination...") json_input = json.loads(sys.stdin.read()) stash = StashInterface(json_input["server_connection"]) config = stash.get_configuration().get("plugins", {}) settings = { - "SkipTags": "e621_tagged", # Add automatic filtering + "SkipTags": "e621_tagged, e621_tag_failed", "ExcludeOrganized": False } settings.update(config.get("e621_tagger", {})) - log.info(settings) + e621_tagged = get_or_create_tag(stash, "e621_tagged") + e621_failed = get_or_create_tag(stash, "e621_tag_failed") - # Get e621_tagged ID for filtering - e621_tag = get_or_create_tag(stash, "e621_tagged") - - # Existing tags + automatic e621_tagged exclusion skip_tags = [t.strip() for t in settings["SkipTags"].split(",") if t.strip()] - skip_tags.append(e621_tag["id"]) # Filter by ID instead of name + skip_tags = [st for st in skip_tags] + skip_tags.extend([e621_tagged["id"], e621_failed["id"]]) - images = get_all_images(stash, skip_tags, settings["ExcludeOrganized"]) - - # Rest of the loop remains the same - for i, image in enumerate(images, 1): - image_tag_names = [tag["name"] for tag in image.get("tags", [])] - if any(tag in image_tag_names for tag in skip_tags): + log.info("Fetching images in pages...") + for idx, image in enumerate(get_all_images(stash, skip_tags, settings["ExcludeOrganized"], per_page=100), start=1): + current_tags = [t["name"] for t in image.get("tags", [])] + if any(t in current_tags for t in skip_tags): log.info(f"Skipping image {image['id']} - contains skip tag") continue - log.progress(i/len(images)) + log.progress(idx) scrape_image(stash, image["id"]) diff --git a/plugins/e621_tagger/e621_tagger.yml b/plugins/e621_tagger/e621_tagger.yml index 3ad0186..9b8ecc2 100644 --- a/plugins/e621_tagger/e621_tagger.yml +++ b/plugins/e621_tagger/e621_tagger.yml @@ -1,6 +1,6 @@ name: e621_tagger description: Finding images and videos on e621 and tagging them. -version: 0.1 +version: 0.2 url: https://github.com/stashapp/CommunityScripts/ exec: - python