e621_tagger new features (#558)

2026-06-10 16:51:23 -05:00 · 2025-05-17 23:52:12 +02:00
parent 05a5e4921b
commit 4e20153346
2 changed files with 68 additions and 79 deletions
--- a/plugins/e621_tagger/e621_tagger.py
+++ b/plugins/e621_tagger/e621_tagger.py
@@ -8,54 +8,61 @@ import stashapi.log as log
 from stashapi.stashapp import StashInterface


-
 def get_all_images(
-    client: StashInterface, 
-    skip_tags: list[str], 
-    exclude_organized: bool
+    client: StashInterface,
+    skip_tags: list[int],
+    exclude_organized: bool,
+    per_page: int = 100,
 ) -> list[dict]:
    """
-    Get all images with proper tag exclusion and organization filter
+    Generator to fetch images in pages from the stash API.
    """
-    image_filter = {}
-    pagination = {
-        "page": 1,
-        "per_page": -1,  # -1 gets all results at once
-        "sort": "created_at",
-        "direction": "ASC",
-    }
-
-    # Convert tag names to IDs
-    tag_ids = []
-    for tag_name in skip_tags:
-        tag = get_or_create_tag(client, tag_name)
-        if tag:
-            tag_ids.append(tag["id"])
-
-    if tag_ids:
-        image_filter["tags"] = {
-            "value": [],
-            "excludes": tag_ids,
-            "modifier": "INCLUDES_ALL",
-            "depth": -1,
+    page = 1
+    while True:
+        image_filter = {}
+        pagination = {
+            "page": page,
+            "per_page": per_page,
+            "sort": "created_at",
+            "direction": "ASC",
        }

-    if exclude_organized:
-        image_filter["organized"] = False  # Correct field name
+        if skip_tags:
+            image_filter["tags"] = {
+                "value": [],
+                "excludes": skip_tags,
+                "modifier": "INCLUDES_ALL",
+                "depth": -1,
+            }

-    # Maintain original parameter structure
-    return client.find_images(f=image_filter, filter=pagination)
+        if exclude_organized:
+            image_filter["organized"] = False
+
+        images = client.find_images(f=image_filter, filter=pagination)
+        if not images:
+            # no more pages
+            break
+
+        log.info(f"Fetched page {page} with {len(images)} images")
+        for img in images:
+            yield img
+
+        # move to next page
+        page += 1


 def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> None:
    """Process e621 metadata and update Stash records"""
-    # Skip already processed images
+    # same as before...
    image = stash.find_image(image_id)
-    if any(tag["name"] == "e621_tagged" for tag in image.get("tags", [])):
+    if any(t["name"] == "e621_tagged" for t in image.get("tags", [])):
+        return
+
+    if any(t["name"] == "e621_tag_failed" for t in image.get("tags", [])):
        return

    try:
-        time.sleep(2)  # Rate limiting
+        time.sleep(0.5)
        response = requests.get(
            f"https://e621.net/posts.json?md5={image_md5}",
            headers={"User-Agent": "Stash-e621-Tagger/1.0"},
@@ -64,53 +71,49 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N
        response.raise_for_status()
        post_data = response.json().get("post", {})
    except Exception as e:
-        log.error(f"e621 API error: {str(e)}")
+        log.error(f"Marking as failed. e621 API error: {str(e)}")
+        e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed")
+        fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in image.get("tags", [])]
+        stash.update_image({"id": image_id, "tag_ids": list(set(fail_ids))})
        return

    if not post_data:
        return

-    # Create essential entities
    e621_tag = get_or_create_tag(stash, "e621_tagged")
    post_url = f"https://e621.net/posts/{post_data['id']}"

-    # Process tags
    tag_ids = [e621_tag["id"]]
-    for category in ["general", "species", "character", "artist", "copyright"]:
-        for tag in post_data.get("tags", {}).get(category, []):
-            # Clean and validate tag
+    for cat in ["general", "species", "character", "artist", "copyright"]:
+        for tag in post_data.get("tags", {}).get(cat, []):
            clean_tag = tag.strip()
            if not clean_tag:
                continue
-            
            stash_tag = get_or_create_tag(stash, clean_tag)
            if stash_tag:
                tag_ids.append(stash_tag["id"])

-    # Process studio
    studio_id = None
    if artists := post_data.get("tags", {}).get("artist"):
        studio = get_or_create_studio(stash, artists[0])
        studio_id = studio["id"]

-    # Process performers
    performer_ids = []
-    for char_tag in post_data.get("tags", {}).get("character", []):
-        performer_name = char_tag.split('_(')[0]
-        performer = get_or_create_performer(stash, performer_name)
-        performer_ids.append(performer["id"])
+    for char in post_data.get("tags", {}).get("character", []):
+        name = char.split('_(')[0]
+        perf = get_or_create_performer(stash, name)
+        performer_ids.append(perf["id"])

-    # Update image
    try:
        stash.update_image({
            "id": image_id,
+            "organized": True,
            "urls": [post_url],
            "tag_ids": list(set(tag_ids)),
            "studio_id": studio_id,
            "performer_ids": performer_ids
        })
-
-        log.info("Image updated: ${image_id}")
+        log.info(f"Image updated: {image_id}")
    except Exception as e:
        log.error(f"Update failed: {str(e)}")

@@ -166,6 +169,7 @@ def get_or_create_performer(stash: StashInterface, name: str) -> dict:

 def scrape_image(client: StashInterface, image_id: str) -> None:
    """Main scraping handler"""
+    # same logic as before for MD5 extraction and process_e621_post call
    image = client.find_image(image_id)
    if not image or not image.get("visual_files"):
        return
@@ -173,65 +177,50 @@ def scrape_image(client: StashInterface, image_id: str) -> None:
    file_data = image["visual_files"][0]
    filename = file_data["basename"]
    filename_md5 = filename.split('.')[0]
-    final_md5 = None

-    # First try filename-based MD5
    if re.match(r"^[a-f0-9]{32}$", filename_md5):
        final_md5 = filename_md5
        log.info(f"Using filename MD5: {final_md5}")
    else:
-        # Fallback to content-based MD5
        try:
-            file_path = file_data["path"]
-            log.info(f"Generating MD5 from file content: {file_path}")
-            
            md5_hash = hashlib.md5()
-            with open(file_path, "rb") as f:
-                # Read file in 64kb chunks for memory efficiency
+            with open(file_data["path"], "rb") as f:
                for chunk in iter(lambda: f.read(65536), b""):
                    md5_hash.update(chunk)
-            
            final_md5 = md5_hash.hexdigest()
            log.info(f"Generated content MD5: {final_md5}")
        except Exception as e:
            log.error(f"Failed to generate MD5: {str(e)}")
            return

-    if final_md5:
-        process_e621_post(client, image_id, final_md5)
-    else:
-        log.warning("No valid MD5 available for processing")
+    process_e621_post(client, image_id, final_md5)
+

-# Plugin setup and execution
-# In the main execution block:
 if __name__ == "__main__":
+    log.info("Starting tagger with pagination...")
    json_input = json.loads(sys.stdin.read())
    stash = StashInterface(json_input["server_connection"])

    config = stash.get_configuration().get("plugins", {})
    settings = {
-        "SkipTags": "e621_tagged",  # Add automatic filtering
+        "SkipTags": "e621_tagged, e621_tag_failed",
        "ExcludeOrganized": False
    }
    settings.update(config.get("e621_tagger", {}))

-    log.info(settings)
+    e621_tagged = get_or_create_tag(stash, "e621_tagged")
+    e621_failed = get_or_create_tag(stash, "e621_tag_failed")

-    # Get e621_tagged ID for filtering
-    e621_tag = get_or_create_tag(stash, "e621_tagged")
-
-    # Existing tags + automatic e621_tagged exclusion
    skip_tags = [t.strip() for t in settings["SkipTags"].split(",") if t.strip()]
-    skip_tags.append(e621_tag["id"])  # Filter by ID instead of name
+    skip_tags = [st for st in skip_tags]
+    skip_tags.extend([e621_tagged["id"], e621_failed["id"]])

-    images = get_all_images(stash, skip_tags, settings["ExcludeOrganized"])
-
-    # Rest of the loop remains the same
-    for i, image in enumerate(images, 1):
-        image_tag_names = [tag["name"] for tag in image.get("tags", [])]
-        if any(tag in image_tag_names for tag in skip_tags):
+    log.info("Fetching images in pages...")
+    for idx, image in enumerate(get_all_images(stash, skip_tags, settings["ExcludeOrganized"], per_page=100), start=1):
+        current_tags = [t["name"] for t in image.get("tags", [])]
+        if any(t in current_tags for t in skip_tags):
            log.info(f"Skipping image {image['id']} - contains skip tag")
            continue

-        log.progress(i/len(images))
+        log.progress(idx)
        scrape_image(stash, image["id"])
--- a/plugins/e621_tagger/e621_tagger.yml
+++ b/plugins/e621_tagger/e621_tagger.yml
@@ -1,6 +1,6 @@
 name: e621_tagger
 description: Finding images and videos on e621 and tagging them.
-version: 0.1
+version: 0.2
 url: https://github.com/stashapp/CommunityScripts/
 exec:
  - python