e621_tagger new features (#558)

This commit is contained in:
yokarion
2025-05-17 23:52:12 +02:00
committed by GitHub
parent 05a5e4921b
commit 4e20153346
2 changed files with 68 additions and 79 deletions

View File

@@ -8,54 +8,61 @@ import stashapi.log as log
from stashapi.stashapp import StashInterface
def get_all_images(
client: StashInterface,
skip_tags: list[str],
exclude_organized: bool
client: StashInterface,
skip_tags: list[int],
exclude_organized: bool,
per_page: int = 100,
) -> list[dict]:
"""
Get all images with proper tag exclusion and organization filter
Generator to fetch images in pages from the stash API.
"""
image_filter = {}
pagination = {
"page": 1,
"per_page": -1, # -1 gets all results at once
"sort": "created_at",
"direction": "ASC",
}
# Convert tag names to IDs
tag_ids = []
for tag_name in skip_tags:
tag = get_or_create_tag(client, tag_name)
if tag:
tag_ids.append(tag["id"])
if tag_ids:
image_filter["tags"] = {
"value": [],
"excludes": tag_ids,
"modifier": "INCLUDES_ALL",
"depth": -1,
page = 1
while True:
image_filter = {}
pagination = {
"page": page,
"per_page": per_page,
"sort": "created_at",
"direction": "ASC",
}
if exclude_organized:
image_filter["organized"] = False # Correct field name
if skip_tags:
image_filter["tags"] = {
"value": [],
"excludes": skip_tags,
"modifier": "INCLUDES_ALL",
"depth": -1,
}
# Maintain original parameter structure
return client.find_images(f=image_filter, filter=pagination)
if exclude_organized:
image_filter["organized"] = False
images = client.find_images(f=image_filter, filter=pagination)
if not images:
# no more pages
break
log.info(f"Fetched page {page} with {len(images)} images")
for img in images:
yield img
# move to next page
page += 1
def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> None:
"""Process e621 metadata and update Stash records"""
# Skip already processed images
# same as before...
image = stash.find_image(image_id)
if any(tag["name"] == "e621_tagged" for tag in image.get("tags", [])):
if any(t["name"] == "e621_tagged" for t in image.get("tags", [])):
return
if any(t["name"] == "e621_tag_failed" for t in image.get("tags", [])):
return
try:
time.sleep(2) # Rate limiting
time.sleep(0.5)
response = requests.get(
f"https://e621.net/posts.json?md5={image_md5}",
headers={"User-Agent": "Stash-e621-Tagger/1.0"},
@@ -64,53 +71,49 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N
response.raise_for_status()
post_data = response.json().get("post", {})
except Exception as e:
log.error(f"e621 API error: {str(e)}")
log.error(f"Marking as failed. e621 API error: {str(e)}")
e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed")
fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in image.get("tags", [])]
stash.update_image({"id": image_id, "tag_ids": list(set(fail_ids))})
return
if not post_data:
return
# Create essential entities
e621_tag = get_or_create_tag(stash, "e621_tagged")
post_url = f"https://e621.net/posts/{post_data['id']}"
# Process tags
tag_ids = [e621_tag["id"]]
for category in ["general", "species", "character", "artist", "copyright"]:
for tag in post_data.get("tags", {}).get(category, []):
# Clean and validate tag
for cat in ["general", "species", "character", "artist", "copyright"]:
for tag in post_data.get("tags", {}).get(cat, []):
clean_tag = tag.strip()
if not clean_tag:
continue
stash_tag = get_or_create_tag(stash, clean_tag)
if stash_tag:
tag_ids.append(stash_tag["id"])
# Process studio
studio_id = None
if artists := post_data.get("tags", {}).get("artist"):
studio = get_or_create_studio(stash, artists[0])
studio_id = studio["id"]
# Process performers
performer_ids = []
for char_tag in post_data.get("tags", {}).get("character", []):
performer_name = char_tag.split('_(')[0]
performer = get_or_create_performer(stash, performer_name)
performer_ids.append(performer["id"])
for char in post_data.get("tags", {}).get("character", []):
name = char.split('_(')[0]
perf = get_or_create_performer(stash, name)
performer_ids.append(perf["id"])
# Update image
try:
stash.update_image({
"id": image_id,
"organized": True,
"urls": [post_url],
"tag_ids": list(set(tag_ids)),
"studio_id": studio_id,
"performer_ids": performer_ids
})
log.info("Image updated: ${image_id}")
log.info(f"Image updated: {image_id}")
except Exception as e:
log.error(f"Update failed: {str(e)}")
@@ -166,6 +169,7 @@ def get_or_create_performer(stash: StashInterface, name: str) -> dict:
def scrape_image(client: StashInterface, image_id: str) -> None:
"""Main scraping handler"""
# same logic as before for MD5 extraction and process_e621_post call
image = client.find_image(image_id)
if not image or not image.get("visual_files"):
return
@@ -173,65 +177,50 @@ def scrape_image(client: StashInterface, image_id: str) -> None:
file_data = image["visual_files"][0]
filename = file_data["basename"]
filename_md5 = filename.split('.')[0]
final_md5 = None
# First try filename-based MD5
if re.match(r"^[a-f0-9]{32}$", filename_md5):
final_md5 = filename_md5
log.info(f"Using filename MD5: {final_md5}")
else:
# Fallback to content-based MD5
try:
file_path = file_data["path"]
log.info(f"Generating MD5 from file content: {file_path}")
md5_hash = hashlib.md5()
with open(file_path, "rb") as f:
# Read file in 64kb chunks for memory efficiency
with open(file_data["path"], "rb") as f:
for chunk in iter(lambda: f.read(65536), b""):
md5_hash.update(chunk)
final_md5 = md5_hash.hexdigest()
log.info(f"Generated content MD5: {final_md5}")
except Exception as e:
log.error(f"Failed to generate MD5: {str(e)}")
return
if final_md5:
process_e621_post(client, image_id, final_md5)
else:
log.warning("No valid MD5 available for processing")
process_e621_post(client, image_id, final_md5)
# Plugin setup and execution
# In the main execution block:
if __name__ == "__main__":
log.info("Starting tagger with pagination...")
json_input = json.loads(sys.stdin.read())
stash = StashInterface(json_input["server_connection"])
config = stash.get_configuration().get("plugins", {})
settings = {
"SkipTags": "e621_tagged", # Add automatic filtering
"SkipTags": "e621_tagged, e621_tag_failed",
"ExcludeOrganized": False
}
settings.update(config.get("e621_tagger", {}))
log.info(settings)
e621_tagged = get_or_create_tag(stash, "e621_tagged")
e621_failed = get_or_create_tag(stash, "e621_tag_failed")
# Get e621_tagged ID for filtering
e621_tag = get_or_create_tag(stash, "e621_tagged")
# Existing tags + automatic e621_tagged exclusion
skip_tags = [t.strip() for t in settings["SkipTags"].split(",") if t.strip()]
skip_tags.append(e621_tag["id"]) # Filter by ID instead of name
skip_tags = [st for st in skip_tags]
skip_tags.extend([e621_tagged["id"], e621_failed["id"]])
images = get_all_images(stash, skip_tags, settings["ExcludeOrganized"])
# Rest of the loop remains the same
for i, image in enumerate(images, 1):
image_tag_names = [tag["name"] for tag in image.get("tags", [])]
if any(tag in image_tag_names for tag in skip_tags):
log.info("Fetching images in pages...")
for idx, image in enumerate(get_all_images(stash, skip_tags, settings["ExcludeOrganized"], per_page=100), start=1):
current_tags = [t["name"] for t in image.get("tags", [])]
if any(t in current_tags for t in skip_tags):
log.info(f"Skipping image {image['id']} - contains skip tag")
continue
log.progress(i/len(images))
log.progress(idx)
scrape_image(stash, image["id"])

View File

@@ -1,6 +1,6 @@
name: e621_tagger
description: Finding images and videos on e621 and tagging them.
version: 0.1
version: 0.2
url: https://github.com/stashapp/CommunityScripts/
exec:
- python