mirror of
https://github.com/stashapp/CommunityScripts.git
synced 2026-06-10 16:51:23 -05:00
e621_tagger new features (#558)
This commit is contained in:
@@ -8,54 +8,61 @@ import stashapi.log as log
|
||||
from stashapi.stashapp import StashInterface
|
||||
|
||||
|
||||
|
||||
def get_all_images(
|
||||
client: StashInterface,
|
||||
skip_tags: list[str],
|
||||
exclude_organized: bool
|
||||
client: StashInterface,
|
||||
skip_tags: list[int],
|
||||
exclude_organized: bool,
|
||||
per_page: int = 100,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Get all images with proper tag exclusion and organization filter
|
||||
Generator to fetch images in pages from the stash API.
|
||||
"""
|
||||
image_filter = {}
|
||||
pagination = {
|
||||
"page": 1,
|
||||
"per_page": -1, # -1 gets all results at once
|
||||
"sort": "created_at",
|
||||
"direction": "ASC",
|
||||
}
|
||||
|
||||
# Convert tag names to IDs
|
||||
tag_ids = []
|
||||
for tag_name in skip_tags:
|
||||
tag = get_or_create_tag(client, tag_name)
|
||||
if tag:
|
||||
tag_ids.append(tag["id"])
|
||||
|
||||
if tag_ids:
|
||||
image_filter["tags"] = {
|
||||
"value": [],
|
||||
"excludes": tag_ids,
|
||||
"modifier": "INCLUDES_ALL",
|
||||
"depth": -1,
|
||||
page = 1
|
||||
while True:
|
||||
image_filter = {}
|
||||
pagination = {
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"sort": "created_at",
|
||||
"direction": "ASC",
|
||||
}
|
||||
|
||||
if exclude_organized:
|
||||
image_filter["organized"] = False # Correct field name
|
||||
if skip_tags:
|
||||
image_filter["tags"] = {
|
||||
"value": [],
|
||||
"excludes": skip_tags,
|
||||
"modifier": "INCLUDES_ALL",
|
||||
"depth": -1,
|
||||
}
|
||||
|
||||
# Maintain original parameter structure
|
||||
return client.find_images(f=image_filter, filter=pagination)
|
||||
if exclude_organized:
|
||||
image_filter["organized"] = False
|
||||
|
||||
images = client.find_images(f=image_filter, filter=pagination)
|
||||
if not images:
|
||||
# no more pages
|
||||
break
|
||||
|
||||
log.info(f"Fetched page {page} with {len(images)} images")
|
||||
for img in images:
|
||||
yield img
|
||||
|
||||
# move to next page
|
||||
page += 1
|
||||
|
||||
|
||||
def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> None:
|
||||
"""Process e621 metadata and update Stash records"""
|
||||
# Skip already processed images
|
||||
# same as before...
|
||||
image = stash.find_image(image_id)
|
||||
if any(tag["name"] == "e621_tagged" for tag in image.get("tags", [])):
|
||||
if any(t["name"] == "e621_tagged" for t in image.get("tags", [])):
|
||||
return
|
||||
|
||||
if any(t["name"] == "e621_tag_failed" for t in image.get("tags", [])):
|
||||
return
|
||||
|
||||
try:
|
||||
time.sleep(2) # Rate limiting
|
||||
time.sleep(0.5)
|
||||
response = requests.get(
|
||||
f"https://e621.net/posts.json?md5={image_md5}",
|
||||
headers={"User-Agent": "Stash-e621-Tagger/1.0"},
|
||||
@@ -64,53 +71,49 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N
|
||||
response.raise_for_status()
|
||||
post_data = response.json().get("post", {})
|
||||
except Exception as e:
|
||||
log.error(f"e621 API error: {str(e)}")
|
||||
log.error(f"Marking as failed. e621 API error: {str(e)}")
|
||||
e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed")
|
||||
fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in image.get("tags", [])]
|
||||
stash.update_image({"id": image_id, "tag_ids": list(set(fail_ids))})
|
||||
return
|
||||
|
||||
if not post_data:
|
||||
return
|
||||
|
||||
# Create essential entities
|
||||
e621_tag = get_or_create_tag(stash, "e621_tagged")
|
||||
post_url = f"https://e621.net/posts/{post_data['id']}"
|
||||
|
||||
# Process tags
|
||||
tag_ids = [e621_tag["id"]]
|
||||
for category in ["general", "species", "character", "artist", "copyright"]:
|
||||
for tag in post_data.get("tags", {}).get(category, []):
|
||||
# Clean and validate tag
|
||||
for cat in ["general", "species", "character", "artist", "copyright"]:
|
||||
for tag in post_data.get("tags", {}).get(cat, []):
|
||||
clean_tag = tag.strip()
|
||||
if not clean_tag:
|
||||
continue
|
||||
|
||||
stash_tag = get_or_create_tag(stash, clean_tag)
|
||||
if stash_tag:
|
||||
tag_ids.append(stash_tag["id"])
|
||||
|
||||
# Process studio
|
||||
studio_id = None
|
||||
if artists := post_data.get("tags", {}).get("artist"):
|
||||
studio = get_or_create_studio(stash, artists[0])
|
||||
studio_id = studio["id"]
|
||||
|
||||
# Process performers
|
||||
performer_ids = []
|
||||
for char_tag in post_data.get("tags", {}).get("character", []):
|
||||
performer_name = char_tag.split('_(')[0]
|
||||
performer = get_or_create_performer(stash, performer_name)
|
||||
performer_ids.append(performer["id"])
|
||||
for char in post_data.get("tags", {}).get("character", []):
|
||||
name = char.split('_(')[0]
|
||||
perf = get_or_create_performer(stash, name)
|
||||
performer_ids.append(perf["id"])
|
||||
|
||||
# Update image
|
||||
try:
|
||||
stash.update_image({
|
||||
"id": image_id,
|
||||
"organized": True,
|
||||
"urls": [post_url],
|
||||
"tag_ids": list(set(tag_ids)),
|
||||
"studio_id": studio_id,
|
||||
"performer_ids": performer_ids
|
||||
})
|
||||
|
||||
log.info("Image updated: ${image_id}")
|
||||
log.info(f"Image updated: {image_id}")
|
||||
except Exception as e:
|
||||
log.error(f"Update failed: {str(e)}")
|
||||
|
||||
@@ -166,6 +169,7 @@ def get_or_create_performer(stash: StashInterface, name: str) -> dict:
|
||||
|
||||
def scrape_image(client: StashInterface, image_id: str) -> None:
|
||||
"""Main scraping handler"""
|
||||
# same logic as before for MD5 extraction and process_e621_post call
|
||||
image = client.find_image(image_id)
|
||||
if not image or not image.get("visual_files"):
|
||||
return
|
||||
@@ -173,65 +177,50 @@ def scrape_image(client: StashInterface, image_id: str) -> None:
|
||||
file_data = image["visual_files"][0]
|
||||
filename = file_data["basename"]
|
||||
filename_md5 = filename.split('.')[0]
|
||||
final_md5 = None
|
||||
|
||||
# First try filename-based MD5
|
||||
if re.match(r"^[a-f0-9]{32}$", filename_md5):
|
||||
final_md5 = filename_md5
|
||||
log.info(f"Using filename MD5: {final_md5}")
|
||||
else:
|
||||
# Fallback to content-based MD5
|
||||
try:
|
||||
file_path = file_data["path"]
|
||||
log.info(f"Generating MD5 from file content: {file_path}")
|
||||
|
||||
md5_hash = hashlib.md5()
|
||||
with open(file_path, "rb") as f:
|
||||
# Read file in 64kb chunks for memory efficiency
|
||||
with open(file_data["path"], "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
md5_hash.update(chunk)
|
||||
|
||||
final_md5 = md5_hash.hexdigest()
|
||||
log.info(f"Generated content MD5: {final_md5}")
|
||||
except Exception as e:
|
||||
log.error(f"Failed to generate MD5: {str(e)}")
|
||||
return
|
||||
|
||||
if final_md5:
|
||||
process_e621_post(client, image_id, final_md5)
|
||||
else:
|
||||
log.warning("No valid MD5 available for processing")
|
||||
process_e621_post(client, image_id, final_md5)
|
||||
|
||||
|
||||
# Plugin setup and execution
|
||||
# In the main execution block:
|
||||
if __name__ == "__main__":
|
||||
log.info("Starting tagger with pagination...")
|
||||
json_input = json.loads(sys.stdin.read())
|
||||
stash = StashInterface(json_input["server_connection"])
|
||||
|
||||
config = stash.get_configuration().get("plugins", {})
|
||||
settings = {
|
||||
"SkipTags": "e621_tagged", # Add automatic filtering
|
||||
"SkipTags": "e621_tagged, e621_tag_failed",
|
||||
"ExcludeOrganized": False
|
||||
}
|
||||
settings.update(config.get("e621_tagger", {}))
|
||||
|
||||
log.info(settings)
|
||||
e621_tagged = get_or_create_tag(stash, "e621_tagged")
|
||||
e621_failed = get_or_create_tag(stash, "e621_tag_failed")
|
||||
|
||||
# Get e621_tagged ID for filtering
|
||||
e621_tag = get_or_create_tag(stash, "e621_tagged")
|
||||
|
||||
# Existing tags + automatic e621_tagged exclusion
|
||||
skip_tags = [t.strip() for t in settings["SkipTags"].split(",") if t.strip()]
|
||||
skip_tags.append(e621_tag["id"]) # Filter by ID instead of name
|
||||
skip_tags = [st for st in skip_tags]
|
||||
skip_tags.extend([e621_tagged["id"], e621_failed["id"]])
|
||||
|
||||
images = get_all_images(stash, skip_tags, settings["ExcludeOrganized"])
|
||||
|
||||
# Rest of the loop remains the same
|
||||
for i, image in enumerate(images, 1):
|
||||
image_tag_names = [tag["name"] for tag in image.get("tags", [])]
|
||||
if any(tag in image_tag_names for tag in skip_tags):
|
||||
log.info("Fetching images in pages...")
|
||||
for idx, image in enumerate(get_all_images(stash, skip_tags, settings["ExcludeOrganized"], per_page=100), start=1):
|
||||
current_tags = [t["name"] for t in image.get("tags", [])]
|
||||
if any(t in current_tags for t in skip_tags):
|
||||
log.info(f"Skipping image {image['id']} - contains skip tag")
|
||||
continue
|
||||
|
||||
log.progress(i/len(images))
|
||||
log.progress(idx)
|
||||
scrape_image(stash, image["id"])
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
name: e621_tagger
|
||||
description: Finding images and videos on e621 and tagging them.
|
||||
version: 0.1
|
||||
version: 0.2
|
||||
url: https://github.com/stashapp/CommunityScripts/
|
||||
exec:
|
||||
- python
|
||||
|
||||
Reference in New Issue
Block a user