bulkimageScrape plugin now uses generator and paging to iterate images (#562)

Co-authored-by: anonymous <anonymous@anonymous.test>
This commit is contained in:
spaceyuck
2025-05-24 17:52:59 +00:00
committed by GitHub
parent b3586eb08e
commit 498ae9cb0c
2 changed files with 65 additions and 17 deletions

5
plugins/bulkImageScrape/BulkImageScrape.yml Normal file → Executable file
View File

@@ -1,6 +1,6 @@
name: Bulk Image Scrape
description: Apply an image scraper to all images
version: 0.3
version: 0.3.1
url: https://github.com/stashapp/CommunityScripts/
exec:
- python
@@ -29,6 +29,9 @@ settings:
ExcludeOrganized:
displayName: Exclude images that are set as organized (default is to include)
type: BOOLEAN
SkipEntriesNum:
displayName: number of entries to skip over (mostly for rerunning after an error on large collections)
type: NUMBER
tasks:
- name: "Bulk Image Scrape"

77
plugins/bulkImageScrape/bulkImageScrape.py Normal file → Executable file
View File

@@ -1,5 +1,7 @@
from collections.abc import Generator
import sys
import json
import math
import time
import stashapi.log as log
from stashapi.stash_types import StashItem
@@ -109,19 +111,8 @@ def parse_skip_tags(client: StashInterface, skip_tags: str) -> list[str]:
return tag_ids
def get_all_images(
client: StashInterface, skip_tags: list[str], exclude_organized: bool
) -> list[dict]:
"""
Get all images from the stash
"""
def build_image_filter(skip_tags: list[str], exclude_organized: bool) -> dict:
image_filter: dict = {}
all_results: dict = {
"page": 1,
"per_page": -1,
"sort": "created_at",
"direction": "ASC",
}
if exclude_organized:
image_filter["organized"] = False
@@ -135,7 +126,58 @@ def get_all_images(
"depth": -1,
}
return client.find_images(f=image_filter, filter=all_results)
return image_filter
def count_all_images(
client: StashInterface, skip_tags: list[str], exclude_organized: bool
) -> int:
"""
count all images from the stash
"""
image_filter: dict = build_image_filter(skip_tags=skip_tags, exclude_organized=exclude_organized)
all_results: dict = {
"page": 1,
"per_page": 0,
"sort": "created_at",
"direction": "ASC",
}
total_images, images = client.find_images(f=image_filter, filter=all_results, get_count=True)
return total_images
def get_all_images(
client: StashInterface, skip_tags: list[str], exclude_organized: bool, skip_entries: int = 0
) -> Generator[dict, None, None]:
"""
Get all images from the stash
"""
image_filter: dict = build_image_filter(skip_tags=skip_tags, exclude_organized=exclude_organized)
page_size = 100
page = 1
if skip_entries > 0:
page += math.floor(skip_entries / page_size)
log.info(f"skipping to result page {page} with {page_size} entries each to skip around {skip_entries}")
images = None
while images is None or len(images) > 0:
all_results: dict = {
"page": page,
"per_page": page_size,
"sort": "created_at",
"direction": "ASC",
}
images = client.find_images(f=image_filter, filter=all_results)
for image in images:
yield image
page += 1
def scrape_image(
@@ -240,6 +282,7 @@ settings: dict[str, any] = {
"CreateMissingTags": False,
"MergeExistingTags": False,
"ExcludeOrganized": False,
"SkipEntriesNum": 0,
}
if "BulkImageScrape" in config:
@@ -267,17 +310,19 @@ parsed_skip_tags: list[str] = parse_skip_tags(stash, settings["SkipTags"])
#
log.info("Querying images from stash")
images: list[dict] = get_all_images(
total_images: int = count_all_images(
stash, parsed_skip_tags, settings["ExcludeOrganized"]
)
) - settings["SkipEntriesNum"]
total_images: int = len(images)
if total_images == 0:
log.info("No images found with the given filters")
exit(0)
else:
log.info(f"Found {total_images} images")
images: Generator[dict, None, None] = get_all_images(
stash, parsed_skip_tags, settings["ExcludeOrganized"], settings["SkipEntriesNum"]
)
for i, image in enumerate(images, start=1):
time.sleep(0.5)