BulkImageScraper: Improve Error Handling and Docs (#560)

2026-02-04 10:49:10 -06:00 · 2025-05-20 23:53:39 +02:00 · 2025-05-20 23:53:39 +02:00 · b3586eb08e
commit b3586eb08e
parent 034a8a7939
8 changed files with 117 additions and 57 deletions
--- a/plugins/bulkImageScrape/BulkImageScrape.yml
+++ b/plugins/bulkImageScrape/BulkImageScrape.yml
@ -1,6 +1,6 @@
 name: Bulk Image Scrape
 description: Apply an image scraper to all images
-version: 0.2
+version: 0.3
 url: https://github.com/stashapp/CommunityScripts/
 exec:
  - python
--- a/plugins/bulkImageScrape/README.md
+++ b/plugins/bulkImageScrape/README.md
@ -2,33 +2,46 @@

 https://discourse.stashapp.cc/t/bulk-image-scrape/1339

-Apply any image scraper to all of your images 
+Apply any image scraper to all of your images

 ## Install

-After you installed the plugin, make sure you have the latest version of stashapi installed by running `pip install -r <path to your stash>/plugins/community/bulkImageScrape/requirements.txt`.
+After you installed the plugin, make sure you have the latest version of stashapi installed by running `pip install -r <path to your stash>/plugins/community/BulkImageScrape/requirements.txt`.

 ## Config

 Go into your Stash then under `Settings > Plugins` you'll find the config for Bulk Image Scrape

-It is mandatory to enter the Scraper ID (the Name) of the Scraper you want to use. In this example [SHALookup](https://github.com/FansDB/metadata-scrapers) is used but you can use any Scraper that is installed in your Stash and is valid for image scraping.
+It is mandatory to enter the Scraper ID of the Scraper you want to use. In this example [SHALookup](https://github.com/FansDB/metadata-scrapers) is used but you can use any [valid Image Scraper](#scraper-requirements) that is installed in your Stash.

 ![Settings](./res/settings.png)

 - `Create Missing movies/groups from scrape result`
-> if the scraper returns a movie/group and it is not already in your stash, the plugin will create it if enabled
+
+> If the scraper returns a movie/group and it is not already in your stash, the plugin will create it if enabled
+
 - `Create Missing performer from scrape result`
-> if the scraper returns a performer and it is not already in your stash, the plugin will create it if enabled
+
+> If the scraper returns a performer and it is not already in your stash, the plugin will create it if enabled
+
 - `Create Missing studios from scrape result`
-> if the scraper returns a studio and it is not already in your stash, the plugin will create it if enabled
+
+> If the scraper returns a studio and it is not already in your stash, the plugin will create it if enabled
+
 - `Exclude images that are set as organized`
+
 > Any image that is set as organized will be skipped if enabled
+
 - `Merge existingtags with scraped tags`
-> merge scraped tags with existing tags instead of overwriting them when enabled
+
+> Merge scraped tags with existing tags instead of overwriting them when enabled
+
 - `The Scraper ID of the image scraper to use`
-> Enter the ID/Name of the scraper you want to use here. If this is not set correctly the plugin will tell you in the logs when you run the plugin task
+
+> Enter the ID of the scraper you want to use. If this is not set correctly the plugin will tell you in the logs when you run the plugin task
+
 - `List of tags to skip`
+
 > Any image that has one or more of the Tags from this setting will be skipped by the plugin if Tags are specified here. Multiple Tags must be comma separated. If the plugin can't find a tag you specified it will notify you in the logs

 ## Task
@ -41,3 +54,19 @@ Once the Task is running you can track the progress in `Settings > Log`
 If the plugin encounters any issues you will be informed here

 ![Running](./res/running.png)
+
+## Scraper Requirements
+
+### Scraper ID
+
+You must set the Scraper ID which you can find in `Settings > Metadata Providers > Installed Scrapers`
+
+The Scraper ID is shown below the Scraper name and is highlighted in red here:
+
+![valid_scraper_id](./res/valid_scraper_id.png)
+
+### Valid Image Scraper
+
+The scraper you choose must be listed as an image scraper in `Settings > Metadata Providers > Scrapers > Image scrapers`
+
+![valid_image_scraper](./res/valid_image_scraper.png)
--- a/plugins/bulkImageScrape/bulkImageScrape.py
+++ b/plugins/bulkImageScrape/bulkImageScrape.py
@ -6,20 +6,76 @@ from stashapi.stash_types import StashItem
 from stashapi.stashapp import StashInterface
 from stashapi.scrape_parser import ScrapeParser

-# Quick check to make sure we have the correct version of stashapi
-if StashItem.IMAGE is None or not hasattr(StashInterface, "scrape_image"):
-    log.error(
-        "It seems you are using an older version of stashapi\n"
-        "without support for image scraping.\n"
-        "Please use the requirements.txt file to install the most recent version"
-    )
-    exit(1)
-

 #
 # Helper functions
 #

+README_URL: str = "https://github.com/stashapp/CommunityScripts/blob/main/plugins/bulkImageScrape/README.md"
+
+
+def validate_scraper(client: StashInterface, scraper_id: str) -> str:
+    """
+    Check if the requested scraper exists and is able to scrape images
+    and return the id if it does
+    """
+
+    if scraper_id == "":
+        log.error(
+            "ScraperID is empty - cannot continue\n"
+            "Please set a valid ScraperID in the plugin settings at:\n"
+            "Settings -> Plugins -> Plugins -> BulkImageScrape -> ScraperID\n"
+            f"as described in the README.md file at:\n"
+            f"{README_URL}"
+        )
+        exit(1)
+
+    scrapers: list[dict] = client.list_scrapers([StashItem.IMAGE])
+    valid_scraper_ids: list[str] = []
+
+    for scraper in scrapers:
+        if scraper["id"] == scraper_id:
+            valid_scraper_ids.append(scraper["id"])
+
+    if len(valid_scraper_ids) == 0:
+        log.error(
+            f"No valid image scraper found with id {scraper_id}\n"
+            "Please check the ScraperID is correct\n"
+            "Your selected scraper should be listed at:\n"
+            "Settings -> Metadata Providers -> Scrapers -> Image scrapers"
+            f"as described in the README.md file at:\n"
+            f"{README_URL}"
+        )
+        exit(1)
+
+    if len(valid_scraper_ids) > 1:
+        log.error(
+            f"Multiple image scrapers found with id {scraper_id}\n"
+            "Scraper ID must be unique - please fix your scraper installations\n"
+            "Check your installed scrapers at:\n"
+            "Settings -> Metadata Providers -> Installed Scrapers"
+            f"as described in the README.md file at:\n"
+            f"{README_URL}"
+        )
+        exit(1)
+
+    return valid_scraper_ids[0]
+
+
+def validate_stashapi(item: StashItem, client: StashInterface) -> None:
+    """
+    Quick check to make sure we have the correct version of stashapi installed
+    """
+    if item.IMAGE is None or not hasattr(client, "scrape_image"):
+        log.error(
+            "It seems you are using an older version of stashapi\n"
+            "without support for image scraping.\n"
+            "Please use the requirements.txt file to install the most recent version\n"
+            f"as described in the README.md file at:\n"
+            f"{README_URL}"
+        )
+        exit(1)
+

 def get_tag_id(client: StashInterface, tag_name: str) -> str | None:
    """
@ -37,21 +93,13 @@ def get_tag_id(client: StashInterface, tag_name: str) -> str | None:
    return tags[0]["id"]


-def get_scraper_id(client: StashInterface, scraper_name: str) -> str | None:
-    """
-    Get the id of a scraper by name or return None if the scraper is not found
-    """
-    scrapers: list[dict] = client.list_scrapers([StashItem.IMAGE])
-    for scraper in scrapers:
-        if scraper["name"] == scraper_name:
-            return scraper["id"]
-    return None
-
-
 def parse_skip_tags(client: StashInterface, skip_tags: str) -> list[str]:
    """
    Parse the skip tags to a list of tag ids
    """
+    if skip_tags == "" or skip_tags is None:
+        return []
+
    skip_tags = skip_tags.split(",")
    tag_ids: list[str] = []
    for tag in skip_tags:
@ -87,9 +135,7 @@ def get_all_images(
            "depth": -1,
        }

-    result: list[dict] = client.find_images(f=image_filter, filter=all_results)
-
-    return result
+    return client.find_images(f=image_filter, filter=all_results)


 def scrape_image(
@ -128,7 +174,7 @@ def scrape_is_valid(scrape_input: dict | list[dict] | None) -> bool:
            if value is not None and value != [] and value != {} and value != ""
        )
    else:
-        # something went wrong strangely wrong?
+        # something went strangely wrong?
        return False


@ -183,6 +229,7 @@ def update_image(client: StashInterface, update: dict) -> dict | None:
 json_input: dict = json.loads(sys.stdin.read())
 FRAGMENT_SERVER: dict = json_input["server_connection"]
 stash: StashInterface = StashInterface(FRAGMENT_SERVER)
+log.info("Starting Bulk Image Scrape Plugin")

 config: dict = stash.get_configuration()["plugins"]
 settings: dict[str, any] = {
@ -197,6 +244,7 @@ settings: dict[str, any] = {

 if "BulkImageScrape" in config:
    settings.update(config["BulkImageScrape"])
+log.info(f"settings: {settings=}")

 scrape_parser = ScrapeParser(
    stash,
@ -207,36 +255,18 @@ scrape_parser = ScrapeParser(
 )

 #
-# Validate input settings
+# VALIDATE ENVIRONMENT
 #

-
-# Exit if no ScraperID is set or we cannot resolve it
-if settings["ScraperID"] == "":
-    log.error("No ScraperID set")
-    exit(1)
-
-scraper_id: None | str = get_scraper_id(stash, settings["ScraperID"])
-if scraper_id is None:
-    log.error(f"ScraperID {settings['ScraperID']} not found - cannot continue")
-    log.error("Please check the ScraperID is correct and try again")
-    exit(1)
-
-# parse the skip tags to a list of tag ids if we have any
-parsed_skip_tags: list[str] = []
-if settings["SkipTags"] != "":
-    parsed_skip_tags = parse_skip_tags(stash, settings["SkipTags"])
-    if len(parsed_skip_tags) == 0:
-        parsed_skip_tags = []
+validate_stashapi(StashItem.IMAGE, stash)
+scraper_id: str = validate_scraper(stash, settings["ScraperID"])
+parsed_skip_tags: list[str] = parse_skip_tags(stash, settings["SkipTags"])

 #
 # MAIN
 #

-log.info("Starting Bulk Image Scrape Plugin")
-log.info(f"settings: {settings=}")
 log.info("Querying images from stash")
-
 images: list[dict] = get_all_images(
    stash, parsed_skip_tags, settings["ExcludeOrganized"]
 )
@ -246,7 +276,7 @@ if total_images == 0:
    log.info("No images found with the given filters")
    exit(0)
 else:
-    log.info(f"Found {len(images)} images")
+    log.info(f"Found {total_images} images")


 for i, image in enumerate(images, start=1):
@ -258,7 +288,8 @@ for i, image in enumerate(images, start=1):
    valid: bool = scrape_is_valid(scrape)
    if not valid:
        log.debug(
-            f"Scraper returned invalid/empty result for image {image['id']} with scraper {scraper_id} - skipping"
+            f"Scraper returned invalid/empty result for image {image['id']} "
+            f"with scraper {scraper_id} - skipping"
        )
        continue

--- a/plugins/bulkImageScrape/res/running.png
+++ b/plugins/bulkImageScrape/res/running.png
--- a/plugins/bulkImageScrape/res/settings.png
+++ b/plugins/bulkImageScrape/res/settings.png
--- a/plugins/bulkImageScrape/res/task.png
+++ b/plugins/bulkImageScrape/res/task.png
--- a/plugins/bulkImageScrape/res/valid_image_scraper.png
+++ b/plugins/bulkImageScrape/res/valid_image_scraper.png
--- a/plugins/bulkImageScrape/res/valid_scraper_id.png
+++ b/plugins/bulkImageScrape/res/valid_scraper_id.png