Bugfixes and adding a feature to run a scraper on profile urls for images (#260)

Co-authored-by: Tweeticoats <Tweeticoats@github.com>
2026-04-12 19:41:51 -05:00 · 2024-02-26 01:41:40 +10:30
parent 556ad2bdc6
commit fbeb9bd13e
2 changed files with 103 additions and 15 deletions
--- a/plugins/stashdb-performer-gallery/stashdb-performer-gallery.py
+++ b/plugins/stashdb-performer-gallery/stashdb-performer-gallery.py
@@ -1,5 +1,5 @@
 import stashapi.log as log
-from stashapi.stashapp import StashInterface
+from stashapi.stashapp import StashInterface,StashItem
 from stashapi.stashbox import StashBoxInterface
 import os
 import sys
@@ -15,7 +15,7 @@ import base64
 per_page = 100
 request_s = requests.Session()
 stash_boxes = {}
-
+scrapers={}

 def processImages(img):
    log.debug("image: %s" % (img,))
@@ -34,8 +34,7 @@ def processImages(img):


 def processPerformers():
-    performers = stash.find_performers(
-        f={
+    query={
            "tags": {
                "depth": 0,
                "excludes": [],
@@ -43,7 +42,8 @@ def processPerformers():
                "value": [tag_stashbox_performer_gallery],
            }
        }
-    )
+    performers = stash.find_performers(f=query)
+
    for performer in performers:
        processPerformer(performer)

@@ -96,7 +96,11 @@ def processPerformerStashid(endpoint, stashid, p):
        images {
          id
          url
-        } 
+        }
+        urls{
+          url
+          type
+        }
        """
        perf = stashbox.find_performer(stashid, fragment=query)
        log.debug(perf)
@@ -174,13 +178,88 @@ def processPerformerStashid(endpoint, stashid, p):
                    )
                )
                r = requests.get(img["url"])
-                with open(filename, "xb") as f:
+                with open(filename, "wb") as f:
                    f.write(r.content)
                    f.close()
            #            modified=True
            else:
                log.debug("image already downloaded")

+        # scrape urls on the performer using the url scrapers in stash
+        if settings['runPerformerScraper'] and len(perf['urls'])>0:
+
+            # we need to determine what scrapers we have and what url patterns they accept, query what url patterns are supported, should only need to check once
+            if len(scrapers) == 0:
+                scrapers_graphql="""query ListPerformerScrapers {
+                  listScrapers(types: [PERFORMER]) {
+                  id
+                  name
+                    performer {
+                      urls
+                      supported_scrapes
+                    }
+                  }
+                }"""
+                res = stash.callGQL(scrapers_graphql)
+                for r in res['listScrapers']:
+                    if r['performer']['urls']:
+                        for url in r['performer']['urls']:
+                            scrapers[url]=r
+
+            for u in perf['urls']:
+                for url in scrapers.keys():
+                    if url in u['url']:
+                        log.info('Running stash scraper on performer url: %s' % (u['url'],))
+                        res=stash.scrape_performer_url(u['url'])
+                        # Check if the scraper returned a result
+                        if res is not None:
+                            log.debug(res)
+                            # it's possible for multiple images to be returned by a scraper so incriment a number each image
+                            image_id = 1
+                            if res['images']:
+                                for image in res['images']:
+                                    image_index = Path(settings["path"]) / p["id"] / ("%s-%s.json" % (scrapers[url]['id'],image_id ,))
+                                    if not image_index.exists():
+                                        with open(image_index, "w") as f:
+                                            image_data = {
+                                                "title":  '%s - %s ' % (scrapers[url]['id'],image_id,),
+                                                "details": "name: %s\ngender: %s\nurl: %s\ntwitter: %s\ninstagram: %s\nbirthdate: %s\nethnicity: %s\ncountry: %s\neye_color: %s\nheight: %s\nmeasurements: %s\nfake tits: %s\npenis_length: %s\n career length: %s\ntattoos: %s\npiercings: %s\nhair_color: %s\nweight: %s\n description: %s\n" % (res['name'], res['gender'], res['url'], res['twitter'], res['instagram'], res['birthdate'], res['ethnicity'], res['country'], res['eye_color'], res['height'], res['measurements'], res['fake_tits'], res['penis_length'], res['career_length'], res['tattoos'], res['piercings'], res['hair_color'], res['weight'], res['details'],),
+                                                "urls": [u['url'],],
+                                                "performer_ids": [p["id"]],
+                                                "tag_ids": [tag_stashbox_performer_gallery],
+                                                "gallery_ids": [index["galleries"][endpoint]],
+                                            }
+                                            json.dump(image_data, f)
+                                    filename = Path(settings["path"]) / p["id"] / ("%s-%s.jpg" % (scrapers[url]['id'],image_id ,))
+                                    if not filename.exists():
+                                        if image.startswith('data:'):
+                                            with open(filename, "wb") as f:
+                                                f.write(base64.b64decode(image.split('base64,')[1]))
+                                                f.close()
+                                        else:
+                                            with open(image_index, "w") as f:
+                                                image_data = {
+                                                    "title": '%s - %s ' % (scrapers[url]['id'],image_id,),
+                                                    "details": "%s"% (res,),
+                                                    "urls": [u['url'],image],
+                                                    "performer_ids": [p["id"]],
+                                                    "tag_ids": [tag_stashbox_performer_gallery],
+                                                    "gallery_ids": [index["galleries"][endpoint]],
+                                                }
+                                                json.dump(image_data, f)
+                                            filename = Path(settings["path"]) / p["id"] / ("%s.jpg" % (image_id,))
+                                            r = requests.get(img["url"])
+                                            if r.status_code==200:
+                                                with open(filename, "wb") as f:
+                                                    f.write(r.content)
+                                                    f.close()
+                                    image_id=image_id+1
+
+
+    #                log.debug('%s %s' % (url['url'],url['type'],))
+#                    stash.scraper
+#                    scrape=stash.scrape_performer_url(ur)
+
    else:
        log.error("endpoint %s not configured, skipping" % (endpoint,))

@@ -230,15 +309,20 @@ def processQueue():


 def relink_images():
-    images = stash.find_images(
-        f={
+    query={
            "path": {"modifier": "INCLUDES", "value": settings["path"]},
-            "performer_count": {"modifier": "EQUALS", "value": 0},
+                "is_missing": "galleries"
        }
-    )
-    log.debug(images)
-    for img in images:
-        processImages(img)
+    total = stash.find_images(f=query,get_count=True)[0]
+    i = 0
+    images=[]
+    while i < total:
+        images = stash.find_images(f=query,filter={"page": 0, "per_page": per_page})
+        for img in images:
+            log.debug('image: %s' %(img,))
+            processImages(img)
+            i=i+1
+            log.progress((i / total))


 json_input = json.loads(sys.stdin.read())
@@ -249,6 +333,7 @@ stash = StashInterface(FRAGMENT_SERVER)
 config = stash.get_configuration()["plugins"]
 settings = {
    "path": "/download_dir",
+    "runPerformerScraper":False,
 }
 if "stashdb-performer-gallery" in config:
    settings.update(config["stashdb-performer-gallery"])
@@ -263,7 +348,6 @@ tag_performer_image = stash.find_tag("[Set Profile Image]", create=True).get("id
 if "stasdb-performer-gallery" in config:
    settings.update(config["stasdb-performer-gallery"])

-
 if "mode" in json_input["args"]:
    PLUGIN_ARGS = json_input["args"]["mode"]
    if "performer" in json_input["args"]:
--- a/plugins/stashdb-performer-gallery/stashdb-performer-gallery.yml
+++ b/plugins/stashdb-performer-gallery/stashdb-performer-gallery.yml
@@ -11,6 +11,10 @@ settings:
    displayName: Download parent folder
    description: Download location for files, note this should be in a different folder to stash and in a folder covered by stash. You may need to create a new library path to cover this directory.
    type: STRING
+  runPerformerScraper:
+    displayName: Run stash scrapers on profile urls
+    description: Run scrapers on profile urls
+    type: BOOLEAN

 hooks:
  - name: modify performer