Add new scrapers to TugPass

Several of these sites are better served with a secondary scraper, and Tease POV is unique enough to warrant a separate scraper entirely
2025-12-10 17:32:09 -06:00 · 2024-04-08 03:17:53 +02:00 · 2024-04-08 03:17:53 +02:00 · 9aeaf98cf1
commit 9aeaf98cf1
parent 2cd09ad1d8
1 changed files with 62 additions and 25 deletions
--- a/scrapers/TugPass.yml
+++ b/scrapers/TugPass.yml
@ -3,49 +3,53 @@ sceneByURL:
  - action: scrapeXPath
    url:
      - tugpass.com/videos
-    scraper: sceneScraper_network
+    scraper: network
  - action: scrapeXPath
-    url: 
-      - breedme.com/videos
+    url:
      - cumblastcity.com/videos
      - clubtug.com/videos
-      - ebonytugs.com/videos
-      - edgequeens.com/videos
      - familylust.com/videos
      - finishhim.com/videos
-      - jawbreakerz.com/videos
      - meanmassage.com/videos
-      - milfaf.com/videos
-      - mylked.com/videos
      - nookies.com/video
      - over40handjobs.com/videos
      - petite18.com/videos
      - seemomsuck.com/videos
+      - teentugs.com/videos
+    scraper: primary
+  - action: scrapeXPath
+    url:
+      - breedme.com/videos
+      - edgequeens.com/videos
+      - jawbreakerz.com/videos
+      - milfaf.com/videos
+      - mylked.com/videos
      - shadyspa.com/videos
      - shereacts.com/videos
+    scraper: secondary
+  - action: scrapeXPath
+    url:
      - teasepov.com/videos
-      - teentugs.com/videos
-    scraper: sceneScraper_sites
+    scraper: teasepov
+
 xPathScrapers:
-  sceneScraper_network:
+  network:
    scene:
      Details: &details //meta[@name="description"]/@content
      Performers: &performers
-        Name: //div[@class="featuringWrapper"]/a
+        Name: //div[@class="featuringWrapper"]/a | //div[@class="video-box"]//a[contains(@href, "/model/")]
      Title: &title
-          selector: //title/text()
-          postProcess:
-            - replace:
-                - regex: \s\|.+$
-                  with:
-                - regex: \s\-.+$
-                  with:
+        selector: //title/text()
+        postProcess:
+          - replace:
+              - regex: \s[|-].+$
+                with:
      # Scrape sanitized URL
      URL: &url //link[@rel="canonical"]/@href
      Image:
        selector: //div[@class="player"]/img/@src
        postProcess:
-           - replace:
+          - replace:
              - regex: ^
                with: https://www.tugpass.com/
      Studio:
@ -55,7 +59,7 @@ xPathScrapers:
            - replace:
                - regex: ^\/[A-Za-z_-]+\/([A-Za-z0-9-]+)\/.+
                  with: $1
-  sceneScraper_sites:
+  primary:
    scene:
      Details: *details
      Performers: *performers
@ -78,7 +82,7 @@ xPathScrapers:
                with: $1
              - regex: ^(https?:.+(\.com)).+__SEP__(.+)
                with: $1$3
-      Studio:
+      Studio: &studio
        Name:
          selector: //meta[@name="copyright"]/@content
          postProcess:
@ -86,6 +90,39 @@ xPathScrapers:
                - regex: Copyright\s(.+)\s\d{4}
                  with: $1
            - map:
-                "breedme": Breed Me
-                "milfaf": MILFAF
-# Last Updated March 27, 2024
+                breedme: Breed Me
+                milfaf: MILFAF
+  secondary:
+    scene:
+      Title: >-
+        //h3[@class="top-title"] |
+        //div[@class="title-top"]//h5
+      Details: //div[contains(@class, "video-detail") or contains(@class, "title-bottom")]/p
+      Date:
+        selector: //div[@class="title-top"]//span/text()
+        postProcess:
+          - replace:
+              - regex: ^\W+
+                with: ""
+          - parseDate: January 2, 2006
+      Image: //video/@poster
+      Performers:
+        Name: //div[contains(@class, "player") or contains(@class, "title-top")]//a[contains(@href, "models")]
+      URL: *url
+      Studio: *studio
+  # This one site is different enough that it gets its own scraper
+  teasepov:
+    scene:
+      Title: //div[@class="slideTitle"]
+      Details: //div[@class="videoDescription"]
+      Image: //div[@id="player"]/img/@src
+      Date:
+        selector: //div[@class="slideDate"]
+        postProcess:
+          - replace:
+              - regex: ^\W+
+                with: ""
+          - parseDate: January 2, 2006
+      URL: *url
+      Studio: *studio
+# Last Updated April 08, 2024