From 9aeaf98cf14f529a084a06f3a40bbbe7dc5f65a7 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 8 Apr 2024 03:17:53 +0200 Subject: [PATCH] Add new scrapers to TugPass Several of these sites are better served with a secondary scraper, and Tease POV is unique enough to warrant a separate scraper entirely --- scrapers/TugPass.yml | 87 +++++++++++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 25 deletions(-) diff --git a/scrapers/TugPass.yml b/scrapers/TugPass.yml index 97c29e1f..7e959d7f 100644 --- a/scrapers/TugPass.yml +++ b/scrapers/TugPass.yml @@ -3,49 +3,53 @@ sceneByURL: - action: scrapeXPath url: - tugpass.com/videos - scraper: sceneScraper_network + scraper: network - action: scrapeXPath - url: - - breedme.com/videos + url: - cumblastcity.com/videos - clubtug.com/videos - - ebonytugs.com/videos - - edgequeens.com/videos - familylust.com/videos - finishhim.com/videos - - jawbreakerz.com/videos - meanmassage.com/videos - - milfaf.com/videos - - mylked.com/videos - nookies.com/video - over40handjobs.com/videos - petite18.com/videos - seemomsuck.com/videos + - teentugs.com/videos + scraper: primary + - action: scrapeXPath + url: + - breedme.com/videos + - edgequeens.com/videos + - jawbreakerz.com/videos + - milfaf.com/videos + - mylked.com/videos - shadyspa.com/videos - shereacts.com/videos + scraper: secondary + - action: scrapeXPath + url: - teasepov.com/videos - - teentugs.com/videos - scraper: sceneScraper_sites + scraper: teasepov + xPathScrapers: - sceneScraper_network: + network: scene: Details: &details //meta[@name="description"]/@content Performers: &performers - Name: //div[@class="featuringWrapper"]/a + Name: //div[@class="featuringWrapper"]/a | //div[@class="video-box"]//a[contains(@href, "/model/")] Title: &title - selector: //title/text() - postProcess: - - replace: - - regex: \s\|.+$ - with: - - regex: \s\-.+$ - with: + selector: //title/text() + postProcess: + - replace: + - regex: \s[|-].+$ + with: # Scrape sanitized URL URL: &url //link[@rel="canonical"]/@href Image: selector: //div[@class="player"]/img/@src postProcess: - - replace: + - replace: - regex: ^ with: https://www.tugpass.com/ Studio: @@ -55,7 +59,7 @@ xPathScrapers: - replace: - regex: ^\/[A-Za-z_-]+\/([A-Za-z0-9-]+)\/.+ with: $1 - sceneScraper_sites: + primary: scene: Details: *details Performers: *performers @@ -78,7 +82,7 @@ xPathScrapers: with: $1 - regex: ^(https?:.+(\.com)).+__SEP__(.+) with: $1$3 - Studio: + Studio: &studio Name: selector: //meta[@name="copyright"]/@content postProcess: @@ -86,6 +90,39 @@ xPathScrapers: - regex: Copyright\s(.+)\s\d{4} with: $1 - map: - "breedme": Breed Me - "milfaf": MILFAF -# Last Updated March 27, 2024 + breedme: Breed Me + milfaf: MILFAF + secondary: + scene: + Title: >- + //h3[@class="top-title"] | + //div[@class="title-top"]//h5 + Details: //div[contains(@class, "video-detail") or contains(@class, "title-bottom")]/p + Date: + selector: //div[@class="title-top"]//span/text() + postProcess: + - replace: + - regex: ^\W+ + with: "" + - parseDate: January 2, 2006 + Image: //video/@poster + Performers: + Name: //div[contains(@class, "player") or contains(@class, "title-top")]//a[contains(@href, "models")] + URL: *url + Studio: *studio + # This one site is different enough that it gets its own scraper + teasepov: + scene: + Title: //div[@class="slideTitle"] + Details: //div[@class="videoDescription"] + Image: //div[@id="player"]/img/@src + Date: + selector: //div[@class="slideDate"] + postProcess: + - replace: + - regex: ^\W+ + with: "" + - parseDate: January 2, 2006 + URL: *url + Studio: *studio +# Last Updated April 08, 2024