From a36daaff54cea2bf8190627c9908bae76c6b49d8 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sun, 10 Mar 2024 18:43:12 +0100 Subject: [PATCH] Add XPath scraper for POVTrain --- SCRAPERS-LIST.md | 1 + scrapers/POVTrain.yml | 58 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 scrapers/POVTrain.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index c270a75d..ea0583aa 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1251,6 +1251,7 @@ povperverts.net|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- povpornstars.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- povr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR povthis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povtrain.com|POVTrain.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- prettydirty.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- prettydirtyteens.com|Deviante/Deviante.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|Python|- pridestudios.com|Algolia_MenOver30.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay diff --git a/scrapers/POVTrain.yml b/scrapers/POVTrain.yml new file mode 100644 index 00000000..73d4f1ee --- /dev/null +++ b/scrapers/POVTrain.yml @@ -0,0 +1,58 @@ +name: POV Train +sceneByURL: + - action: scrapeXPath + url: + - povtrain.com/tour/trailer.php?id= + scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.povtrain.com/tour/search.php?query={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper +xPathScrapers: + sceneSearch: + scene: + Title: //ul[@class="slides"]/li//h3 + URL: //ul[@class="slides"]/li/a/@href + Date: + selector: //ul[@class="slides"]/li//comment()[contains(., "Date")] + postProcess: &parseDate + - replace: + - regex: .*\s*(.*)<.* + with: $1 + - parseDate: January 2, 2006 + Image: + selector: //ul[@class="slides"]/li/a/img/@src + postProcess: + - replace: + - regex: ^ + with: https://povtrain.com/ + sceneScraper: + scene: + Title: //h2 + Details: //div[@class="desc"]/p + Date: + selector: //comment()[contains(., "Date Added")] + postProcess: *parseDate + Image: + # Sometimes they put the preview image in a script tag, sometimes it's an actual img + selector: //script[contains(., "image:")]/text() | //div[contains(@class, "alpha")]//img[contains(@id, "set-target")]/@src + postProcess: + - replace: + # This regex will only match when we have a script tag + # and gives the same result as the img tag, a relative URL + - regex: .*image:\s*"([^"]+).* + with: $1 + - regex: ^ + with: https://povtrain.com + Studio: + Name: + fixed: POV Train + Tags: + Name: //h5[@class="video_categories"]/a + Performers: + Name: //h5[@class="featuring_model"]/a +# Last Updated March 10, 2024