Add new scrapers to TugPass

Several of these sites are better served with a secondary scraper, and Tease POV is
unique enough to warrant a separate scraper entirely
This commit is contained in:
Maista6969 2024-04-08 03:17:53 +02:00
parent 2cd09ad1d8
commit 9aeaf98cf1
No known key found for this signature in database

View File

@ -3,49 +3,53 @@ sceneByURL:
- action: scrapeXPath
url:
- tugpass.com/videos
scraper: sceneScraper_network
scraper: network
- action: scrapeXPath
url:
- breedme.com/videos
url:
- cumblastcity.com/videos
- clubtug.com/videos
- ebonytugs.com/videos
- edgequeens.com/videos
- familylust.com/videos
- finishhim.com/videos
- jawbreakerz.com/videos
- meanmassage.com/videos
- milfaf.com/videos
- mylked.com/videos
- nookies.com/video
- over40handjobs.com/videos
- petite18.com/videos
- seemomsuck.com/videos
- teentugs.com/videos
scraper: primary
- action: scrapeXPath
url:
- breedme.com/videos
- edgequeens.com/videos
- jawbreakerz.com/videos
- milfaf.com/videos
- mylked.com/videos
- shadyspa.com/videos
- shereacts.com/videos
scraper: secondary
- action: scrapeXPath
url:
- teasepov.com/videos
- teentugs.com/videos
scraper: sceneScraper_sites
scraper: teasepov
xPathScrapers:
sceneScraper_network:
network:
scene:
Details: &details //meta[@name="description"]/@content
Performers: &performers
Name: //div[@class="featuringWrapper"]/a
Name: //div[@class="featuringWrapper"]/a | //div[@class="video-box"]//a[contains(@href, "/model/")]
Title: &title
selector: //title/text()
postProcess:
- replace:
- regex: \s\|.+$
with:
- regex: \s\-.+$
with:
selector: //title/text()
postProcess:
- replace:
- regex: \s[|-].+$
with:
# Scrape sanitized URL
URL: &url //link[@rel="canonical"]/@href
Image:
selector: //div[@class="player"]/img/@src
postProcess:
- replace:
- replace:
- regex: ^
with: https://www.tugpass.com/
Studio:
@ -55,7 +59,7 @@ xPathScrapers:
- replace:
- regex: ^\/[A-Za-z_-]+\/([A-Za-z0-9-]+)\/.+
with: $1
sceneScraper_sites:
primary:
scene:
Details: *details
Performers: *performers
@ -78,7 +82,7 @@ xPathScrapers:
with: $1
- regex: ^(https?:.+(\.com)).+__SEP__(.+)
with: $1$3
Studio:
Studio: &studio
Name:
selector: //meta[@name="copyright"]/@content
postProcess:
@ -86,6 +90,39 @@ xPathScrapers:
- regex: Copyright\s(.+)\s\d{4}
with: $1
- map:
"breedme": Breed Me
"milfaf": MILFAF
# Last Updated March 27, 2024
breedme: Breed Me
milfaf: MILFAF
secondary:
scene:
Title: >-
//h3[@class="top-title"] |
//div[@class="title-top"]//h5
Details: //div[contains(@class, "video-detail") or contains(@class, "title-bottom")]/p
Date:
selector: //div[@class="title-top"]//span/text()
postProcess:
- replace:
- regex: ^\W+
with: ""
- parseDate: January 2, 2006
Image: //video/@poster
Performers:
Name: //div[contains(@class, "player") or contains(@class, "title-top")]//a[contains(@href, "models")]
URL: *url
Studio: *studio
# This one site is different enough that it gets its own scraper
teasepov:
scene:
Title: //div[@class="slideTitle"]
Details: //div[@class="videoDescription"]
Image: //div[@id="player"]/img/@src
Date:
selector: //div[@class="slideDate"]
postProcess:
- replace:
- regex: ^\W+
with: ""
- parseDate: January 2, 2006
URL: *url
Studio: *studio
# Last Updated April 08, 2024