Add SLRoriginals xPath scraper (#745)

This commit is contained in:
Philip Wang 2021-10-21 16:19:04 -04:00 committed by GitHub
parent 96441aab8f
commit fceee20e35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 32 deletions

View File

@ -45,6 +45,7 @@ allgirlmassage.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbi
allherluv.com|AllHerLuv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian
allinternal.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|-
alljapanesepass.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV
allvr.porn|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR
alsscan.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|python|-
amateripremium.com|porncz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
amateurallure.com|AmateurAllure.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -455,6 +456,7 @@ handsonhardcore.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
hanime.tv|hanime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Hentai
hardcoreyouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
hardfuckgirls.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
hardonvr.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR
hardtied.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
hardx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|-
harmonyvision.com|HarmonyVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|-
@ -617,7 +619,7 @@ loveherass.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|-
loveherfeet.com|LoveHerFeet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Foot Fetish
lubed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
lustery.com|Lustery.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Amateur
lustreality.com|LustReality.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR
lustreality.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR
lustylina.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
madeincanada.xxx|MadeInCanada.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
madouqu.com|Madou.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -911,6 +913,7 @@ sis.porn|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
sislovesme.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
sissypov.com|SissyPov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
slayed.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python >= 3.3|-
slroriginals.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR
smashpictures.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
smokingmina.com|SmokingMina.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
soapymassage.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -1094,6 +1097,7 @@ vrbgay.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR
vrbtrans.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR
vrconk.com|VRConk.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR
vrcosplayx.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR
vrfirsttimer.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR
vrhush.com|VRHush.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR
vrporncz.com|porncz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
wankitnow.com|wankitnow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR

View File

@ -1,31 +0,0 @@
name: LustReality
sceneByURL:
- action: scrapeXPath
url:
- lustreality.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //h1
Date:
selector: //span[@class="u-inline-block u-mr--nine"]
postProcess:
- parseDate: Jan 02, 2006
Performers:
Name: //div[@class="u-mt--three u-mb--three"]/a
Tags:
Name: //ul[@class="u-list u-list--inline u-fs--th u-ff--alt"]/li/a
Details:
selector: //div[@class="u-mb--six u-fs--fo u-lh--normal"]
Image:
selector: //div[@class="splash-screen fullscreen-message is-visible"]/@style
postProcess:
- replace:
- regex: .*?(https?:\/\/\S*\.jpg).*
with: $1
Studio:
Name:
fixed: LustReality
# Last Updated September 10, 2021

62
scrapers/SLRoriginals.yml Normal file
View File

@ -0,0 +1,62 @@
name: "SLROriginals"
sceneByURL:
- action: scrapeXPath
url: &urlAttr
- allvr.porn
- hardonvr.com
- lustreality.com
- slroriginals.com
- vrfirsttimer.com
scraper: sceneScraper
movieByURL:
- action: scrapeXPath
url: *urlAttr
scraper: movieScraper
xPathScrapers:
sceneScraper:
common:
$info: &info //div[starts-with( @class,"c-page--video-right")]
scene:
Title: &titleSel $info//h1[1]/text()
Date: &dateAttr
selector: $info//span[starts-with(@class,"u-inline-block")][1]/text()
postProcess:
- parseDate: Jan 2, 2006
Details: &detailsAttr
selector: $info//div[starts-with(@class,"u-mb--six")]
Tags:
Name: $info//div[starts-with(@class,"u-mt--four")]//li/a/text()
Performers:
Name: $info//div[starts-with(@class,"u-mt--three")]//a/text()
Studio:
Name: &studioName //meta[@name="application-name"]/@content
Image: &imageSel
selector: //div[starts-with(@style,"background-image:")]/@style
postProcess:
- replace:
- regex: .+url\((.+)\).+
with: $1
Movies:
Name: &movieNameAttr
selector: //meta[@name="application-name"]/@content|//div[starts-with( @class,"c-page--video-right")]//h1[1]/text()
concat: " - "
Duration: &durationAttr
selector: $info//span[contains(text(),"Duration:")]
postProcess:
- replace:
- regex: Duration:\s
with: ""
Date: *dateAttr
Synopsis: *detailsAttr
movieScraper:
common:
$info: *info
movie:
Name: *movieNameAttr
Duration: *durationAttr
Date: *dateAttr
Synopsis: *detailsAttr
Studio:
Name: *studioName
FrontImage: *imageSel
# Last Updated October 21, 2021