Add new scraper for sexselector.com & twistys.com, update multiple scrapers (#1014)

This commit is contained in:
aussiehuddo 2022-08-11 09:06:54 +10:00 committed by GitHub
parent 5f0ffb46c6
commit 12dc27b09c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 505 additions and 163 deletions

View File

@ -45,7 +45,7 @@ alexlegend.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
aliciasgoddesses.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
allanal.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
allanalallthetime.com|AllAnalAllTheTime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
allblackx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
allblackx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
allfinegirls.net|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
allgirlmassage.com|Algolia_AllGirlMassage.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian
allherluv.com|AllHerLuv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian
@ -120,7 +120,7 @@ babearchives.com|BabeArchives.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
babepedia.com|Babepedia.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Database
baberotica.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
baberoticavr.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR
babes.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
babes.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|CDP|-
babesandstars.com|Babes&Stars.yml|:x:|:x:|:x:|:heavy_check_mark:|-|-
babesnetwork.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|-
babevr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR
@ -320,7 +320,7 @@ damnthatsbig.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay
danejones.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
danni.com|Danni.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
darkroomvr.com|DarkRoomVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
darkx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|-
darkx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
data18.com|data18.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
daughterswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
ddfbusty.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -375,7 +375,7 @@ elegantangel.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:
ericjohnssexadventures.com|EricJohnsSexAdventures.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
ericvideos.com|EricVideos.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay
erito.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV
eroticax.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|-
eroticax.com|Algolia_xEmpire.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
eroticbeauty.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|-
eroticspice.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|-
erotiquetvlive.com|InterracialPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -493,7 +493,7 @@ genuinesin.com|GenuineSin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
girlcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
girlfaction.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
girlfriendsfilms.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Lesbian
girlgirl.com|JulesJordan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian
girlgirl.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Lesbian
girlgirlmania.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
girlgrind.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Lesbian
girlsandstuds.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -531,7 +531,7 @@ hardcoreyouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
hardfuckgirls.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
hardonvr.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR
hardtied.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
hardx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|-
hardx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
harmonyvision.com|HarmonyVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|-
hazel-tucker.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans
hd19.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -654,7 +654,7 @@ jpnurse.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV
jpshavers.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV
jpteacher.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV
jschoolgirls.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV
julesjordan.com|JulesJordan.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|-
julesjordan.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|-|-
juliaannlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
karissa-diamond.com|Karissa-Diamond.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|-
karups.com|Karups.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -688,7 +688,7 @@ lesbea.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian
lesbianass.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Lesbian
lesbianfactor.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian
lesbiantribe.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
lesbianx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian
lesbianx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
lesworship.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian
lethalhardcore.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|-
lethalhardcorevr.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR
@ -877,7 +877,7 @@ oraloverdose.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
orgytrain.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
outdoorjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV
outhim.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay
outofthefamily.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
outofthefamily.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
p54u.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV
pacopacomama.com|Paco.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV
paintoy.com|insexOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -1033,6 +1033,7 @@ sexlikereal.com|SexLikeReal.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
sexmex.xxx|SexMex.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|-
sexmywife.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
sexsee.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|-
sexselector.com|SexSelector.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
sexuallybroken.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
sexvr.com|SexVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR
sexwithmuslims.com|porncz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -1081,7 +1082,7 @@ spankmonster.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
spearteenpussy.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
spermantino.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
spermmania.com|SpermMania.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
spermswallowers.com|JulesJordan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
spermswallowers.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|-
spermswap.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|-
spizoo.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
spoiledvirgins.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -1130,7 +1131,7 @@ sweetsinner.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|-
sweetyx.com|SweetyX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
swinger-blog.xxx|SwingerBlog.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
swnude.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay
tabooheat.com|TabooHeat.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
tabooheat.com|Algolia_TabooHeat.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
taboopov.com|taboopov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
tacamateurs.com|TACAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
taliashepard.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -1182,7 +1183,7 @@ tgirls.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans
tgirlsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans
thatsitcomshow.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|-
theartporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
theassfactory.com|JulesJordan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
theassfactory.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|-
thedicksuckers.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
theflourishamateurs.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
theflourishfetish.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -1247,8 +1248,8 @@ tushy.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
tushyraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
tutor4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
twistedvisual.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
twistys.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|-
twistysnetwork.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|-
twistys.com|Twistys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|-
twistysnetwork.com|Twistys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|-
twotgirls.com|TwoTGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans
uk-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans
ultrafilms.com|UltraFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
@ -1328,7 +1329,7 @@ x-art.com|X-artcom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
xart.xxx|Xartxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
xconfessions.com|XConfessions.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|-
xcoreclub.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
xempire.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|-
xempire.com|Algolia_xEmpire.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
xevunleashed.com|Xevunleashed.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
xhamster.com|Xhamster.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
xlgirls.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|-

View File

@ -3,6 +3,7 @@ sceneByURL:
- action: script
url:
- adulttime.com/en/video/
- outofthefamily.com/en/video/
script:
- python
- Algolia.py
@ -27,4 +28,4 @@ sceneByQueryFragment:
- Algolia.py
- girlsway
- validName
# Last Updated March 23, 2022
# Last Updated June 23, 2022

View File

@ -0,0 +1,30 @@
name: "TabooHeat"
sceneByURL:
- action: script
url:
- tabooheat.com/en/video
script:
- python
- Algolia.py
- tabooheat
sceneByFragment:
action: script
script:
- python
- Algolia.py
- tabooheat
sceneByName:
action: script
script:
- python
- Algolia.py
- tabooheat
- searchName
sceneByQueryFragment:
action: script
script:
- python
- Algolia.py
- tabooheat
- validName
# Last Updated June 22, 2022

View File

@ -0,0 +1,35 @@
name: "xEmpire"
sceneByURL:
- action: script
url:
- allblackx.com/en/video/
- darkx.com/en/video/
- eroticax.com/en/video/
- hardx.com/en/video/
- lesbianx.com/en/video/
- xempire.com/en/video/
script:
- python
- Algolia.py
- xempire
sceneByFragment:
action: script
script:
- python
- Algolia.py
- xempire
sceneByName:
action: script
script:
- python
- Algolia.py
- xempire
- searchName
sceneByQueryFragment:
action: script
script:
- python
- Algolia.py
- xempire
- validName
# Last Updated June 22, 2022

View File

@ -22,7 +22,12 @@ xPathScrapers:
- regex: ^
with: "https:"
Studio:
Name: //div[@class="vdoCast"]/a[1]/text()
Name:
selector: //div[@class="vdoCast"]/a[1]/text()
postProcess:
# Fix special cases
- map:
MomIsHorny: Mom is Horny
Date:
selector: //div[@class="vdoCast" and contains(text(), "Release:")]
postProcess:
@ -32,5 +37,4 @@ xPathScrapers:
- subScraper:
selector: //span[@class="thmb_mr_cmn thmb_mr_2 clearfix"]/span[@class="faTxt"]
- parseDate: Jan 2, 2006
# Last Updated November 08, 2020
# Last Updated June 08, 2022

View File

@ -7,7 +7,6 @@ sceneByURL:
# if you only have the id add an extra part after it eg for https://www.brazzers.com/video/123456/ use https://www.brazzers.com/video/123456/a
- brazzers.com/video
- men.com/sceneid/
- realitykings.com/scene/
scraper: sceneScraper
performerByURL:
@ -256,4 +255,4 @@ xPathScrapers:
driver:
useCDP: true
# Last Updated March 10, 2021
# Last Updated June 23, 2022

View File

@ -33,6 +33,12 @@ xPathScrapers:
selector: //h3[@class="site-name"]
postProcess:
- replace:
- regex: (\.com)$
with:
# Last Updated May 03, 2021
- regex: (.+?)(\.com)$
with: $1
# 'BlacksOnBlondes' => 'Blacks On Blondes'
- regex: ([a-z])-?([A-Z])
with: $1 $2
# Fix special cases
- map:
Glory Hole: Gloryhole
# Last Updated June 08, 2022

View File

@ -26,7 +26,7 @@ xPathScrapers:
Name: //div[@class="actress"]/a
Studio:
Name:
fixed: DorcelClub
fixed: Dorcel Club
Movies:
Name: //div[@class="left"]/span[@class="movie"]/a/text()
URL:
@ -55,7 +55,7 @@ xPathScrapers:
with: ":00:"
Studio:
Name:
fixed: DorcelClub
fixed: Dorcel Club
Director:
selector: //span[@class='director']/text()
postProcess:
@ -69,4 +69,4 @@ xPathScrapers:
- regex: ([^\s]*)\s1x
with: $1
Synopsis: //span[@class="full"]/p|/div[@class="content-text"]/p
# Last Updated August 07, 2021
# Last Updated June 27, 2022

View File

@ -22,9 +22,9 @@ xPathScrapers:
Image: //meta[@property="og:image"]/@content
Studio:
Name:
fixed: FreakMobMedia
fixed: Freak Mob Media
Tags:
Name: //span[@class="meta-info"][text()="Category" or text()="Tag"]/following-sibling::a/text()
Performers:
Name: //span[@class="meta-info" and text()="Model"]/following-sibling::a/text()
# Last Updated July 18, 2021
# Last Updated June 24, 2022

View File

@ -5,7 +5,6 @@ sceneByURL:
- 1000facials.com/en/scene/
- activeduty.com/en/video/
- alettaoceanempire.com/en/video/
- allblackx.com/en/video/
- analacrobats.com/en/video/
- analteenangels.com/en/video/
- ashleyfires.com/
@ -17,13 +16,11 @@ sceneByURL:
- buttman.com/
- cumshotoasis.com/
- currycreampie.com/
- darkx.com/en/video/
- devilsfilmparodies.com/
- devilsgangbangs.com/
- devonlee.com/
- dpfanatics.com/en/video/
- dylanryder.com/
- eroticax.com/en/video/
- evilangel.com/en/video/
- falconstudios.com/en/video/
- famedigital.com/
@ -37,14 +34,12 @@ sceneByURL:
- girlstryanal.com/en/video/
- grannyghetto.com/
- hairyundies.com/
- hardx.com/en/video/
- hothouse.com/en/video/
- immorallive.com/en/video/
- jaysinxxx.com/
- jonnidarkkoxxx.com/
- lanesisters.com/
- lesbianfactor.com/
- lesbianx.com/en/video/
- lexingtonsteele.com/
- lezcuties.com/en/video/
- maledigital.com/en/video/
@ -95,7 +90,6 @@ sceneByURL:
- webyoung.com/en/video/
- whiteghetto.com/
- wicked.com/en/video/
- xempire.com/en/video/
scraper: sceneScraper
performerByURL:
@ -300,4 +294,5 @@ xPathScrapers:
Name: //a[contains(@class, 'GA_Id_headerLogo')]/span[@class='linkMainCaption']/text()
FrontImage: //a[@class='frontCoverImg']/@href
BackImage: //a[@class='backCoverImg']/@href
# Last Updated February 01, 2022
# Last Updated June 22, 2022

View File

@ -9,6 +9,16 @@ sceneByURL:
- spermswallowers.com/
scraper: sceneScraper
galleryByURL:
- action: scrapeXPath
url:
- julesjordan.com/
- girlgirl.com/
- manuelferrara.com/
- theassfactory.com/
- spermswallowers.com/
scraper: sceneScraper
performerByURL:
- action: scrapeXPath
url:
@ -26,20 +36,35 @@ xPathScrapers:
common:
$details: //div[@class="backgroundcolor_info"]
$galinfo: //div[@class="gallery_info"]
scene:
Title: //div[@class="title_bar"]/span/text()
Date:
Title: &titleAttr
selector: //div[@class="title_bar"]/span/text()
Date: &dateAttr
selector: $details//div[@class="cell update_date"]/text()
postProcess:
- parseDate: 01/02/2006
Details:
Details: &detailsAttr
selector: $details//span[@class="update_description"]/text()
concat: " "
Tags:
Tags: &tagsAttr
Name: //span[@class="update_tags"]/a/text()
Performers:
Performers: &performersAttr
Name: $details//span[@class="update_models"]/a/text()|$galinfo//span[@class="update_models"]/div/a/text()
URL: $details//span[@class="update_models"]/a/@href|$galinfo//span[@class="update_models"]/div/a/@href
Studio: &studioAttr
Name:
selector: //base/@href
postProcess:
- replace:
- regex: (.+\/\/|www.|\..+)
with:
- map:
julesjordan: Jules Jordan
girlgirl: Girl Girl
manuelferrara: Manuel Ferrara
theassfactory: The Ass Factory
spermswallowers: Sperm Swallowers
Movies:
Name: $details//span[@class="update_dvds"]/a/text()
URL: $details//span[@class="update_dvds"]/a/@href
@ -52,6 +77,14 @@ xPathScrapers:
- regex: (^\/)
with: https://www.julesjordan.com/
gallery:
Title: *titleAttr
Date: *dateAttr
Details: *detailsAttr
Tags: *tagsAttr
Performers: *performersAttr
Studio: *studioAttr
performerScraper:
performer:
Name: //span[@class='title_bar_hilite']/text()
@ -94,5 +127,4 @@ xPathScrapers:
Name: //span[@class="update_date"]/text()[contains(.,"Studio")]/following-sibling::a/text()
FrontImage: //div[@class="front"]/a/img/@src0_3x
BackImage: //div[@class="back"]/a/img/@src0_3x
# Last Updated November 21, 2020
# Last Updated June 09, 2022

View File

@ -17,8 +17,6 @@ sceneByURL:
- sweetheartvideo.com/scene/
- sweetsinner.com/scene/
- teenslovehugecocks.com/scene/
- twistys.com/scene/
- twistysnetwork.com/scene/
scraper: sceneScraper
- action: scrapeXPath
url:
@ -65,14 +63,12 @@ performerByURL:
- sweetsinner.com/model/
- teenslovehugecocks.com/model/
- transsensual.com/model/
- twistys.com/model/
- twistysnetwork.com/model/
scraper: performerScraper
xPathScrapers:
sceneScraper:
common:
$section: //img[@alt=""][contains(@src,"poster")]/ancestor::section
$section: //div[contains(@class,"tg5e7m")]/ancestor::section
scene:
Title: $section//h1/text()|$section//h2/text()
Date:
@ -94,11 +90,25 @@ xPathScrapers:
selector: $section//a[contains(@href,"site=")]/@title|//link[@rel="canonical"]/@href
postProcess:
- replace:
- regex: .*www\.
with: ""
- regex: \.com.+
with: ""
Image: //img[@alt=""][contains(@src,"poster")]/@src
- regex: (.+www\.)(\w+)(.+)
with: $2
- map:
babesnetwork: Babes
devianthardcore: Deviant Hardcore
doghousedigital: Doghouse Digital
familyhookups: Family Hookups
girlgrind: Girl Grind
kinkyspa: Kinky Spa
Lets Try Anal: Let's Try Anal
lookathernow: Look At Her Now
mofosnetwork: Mofos
realityjunkies: Reality Junkies
squirted: Squirted
sweetheartvideo: Sweetheart Video
SweetSinner: Sweet Sinner
sweetsinner: Sweet Sinner
teenslovehugecocks: Teens Love Huge Cocks
Image: $section//img[contains(@src,"poster")]/@src
scriptScraper:
common:
$script: //script[@type="application/ld+json"]
@ -123,7 +133,26 @@ xPathScrapers:
- regex: '.+"thumbnailUrl": "([^"]+)".+'
with: $1
Studio:
Name: //meta[@name="dti.network"]/@content
Name:
selector: //div[contains(@class,"tg5e7m")]/ancestor::section//a[contains(@href,"site=")]/@title|//link[@rel="canonical"]/@href
postProcess:
- replace:
- regex: (.+www\.)(\w+)(.+)
with: $2
- map:
digitalplayground: Digital Playground
fakehostel: Fake Hostel
faketaxi: Fake Taxi
familysinners: Family Sinners
iconmale: Icon Male
iknowthatgirl: I Know That Girl
lilhumpers: Lil Humpers
milfed: Milfed
publicagent: Public Agent
seancody: Sean Cody
transangels: Trans Angels
transsensual: TransSensual
trueamateurs: True Amateurs
Tags: *tags
Details:
selector: $script
@ -153,15 +182,13 @@ xPathScrapers:
selector: //link[@rel="canonical"]/@href
postProcess:
- replace:
- regex: .*www\.
with: ""
- regex: \.com.+
with: ""
- regex: " "
with: ""
- regex: (.+www\.)(\w+)(.+)
with: $2
- map:
digitalplayground: "Digital Playground"
transsensual: TransSensual
kinkyspa: Kinky Spa
FrontImage: $section//picture/img[@alt]/@src
performerScraper:
@ -290,4 +317,4 @@ xPathScrapers:
Image:
selector: //img[contains(@src, "model")]/@src
URL: //link[@rel="canonical"]/@href
# Last Updated February 20, 2022
# Last Updated June 08, 2022

View File

@ -33,9 +33,18 @@ xPathScrapers:
with: "hi.jpg"
Studio:
Name:
selector: //div[contains(@class,"siteName")]/a[@href]/text()
selector: //div[contains(@class,"siteName")]/a[@href]
postProcess:
- replace:
- regex: \sX\s.*
with: ""
# Last Updated August 27, 2021
- map:
GotMylf: Got Mylf
FullOfJoi: Full Of JOI
LoneMilf: Lone MILF
MilfBody: MILF Body
MomDrips: Mom Drips
MomShoot: Mom Shoot
Mylf Of The Month: Mylf of the Month
MylfBlows: Mylf Blows
MylfBoss: Mylf Boss
MylfSelects: Mylf Selects
StayHomeMilf: Stay Home Milf
# Last Updated June 27, 2022

View File

@ -15,7 +15,7 @@ xPathScrapers:
$info: //div[@class="content-page-info"]
scene:
Title: //h2[@class="title"]/text()|//h1[@class="title"]/text()
Details: $info/div[starts-with(@class,"desc")]
Details: $info/div[starts-with(@class,"desc")]|//div [@class="desc"]/text()
Performers:
Name:
selector: $info//h4[@class="models"]/a
@ -35,4 +35,4 @@ xPathScrapers:
with: "$1 $2"
- parseDate: 2 Jan 2006
Image: //video[contains(@id,"ypp-player")]/@poster|//div[@id="main-player"]/@data-screenshot|//meta[@property="og:image"]/@content
# Last Updated March 02, 2022
# Last Updated June 06, 2022

View File

@ -55,15 +55,15 @@ xPathScrapers:
- regex: (.+www\.)(\w+)(.+)
with: $2
- map:
analmom: AnalMom
badmilfs: BadMilfs
analmom: Anal Mom
badmilfs: Bad MILFs
bbcparadise: BBC Paradise
bffs: BFFs
blackvalleygirls: Black Valley Girls
dadcrush: DadCrush
daughterswap: Daughter Swap
dyked: Dyked
exxxtrasmall: ExxxtraSmall
exxxtrasmall: Exxxtra Small
familystrokes: Family Strokes
fostertapes: Foster Tapes
freeusefantasy: Freeuse Fantasy
@ -73,22 +73,22 @@ xPathScrapers:
littleasians: Little Asians
momswap: Mom Swap
mybabysittersclub: My Babysitters Club
mylfdom: MYLFDom
mylfdom: Mylfdom
notmygrandpa: Not My Grandpa
pervdoctor: PervDoctor
pervmom: PervMom
pervnana: PervNana
pervtherapy: PervTherapy
pervdoctor: Perv Doctor
pervmom: Perv Mom
pervnana: Perv Nana
pervtherapy: Perv Therapy
shoplyfter: Shoplyfter
shoplyftermylf: Shoplyfter MYLF
sislovesme: SisLovesMe
sislovesme: Sis Loves Me
sisswap: Sis Swap
stayhomepov: StayHomePOV
stepsiblings: StepSiblings
stepsiblings: Step Siblings
teencurves: Teen Curves
teenpies: Teen Pies
teensloveanal: Teens Love Anal
teensloveblackcocks: Teens Love Black Cocks
thickumz: Thickumz
tinysis: TinySis
# Last Updated February 23, 2022
tinysis: Tiny Sis
# Last Updated June 27, 2022

View File

@ -8,7 +8,7 @@ xPathScrapers:
sceneScraper:
scene:
Title: //div[@class="shadow video-details"]/h1/text()
Details: (//div[@class="shadow video-details"]/p[not(@class)])[1]/text()
Details: //div[@class="shadow video-details"]/p[not(@class)][1]
Date:
selector: //p[@class="video-date"]/text()[2]
postProcess:
@ -17,8 +17,11 @@ xPathScrapers:
with: ","
- parseDate: January 2, 2006
Image: //section[@class="top-wrapper"]/div//video/@poster
Studio:
Name:
fixed: Producers Fun
Tags:
Name:
selector: //p[@class="video-tags"]/a/text()
# Last Updated November 08, 2020
# Last Updated June 23, 2022

View File

@ -2,7 +2,6 @@ name: "RealityKingsOL"
sceneByURL:
- action: scrapeXPath
url:
- babes.com/scene/
- bellesafilms.com/scene/
- bellesahouse.com/scene/
- danejones.com/scene/
@ -45,10 +44,21 @@ xPathScrapers:
- replace:
- regex: .*www\.(.+)\.com.*
with: $1
- map:
bellesafilms: Bellesa Films
bellesahouse: Bellesa House
danejones: Dane Jones
erito: Erito
lesbea: Lesbea
propertysex: Property Sex
shewillcheat: She Will Cheat
sexyhub: Sexy Hub
welivetogether: We Live Together
Image:
selector: //img[@alt=""][contains(@src,"poster")]/@src|//script[@type="application/ld+json"]/text()
postProcess:
- replace:
- regex: '.*"thumbnailUrl": "([^"]+)".*'
with: $1
# Last Updated May 07, 2022
# Last Updated June 23, 2022

33
scrapers/SexSelector.yml Normal file
View File

@ -0,0 +1,33 @@
name: "SexSelector"
sceneByURL:
- action: scrapeXPath
url:
- sexselector.com/video/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //div[@class="player__info__top"]/h3/text()
Details: //p[@class="player__description"]/text()
Date:
selector: //div[@class="player"]/p[starts-with(., "Released:")]
postProcess:
- replace:
- regex: "^Released: "
with:
- parseDate: Jan 2, 2006
Image:
selector: //a[@class="header__menu header__menu--logo"]/@href
postProcess:
- replace:
- regex: .+(pg\d+)
with: https://sm-members.bangbros.com/shoots/sexselector/$1/poster/${1}_01_2160.jpg
Studio:
Name:
fixed: Sex Selector
Performers:
Name: //div[@class="player__stats"]//a/text()
Tags:
Name: //div[@class="video__tags__list"]/a/text()
# Last Updated June 06, 2022

View File

@ -1,29 +0,0 @@
name: TabooHeat
sceneByURL:
- action: scrapeXPath
url:
- tabooheat.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //h3[@class="sceneTitle"]
Image:
selector: //a[@class="sceneLink "]/img/@src
Date:
selector: //li[@class="updatedDate"]
postProcess:
- replace:
- regex: .*(\d{4}-\d{2}-\d{2}).+
with: $1
- parseDate: 2006-02-01
Tags:
Name: //div[@class="sceneCol sceneColCategories"]/a
Performers:
Name:
selector: //div[@class="sceneCol sceneColActors"]/a
Studio:
Name:
fixed: TabooHeat
# Last Updated September 07, 2021

View File

@ -47,29 +47,29 @@ xPathScrapers:
selector: //meta[@property="og:url"]/@content
postProcess:
- map:
https://AbbieMaley.com: AbbieMaley
https://AbbieMaley.com: Abbie Maley
https://AnalBBC.com: AnalBBC
https://ANALIZED.com: Analized
https://analviolation.com: AnalViolation
https://BadDaddyPOV.com: BadDaddyPOV
https://BadFamilyPov.com: BadFamilyPov
https://BadMommyPov.com: BadMommyPov
https://BadMommyPOV.com: BadMommyPov
https://DownToFuckDating.com: DownToFuckDating
https://DTFsluts.com: DTFsluts
https://analviolation.com: Anal Violation
https://BadDaddyPOV.com: Bad Daddy POV
https://BadFamilyPov.com: Bad Family POV
https://BadMommyPov.com: Bad Mommy POV
https://BadMommyPOV.com: Bad Mommy POV
https://DownToFuckDating.com: Down To Fuck Dating
https://DTFsluts.com: DTF Sluts
https://Girlfaction.com: Girlfaction
https://HerGape.com: HerGape
https://HomemadeAnalWhores.com: HomemadeAnalWhores
https://JamesDeen.com: JamesDeen
https://OnlyPrince.com: OnlyPrince
https://PervertGallery.com: PervertGallery
https://PornForce.com: PornForce
https://POVPerverts.net: POVPerverts
https://publicsexdate.com: PublicSexDate
https://SlutInspection.com: SlutInspection
https://teenageanalsluts.com: TeenageAnalSluts
https://TeenageCorruption.com: TeenageCorruption
https://TwistedVisual.com: TwistedVisual
https://yourmomdoesanal.com: YourMomDoesAnal
https://YourMomDoesPorn.com: YourMomDoesPorn
# Last Updated June 05, 2022
https://HerGape.com: Her Gape
https://HomemadeAnalWhores.com: Homemade Anal Whores
https://JamesDeen.com: James Deen
https://OnlyPrince.com: Only Prince
https://PervertGallery.com: Pervert Gallery
https://PornForce.com: Porn Force
https://POVPerverts.net: POV Perverts
https://publicsexdate.com: Public Sex Date
https://SlutInspection.com: Slut Inspection
https://teenageanalsluts.com: Teenage Anal Sluts
https://TeenageCorruption.com: Teenage Corruption
https://TwistedVisual.com: Twisted Visual
https://yourmomdoesanal.com: Your Mom Does Anal
https://YourMomDoesPorn.com: Your Mom Does Porn
# Last Updated June 23, 2022

182
scrapers/Twistys.yml Normal file
View File

@ -0,0 +1,182 @@
name: "Twistys"
sceneByURL:
- action: scrapeXPath
url:
- twistys.com/scene/
- twistysnetwork.com/scene/
scraper: sceneScraper
performerByURL:
- action: scrapeXPath
url:
- twistys.com/model/
- twistysnetwork.com/model/
scraper: performerScraper
xPathScrapers:
sceneScraper:
scene:
Title: //h2[contains(@class,"edmOvr")]/text()
Date:
selector: //h2[contains(@class,"jywyKe")]/text()
postProcess:
- parseDate: January 2, 2006
Details: //p[contains(@class,"beuOjr")]/text()
Tags:
Name:
selector: //div[contains(@class,"jEorCN")]/a/text()
postProcess:
- replace:
- regex: ',\s*$'
with:
Performers:
Name: //h2[contains(@class,"hgImKM")]//a/text()
Studio:
Name:
selector: //div[contains(@class,"fRnfXb")]//a/text()
postProcess:
- map:
anettedawn: Anette Dawn
twistys: Twistys
TwistysHard: Twistys Hard
whengirlsplay: When Girls Play
Image: //div[contains(@class,"eIJfvU")]//img/@src
performerScraper:
performer:
Name: //div[contains(@class,"kpSMJE")]/text()
Birthdate:
selector: //li[contains(.,"Date of Birth")]/span[not(contains(text(),"Date"))]/text()
postProcess:
- replace:
- regex: th,|st,|rd,|nd,
with: ","
- parseDate: January 2, 2006
Measurements: //li[contains(.,"Measurements")]/span[not(contains(text(),"Measurements"))]/text()
Height:
selector: //li[contains(.,"Height")]/span[not(contains(text(),"Height"))]/text()
postProcess:
- feetToCm: true
HairColor: //span[contains(.,"Hair Color")]/ancestor::li//a/text()
Ethnicity:
selector: //span[contains(.,"Ethnicity")]/ancestor::li//a/text()
postProcess:
- map:
Asian: Asian
Caucasian: White
Black: Black
Latina: Hispanic
Details: //p[contains(@class,"beuOjr")]/text()
Image:
selector: //img[contains(@src, "model")]/@src
URL: //link[@rel="canonical"]/@href
Country:
selector: //li[contains(.,"Birth Place")]/span[not(contains(text(),"Birth"))]/text()
postProcess:
- replace:
- regex: .*?,\s
with: ""
- map:
N/A: ""
AK: "USA"
AL: "USA"
AR: "USA"
AZ: "USA"
CA: "USA"
CO: "USA"
CT: "USA"
DC: "USA"
DE: "USA"
FL: "USA"
GA: "USA"
HI: "USA"
IA: "USA"
ID: "USA"
IL: "USA"
IN: "USA"
KS: "USA"
KY: "USA"
LA: "USA"
MA: "USA"
MD: "USA"
ME: "USA"
MI: "USA"
MN: "USA"
MO: "USA"
MS: "USA"
MT: "USA"
NC: "USA"
ND: "USA"
NE: "USA"
NH: "USA"
NJ: "USA"
NM: "USA"
NV: "USA"
NY: "USA"
OH: "USA"
OK: "USA"
OR: "USA"
PA: "USA"
RI: "USA"
SC: "USA"
SD: "USA"
TN: "USA"
TX: "USA"
UT: "USA"
VA: "USA"
VT: "USA"
WA: "USA"
WI: "USA"
WV: "USA"
WY: "USA"
Alabama: "USA"
Alaska: "USA"
Arizona: "USA"
Arkansas: "USA"
California: "USA"
Colorado: "USA"
Connecticut: "USA"
Delaware: "USA"
Florida: "USA"
Georgia: "USA"
Hawaii: "USA"
Idaho: "USA"
Illinois: "USA"
Indiana: "USA"
Iowa: "USA"
Kansas: "USA"
Kentucky: "USA"
Louisiana: "USA"
Maine: "USA"
Maryland: "USA"
Massachusetts: "USA"
Michigan: "USA"
Minnesota: "USA"
Mississippi: "USA"
Missouri: "USA"
Montana: "USA"
Nebraska: "USA"
Nevada: "USA"
New Hampshire: "USA"
New Jersey: "USA"
New Mexico: "USA"
New York: "USA"
North Carolina: "USA"
North Dakota: "USA"
Ohio: "USA"
Oklahoma: "USA"
Oregon: "USA"
Pennsylvania: "USA"
Rhode Island: "USA"
South Carolina: "USA"
South Dakota: "USA"
Tennessee: "USA"
Texas: "USA"
Utah: "USA"
Vermont: "USA"
Virginia: "USA"
Washington: "USA"
West Virginia: "USA"
Wisconsin: "USA"
Wyoming: "USA"
# Last Updated June 08, 2022

View File

@ -51,7 +51,7 @@ xPathScrapers:
hookuphotshot: HookUp Hotshot
hotandtatted: Hot and Tatted
hussiepass: Hussie Pass
seehimfuck: See HIM Fuck
seehimsolo: See HIM Solo
seehimfuck: See Him Fuck
seehimsolo: See Him Solo
povpornstars: POV Pornstars
# Last Updated July 13, 2022

View File

@ -37,8 +37,12 @@ xPathScrapers:
# split: ","
Studio:
Name:
fixed: PurgatoryX
selector: //p[@class="series"]//span/text()
postProcess:
- replace:
- regex: " Series"
with: ""
# URL need to have "?trilogy=" to get the name of the Trilogy
Movies:
Name: //span[contains(text(),"Part of")]/following-sibling::text()
# Last Updated December 03, 2020
# Last Updated June 27, 2022

View File

@ -29,12 +29,12 @@ xPathScrapers:
Performers:
Name: $infodata/p[contains(text(), 'Featuring:')]/a
Studio:
Name: //div[@class="container"]/a[@class="logo"]/@title
Name:
fixed: Wild on Cam
Image:
selector: //script[contains(., "contentthumbs")]/text()|//img[@class="lazyload update_thumb thumbs stdimage"]/@src
postProcess:
- replace:
- regex: (.+)(https.+contentthumbs.+)(" width="100%)(.+)
with: $2
# Last Updated October 19, 2021
# Last Updated June 27, 2022