Create .gitattributes (#265)

This commit is contained in:
peolic 2020-11-08 21:09:03 +02:00 committed by GitHub
parent a9603e2a60
commit 6e013d1064
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 781 additions and 776 deletions

5
.gitattributes vendored Normal file
View File

@ -0,0 +1,5 @@
* text=auto
*.yml eol=lf diff=yaml linguist-detectable
*.py eol=lf diff=python
*.md eol=lf diff=markdown

View File

@ -1,34 +1,34 @@
name: "FemDomEmpire"
sceneByURL:
- action: scrapeXPath
url:
- femdomempire.com/tour/trailers/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //div[@class="videoDetails clear"]/h3
Date:
selector: //p[span[contains(.,"Date Added")]]
postProcess:
- replace:
- regex: "Date Added:"
with:
- parseDate: January 2, 2006
Details: //div[@class="videoDetails clear"]/p
Performers:
Name: //li[@class="update_models"]/a
Tags:
Name: //ul/li[@class="label" and contains(.,"Categories:")]/..//a
Studio:
Name:
fixed: FemDomEmpire
Image:
selector: //img[@class="update_thumb thumbs stdimage"]/@src0_1x
postProcess:
- replace:
- regex: .*/tour/content//
with: "https://femdomempire.com/tour/content/"
# Last Updated October 10, 2020
name: "FemDomEmpire"
sceneByURL:
- action: scrapeXPath
url:
- femdomempire.com/tour/trailers/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //div[@class="videoDetails clear"]/h3
Date:
selector: //p[span[contains(.,"Date Added")]]
postProcess:
- replace:
- regex: "Date Added:"
with:
- parseDate: January 2, 2006
Details: //div[@class="videoDetails clear"]/p
Performers:
Name: //li[@class="update_models"]/a
Tags:
Name: //ul/li[@class="label" and contains(.,"Categories:")]/..//a
Studio:
Name:
fixed: FemDomEmpire
Image:
selector: //img[@class="update_thumb thumbs stdimage"]/@src0_1x
postProcess:
- replace:
- regex: .*/tour/content//
with: "https://femdomempire.com/tour/content/"
# Last Updated October 10, 2020
# Note: Date Added on older scenes seem to be incorrect on FemDomEmpire site. Newer scenes have correct date.

View File

@ -1,34 +1,34 @@
name: firstanalquest
sceneByURL:
- action: scrapeXPath
url:
- firstanalquest.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$performer: //ul[@class="list-inline"][contains(text(),"Models:")]/li/a
$title: //div[@class="badge-content"]/preceding-sibling::span/text()
scene:
Title: $title
Details:
selector: //div[@class="text-desc"]
Tags:
Name: //li[strong[text()="Tags:"]]/following-sibling::li/a/text()
Performers:
Name: $performer/text()
Image: //img[@class="player-preview"]/@src
Studio:
Name:
fixed: First Anal Quest
Date:
selector: $title
postProcess:
- replace:
- regex: \s
with: +
- regex: ^([^-]+-?).+
with: http://www.firstanalquest.com/search/?q=$1
- subScraper: //span[@class="thumb-added"]/text()
- parseDate: Jan 2, 2006
# Last Updated August 06, 2020
name: firstanalquest
sceneByURL:
- action: scrapeXPath
url:
- firstanalquest.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$performer: //ul[@class="list-inline"][contains(text(),"Models:")]/li/a
$title: //div[@class="badge-content"]/preceding-sibling::span/text()
scene:
Title: $title
Details:
selector: //div[@class="text-desc"]
Tags:
Name: //li[strong[text()="Tags:"]]/following-sibling::li/a/text()
Performers:
Name: $performer/text()
Image: //img[@class="player-preview"]/@src
Studio:
Name:
fixed: First Anal Quest
Date:
selector: $title
postProcess:
- replace:
- regex: \s
with: +
- regex: ^([^-]+-?).+
with: http://www.firstanalquest.com/search/?q=$1
- subScraper: //span[@class="thumb-added"]/text()
- parseDate: Jan 2, 2006
# Last Updated August 06, 2020

View File

@ -1,25 +1,25 @@
name: fistertwister
sceneByURL:
- action: scrapeXPath
url:
- fistertwister.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //div[@class="jumbotron"]/h2/text()[1]
Date:
selector: //li[contains(text(),"Released on:")]/strong/text()
postProcess:
- parseDate: Jan 2, 2006
Details: //div[@class="jumbotron video-info"]/p[not(@class) and not(a)]/text()
Tags:
Name: //a[@class="btn btn-default"]/text()
Performers:
Name: //li[contains(text(),"Featuring:")]//a/text()
Studio:
Name:
fixed: Fister Twister
#Image: //video[@id="video"]/@poster
Image: //meta[@property="og:image"]/@content
# Last Updated August 06, 2020
name: fistertwister
sceneByURL:
- action: scrapeXPath
url:
- fistertwister.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //div[@class="jumbotron"]/h2/text()[1]
Date:
selector: //li[contains(text(),"Released on:")]/strong/text()
postProcess:
- parseDate: Jan 2, 2006
Details: //div[@class="jumbotron video-info"]/p[not(@class) and not(a)]/text()
Tags:
Name: //a[@class="btn btn-default"]/text()
Performers:
Name: //li[contains(text(),"Featuring:")]//a/text()
Studio:
Name:
fixed: Fister Twister
#Image: //video[@id="video"]/@poster
Image: //meta[@property="og:image"]/@content
# Last Updated August 06, 2020

View File

@ -1,42 +1,42 @@
name: "FittingRoom"
sceneByURL:
- action: scrapeXPath
url:
- fitting-room.com/videos/
scraper: sceneScraper
galleryByURL:
- action: scrapeXPath
url:
- fitting-room.com/albums/
scraper: galleryScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //head/title/text()
Details: //meta[@property="twitter:description"]/@content
Performers:
Name: //div[@class="info-model"]//p[@class="name"]
Tags:
Name: //meta[@property="article:tag"]/@content
Studio:
Name:
fixed: Fitting-Room
Image: //meta[@property="twitter:image"]/@content
URL: //meta[@property="twitter:url"]/@content
galleryScraper:
gallery:
Title:
selector: //head/title/text()
Details: //meta[@property="twitter:description"]/@content
Performers:
Name: //div[@class="info-model"]//p[@class="name"]
Tags:
Name: //meta[@property="article:tag"]/@content
Studio:
Name:
fixed: Fitting-Room
URL: //meta[@property="twitter:url"]/@content
# Last Updated October 23, 2020
name: "FittingRoom"
sceneByURL:
- action: scrapeXPath
url:
- fitting-room.com/videos/
scraper: sceneScraper
galleryByURL:
- action: scrapeXPath
url:
- fitting-room.com/albums/
scraper: galleryScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //head/title/text()
Details: //meta[@property="twitter:description"]/@content
Performers:
Name: //div[@class="info-model"]//p[@class="name"]
Tags:
Name: //meta[@property="article:tag"]/@content
Studio:
Name:
fixed: Fitting-Room
Image: //meta[@property="twitter:image"]/@content
URL: //meta[@property="twitter:url"]/@content
galleryScraper:
gallery:
Title:
selector: //head/title/text()
Details: //meta[@property="twitter:description"]/@content
Performers:
Name: //div[@class="info-model"]//p[@class="name"]
Tags:
Name: //meta[@property="article:tag"]/@content
Studio:
Name:
fixed: Fitting-Room
URL: //meta[@property="twitter:url"]/@content
# Last Updated October 23, 2020

View File

@ -1,29 +1,29 @@
name: Mandyflores
sceneByURL:
- action: scrapeXPath
url:
- mandyflores.com/vod/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$updateDesc: //span[@class="update_description"]
$divCenter: /div[@align="center"]
scene:
Title: //span[@class="title_bar_hilite"]
Details:
selector: $updateDesc$divCenter/span/span[@style]/text()[normalize-space(.)]|($updateDesc | $updateDesc/p)/text()[normalize-space(.)]|($updateDesc$divCenter/text())[1]|($updateDesc/text())[1]
concat: "\n\n"
Date:
selector: //div[@class="cell update_date"][not(ancestor::span[@class="update_description"])]/text()[1]
postProcess:
- parseDate: 01/02/2006
Studio:
Name:
fixed: Mandyflores
Tags:
Name: //span[@class="update_tags"]/a
Performers:
Name: //span[@class="update_models"][following-sibling::span[@class="update_tags"]]/a
# Last Updated September 16, 2020
name: Mandyflores
sceneByURL:
- action: scrapeXPath
url:
- mandyflores.com/vod/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$updateDesc: //span[@class="update_description"]
$divCenter: /div[@align="center"]
scene:
Title: //span[@class="title_bar_hilite"]
Details:
selector: $updateDesc$divCenter/span/span[@style]/text()[normalize-space(.)]|($updateDesc | $updateDesc/p)/text()[normalize-space(.)]|($updateDesc$divCenter/text())[1]|($updateDesc/text())[1]
concat: "\n\n"
Date:
selector: //div[@class="cell update_date"][not(ancestor::span[@class="update_description"])]/text()[1]
postProcess:
- parseDate: 01/02/2006
Studio:
Name:
fixed: Mandyflores
Tags:
Name: //span[@class="update_tags"]/a
Performers:
Name: //span[@class="update_models"][following-sibling::span[@class="update_tags"]]/a
# Last Updated September 16, 2020

View File

@ -1,114 +1,114 @@
name: Modelhub
performerByName:
action: scrapeXPath
queryURL: https://modelhub.com/model/search?q={}
scraper: performerSearch
performerByURL:
- action: scrapeXPath
url:
- modelhub.com
scraper: performerScraper
sceneByURL:
- action: scrapeXPath
url:
- modelhub.com/video/
scraper: sceneScraper
xPathScrapers:
performerSearch:
performer:
Name: //div[@class="modelInfo"]/a/h3/text()
URL:
selector: //li[@class="modelBox"]/a/@href
postProcess:
- replace:
- regex: ^(.*)$
with: "https://www.modelhub.com$1/bio"
performerScraper:
common:
$infoPiece: //div[@class="modelContent"]
$modelAttributes: //ul[@class="modelAttributes"]/li
performer:
Name: //div[@class="infoSection"]/h1/text()
Birthdate:
selector: //span[@class='bday js_lazy_bkg']/text()
parseDate: January 2, 2006
Gender:
selector: //ul[@class="allStats"]/li[contains(.,"Gender")]/span
postProcess:
- replace:
- regex: "Couple"
with:
Ethnicity:
selector: $modelAttributes/div[contains(.,"Ethnicity")]/span
postProcess:
- map:
Latin: "hispanic"
Other: ""
Twitter:
selector: //a[@class='twitter js_lazy_bkg']/@href
postProcess:
- replace:
- regex: .+(twitter.com)/(.+)
with: $2
- regex: (.+)\?(.*)
with: $1
Instagram:
selector: //a[@class="insta js_lazy_bkg"]/@href
postProcess:
- replace:
- regex: .+(instagram.com/)(.+)
with: $2
Height:
selector: $modelAttributes/div[contains(.,"Height")]/span
postProcess:
- replace:
- regex: .*\((\d+)cm\)
with: $1
FakeTits:
selector: $modelAttributes/div[contains(.,"Breast Type")]/span
postProcess:
- map:
Natural: "No"
Fake: "Yes"
Piercings: $modelAttributes/div[contains(.,"Piercing")]/span
EyeColor: $modelAttributes/div[contains(.,"Eye Color")]/span
Tattoos: $modelAttributes/div[contains(.,"Tattoo")]/span
URL: //meta[@name="twitter:url"]/@content
Image: //img[@class='profileImg lazy']/@data-src
Country:
selector: //ul[@class="info"]/li/span[@class="location js_lazy_bkg"]/text()
postProcess:
- map:
"AR": "Argentina"
"AT": "Austria"
"AU": "Australia"
"BR": "Brazil"
"CA": "Canada"
"colombia, CO": "Colombia"
"CO": "Colombia"
"CZ": "Czech Republic"
"DE": "Germany"
"ES": "Spain"
"FR": "France"
"GB": "United Kingdom"
"HU": "Hungary"
"IT": "Italy"
"Peru, PE": "Peru"
"PE": "Peru"
"PH": "Philippines"
"RO": "Romania"
"RU": "Russia"
"US": "United States"
sceneScraper:
scene:
Title: //div[@class='videoShortInfo']/div[1]/h1/text()
Tags:
Name: //div[@class='videoCategories']/div/a/text()
Image: //meta[@property="og:image"]/@content
Details: //p[@ class="videoDescription"]
Performers:
Name: //div[@class='videoAvatar']/div[1]/a[2]
# Last Updated Jul 25, 2020
name: Modelhub
performerByName:
action: scrapeXPath
queryURL: https://modelhub.com/model/search?q={}
scraper: performerSearch
performerByURL:
- action: scrapeXPath
url:
- modelhub.com
scraper: performerScraper
sceneByURL:
- action: scrapeXPath
url:
- modelhub.com/video/
scraper: sceneScraper
xPathScrapers:
performerSearch:
performer:
Name: //div[@class="modelInfo"]/a/h3/text()
URL:
selector: //li[@class="modelBox"]/a/@href
postProcess:
- replace:
- regex: ^(.*)$
with: "https://www.modelhub.com$1/bio"
performerScraper:
common:
$infoPiece: //div[@class="modelContent"]
$modelAttributes: //ul[@class="modelAttributes"]/li
performer:
Name: //div[@class="infoSection"]/h1/text()
Birthdate:
selector: //span[@class='bday js_lazy_bkg']/text()
parseDate: January 2, 2006
Gender:
selector: //ul[@class="allStats"]/li[contains(.,"Gender")]/span
postProcess:
- replace:
- regex: "Couple"
with:
Ethnicity:
selector: $modelAttributes/div[contains(.,"Ethnicity")]/span
postProcess:
- map:
Latin: "hispanic"
Other: ""
Twitter:
selector: //a[@class='twitter js_lazy_bkg']/@href
postProcess:
- replace:
- regex: .+(twitter.com)/(.+)
with: $2
- regex: (.+)\?(.*)
with: $1
Instagram:
selector: //a[@class="insta js_lazy_bkg"]/@href
postProcess:
- replace:
- regex: .+(instagram.com/)(.+)
with: $2
Height:
selector: $modelAttributes/div[contains(.,"Height")]/span
postProcess:
- replace:
- regex: .*\((\d+)cm\)
with: $1
FakeTits:
selector: $modelAttributes/div[contains(.,"Breast Type")]/span
postProcess:
- map:
Natural: "No"
Fake: "Yes"
Piercings: $modelAttributes/div[contains(.,"Piercing")]/span
EyeColor: $modelAttributes/div[contains(.,"Eye Color")]/span
Tattoos: $modelAttributes/div[contains(.,"Tattoo")]/span
URL: //meta[@name="twitter:url"]/@content
Image: //img[@class='profileImg lazy']/@data-src
Country:
selector: //ul[@class="info"]/li/span[@class="location js_lazy_bkg"]/text()
postProcess:
- map:
"AR": "Argentina"
"AT": "Austria"
"AU": "Australia"
"BR": "Brazil"
"CA": "Canada"
"colombia, CO": "Colombia"
"CO": "Colombia"
"CZ": "Czech Republic"
"DE": "Germany"
"ES": "Spain"
"FR": "France"
"GB": "United Kingdom"
"HU": "Hungary"
"IT": "Italy"
"Peru, PE": "Peru"
"PE": "Peru"
"PH": "Philippines"
"RO": "Romania"
"RU": "Russia"
"US": "United States"
sceneScraper:
scene:
Title: //div[@class='videoShortInfo']/div[1]/h1/text()
Tags:
Name: //div[@class='videoCategories']/div/a/text()
Image: //meta[@property="og:image"]/@content
Details: //p[@ class="videoDescription"]
Performers:
Name: //div[@class='videoAvatar']/div[1]/a[2]
# Last Updated Jul 25, 2020

View File

@ -1,55 +1,55 @@
name: "PervCity"
sceneByURL:
- action: scrapeXPath
url:
- analoverdose.com/trailers/
- bangingbeauties.com/trailers/
- chocolatebjs.com/trailers/
- oraloverdose.com/trailers/
- pervcity.com/trailers/
- upherasshole.com/trailers/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$sceneinfo: //div[@class="videoInfo"]
$base: //head/base/@href
scene:
Title: $sceneinfo/div[@class="infoHeader"]
Performers:
Name: $sceneinfo//span[@class="tour_update_models"]/a
Details: $sceneinfo//p/text()
Image:
selector: //head/base/@href|//img[@class="posterimg stdimage thumbs"]/@src
concat: "|"
postProcess:
- replace:
- regex: ([^|]+)\|(.*)/(content/.+)
with: $1$3
Date:
selector: $sceneinfo/div[@class="infoHeader"]
postProcess:
- replace:
- regex: \s
with: "_"
- regex: ^
with: "https://pervcity.com/search.php?query="
- subScraper:
selector: //div[@class="category_listing_block"]//div[@class="date"]/text()
postProcess:
- parseDate: 01-02-2006
Studio:
Name:
selector: //head/base/@href
postProcess:
- replace:
- regex: .+/([^\.]+)\.com/.*
with: $1
- map:
analoverdose: AnalOverdose
chocolatebjs: ChocolateBjs
bangingbeauties: BangingBeauties
oraloverdose: OralOverdose
pervcity: PervCity
upherasshole: UpHerAsshole
# Last Updated October 14, 2020
name: "PervCity"
sceneByURL:
- action: scrapeXPath
url:
- analoverdose.com/trailers/
- bangingbeauties.com/trailers/
- chocolatebjs.com/trailers/
- oraloverdose.com/trailers/
- pervcity.com/trailers/
- upherasshole.com/trailers/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$sceneinfo: //div[@class="videoInfo"]
$base: //head/base/@href
scene:
Title: $sceneinfo/div[@class="infoHeader"]
Performers:
Name: $sceneinfo//span[@class="tour_update_models"]/a
Details: $sceneinfo//p/text()
Image:
selector: //head/base/@href|//img[@class="posterimg stdimage thumbs"]/@src
concat: "|"
postProcess:
- replace:
- regex: ([^|]+)\|(.*)/(content/.+)
with: $1$3
Date:
selector: $sceneinfo/div[@class="infoHeader"]
postProcess:
- replace:
- regex: \s
with: "_"
- regex: ^
with: "https://pervcity.com/search.php?query="
- subScraper:
selector: //div[@class="category_listing_block"]//div[@class="date"]/text()
postProcess:
- parseDate: 01-02-2006
Studio:
Name:
selector: //head/base/@href
postProcess:
- replace:
- regex: .+/([^\.]+)\.com/.*
with: $1
- map:
analoverdose: AnalOverdose
chocolatebjs: ChocolateBjs
bangingbeauties: BangingBeauties
oraloverdose: OralOverdose
pervcity: PervCity
upherasshole: UpHerAsshole
# Last Updated October 14, 2020

View File

@ -1,48 +1,48 @@
name: private
sceneByURL:
- action: scrapeXPath
url:
- private.com
scraper: sceneScraper
movieByURL:
- action: scrapeXPath
url:
- private.com
scraper: movieScraper
xPathScrapers:
sceneScraper:
common:
$content: //section[@class="video-description-and-tags clearfix"]
scene:
Title:
selector: $content//ul[@class="scene-models-list"]/preceding-sibling::h1/text()
Date:
selector: //meta[@itemprop="uploadDate"]/@content
postProcess:
- parseDate: 01/02/2006
Details: $content//p[@id="description-section"]
Tags:
Name: $content//ul[@class="scene-tags"]/li/a/text()
Performers:
Name: $content//ul[@class="scene-models-list"]//a/text()
Studio:
Name: $content//span[@class="title-site"]/text()
#fixed: Private
Image: //meta[@property="og:image"]/@content
movieScraper:
movie:
Name: //div[@class="dvds-wrapper"]/h1/text()
Director: //p[@class="director"]/span[@itemprop="name"]/text()
Duration:
selector: //p[em[contains(text(), "Duration:")]]/text()
postProcess:
- replace:
- regex: ^(\d+).+$
with: 00:$1:00
Date: //span[@itemprop="datePublished"]/text()
Studio:
Name:
fixed: Private
Synopsis: //p[@class="sinopsys"]
FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src
# Last Updated August 18, 2020
name: private
sceneByURL:
- action: scrapeXPath
url:
- private.com
scraper: sceneScraper
movieByURL:
- action: scrapeXPath
url:
- private.com
scraper: movieScraper
xPathScrapers:
sceneScraper:
common:
$content: //section[@class="video-description-and-tags clearfix"]
scene:
Title:
selector: $content//ul[@class="scene-models-list"]/preceding-sibling::h1/text()
Date:
selector: //meta[@itemprop="uploadDate"]/@content
postProcess:
- parseDate: 01/02/2006
Details: $content//p[@id="description-section"]
Tags:
Name: $content//ul[@class="scene-tags"]/li/a/text()
Performers:
Name: $content//ul[@class="scene-models-list"]//a/text()
Studio:
Name: $content//span[@class="title-site"]/text()
#fixed: Private
Image: //meta[@property="og:image"]/@content
movieScraper:
movie:
Name: //div[@class="dvds-wrapper"]/h1/text()
Director: //p[@class="director"]/span[@itemprop="name"]/text()
Duration:
selector: //p[em[contains(text(), "Duration:")]]/text()
postProcess:
- replace:
- regex: ^(\d+).+$
with: 00:$1:00
Date: //span[@itemprop="datePublished"]/text()
Studio:
Name:
fixed: Private
Synopsis: //p[@class="sinopsys"]
FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src
# Last Updated August 18, 2020

View File

@ -1,38 +1,38 @@
name: puffynetwork
sceneByURL:
- action: scrapeXPath
url:
- puffynetwork.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$performer: //dl/dd/a
scene:
Title:
selector: //h2[@class="title"]/span/text()
postProcess:
- replace:
- regex: ^(.*)[—]\s+
with:
Date:
selector: //dl/dt[contains(text(),"Released on:")]/span/text()
postProcess:
- parseDate: Jan 2, 2006
Details: //div[@class="show_more"]/text()[1]
Tags:
Name: //p[@class="tags"]/a/text()
Performers:
Name: $performer/text()
Studio:
Name:
selector: //h2[@class="title"]//div[contains(text(),"Site:")]/a/text()
postProcess:
- map:
Weliketosuck: We Like To Suck
Wetandpuffy: Wet And Puffy
Wetandpissy: Wet And Pissy
Eurobabefacials: Euro Babe Facials
Simplyanal: Simply Anal
Image: //video[@id="video"]/@poster
# Last Updated August 06, 2020
name: puffynetwork
sceneByURL:
- action: scrapeXPath
url:
- puffynetwork.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$performer: //dl/dd/a
scene:
Title:
selector: //h2[@class="title"]/span/text()
postProcess:
- replace:
- regex: ^(.*)[—]\s+
with:
Date:
selector: //dl/dt[contains(text(),"Released on:")]/span/text()
postProcess:
- parseDate: Jan 2, 2006
Details: //div[@class="show_more"]/text()[1]
Tags:
Name: //p[@class="tags"]/a/text()
Performers:
Name: $performer/text()
Studio:
Name:
selector: //h2[@class="title"]//div[contains(text(),"Site:")]/a/text()
postProcess:
- map:
Weliketosuck: We Like To Suck
Wetandpuffy: Wet And Puffy
Wetandpissy: Wet And Pissy
Eurobabefacials: Euro Babe Facials
Simplyanal: Simply Anal
Image: //video[@id="video"]/@poster
# Last Updated August 06, 2020

View File

@ -1,73 +1,73 @@
name: "Teencoreclub"
sceneByURL:
- action: scrapeXPath
url:
- analyzedgirls.com/
- assteenmouth.com/
- bangteenpussy.com/
- brutalinvasion.com/
- cumaholicteens.com/
- defiled18.com/
- doubleteamedteens.com/
- dreamteenshd.com/
- girlsgotcream.com/
- hardcoreyouth.com/
- littlehellcat.com/
- maketeengape.com/
- nylonsweeties.com/
- seductive18.com/
- teenanalcasting.com/
- teendrillers.com/
- teensnaturalway.com/
- teenstryblacks.com/
- spermantino.com/
- teachmyass.com/
- drilledchicks.com/
- analcheckups.com/
- fabsluts.com/
- jerk-offpass.com/
- nylonspunkjunkies.com/
- shegotsix.com/
- spearteenpussy.com/
- teencoreclub.com/
- teencorezine.com/
- teensgoporn.com/
- weneednewtalents.com/
- xcoreclub.com/
- youngthroats.com/
- tryteens.com/
- whiteteensblackcocks.com/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //div[@class="detail-hero-title"]//h1/text()
replace:
- regex: \t+
with: ' '
Details: //div[@class="detail-description"]/text()
Performers:
Name:
selector: //div[@class="detail-hero-title"]//h1/text()
replace:
- regex: \t+
with:
split: ','
Tags:
Name:
selector: //meta[@name="keywords"]/@content
split: ','
Image:
selector: //div[@class="detail-hero"]/@style
replace:
- regex: ^.*url.([^\)]+).*$
with: "$1"
Studio:
Name:
selector: //div[@class="flex-shrink-0 flex items-center"]//img[1]/@alt
replace:
- regex: \.\w+
with:
# Last Updated June 22, 2020
name: "Teencoreclub"
sceneByURL:
- action: scrapeXPath
url:
- analyzedgirls.com/
- assteenmouth.com/
- bangteenpussy.com/
- brutalinvasion.com/
- cumaholicteens.com/
- defiled18.com/
- doubleteamedteens.com/
- dreamteenshd.com/
- girlsgotcream.com/
- hardcoreyouth.com/
- littlehellcat.com/
- maketeengape.com/
- nylonsweeties.com/
- seductive18.com/
- teenanalcasting.com/
- teendrillers.com/
- teensnaturalway.com/
- teenstryblacks.com/
- spermantino.com/
- teachmyass.com/
- drilledchicks.com/
- analcheckups.com/
- fabsluts.com/
- jerk-offpass.com/
- nylonspunkjunkies.com/
- shegotsix.com/
- spearteenpussy.com/
- teencoreclub.com/
- teencorezine.com/
- teensgoporn.com/
- weneednewtalents.com/
- xcoreclub.com/
- youngthroats.com/
- tryteens.com/
- whiteteensblackcocks.com/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //div[@class="detail-hero-title"]//h1/text()
replace:
- regex: \t+
with: ' '
Details: //div[@class="detail-description"]/text()
Performers:
Name:
selector: //div[@class="detail-hero-title"]//h1/text()
replace:
- regex: \t+
with:
split: ','
Tags:
Name:
selector: //meta[@name="keywords"]/@content
split: ','
Image:
selector: //div[@class="detail-hero"]/@style
replace:
- regex: ^.*url.([^\)]+).*$
with: "$1"
Studio:
Name:
selector: //div[@class="flex-shrink-0 flex items-center"]//img[1]/@alt
replace:
- regex: \.\w+
with:
# Last Updated June 22, 2020

View File

@ -1,78 +1,78 @@
name: TheNude
performerByName:
action: scrapeXPath
queryURL: https://www.thenude.com/index.php?page=search&action=searchModels&__form_name=navbar-search&m_aka=on&m_name={}
scraper: performerSearch
performerByURL:
- action: scrapeXPath
url:
- https://www.thenude.com
scraper: performerScraper
xPathScrapers:
performerSearch:
performer:
# Name: //a[@class="model-name"]/text() Version to get only the name
# Name: //a[@class="model-name"]/../../a/@title Version to get a little info on label/studio as well
Name:
selector: //figcaption/span
replace:
- regex: "^AKA:"
with: ""
URL: //a[@class="model-name"]/@href
performerScraper:
performer:
Name: (//meta[@itemprop="name"])[1]/@content
URL: (//meta[@itemprop="url"])[1]/@content
Twitter: //a[text()="TWITTER"]/@href
Instagram: //a[text()="INSTAGRAM"]/@href
Birthdate:
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
parseDate: 02-01-2006
Ethnicity:
selector: //li/span[@class="list-quest"][contains(text(),'Ethnicity')]/../text()
replace:
- regex: Asian
with: "asian"
- regex: Caucasian
with: "white"
- regex: Black
with: "black"
- regex: Latin
with: "hispanic"
Country:
selector: //span[@itemprop="nationality"]/text()
replace:
- regex: "United States of America"
with: "United States"
#EyeColor: not listed
Height:
selector: //li/span[@class="list-quest"][contains(text(),'Height')]/../text()
replace:
- regex: ^(\d+).+$
with: "$1 cm"
Measurements:
selector: //li/span[@class="list-quest"][contains(text(),'Measurements')]/../text()
FakeTits:
selector: //li/span[@class="list-quest"][contains(text(),'Breasts')]/../text()
replace:
- regex: ^[^\(]+\(([^\)]+)\).*$
with: "$1"
- regex: Fake
with: "Yes"
- regex: Real
with: "No"
CareerLength:
selector: //li/span[@class="list-quest"][contains(text(),'Seen')]/../text()
concat: "-"
Aliases:
selector: //meta[@itemprop="additionalName"]/@content
concat: ", "
Tattoos:
selector: //li/span[@class="list-quest"][contains(text(),'Tattoos')]/../text()
Piercings:
selector: //li/span[@class="list-quest"][contains(text(),'Piercings')]/../text()
Image:
selector: (//meta[@itemprop="image"])[1]/@content
Gender:
selector: //meta[@itemprop="gender"]/@content
# Last updated July 02, 2020
name: TheNude
performerByName:
action: scrapeXPath
queryURL: https://www.thenude.com/index.php?page=search&action=searchModels&__form_name=navbar-search&m_aka=on&m_name={}
scraper: performerSearch
performerByURL:
- action: scrapeXPath
url:
- https://www.thenude.com
scraper: performerScraper
xPathScrapers:
performerSearch:
performer:
# Name: //a[@class="model-name"]/text() Version to get only the name
# Name: //a[@class="model-name"]/../../a/@title Version to get a little info on label/studio as well
Name:
selector: //figcaption/span
replace:
- regex: "^AKA:"
with: ""
URL: //a[@class="model-name"]/@href
performerScraper:
performer:
Name: (//meta[@itemprop="name"])[1]/@content
URL: (//meta[@itemprop="url"])[1]/@content
Twitter: //a[text()="TWITTER"]/@href
Instagram: //a[text()="INSTAGRAM"]/@href
Birthdate:
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
parseDate: 02-01-2006
Ethnicity:
selector: //li/span[@class="list-quest"][contains(text(),'Ethnicity')]/../text()
replace:
- regex: Asian
with: "asian"
- regex: Caucasian
with: "white"
- regex: Black
with: "black"
- regex: Latin
with: "hispanic"
Country:
selector: //span[@itemprop="nationality"]/text()
replace:
- regex: "United States of America"
with: "United States"
#EyeColor: not listed
Height:
selector: //li/span[@class="list-quest"][contains(text(),'Height')]/../text()
replace:
- regex: ^(\d+).+$
with: "$1 cm"
Measurements:
selector: //li/span[@class="list-quest"][contains(text(),'Measurements')]/../text()
FakeTits:
selector: //li/span[@class="list-quest"][contains(text(),'Breasts')]/../text()
replace:
- regex: ^[^\(]+\(([^\)]+)\).*$
with: "$1"
- regex: Fake
with: "Yes"
- regex: Real
with: "No"
CareerLength:
selector: //li/span[@class="list-quest"][contains(text(),'Seen')]/../text()
concat: "-"
Aliases:
selector: //meta[@itemprop="additionalName"]/@content
concat: ", "
Tattoos:
selector: //li/span[@class="list-quest"][contains(text(),'Tattoos')]/../text()
Piercings:
selector: //li/span[@class="list-quest"][contains(text(),'Piercings')]/../text()
Image:
selector: (//meta[@itemprop="image"])[1]/@content
Gender:
selector: //meta[@itemprop="gender"]/@content
# Last updated July 02, 2020

View File

@ -1,43 +1,43 @@
name: TheScoreGroup
sceneByURL:
- action: scrapeXPath
url:
- scoreland.com/
- 60plusmilfs.com/
- 50plusmilfs.com/
- 40somethingmag.com/
- pornmegaload.com/
- xlgirls.com/
- 18eighteen.com/
- legsex.com/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //section[@id="videos_page-page"]/div[@class="row"]/div/h2/text()
Studio:
Name:
selector: //meta[@property="og:site_name"]/@content
Date:
selector: //div[@class="stat mb-2"]/span[contains(.,"Date:")]/../span[@class="value"]/text()
postProcess:
- replace:
- regex: ..,
with:
- parseDate: January 2 2006
Details:
selector: //div[@class="row"]/div/div[@class="p-desc"]/text()
concat: "\n"
Tags:
Name: //div[@class="row"]/div/div[@class="mb-3"]/a/text()
Performers:
Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text()
Image:
selector: //meta[@itemprop="image"]/@content
# Enable this post process if you want better image quality but sometimes it can fail
# postProcess:
# - replace:
# - regex: _lg
# with: _x_800
# Last Updated August 11, 2020
name: TheScoreGroup
sceneByURL:
- action: scrapeXPath
url:
- scoreland.com/
- 60plusmilfs.com/
- 50plusmilfs.com/
- 40somethingmag.com/
- pornmegaload.com/
- xlgirls.com/
- 18eighteen.com/
- legsex.com/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //section[@id="videos_page-page"]/div[@class="row"]/div/h2/text()
Studio:
Name:
selector: //meta[@property="og:site_name"]/@content
Date:
selector: //div[@class="stat mb-2"]/span[contains(.,"Date:")]/../span[@class="value"]/text()
postProcess:
- replace:
- regex: ..,
with:
- parseDate: January 2 2006
Details:
selector: //div[@class="row"]/div/div[@class="p-desc"]/text()
concat: "\n"
Tags:
Name: //div[@class="row"]/div/div[@class="mb-3"]/a/text()
Performers:
Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text()
Image:
selector: //meta[@itemprop="image"]/@content
# Enable this post process if you want better image quality but sometimes it can fail
# postProcess:
# - replace:
# - regex: _lg
# with: _x_800
# Last Updated August 11, 2020

View File

@ -1,32 +1,32 @@
name: vipissy
sceneByURL:
- action: scrapeXPath
url:
- vipissy.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$performer: //dl/dd/a
scene:
Title:
selector: //section[@class="downloads"]/strong/text()
postProcess:
- replace:
- regex: ^(.*)[—]\s+
with:
Date:
# selector: //dl/dd[contains(text(),"Released on:")]/text()
selector: //dt[i[@class="glyphicon glyphicon-calendar"]]/following-sibling::dd[1]/text()
postProcess:
- parseDate: Jan 2, 2006
Details: //div[@class="show_more"]/text()[1]
Tags:
Name: //p[@class="tags"]/a/text()
Performers:
Name: $performer/text()
Studio:
Name:
fixed: Vipissy
Image: //div[@id="videoplayer"]/video/@poster
# Last Updated August 06, 2020
name: vipissy
sceneByURL:
- action: scrapeXPath
url:
- vipissy.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
$performer: //dl/dd/a
scene:
Title:
selector: //section[@class="downloads"]/strong/text()
postProcess:
- replace:
- regex: ^(.*)[—]\s+
with:
Date:
# selector: //dl/dd[contains(text(),"Released on:")]/text()
selector: //dt[i[@class="glyphicon glyphicon-calendar"]]/following-sibling::dd[1]/text()
postProcess:
- parseDate: Jan 2, 2006
Details: //div[@class="show_more"]/text()[1]
Tags:
Name: //p[@class="tags"]/a/text()
Performers:
Name: $performer/text()
Studio:
Name:
fixed: Vipissy
Image: //div[@id="videoplayer"]/video/@poster
# Last Updated August 06, 2020

View File

@ -1,22 +1,22 @@
name: xnxx
sceneByURL:
- action: scrapeXPath
url:
- xnxx.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //div[@class="clear-infobar"]/strong/text()
Tags:
Name: //div[@class="metadata-row video-tags"]/a/text()
Studio:
Name: //span[@class="metadata"]/a[@class="gold-plate" or @class="free-plate"]/text()
Details: //p[@class="metadata-row video-description"]
Image:
selector: //script[contains(text(), "setThumbUrl169")]/text()
postProcess:
- replace:
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
with: $1
# Last Updated August 06, 2020
name: xnxx
sceneByURL:
- action: scrapeXPath
url:
- xnxx.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //div[@class="clear-infobar"]/strong/text()
Tags:
Name: //div[@class="metadata-row video-tags"]/a/text()
Studio:
Name: //span[@class="metadata"]/a[@class="gold-plate" or @class="free-plate"]/text()
Details: //p[@class="metadata-row video-description"]
Image:
selector: //script[contains(text(), "setThumbUrl169")]/text()
postProcess:
- replace:
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
with: $1
# Last Updated August 06, 2020

View File

@ -1,28 +1,28 @@
name: xtube
sceneByURL:
- action: scrapeXPath
url:
- xtube.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //form/h1/text()
Date:
selector: //script[contains(text(), "dimension10")]/text()
postProcess:
- replace:
- regex: ^.+'(\d{4})(\d{2})(\d{2})'.+$
with: $2 $3, $1
- parseDate: 01 02, 2006
Studio:
Name:
fixed: xtube
Details: //meta[@property="og:description"]/@content
Performers:
Name: //form[@id="postCommentForm"]/input[@name="contentOwnerId"]/@value[not (contains(.,"sponsor_"))]
Tags:
Name: //div[@class="categories" or @class="tags"]//a/text()
Image: //meta[@property="og:image"]/@content
# Last Updated August 06, 2020
name: xtube
sceneByURL:
- action: scrapeXPath
url:
- xtube.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title:
selector: //form/h1/text()
Date:
selector: //script[contains(text(), "dimension10")]/text()
postProcess:
- replace:
- regex: ^.+'(\d{4})(\d{2})(\d{2})'.+$
with: $2 $3, $1
- parseDate: 01 02, 2006
Studio:
Name:
fixed: xtube
Details: //meta[@property="og:description"]/@content
Performers:
Name: //form[@id="postCommentForm"]/input[@name="contentOwnerId"]/@value[not (contains(.,"sponsor_"))]
Tags:
Name: //div[@class="categories" or @class="tags"]//a/text()
Image: //meta[@property="og:image"]/@content
# Last Updated August 06, 2020

View File

@ -1,23 +1,23 @@
name: xvideos
sceneByURL:
- action: scrapeXPath
url:
- xvideos.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //h2[@class="page-title"]/text()[1]
Tags:
Name: //li/a[@class="btn btn-default"][contains(@href,"/tags/")]/text()
Performers:
Name: //a[@class="btn btn-default label profile hover-name"]/span[@class="name"]/text()
Studio:
Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text()
Image:
selector: //script[contains(text(), "setThumbUrl169")]/text()
postProcess:
- replace:
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
with: $1
# Last Updated August 06, 2020
name: xvideos
sceneByURL:
- action: scrapeXPath
url:
- xvideos.com
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //h2[@class="page-title"]/text()[1]
Tags:
Name: //li/a[@class="btn btn-default"][contains(@href,"/tags/")]/text()
Performers:
Name: //a[@class="btn btn-default label profile hover-name"]/span[@class="name"]/text()
Studio:
Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text()
Image:
selector: //script[contains(text(), "setThumbUrl169")]/text()
postProcess:
- replace:
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
with: $1
# Last Updated August 06, 2020

View File

@ -1,59 +1,59 @@
import json
import sys
import sqlite3
from os import path
''' This script uses the sqlite database from xbvr (3d porn manager)
Copy main.db from yout xbvr configuration and rename this to xbvr.db
docker cp xbvr:/root/.config/xbvr/main.db xbvr.db
This script needs python3 and sqlite3
'''
def lookup_scene(id):
c=conn.cursor()
c.execute('select title,synopsis,site,cover_url,scene_url,date(release_date) from scenes where id=?',(id,))
row=c.fetchone()
res={}
res['title']=row[0]
res['details']=row[1]
res['studio']={"name":row[2]}
res['image']=row[3]
res['url']=row[4]
res['date']=row[5]
c.execute("select tags.name from scene_tags,tags where scene_tags.tag_id=tags.id and scene_tags.scene_id=? ;",(id,))
row = c.fetchall()
res['tags']=[{"name":x[0]} for x in row]
c.execute("select actors.name from scene_cast,actors where actors.id=scene_cast.actor_id and scene_cast.scene_id=? ;",(id,))
row = c.fetchall()
res['performers']=[{"name":x[0]} for x in row]
return res
def find_scene_id(title):
c = conn.cursor()
c.execute('SELECT scene_id FROM files WHERE filename=?', (title,))
id=c.fetchone()
if id == None:
c.execute('select id from scenes where title=?',(title,))
id=c.fetchone()
return id[0]
return id[0]
if not path.exists("xbvr.db"):
print("Error, the sqlite database xbvr.db does not exist in the scrapers directory.",file=sys.stderr)
print("Copy this database from the docker container and give it the name xbvr.db",file=sys.stderr)
print("docker cp xbvr:/root/.config/xbvr/main.db xbvr.db",file=sys.stderr)
exit(1)
conn = sqlite3.connect('xbvr.db',detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
if sys.argv[1] == "query":
fragment = json.loads(sys.stdin.read())
print(json.dumps(fragment),file=sys.stderr)
scene_id = find_scene_id(fragment['title'])
if not scene_id:
print(f"Could not determine scene id in title: `{fragment['title']}`",file=sys.stderr)
else:
print(f"Found scene id: {scene_id}",file=sys.stderr)
result=lookup_scene(scene_id)
print(json.dumps(result))
conn.close()
import json
import sys
import sqlite3
from os import path
''' This script uses the sqlite database from xbvr (3d porn manager)
Copy main.db from yout xbvr configuration and rename this to xbvr.db
docker cp xbvr:/root/.config/xbvr/main.db xbvr.db
This script needs python3 and sqlite3
'''
def lookup_scene(id):
c=conn.cursor()
c.execute('select title,synopsis,site,cover_url,scene_url,date(release_date) from scenes where id=?',(id,))
row=c.fetchone()
res={}
res['title']=row[0]
res['details']=row[1]
res['studio']={"name":row[2]}
res['image']=row[3]
res['url']=row[4]
res['date']=row[5]
c.execute("select tags.name from scene_tags,tags where scene_tags.tag_id=tags.id and scene_tags.scene_id=? ;",(id,))
row = c.fetchall()
res['tags']=[{"name":x[0]} for x in row]
c.execute("select actors.name from scene_cast,actors where actors.id=scene_cast.actor_id and scene_cast.scene_id=? ;",(id,))
row = c.fetchall()
res['performers']=[{"name":x[0]} for x in row]
return res
def find_scene_id(title):
c = conn.cursor()
c.execute('SELECT scene_id FROM files WHERE filename=?', (title,))
id=c.fetchone()
if id == None:
c.execute('select id from scenes where title=?',(title,))
id=c.fetchone()
return id[0]
return id[0]
if not path.exists("xbvr.db"):
print("Error, the sqlite database xbvr.db does not exist in the scrapers directory.",file=sys.stderr)
print("Copy this database from the docker container and give it the name xbvr.db",file=sys.stderr)
print("docker cp xbvr:/root/.config/xbvr/main.db xbvr.db",file=sys.stderr)
exit(1)
conn = sqlite3.connect('xbvr.db',detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
if sys.argv[1] == "query":
fragment = json.loads(sys.stdin.read())
print(json.dumps(fragment),file=sys.stderr)
scene_id = find_scene_id(fragment['title'])
if not scene_id:
print(f"Could not determine scene id in title: `{fragment['title']}`",file=sys.stderr)
else:
print(f"Found scene id: {scene_id}",file=sys.stderr)
result=lookup_scene(scene_id)
print(json.dumps(result))
conn.close()