Create .gitattributes (#265)

This commit is contained in:
peolic 2020-11-08 21:09:03 +02:00 committed by GitHub
parent a9603e2a60
commit 6e013d1064
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 781 additions and 776 deletions

5
.gitattributes vendored Normal file
View File

@ -0,0 +1,5 @@
* text=auto
*.yml eol=lf diff=yaml linguist-detectable
*.py eol=lf diff=python
*.md eol=lf diff=markdown

View File

@ -1,34 +1,34 @@
name: "FemDomEmpire" name: "FemDomEmpire"
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- femdomempire.com/tour/trailers/ - femdomempire.com/tour/trailers/
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
scene: scene:
Title: //div[@class="videoDetails clear"]/h3 Title: //div[@class="videoDetails clear"]/h3
Date: Date:
selector: //p[span[contains(.,"Date Added")]] selector: //p[span[contains(.,"Date Added")]]
postProcess: postProcess:
- replace: - replace:
- regex: "Date Added:" - regex: "Date Added:"
with: with:
- parseDate: January 2, 2006 - parseDate: January 2, 2006
Details: //div[@class="videoDetails clear"]/p Details: //div[@class="videoDetails clear"]/p
Performers: Performers:
Name: //li[@class="update_models"]/a Name: //li[@class="update_models"]/a
Tags: Tags:
Name: //ul/li[@class="label" and contains(.,"Categories:")]/..//a Name: //ul/li[@class="label" and contains(.,"Categories:")]/..//a
Studio: Studio:
Name: Name:
fixed: FemDomEmpire fixed: FemDomEmpire
Image: Image:
selector: //img[@class="update_thumb thumbs stdimage"]/@src0_1x selector: //img[@class="update_thumb thumbs stdimage"]/@src0_1x
postProcess: postProcess:
- replace: - replace:
- regex: .*/tour/content// - regex: .*/tour/content//
with: "https://femdomempire.com/tour/content/" with: "https://femdomempire.com/tour/content/"
# Last Updated October 10, 2020 # Last Updated October 10, 2020
# Note: Date Added on older scenes seem to be incorrect on FemDomEmpire site. Newer scenes have correct date. # Note: Date Added on older scenes seem to be incorrect on FemDomEmpire site. Newer scenes have correct date.

View File

@ -1,34 +1,34 @@
name: firstanalquest name: firstanalquest
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- firstanalquest.com - firstanalquest.com
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
common: common:
$performer: //ul[@class="list-inline"][contains(text(),"Models:")]/li/a $performer: //ul[@class="list-inline"][contains(text(),"Models:")]/li/a
$title: //div[@class="badge-content"]/preceding-sibling::span/text() $title: //div[@class="badge-content"]/preceding-sibling::span/text()
scene: scene:
Title: $title Title: $title
Details: Details:
selector: //div[@class="text-desc"] selector: //div[@class="text-desc"]
Tags: Tags:
Name: //li[strong[text()="Tags:"]]/following-sibling::li/a/text() Name: //li[strong[text()="Tags:"]]/following-sibling::li/a/text()
Performers: Performers:
Name: $performer/text() Name: $performer/text()
Image: //img[@class="player-preview"]/@src Image: //img[@class="player-preview"]/@src
Studio: Studio:
Name: Name:
fixed: First Anal Quest fixed: First Anal Quest
Date: Date:
selector: $title selector: $title
postProcess: postProcess:
- replace: - replace:
- regex: \s - regex: \s
with: + with: +
- regex: ^([^-]+-?).+ - regex: ^([^-]+-?).+
with: http://www.firstanalquest.com/search/?q=$1 with: http://www.firstanalquest.com/search/?q=$1
- subScraper: //span[@class="thumb-added"]/text() - subScraper: //span[@class="thumb-added"]/text()
- parseDate: Jan 2, 2006 - parseDate: Jan 2, 2006
# Last Updated August 06, 2020 # Last Updated August 06, 2020

View File

@ -1,25 +1,25 @@
name: fistertwister name: fistertwister
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- fistertwister.com - fistertwister.com
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
scene: scene:
Title: //div[@class="jumbotron"]/h2/text()[1] Title: //div[@class="jumbotron"]/h2/text()[1]
Date: Date:
selector: //li[contains(text(),"Released on:")]/strong/text() selector: //li[contains(text(),"Released on:")]/strong/text()
postProcess: postProcess:
- parseDate: Jan 2, 2006 - parseDate: Jan 2, 2006
Details: //div[@class="jumbotron video-info"]/p[not(@class) and not(a)]/text() Details: //div[@class="jumbotron video-info"]/p[not(@class) and not(a)]/text()
Tags: Tags:
Name: //a[@class="btn btn-default"]/text() Name: //a[@class="btn btn-default"]/text()
Performers: Performers:
Name: //li[contains(text(),"Featuring:")]//a/text() Name: //li[contains(text(),"Featuring:")]//a/text()
Studio: Studio:
Name: Name:
fixed: Fister Twister fixed: Fister Twister
#Image: //video[@id="video"]/@poster #Image: //video[@id="video"]/@poster
Image: //meta[@property="og:image"]/@content Image: //meta[@property="og:image"]/@content
# Last Updated August 06, 2020 # Last Updated August 06, 2020

View File

@ -1,42 +1,42 @@
name: "FittingRoom" name: "FittingRoom"
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- fitting-room.com/videos/ - fitting-room.com/videos/
scraper: sceneScraper scraper: sceneScraper
galleryByURL: galleryByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- fitting-room.com/albums/ - fitting-room.com/albums/
scraper: galleryScraper scraper: galleryScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
scene: scene:
Title: Title:
selector: //head/title/text() selector: //head/title/text()
Details: //meta[@property="twitter:description"]/@content Details: //meta[@property="twitter:description"]/@content
Performers: Performers:
Name: //div[@class="info-model"]//p[@class="name"] Name: //div[@class="info-model"]//p[@class="name"]
Tags: Tags:
Name: //meta[@property="article:tag"]/@content Name: //meta[@property="article:tag"]/@content
Studio: Studio:
Name: Name:
fixed: Fitting-Room fixed: Fitting-Room
Image: //meta[@property="twitter:image"]/@content Image: //meta[@property="twitter:image"]/@content
URL: //meta[@property="twitter:url"]/@content URL: //meta[@property="twitter:url"]/@content
galleryScraper: galleryScraper:
gallery: gallery:
Title: Title:
selector: //head/title/text() selector: //head/title/text()
Details: //meta[@property="twitter:description"]/@content Details: //meta[@property="twitter:description"]/@content
Performers: Performers:
Name: //div[@class="info-model"]//p[@class="name"] Name: //div[@class="info-model"]//p[@class="name"]
Tags: Tags:
Name: //meta[@property="article:tag"]/@content Name: //meta[@property="article:tag"]/@content
Studio: Studio:
Name: Name:
fixed: Fitting-Room fixed: Fitting-Room
URL: //meta[@property="twitter:url"]/@content URL: //meta[@property="twitter:url"]/@content
# Last Updated October 23, 2020 # Last Updated October 23, 2020

View File

@ -1,29 +1,29 @@
name: Mandyflores name: Mandyflores
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- mandyflores.com/vod/ - mandyflores.com/vod/
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
common: common:
$updateDesc: //span[@class="update_description"] $updateDesc: //span[@class="update_description"]
$divCenter: /div[@align="center"] $divCenter: /div[@align="center"]
scene: scene:
Title: //span[@class="title_bar_hilite"] Title: //span[@class="title_bar_hilite"]
Details: Details:
selector: $updateDesc$divCenter/span/span[@style]/text()[normalize-space(.)]|($updateDesc | $updateDesc/p)/text()[normalize-space(.)]|($updateDesc$divCenter/text())[1]|($updateDesc/text())[1] selector: $updateDesc$divCenter/span/span[@style]/text()[normalize-space(.)]|($updateDesc | $updateDesc/p)/text()[normalize-space(.)]|($updateDesc$divCenter/text())[1]|($updateDesc/text())[1]
concat: "\n\n" concat: "\n\n"
Date: Date:
selector: //div[@class="cell update_date"][not(ancestor::span[@class="update_description"])]/text()[1] selector: //div[@class="cell update_date"][not(ancestor::span[@class="update_description"])]/text()[1]
postProcess: postProcess:
- parseDate: 01/02/2006 - parseDate: 01/02/2006
Studio: Studio:
Name: Name:
fixed: Mandyflores fixed: Mandyflores
Tags: Tags:
Name: //span[@class="update_tags"]/a Name: //span[@class="update_tags"]/a
Performers: Performers:
Name: //span[@class="update_models"][following-sibling::span[@class="update_tags"]]/a Name: //span[@class="update_models"][following-sibling::span[@class="update_tags"]]/a
# Last Updated September 16, 2020 # Last Updated September 16, 2020

View File

@ -1,114 +1,114 @@
name: Modelhub name: Modelhub
performerByName: performerByName:
action: scrapeXPath action: scrapeXPath
queryURL: https://modelhub.com/model/search?q={} queryURL: https://modelhub.com/model/search?q={}
scraper: performerSearch scraper: performerSearch
performerByURL: performerByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- modelhub.com - modelhub.com
scraper: performerScraper scraper: performerScraper
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- modelhub.com/video/ - modelhub.com/video/
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
performerSearch: performerSearch:
performer: performer:
Name: //div[@class="modelInfo"]/a/h3/text() Name: //div[@class="modelInfo"]/a/h3/text()
URL: URL:
selector: //li[@class="modelBox"]/a/@href selector: //li[@class="modelBox"]/a/@href
postProcess: postProcess:
- replace: - replace:
- regex: ^(.*)$ - regex: ^(.*)$
with: "https://www.modelhub.com$1/bio" with: "https://www.modelhub.com$1/bio"
performerScraper: performerScraper:
common: common:
$infoPiece: //div[@class="modelContent"] $infoPiece: //div[@class="modelContent"]
$modelAttributes: //ul[@class="modelAttributes"]/li $modelAttributes: //ul[@class="modelAttributes"]/li
performer: performer:
Name: //div[@class="infoSection"]/h1/text() Name: //div[@class="infoSection"]/h1/text()
Birthdate: Birthdate:
selector: //span[@class='bday js_lazy_bkg']/text() selector: //span[@class='bday js_lazy_bkg']/text()
parseDate: January 2, 2006 parseDate: January 2, 2006
Gender: Gender:
selector: //ul[@class="allStats"]/li[contains(.,"Gender")]/span selector: //ul[@class="allStats"]/li[contains(.,"Gender")]/span
postProcess: postProcess:
- replace: - replace:
- regex: "Couple" - regex: "Couple"
with: with:
Ethnicity: Ethnicity:
selector: $modelAttributes/div[contains(.,"Ethnicity")]/span selector: $modelAttributes/div[contains(.,"Ethnicity")]/span
postProcess: postProcess:
- map: - map:
Latin: "hispanic" Latin: "hispanic"
Other: "" Other: ""
Twitter: Twitter:
selector: //a[@class='twitter js_lazy_bkg']/@href selector: //a[@class='twitter js_lazy_bkg']/@href
postProcess: postProcess:
- replace: - replace:
- regex: .+(twitter.com)/(.+) - regex: .+(twitter.com)/(.+)
with: $2 with: $2
- regex: (.+)\?(.*) - regex: (.+)\?(.*)
with: $1 with: $1
Instagram: Instagram:
selector: //a[@class="insta js_lazy_bkg"]/@href selector: //a[@class="insta js_lazy_bkg"]/@href
postProcess: postProcess:
- replace: - replace:
- regex: .+(instagram.com/)(.+) - regex: .+(instagram.com/)(.+)
with: $2 with: $2
Height: Height:
selector: $modelAttributes/div[contains(.,"Height")]/span selector: $modelAttributes/div[contains(.,"Height")]/span
postProcess: postProcess:
- replace: - replace:
- regex: .*\((\d+)cm\) - regex: .*\((\d+)cm\)
with: $1 with: $1
FakeTits: FakeTits:
selector: $modelAttributes/div[contains(.,"Breast Type")]/span selector: $modelAttributes/div[contains(.,"Breast Type")]/span
postProcess: postProcess:
- map: - map:
Natural: "No" Natural: "No"
Fake: "Yes" Fake: "Yes"
Piercings: $modelAttributes/div[contains(.,"Piercing")]/span Piercings: $modelAttributes/div[contains(.,"Piercing")]/span
EyeColor: $modelAttributes/div[contains(.,"Eye Color")]/span EyeColor: $modelAttributes/div[contains(.,"Eye Color")]/span
Tattoos: $modelAttributes/div[contains(.,"Tattoo")]/span Tattoos: $modelAttributes/div[contains(.,"Tattoo")]/span
URL: //meta[@name="twitter:url"]/@content URL: //meta[@name="twitter:url"]/@content
Image: //img[@class='profileImg lazy']/@data-src Image: //img[@class='profileImg lazy']/@data-src
Country: Country:
selector: //ul[@class="info"]/li/span[@class="location js_lazy_bkg"]/text() selector: //ul[@class="info"]/li/span[@class="location js_lazy_bkg"]/text()
postProcess: postProcess:
- map: - map:
"AR": "Argentina" "AR": "Argentina"
"AT": "Austria" "AT": "Austria"
"AU": "Australia" "AU": "Australia"
"BR": "Brazil" "BR": "Brazil"
"CA": "Canada" "CA": "Canada"
"colombia, CO": "Colombia" "colombia, CO": "Colombia"
"CO": "Colombia" "CO": "Colombia"
"CZ": "Czech Republic" "CZ": "Czech Republic"
"DE": "Germany" "DE": "Germany"
"ES": "Spain" "ES": "Spain"
"FR": "France" "FR": "France"
"GB": "United Kingdom" "GB": "United Kingdom"
"HU": "Hungary" "HU": "Hungary"
"IT": "Italy" "IT": "Italy"
"Peru, PE": "Peru" "Peru, PE": "Peru"
"PE": "Peru" "PE": "Peru"
"PH": "Philippines" "PH": "Philippines"
"RO": "Romania" "RO": "Romania"
"RU": "Russia" "RU": "Russia"
"US": "United States" "US": "United States"
sceneScraper: sceneScraper:
scene: scene:
Title: //div[@class='videoShortInfo']/div[1]/h1/text() Title: //div[@class='videoShortInfo']/div[1]/h1/text()
Tags: Tags:
Name: //div[@class='videoCategories']/div/a/text() Name: //div[@class='videoCategories']/div/a/text()
Image: //meta[@property="og:image"]/@content Image: //meta[@property="og:image"]/@content
Details: //p[@ class="videoDescription"] Details: //p[@ class="videoDescription"]
Performers: Performers:
Name: //div[@class='videoAvatar']/div[1]/a[2] Name: //div[@class='videoAvatar']/div[1]/a[2]
# Last Updated Jul 25, 2020 # Last Updated Jul 25, 2020

View File

@ -1,55 +1,55 @@
name: "PervCity" name: "PervCity"
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- analoverdose.com/trailers/ - analoverdose.com/trailers/
- bangingbeauties.com/trailers/ - bangingbeauties.com/trailers/
- chocolatebjs.com/trailers/ - chocolatebjs.com/trailers/
- oraloverdose.com/trailers/ - oraloverdose.com/trailers/
- pervcity.com/trailers/ - pervcity.com/trailers/
- upherasshole.com/trailers/ - upherasshole.com/trailers/
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
common: common:
$sceneinfo: //div[@class="videoInfo"] $sceneinfo: //div[@class="videoInfo"]
$base: //head/base/@href $base: //head/base/@href
scene: scene:
Title: $sceneinfo/div[@class="infoHeader"] Title: $sceneinfo/div[@class="infoHeader"]
Performers: Performers:
Name: $sceneinfo//span[@class="tour_update_models"]/a Name: $sceneinfo//span[@class="tour_update_models"]/a
Details: $sceneinfo//p/text() Details: $sceneinfo//p/text()
Image: Image:
selector: //head/base/@href|//img[@class="posterimg stdimage thumbs"]/@src selector: //head/base/@href|//img[@class="posterimg stdimage thumbs"]/@src
concat: "|" concat: "|"
postProcess: postProcess:
- replace: - replace:
- regex: ([^|]+)\|(.*)/(content/.+) - regex: ([^|]+)\|(.*)/(content/.+)
with: $1$3 with: $1$3
Date: Date:
selector: $sceneinfo/div[@class="infoHeader"] selector: $sceneinfo/div[@class="infoHeader"]
postProcess: postProcess:
- replace: - replace:
- regex: \s - regex: \s
with: "_" with: "_"
- regex: ^ - regex: ^
with: "https://pervcity.com/search.php?query=" with: "https://pervcity.com/search.php?query="
- subScraper: - subScraper:
selector: //div[@class="category_listing_block"]//div[@class="date"]/text() selector: //div[@class="category_listing_block"]//div[@class="date"]/text()
postProcess: postProcess:
- parseDate: 01-02-2006 - parseDate: 01-02-2006
Studio: Studio:
Name: Name:
selector: //head/base/@href selector: //head/base/@href
postProcess: postProcess:
- replace: - replace:
- regex: .+/([^\.]+)\.com/.* - regex: .+/([^\.]+)\.com/.*
with: $1 with: $1
- map: - map:
analoverdose: AnalOverdose analoverdose: AnalOverdose
chocolatebjs: ChocolateBjs chocolatebjs: ChocolateBjs
bangingbeauties: BangingBeauties bangingbeauties: BangingBeauties
oraloverdose: OralOverdose oraloverdose: OralOverdose
pervcity: PervCity pervcity: PervCity
upherasshole: UpHerAsshole upherasshole: UpHerAsshole
# Last Updated October 14, 2020 # Last Updated October 14, 2020

View File

@ -1,48 +1,48 @@
name: private name: private
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- private.com - private.com
scraper: sceneScraper scraper: sceneScraper
movieByURL: movieByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- private.com - private.com
scraper: movieScraper scraper: movieScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
common: common:
$content: //section[@class="video-description-and-tags clearfix"] $content: //section[@class="video-description-and-tags clearfix"]
scene: scene:
Title: Title:
selector: $content//ul[@class="scene-models-list"]/preceding-sibling::h1/text() selector: $content//ul[@class="scene-models-list"]/preceding-sibling::h1/text()
Date: Date:
selector: //meta[@itemprop="uploadDate"]/@content selector: //meta[@itemprop="uploadDate"]/@content
postProcess: postProcess:
- parseDate: 01/02/2006 - parseDate: 01/02/2006
Details: $content//p[@id="description-section"] Details: $content//p[@id="description-section"]
Tags: Tags:
Name: $content//ul[@class="scene-tags"]/li/a/text() Name: $content//ul[@class="scene-tags"]/li/a/text()
Performers: Performers:
Name: $content//ul[@class="scene-models-list"]//a/text() Name: $content//ul[@class="scene-models-list"]//a/text()
Studio: Studio:
Name: $content//span[@class="title-site"]/text() Name: $content//span[@class="title-site"]/text()
#fixed: Private #fixed: Private
Image: //meta[@property="og:image"]/@content Image: //meta[@property="og:image"]/@content
movieScraper: movieScraper:
movie: movie:
Name: //div[@class="dvds-wrapper"]/h1/text() Name: //div[@class="dvds-wrapper"]/h1/text()
Director: //p[@class="director"]/span[@itemprop="name"]/text() Director: //p[@class="director"]/span[@itemprop="name"]/text()
Duration: Duration:
selector: //p[em[contains(text(), "Duration:")]]/text() selector: //p[em[contains(text(), "Duration:")]]/text()
postProcess: postProcess:
- replace: - replace:
- regex: ^(\d+).+$ - regex: ^(\d+).+$
with: 00:$1:00 with: 00:$1:00
Date: //span[@itemprop="datePublished"]/text() Date: //span[@itemprop="datePublished"]/text()
Studio: Studio:
Name: Name:
fixed: Private fixed: Private
Synopsis: //p[@class="sinopsys"] Synopsis: //p[@class="sinopsys"]
FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src
# Last Updated August 18, 2020 # Last Updated August 18, 2020

View File

@ -1,38 +1,38 @@
name: puffynetwork name: puffynetwork
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- puffynetwork.com - puffynetwork.com
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
common: common:
$performer: //dl/dd/a $performer: //dl/dd/a
scene: scene:
Title: Title:
selector: //h2[@class="title"]/span/text() selector: //h2[@class="title"]/span/text()
postProcess: postProcess:
- replace: - replace:
- regex: ^(.*)[—]\s+ - regex: ^(.*)[—]\s+
with: with:
Date: Date:
selector: //dl/dt[contains(text(),"Released on:")]/span/text() selector: //dl/dt[contains(text(),"Released on:")]/span/text()
postProcess: postProcess:
- parseDate: Jan 2, 2006 - parseDate: Jan 2, 2006
Details: //div[@class="show_more"]/text()[1] Details: //div[@class="show_more"]/text()[1]
Tags: Tags:
Name: //p[@class="tags"]/a/text() Name: //p[@class="tags"]/a/text()
Performers: Performers:
Name: $performer/text() Name: $performer/text()
Studio: Studio:
Name: Name:
selector: //h2[@class="title"]//div[contains(text(),"Site:")]/a/text() selector: //h2[@class="title"]//div[contains(text(),"Site:")]/a/text()
postProcess: postProcess:
- map: - map:
Weliketosuck: We Like To Suck Weliketosuck: We Like To Suck
Wetandpuffy: Wet And Puffy Wetandpuffy: Wet And Puffy
Wetandpissy: Wet And Pissy Wetandpissy: Wet And Pissy
Eurobabefacials: Euro Babe Facials Eurobabefacials: Euro Babe Facials
Simplyanal: Simply Anal Simplyanal: Simply Anal
Image: //video[@id="video"]/@poster Image: //video[@id="video"]/@poster
# Last Updated August 06, 2020 # Last Updated August 06, 2020

View File

@ -1,73 +1,73 @@
name: "Teencoreclub" name: "Teencoreclub"
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- analyzedgirls.com/ - analyzedgirls.com/
- assteenmouth.com/ - assteenmouth.com/
- bangteenpussy.com/ - bangteenpussy.com/
- brutalinvasion.com/ - brutalinvasion.com/
- cumaholicteens.com/ - cumaholicteens.com/
- defiled18.com/ - defiled18.com/
- doubleteamedteens.com/ - doubleteamedteens.com/
- dreamteenshd.com/ - dreamteenshd.com/
- girlsgotcream.com/ - girlsgotcream.com/
- hardcoreyouth.com/ - hardcoreyouth.com/
- littlehellcat.com/ - littlehellcat.com/
- maketeengape.com/ - maketeengape.com/
- nylonsweeties.com/ - nylonsweeties.com/
- seductive18.com/ - seductive18.com/
- teenanalcasting.com/ - teenanalcasting.com/
- teendrillers.com/ - teendrillers.com/
- teensnaturalway.com/ - teensnaturalway.com/
- teenstryblacks.com/ - teenstryblacks.com/
- spermantino.com/ - spermantino.com/
- teachmyass.com/ - teachmyass.com/
- drilledchicks.com/ - drilledchicks.com/
- analcheckups.com/ - analcheckups.com/
- fabsluts.com/ - fabsluts.com/
- jerk-offpass.com/ - jerk-offpass.com/
- nylonspunkjunkies.com/ - nylonspunkjunkies.com/
- shegotsix.com/ - shegotsix.com/
- spearteenpussy.com/ - spearteenpussy.com/
- teencoreclub.com/ - teencoreclub.com/
- teencorezine.com/ - teencorezine.com/
- teensgoporn.com/ - teensgoporn.com/
- weneednewtalents.com/ - weneednewtalents.com/
- xcoreclub.com/ - xcoreclub.com/
- youngthroats.com/ - youngthroats.com/
- tryteens.com/ - tryteens.com/
- whiteteensblackcocks.com/ - whiteteensblackcocks.com/
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
scene: scene:
Title: Title:
selector: //div[@class="detail-hero-title"]//h1/text() selector: //div[@class="detail-hero-title"]//h1/text()
replace: replace:
- regex: \t+ - regex: \t+
with: ' ' with: ' '
Details: //div[@class="detail-description"]/text() Details: //div[@class="detail-description"]/text()
Performers: Performers:
Name: Name:
selector: //div[@class="detail-hero-title"]//h1/text() selector: //div[@class="detail-hero-title"]//h1/text()
replace: replace:
- regex: \t+ - regex: \t+
with: with:
split: ',' split: ','
Tags: Tags:
Name: Name:
selector: //meta[@name="keywords"]/@content selector: //meta[@name="keywords"]/@content
split: ',' split: ','
Image: Image:
selector: //div[@class="detail-hero"]/@style selector: //div[@class="detail-hero"]/@style
replace: replace:
- regex: ^.*url.([^\)]+).*$ - regex: ^.*url.([^\)]+).*$
with: "$1" with: "$1"
Studio: Studio:
Name: Name:
selector: //div[@class="flex-shrink-0 flex items-center"]//img[1]/@alt selector: //div[@class="flex-shrink-0 flex items-center"]//img[1]/@alt
replace: replace:
- regex: \.\w+ - regex: \.\w+
with: with:
# Last Updated June 22, 2020 # Last Updated June 22, 2020

View File

@ -1,78 +1,78 @@
name: TheNude name: TheNude
performerByName: performerByName:
action: scrapeXPath action: scrapeXPath
queryURL: https://www.thenude.com/index.php?page=search&action=searchModels&__form_name=navbar-search&m_aka=on&m_name={} queryURL: https://www.thenude.com/index.php?page=search&action=searchModels&__form_name=navbar-search&m_aka=on&m_name={}
scraper: performerSearch scraper: performerSearch
performerByURL: performerByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- https://www.thenude.com - https://www.thenude.com
scraper: performerScraper scraper: performerScraper
xPathScrapers: xPathScrapers:
performerSearch: performerSearch:
performer: performer:
# Name: //a[@class="model-name"]/text() Version to get only the name # Name: //a[@class="model-name"]/text() Version to get only the name
# Name: //a[@class="model-name"]/../../a/@title Version to get a little info on label/studio as well # Name: //a[@class="model-name"]/../../a/@title Version to get a little info on label/studio as well
Name: Name:
selector: //figcaption/span selector: //figcaption/span
replace: replace:
- regex: "^AKA:" - regex: "^AKA:"
with: "" with: ""
URL: //a[@class="model-name"]/@href URL: //a[@class="model-name"]/@href
performerScraper: performerScraper:
performer: performer:
Name: (//meta[@itemprop="name"])[1]/@content Name: (//meta[@itemprop="name"])[1]/@content
URL: (//meta[@itemprop="url"])[1]/@content URL: (//meta[@itemprop="url"])[1]/@content
Twitter: //a[text()="TWITTER"]/@href Twitter: //a[text()="TWITTER"]/@href
Instagram: //a[text()="INSTAGRAM"]/@href Instagram: //a[text()="INSTAGRAM"]/@href
Birthdate: Birthdate:
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text() selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
parseDate: 02-01-2006 parseDate: 02-01-2006
Ethnicity: Ethnicity:
selector: //li/span[@class="list-quest"][contains(text(),'Ethnicity')]/../text() selector: //li/span[@class="list-quest"][contains(text(),'Ethnicity')]/../text()
replace: replace:
- regex: Asian - regex: Asian
with: "asian" with: "asian"
- regex: Caucasian - regex: Caucasian
with: "white" with: "white"
- regex: Black - regex: Black
with: "black" with: "black"
- regex: Latin - regex: Latin
with: "hispanic" with: "hispanic"
Country: Country:
selector: //span[@itemprop="nationality"]/text() selector: //span[@itemprop="nationality"]/text()
replace: replace:
- regex: "United States of America" - regex: "United States of America"
with: "United States" with: "United States"
#EyeColor: not listed #EyeColor: not listed
Height: Height:
selector: //li/span[@class="list-quest"][contains(text(),'Height')]/../text() selector: //li/span[@class="list-quest"][contains(text(),'Height')]/../text()
replace: replace:
- regex: ^(\d+).+$ - regex: ^(\d+).+$
with: "$1 cm" with: "$1 cm"
Measurements: Measurements:
selector: //li/span[@class="list-quest"][contains(text(),'Measurements')]/../text() selector: //li/span[@class="list-quest"][contains(text(),'Measurements')]/../text()
FakeTits: FakeTits:
selector: //li/span[@class="list-quest"][contains(text(),'Breasts')]/../text() selector: //li/span[@class="list-quest"][contains(text(),'Breasts')]/../text()
replace: replace:
- regex: ^[^\(]+\(([^\)]+)\).*$ - regex: ^[^\(]+\(([^\)]+)\).*$
with: "$1" with: "$1"
- regex: Fake - regex: Fake
with: "Yes" with: "Yes"
- regex: Real - regex: Real
with: "No" with: "No"
CareerLength: CareerLength:
selector: //li/span[@class="list-quest"][contains(text(),'Seen')]/../text() selector: //li/span[@class="list-quest"][contains(text(),'Seen')]/../text()
concat: "-" concat: "-"
Aliases: Aliases:
selector: //meta[@itemprop="additionalName"]/@content selector: //meta[@itemprop="additionalName"]/@content
concat: ", " concat: ", "
Tattoos: Tattoos:
selector: //li/span[@class="list-quest"][contains(text(),'Tattoos')]/../text() selector: //li/span[@class="list-quest"][contains(text(),'Tattoos')]/../text()
Piercings: Piercings:
selector: //li/span[@class="list-quest"][contains(text(),'Piercings')]/../text() selector: //li/span[@class="list-quest"][contains(text(),'Piercings')]/../text()
Image: Image:
selector: (//meta[@itemprop="image"])[1]/@content selector: (//meta[@itemprop="image"])[1]/@content
Gender: Gender:
selector: //meta[@itemprop="gender"]/@content selector: //meta[@itemprop="gender"]/@content
# Last updated July 02, 2020 # Last updated July 02, 2020

View File

@ -1,43 +1,43 @@
name: TheScoreGroup name: TheScoreGroup
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- scoreland.com/ - scoreland.com/
- 60plusmilfs.com/ - 60plusmilfs.com/
- 50plusmilfs.com/ - 50plusmilfs.com/
- 40somethingmag.com/ - 40somethingmag.com/
- pornmegaload.com/ - pornmegaload.com/
- xlgirls.com/ - xlgirls.com/
- 18eighteen.com/ - 18eighteen.com/
- legsex.com/ - legsex.com/
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
scene: scene:
Title: //section[@id="videos_page-page"]/div[@class="row"]/div/h2/text() Title: //section[@id="videos_page-page"]/div[@class="row"]/div/h2/text()
Studio: Studio:
Name: Name:
selector: //meta[@property="og:site_name"]/@content selector: //meta[@property="og:site_name"]/@content
Date: Date:
selector: //div[@class="stat mb-2"]/span[contains(.,"Date:")]/../span[@class="value"]/text() selector: //div[@class="stat mb-2"]/span[contains(.,"Date:")]/../span[@class="value"]/text()
postProcess: postProcess:
- replace: - replace:
- regex: .., - regex: ..,
with: with:
- parseDate: January 2 2006 - parseDate: January 2 2006
Details: Details:
selector: //div[@class="row"]/div/div[@class="p-desc"]/text() selector: //div[@class="row"]/div/div[@class="p-desc"]/text()
concat: "\n" concat: "\n"
Tags: Tags:
Name: //div[@class="row"]/div/div[@class="mb-3"]/a/text() Name: //div[@class="row"]/div/div[@class="mb-3"]/a/text()
Performers: Performers:
Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text() Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text()
Image: Image:
selector: //meta[@itemprop="image"]/@content selector: //meta[@itemprop="image"]/@content
# Enable this post process if you want better image quality but sometimes it can fail # Enable this post process if you want better image quality but sometimes it can fail
# postProcess: # postProcess:
# - replace: # - replace:
# - regex: _lg # - regex: _lg
# with: _x_800 # with: _x_800
# Last Updated August 11, 2020 # Last Updated August 11, 2020

View File

@ -1,32 +1,32 @@
name: vipissy name: vipissy
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- vipissy.com - vipissy.com
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
common: common:
$performer: //dl/dd/a $performer: //dl/dd/a
scene: scene:
Title: Title:
selector: //section[@class="downloads"]/strong/text() selector: //section[@class="downloads"]/strong/text()
postProcess: postProcess:
- replace: - replace:
- regex: ^(.*)[—]\s+ - regex: ^(.*)[—]\s+
with: with:
Date: Date:
# selector: //dl/dd[contains(text(),"Released on:")]/text() # selector: //dl/dd[contains(text(),"Released on:")]/text()
selector: //dt[i[@class="glyphicon glyphicon-calendar"]]/following-sibling::dd[1]/text() selector: //dt[i[@class="glyphicon glyphicon-calendar"]]/following-sibling::dd[1]/text()
postProcess: postProcess:
- parseDate: Jan 2, 2006 - parseDate: Jan 2, 2006
Details: //div[@class="show_more"]/text()[1] Details: //div[@class="show_more"]/text()[1]
Tags: Tags:
Name: //p[@class="tags"]/a/text() Name: //p[@class="tags"]/a/text()
Performers: Performers:
Name: $performer/text() Name: $performer/text()
Studio: Studio:
Name: Name:
fixed: Vipissy fixed: Vipissy
Image: //div[@id="videoplayer"]/video/@poster Image: //div[@id="videoplayer"]/video/@poster
# Last Updated August 06, 2020 # Last Updated August 06, 2020

View File

@ -1,22 +1,22 @@
name: xnxx name: xnxx
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- xnxx.com - xnxx.com
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
scene: scene:
Title: //div[@class="clear-infobar"]/strong/text() Title: //div[@class="clear-infobar"]/strong/text()
Tags: Tags:
Name: //div[@class="metadata-row video-tags"]/a/text() Name: //div[@class="metadata-row video-tags"]/a/text()
Studio: Studio:
Name: //span[@class="metadata"]/a[@class="gold-plate" or @class="free-plate"]/text() Name: //span[@class="metadata"]/a[@class="gold-plate" or @class="free-plate"]/text()
Details: //p[@class="metadata-row video-description"] Details: //p[@class="metadata-row video-description"]
Image: Image:
selector: //script[contains(text(), "setThumbUrl169")]/text() selector: //script[contains(text(), "setThumbUrl169")]/text()
postProcess: postProcess:
- replace: - replace:
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$ - regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
with: $1 with: $1
# Last Updated August 06, 2020 # Last Updated August 06, 2020

View File

@ -1,28 +1,28 @@
name: xtube name: xtube
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- xtube.com - xtube.com
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
scene: scene:
Title: Title:
selector: //form/h1/text() selector: //form/h1/text()
Date: Date:
selector: //script[contains(text(), "dimension10")]/text() selector: //script[contains(text(), "dimension10")]/text()
postProcess: postProcess:
- replace: - replace:
- regex: ^.+'(\d{4})(\d{2})(\d{2})'.+$ - regex: ^.+'(\d{4})(\d{2})(\d{2})'.+$
with: $2 $3, $1 with: $2 $3, $1
- parseDate: 01 02, 2006 - parseDate: 01 02, 2006
Studio: Studio:
Name: Name:
fixed: xtube fixed: xtube
Details: //meta[@property="og:description"]/@content Details: //meta[@property="og:description"]/@content
Performers: Performers:
Name: //form[@id="postCommentForm"]/input[@name="contentOwnerId"]/@value[not (contains(.,"sponsor_"))] Name: //form[@id="postCommentForm"]/input[@name="contentOwnerId"]/@value[not (contains(.,"sponsor_"))]
Tags: Tags:
Name: //div[@class="categories" or @class="tags"]//a/text() Name: //div[@class="categories" or @class="tags"]//a/text()
Image: //meta[@property="og:image"]/@content Image: //meta[@property="og:image"]/@content
# Last Updated August 06, 2020 # Last Updated August 06, 2020

View File

@ -1,23 +1,23 @@
name: xvideos name: xvideos
sceneByURL: sceneByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- xvideos.com - xvideos.com
scraper: sceneScraper scraper: sceneScraper
xPathScrapers: xPathScrapers:
sceneScraper: sceneScraper:
scene: scene:
Title: //h2[@class="page-title"]/text()[1] Title: //h2[@class="page-title"]/text()[1]
Tags: Tags:
Name: //li/a[@class="btn btn-default"][contains(@href,"/tags/")]/text() Name: //li/a[@class="btn btn-default"][contains(@href,"/tags/")]/text()
Performers: Performers:
Name: //a[@class="btn btn-default label profile hover-name"]/span[@class="name"]/text() Name: //a[@class="btn btn-default label profile hover-name"]/span[@class="name"]/text()
Studio: Studio:
Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text() Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text()
Image: Image:
selector: //script[contains(text(), "setThumbUrl169")]/text() selector: //script[contains(text(), "setThumbUrl169")]/text()
postProcess: postProcess:
- replace: - replace:
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$ - regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
with: $1 with: $1
# Last Updated August 06, 2020 # Last Updated August 06, 2020

View File

@ -1,59 +1,59 @@
import json import json
import sys import sys
import sqlite3 import sqlite3
from os import path from os import path
''' This script uses the sqlite database from xbvr (3d porn manager) ''' This script uses the sqlite database from xbvr (3d porn manager)
Copy main.db from yout xbvr configuration and rename this to xbvr.db Copy main.db from yout xbvr configuration and rename this to xbvr.db
docker cp xbvr:/root/.config/xbvr/main.db xbvr.db docker cp xbvr:/root/.config/xbvr/main.db xbvr.db
This script needs python3 and sqlite3 This script needs python3 and sqlite3
''' '''
def lookup_scene(id): def lookup_scene(id):
c=conn.cursor() c=conn.cursor()
c.execute('select title,synopsis,site,cover_url,scene_url,date(release_date) from scenes where id=?',(id,)) c.execute('select title,synopsis,site,cover_url,scene_url,date(release_date) from scenes where id=?',(id,))
row=c.fetchone() row=c.fetchone()
res={} res={}
res['title']=row[0] res['title']=row[0]
res['details']=row[1] res['details']=row[1]
res['studio']={"name":row[2]} res['studio']={"name":row[2]}
res['image']=row[3] res['image']=row[3]
res['url']=row[4] res['url']=row[4]
res['date']=row[5] res['date']=row[5]
c.execute("select tags.name from scene_tags,tags where scene_tags.tag_id=tags.id and scene_tags.scene_id=? ;",(id,)) c.execute("select tags.name from scene_tags,tags where scene_tags.tag_id=tags.id and scene_tags.scene_id=? ;",(id,))
row = c.fetchall() row = c.fetchall()
res['tags']=[{"name":x[0]} for x in row] res['tags']=[{"name":x[0]} for x in row]
c.execute("select actors.name from scene_cast,actors where actors.id=scene_cast.actor_id and scene_cast.scene_id=? ;",(id,)) c.execute("select actors.name from scene_cast,actors where actors.id=scene_cast.actor_id and scene_cast.scene_id=? ;",(id,))
row = c.fetchall() row = c.fetchall()
res['performers']=[{"name":x[0]} for x in row] res['performers']=[{"name":x[0]} for x in row]
return res return res
def find_scene_id(title): def find_scene_id(title):
c = conn.cursor() c = conn.cursor()
c.execute('SELECT scene_id FROM files WHERE filename=?', (title,)) c.execute('SELECT scene_id FROM files WHERE filename=?', (title,))
id=c.fetchone() id=c.fetchone()
if id == None: if id == None:
c.execute('select id from scenes where title=?',(title,)) c.execute('select id from scenes where title=?',(title,))
id=c.fetchone() id=c.fetchone()
return id[0] return id[0]
return id[0] return id[0]
if not path.exists("xbvr.db"): if not path.exists("xbvr.db"):
print("Error, the sqlite database xbvr.db does not exist in the scrapers directory.",file=sys.stderr) print("Error, the sqlite database xbvr.db does not exist in the scrapers directory.",file=sys.stderr)
print("Copy this database from the docker container and give it the name xbvr.db",file=sys.stderr) print("Copy this database from the docker container and give it the name xbvr.db",file=sys.stderr)
print("docker cp xbvr:/root/.config/xbvr/main.db xbvr.db",file=sys.stderr) print("docker cp xbvr:/root/.config/xbvr/main.db xbvr.db",file=sys.stderr)
exit(1) exit(1)
conn = sqlite3.connect('xbvr.db',detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) conn = sqlite3.connect('xbvr.db',detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
if sys.argv[1] == "query": if sys.argv[1] == "query":
fragment = json.loads(sys.stdin.read()) fragment = json.loads(sys.stdin.read())
print(json.dumps(fragment),file=sys.stderr) print(json.dumps(fragment),file=sys.stderr)
scene_id = find_scene_id(fragment['title']) scene_id = find_scene_id(fragment['title'])
if not scene_id: if not scene_id:
print(f"Could not determine scene id in title: `{fragment['title']}`",file=sys.stderr) print(f"Could not determine scene id in title: `{fragment['title']}`",file=sys.stderr)
else: else:
print(f"Found scene id: {scene_id}",file=sys.stderr) print(f"Found scene id: {scene_id}",file=sys.stderr)
result=lookup_scene(scene_id) result=lookup_scene(scene_id)
print(json.dumps(result)) print(json.dumps(result))
conn.close() conn.close()