mirror of
https://github.com/stashapp/CommunityScrapers.git
synced 2025-12-10 00:41:16 -06:00
Create .gitattributes (#265)
This commit is contained in:
parent
a9603e2a60
commit
6e013d1064
5
.gitattributes
vendored
Normal file
5
.gitattributes
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
* text=auto
|
||||
|
||||
*.yml eol=lf diff=yaml linguist-detectable
|
||||
*.py eol=lf diff=python
|
||||
*.md eol=lf diff=markdown
|
||||
@ -1,34 +1,34 @@
|
||||
name: "FemDomEmpire"
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- femdomempire.com/tour/trailers/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //div[@class="videoDetails clear"]/h3
|
||||
Date:
|
||||
selector: //p[span[contains(.,"Date Added")]]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: "Date Added:"
|
||||
with:
|
||||
- parseDate: January 2, 2006
|
||||
Details: //div[@class="videoDetails clear"]/p
|
||||
Performers:
|
||||
Name: //li[@class="update_models"]/a
|
||||
Tags:
|
||||
Name: //ul/li[@class="label" and contains(.,"Categories:")]/..//a
|
||||
Studio:
|
||||
Name:
|
||||
fixed: FemDomEmpire
|
||||
Image:
|
||||
selector: //img[@class="update_thumb thumbs stdimage"]/@src0_1x
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .*/tour/content//
|
||||
with: "https://femdomempire.com/tour/content/"
|
||||
|
||||
# Last Updated October 10, 2020
|
||||
name: "FemDomEmpire"
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- femdomempire.com/tour/trailers/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //div[@class="videoDetails clear"]/h3
|
||||
Date:
|
||||
selector: //p[span[contains(.,"Date Added")]]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: "Date Added:"
|
||||
with:
|
||||
- parseDate: January 2, 2006
|
||||
Details: //div[@class="videoDetails clear"]/p
|
||||
Performers:
|
||||
Name: //li[@class="update_models"]/a
|
||||
Tags:
|
||||
Name: //ul/li[@class="label" and contains(.,"Categories:")]/..//a
|
||||
Studio:
|
||||
Name:
|
||||
fixed: FemDomEmpire
|
||||
Image:
|
||||
selector: //img[@class="update_thumb thumbs stdimage"]/@src0_1x
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .*/tour/content//
|
||||
with: "https://femdomempire.com/tour/content/"
|
||||
|
||||
# Last Updated October 10, 2020
|
||||
# Note: Date Added on older scenes seem to be incorrect on FemDomEmpire site. Newer scenes have correct date.
|
||||
@ -1,34 +1,34 @@
|
||||
name: firstanalquest
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- firstanalquest.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$performer: //ul[@class="list-inline"][contains(text(),"Models:")]/li/a
|
||||
$title: //div[@class="badge-content"]/preceding-sibling::span/text()
|
||||
scene:
|
||||
Title: $title
|
||||
Details:
|
||||
selector: //div[@class="text-desc"]
|
||||
Tags:
|
||||
Name: //li[strong[text()="Tags:"]]/following-sibling::li/a/text()
|
||||
Performers:
|
||||
Name: $performer/text()
|
||||
Image: //img[@class="player-preview"]/@src
|
||||
Studio:
|
||||
Name:
|
||||
fixed: First Anal Quest
|
||||
Date:
|
||||
selector: $title
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \s
|
||||
with: +
|
||||
- regex: ^([^-]+-?).+
|
||||
with: http://www.firstanalquest.com/search/?q=$1
|
||||
- subScraper: //span[@class="thumb-added"]/text()
|
||||
- parseDate: Jan 2, 2006
|
||||
# Last Updated August 06, 2020
|
||||
name: firstanalquest
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- firstanalquest.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$performer: //ul[@class="list-inline"][contains(text(),"Models:")]/li/a
|
||||
$title: //div[@class="badge-content"]/preceding-sibling::span/text()
|
||||
scene:
|
||||
Title: $title
|
||||
Details:
|
||||
selector: //div[@class="text-desc"]
|
||||
Tags:
|
||||
Name: //li[strong[text()="Tags:"]]/following-sibling::li/a/text()
|
||||
Performers:
|
||||
Name: $performer/text()
|
||||
Image: //img[@class="player-preview"]/@src
|
||||
Studio:
|
||||
Name:
|
||||
fixed: First Anal Quest
|
||||
Date:
|
||||
selector: $title
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \s
|
||||
with: +
|
||||
- regex: ^([^-]+-?).+
|
||||
with: http://www.firstanalquest.com/search/?q=$1
|
||||
- subScraper: //span[@class="thumb-added"]/text()
|
||||
- parseDate: Jan 2, 2006
|
||||
# Last Updated August 06, 2020
|
||||
|
||||
@ -1,25 +1,25 @@
|
||||
name: fistertwister
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- fistertwister.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //div[@class="jumbotron"]/h2/text()[1]
|
||||
Date:
|
||||
selector: //li[contains(text(),"Released on:")]/strong/text()
|
||||
postProcess:
|
||||
- parseDate: Jan 2, 2006
|
||||
Details: //div[@class="jumbotron video-info"]/p[not(@class) and not(a)]/text()
|
||||
Tags:
|
||||
Name: //a[@class="btn btn-default"]/text()
|
||||
Performers:
|
||||
Name: //li[contains(text(),"Featuring:")]//a/text()
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Fister Twister
|
||||
#Image: //video[@id="video"]/@poster
|
||||
Image: //meta[@property="og:image"]/@content
|
||||
# Last Updated August 06, 2020
|
||||
name: fistertwister
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- fistertwister.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //div[@class="jumbotron"]/h2/text()[1]
|
||||
Date:
|
||||
selector: //li[contains(text(),"Released on:")]/strong/text()
|
||||
postProcess:
|
||||
- parseDate: Jan 2, 2006
|
||||
Details: //div[@class="jumbotron video-info"]/p[not(@class) and not(a)]/text()
|
||||
Tags:
|
||||
Name: //a[@class="btn btn-default"]/text()
|
||||
Performers:
|
||||
Name: //li[contains(text(),"Featuring:")]//a/text()
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Fister Twister
|
||||
#Image: //video[@id="video"]/@poster
|
||||
Image: //meta[@property="og:image"]/@content
|
||||
# Last Updated August 06, 2020
|
||||
|
||||
@ -1,42 +1,42 @@
|
||||
name: "FittingRoom"
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- fitting-room.com/videos/
|
||||
scraper: sceneScraper
|
||||
galleryByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- fitting-room.com/albums/
|
||||
scraper: galleryScraper
|
||||
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title:
|
||||
selector: //head/title/text()
|
||||
Details: //meta[@property="twitter:description"]/@content
|
||||
Performers:
|
||||
Name: //div[@class="info-model"]//p[@class="name"]
|
||||
Tags:
|
||||
Name: //meta[@property="article:tag"]/@content
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Fitting-Room
|
||||
Image: //meta[@property="twitter:image"]/@content
|
||||
URL: //meta[@property="twitter:url"]/@content
|
||||
galleryScraper:
|
||||
gallery:
|
||||
Title:
|
||||
selector: //head/title/text()
|
||||
Details: //meta[@property="twitter:description"]/@content
|
||||
Performers:
|
||||
Name: //div[@class="info-model"]//p[@class="name"]
|
||||
Tags:
|
||||
Name: //meta[@property="article:tag"]/@content
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Fitting-Room
|
||||
URL: //meta[@property="twitter:url"]/@content
|
||||
|
||||
# Last Updated October 23, 2020
|
||||
name: "FittingRoom"
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- fitting-room.com/videos/
|
||||
scraper: sceneScraper
|
||||
galleryByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- fitting-room.com/albums/
|
||||
scraper: galleryScraper
|
||||
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title:
|
||||
selector: //head/title/text()
|
||||
Details: //meta[@property="twitter:description"]/@content
|
||||
Performers:
|
||||
Name: //div[@class="info-model"]//p[@class="name"]
|
||||
Tags:
|
||||
Name: //meta[@property="article:tag"]/@content
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Fitting-Room
|
||||
Image: //meta[@property="twitter:image"]/@content
|
||||
URL: //meta[@property="twitter:url"]/@content
|
||||
galleryScraper:
|
||||
gallery:
|
||||
Title:
|
||||
selector: //head/title/text()
|
||||
Details: //meta[@property="twitter:description"]/@content
|
||||
Performers:
|
||||
Name: //div[@class="info-model"]//p[@class="name"]
|
||||
Tags:
|
||||
Name: //meta[@property="article:tag"]/@content
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Fitting-Room
|
||||
URL: //meta[@property="twitter:url"]/@content
|
||||
|
||||
# Last Updated October 23, 2020
|
||||
|
||||
@ -1,29 +1,29 @@
|
||||
name: Mandyflores
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- mandyflores.com/vod/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$updateDesc: //span[@class="update_description"]
|
||||
$divCenter: /div[@align="center"]
|
||||
scene:
|
||||
Title: //span[@class="title_bar_hilite"]
|
||||
Details:
|
||||
selector: $updateDesc$divCenter/span/span[@style]/text()[normalize-space(.)]|($updateDesc | $updateDesc/p)/text()[normalize-space(.)]|($updateDesc$divCenter/text())[1]|($updateDesc/text())[1]
|
||||
concat: "\n\n"
|
||||
Date:
|
||||
selector: //div[@class="cell update_date"][not(ancestor::span[@class="update_description"])]/text()[1]
|
||||
postProcess:
|
||||
- parseDate: 01/02/2006
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Mandyflores
|
||||
Tags:
|
||||
Name: //span[@class="update_tags"]/a
|
||||
Performers:
|
||||
Name: //span[@class="update_models"][following-sibling::span[@class="update_tags"]]/a
|
||||
|
||||
# Last Updated September 16, 2020
|
||||
name: Mandyflores
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- mandyflores.com/vod/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$updateDesc: //span[@class="update_description"]
|
||||
$divCenter: /div[@align="center"]
|
||||
scene:
|
||||
Title: //span[@class="title_bar_hilite"]
|
||||
Details:
|
||||
selector: $updateDesc$divCenter/span/span[@style]/text()[normalize-space(.)]|($updateDesc | $updateDesc/p)/text()[normalize-space(.)]|($updateDesc$divCenter/text())[1]|($updateDesc/text())[1]
|
||||
concat: "\n\n"
|
||||
Date:
|
||||
selector: //div[@class="cell update_date"][not(ancestor::span[@class="update_description"])]/text()[1]
|
||||
postProcess:
|
||||
- parseDate: 01/02/2006
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Mandyflores
|
||||
Tags:
|
||||
Name: //span[@class="update_tags"]/a
|
||||
Performers:
|
||||
Name: //span[@class="update_models"][following-sibling::span[@class="update_tags"]]/a
|
||||
|
||||
# Last Updated September 16, 2020
|
||||
|
||||
@ -1,114 +1,114 @@
|
||||
name: Modelhub
|
||||
performerByName:
|
||||
action: scrapeXPath
|
||||
queryURL: https://modelhub.com/model/search?q={}
|
||||
scraper: performerSearch
|
||||
performerByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- modelhub.com
|
||||
scraper: performerScraper
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- modelhub.com/video/
|
||||
scraper: sceneScraper
|
||||
|
||||
xPathScrapers:
|
||||
performerSearch:
|
||||
performer:
|
||||
Name: //div[@class="modelInfo"]/a/h3/text()
|
||||
URL:
|
||||
selector: //li[@class="modelBox"]/a/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^(.*)$
|
||||
with: "https://www.modelhub.com$1/bio"
|
||||
|
||||
performerScraper:
|
||||
common:
|
||||
$infoPiece: //div[@class="modelContent"]
|
||||
$modelAttributes: //ul[@class="modelAttributes"]/li
|
||||
performer:
|
||||
Name: //div[@class="infoSection"]/h1/text()
|
||||
Birthdate:
|
||||
selector: //span[@class='bday js_lazy_bkg']/text()
|
||||
parseDate: January 2, 2006
|
||||
Gender:
|
||||
selector: //ul[@class="allStats"]/li[contains(.,"Gender")]/span
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: "Couple"
|
||||
with:
|
||||
Ethnicity:
|
||||
selector: $modelAttributes/div[contains(.,"Ethnicity")]/span
|
||||
postProcess:
|
||||
- map:
|
||||
Latin: "hispanic"
|
||||
Other: ""
|
||||
Twitter:
|
||||
selector: //a[@class='twitter js_lazy_bkg']/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .+(twitter.com)/(.+)
|
||||
with: $2
|
||||
- regex: (.+)\?(.*)
|
||||
with: $1
|
||||
Instagram:
|
||||
selector: //a[@class="insta js_lazy_bkg"]/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .+(instagram.com/)(.+)
|
||||
with: $2
|
||||
Height:
|
||||
selector: $modelAttributes/div[contains(.,"Height")]/span
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .*\((\d+)cm\)
|
||||
with: $1
|
||||
FakeTits:
|
||||
selector: $modelAttributes/div[contains(.,"Breast Type")]/span
|
||||
postProcess:
|
||||
- map:
|
||||
Natural: "No"
|
||||
Fake: "Yes"
|
||||
Piercings: $modelAttributes/div[contains(.,"Piercing")]/span
|
||||
EyeColor: $modelAttributes/div[contains(.,"Eye Color")]/span
|
||||
Tattoos: $modelAttributes/div[contains(.,"Tattoo")]/span
|
||||
URL: //meta[@name="twitter:url"]/@content
|
||||
Image: //img[@class='profileImg lazy']/@data-src
|
||||
Country:
|
||||
selector: //ul[@class="info"]/li/span[@class="location js_lazy_bkg"]/text()
|
||||
postProcess:
|
||||
- map:
|
||||
"AR": "Argentina"
|
||||
"AT": "Austria"
|
||||
"AU": "Australia"
|
||||
"BR": "Brazil"
|
||||
"CA": "Canada"
|
||||
"colombia, CO": "Colombia"
|
||||
"CO": "Colombia"
|
||||
"CZ": "Czech Republic"
|
||||
"DE": "Germany"
|
||||
"ES": "Spain"
|
||||
"FR": "France"
|
||||
"GB": "United Kingdom"
|
||||
"HU": "Hungary"
|
||||
"IT": "Italy"
|
||||
"Peru, PE": "Peru"
|
||||
"PE": "Peru"
|
||||
"PH": "Philippines"
|
||||
"RO": "Romania"
|
||||
"RU": "Russia"
|
||||
"US": "United States"
|
||||
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //div[@class='videoShortInfo']/div[1]/h1/text()
|
||||
Tags:
|
||||
Name: //div[@class='videoCategories']/div/a/text()
|
||||
Image: //meta[@property="og:image"]/@content
|
||||
Details: //p[@ class="videoDescription"]
|
||||
Performers:
|
||||
Name: //div[@class='videoAvatar']/div[1]/a[2]
|
||||
# Last Updated Jul 25, 2020
|
||||
name: Modelhub
|
||||
performerByName:
|
||||
action: scrapeXPath
|
||||
queryURL: https://modelhub.com/model/search?q={}
|
||||
scraper: performerSearch
|
||||
performerByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- modelhub.com
|
||||
scraper: performerScraper
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- modelhub.com/video/
|
||||
scraper: sceneScraper
|
||||
|
||||
xPathScrapers:
|
||||
performerSearch:
|
||||
performer:
|
||||
Name: //div[@class="modelInfo"]/a/h3/text()
|
||||
URL:
|
||||
selector: //li[@class="modelBox"]/a/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^(.*)$
|
||||
with: "https://www.modelhub.com$1/bio"
|
||||
|
||||
performerScraper:
|
||||
common:
|
||||
$infoPiece: //div[@class="modelContent"]
|
||||
$modelAttributes: //ul[@class="modelAttributes"]/li
|
||||
performer:
|
||||
Name: //div[@class="infoSection"]/h1/text()
|
||||
Birthdate:
|
||||
selector: //span[@class='bday js_lazy_bkg']/text()
|
||||
parseDate: January 2, 2006
|
||||
Gender:
|
||||
selector: //ul[@class="allStats"]/li[contains(.,"Gender")]/span
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: "Couple"
|
||||
with:
|
||||
Ethnicity:
|
||||
selector: $modelAttributes/div[contains(.,"Ethnicity")]/span
|
||||
postProcess:
|
||||
- map:
|
||||
Latin: "hispanic"
|
||||
Other: ""
|
||||
Twitter:
|
||||
selector: //a[@class='twitter js_lazy_bkg']/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .+(twitter.com)/(.+)
|
||||
with: $2
|
||||
- regex: (.+)\?(.*)
|
||||
with: $1
|
||||
Instagram:
|
||||
selector: //a[@class="insta js_lazy_bkg"]/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .+(instagram.com/)(.+)
|
||||
with: $2
|
||||
Height:
|
||||
selector: $modelAttributes/div[contains(.,"Height")]/span
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .*\((\d+)cm\)
|
||||
with: $1
|
||||
FakeTits:
|
||||
selector: $modelAttributes/div[contains(.,"Breast Type")]/span
|
||||
postProcess:
|
||||
- map:
|
||||
Natural: "No"
|
||||
Fake: "Yes"
|
||||
Piercings: $modelAttributes/div[contains(.,"Piercing")]/span
|
||||
EyeColor: $modelAttributes/div[contains(.,"Eye Color")]/span
|
||||
Tattoos: $modelAttributes/div[contains(.,"Tattoo")]/span
|
||||
URL: //meta[@name="twitter:url"]/@content
|
||||
Image: //img[@class='profileImg lazy']/@data-src
|
||||
Country:
|
||||
selector: //ul[@class="info"]/li/span[@class="location js_lazy_bkg"]/text()
|
||||
postProcess:
|
||||
- map:
|
||||
"AR": "Argentina"
|
||||
"AT": "Austria"
|
||||
"AU": "Australia"
|
||||
"BR": "Brazil"
|
||||
"CA": "Canada"
|
||||
"colombia, CO": "Colombia"
|
||||
"CO": "Colombia"
|
||||
"CZ": "Czech Republic"
|
||||
"DE": "Germany"
|
||||
"ES": "Spain"
|
||||
"FR": "France"
|
||||
"GB": "United Kingdom"
|
||||
"HU": "Hungary"
|
||||
"IT": "Italy"
|
||||
"Peru, PE": "Peru"
|
||||
"PE": "Peru"
|
||||
"PH": "Philippines"
|
||||
"RO": "Romania"
|
||||
"RU": "Russia"
|
||||
"US": "United States"
|
||||
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //div[@class='videoShortInfo']/div[1]/h1/text()
|
||||
Tags:
|
||||
Name: //div[@class='videoCategories']/div/a/text()
|
||||
Image: //meta[@property="og:image"]/@content
|
||||
Details: //p[@ class="videoDescription"]
|
||||
Performers:
|
||||
Name: //div[@class='videoAvatar']/div[1]/a[2]
|
||||
# Last Updated Jul 25, 2020
|
||||
|
||||
@ -1,55 +1,55 @@
|
||||
name: "PervCity"
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- analoverdose.com/trailers/
|
||||
- bangingbeauties.com/trailers/
|
||||
- chocolatebjs.com/trailers/
|
||||
- oraloverdose.com/trailers/
|
||||
- pervcity.com/trailers/
|
||||
- upherasshole.com/trailers/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$sceneinfo: //div[@class="videoInfo"]
|
||||
$base: //head/base/@href
|
||||
scene:
|
||||
Title: $sceneinfo/div[@class="infoHeader"]
|
||||
Performers:
|
||||
Name: $sceneinfo//span[@class="tour_update_models"]/a
|
||||
Details: $sceneinfo//p/text()
|
||||
Image:
|
||||
selector: //head/base/@href|//img[@class="posterimg stdimage thumbs"]/@src
|
||||
concat: "|"
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ([^|]+)\|(.*)/(content/.+)
|
||||
with: $1$3
|
||||
Date:
|
||||
selector: $sceneinfo/div[@class="infoHeader"]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \s
|
||||
with: "_"
|
||||
- regex: ^
|
||||
with: "https://pervcity.com/search.php?query="
|
||||
- subScraper:
|
||||
selector: //div[@class="category_listing_block"]//div[@class="date"]/text()
|
||||
postProcess:
|
||||
- parseDate: 01-02-2006
|
||||
Studio:
|
||||
Name:
|
||||
selector: //head/base/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .+/([^\.]+)\.com/.*
|
||||
with: $1
|
||||
- map:
|
||||
analoverdose: AnalOverdose
|
||||
chocolatebjs: ChocolateBjs
|
||||
bangingbeauties: BangingBeauties
|
||||
oraloverdose: OralOverdose
|
||||
pervcity: PervCity
|
||||
upherasshole: UpHerAsshole
|
||||
# Last Updated October 14, 2020
|
||||
name: "PervCity"
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- analoverdose.com/trailers/
|
||||
- bangingbeauties.com/trailers/
|
||||
- chocolatebjs.com/trailers/
|
||||
- oraloverdose.com/trailers/
|
||||
- pervcity.com/trailers/
|
||||
- upherasshole.com/trailers/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$sceneinfo: //div[@class="videoInfo"]
|
||||
$base: //head/base/@href
|
||||
scene:
|
||||
Title: $sceneinfo/div[@class="infoHeader"]
|
||||
Performers:
|
||||
Name: $sceneinfo//span[@class="tour_update_models"]/a
|
||||
Details: $sceneinfo//p/text()
|
||||
Image:
|
||||
selector: //head/base/@href|//img[@class="posterimg stdimage thumbs"]/@src
|
||||
concat: "|"
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ([^|]+)\|(.*)/(content/.+)
|
||||
with: $1$3
|
||||
Date:
|
||||
selector: $sceneinfo/div[@class="infoHeader"]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \s
|
||||
with: "_"
|
||||
- regex: ^
|
||||
with: "https://pervcity.com/search.php?query="
|
||||
- subScraper:
|
||||
selector: //div[@class="category_listing_block"]//div[@class="date"]/text()
|
||||
postProcess:
|
||||
- parseDate: 01-02-2006
|
||||
Studio:
|
||||
Name:
|
||||
selector: //head/base/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .+/([^\.]+)\.com/.*
|
||||
with: $1
|
||||
- map:
|
||||
analoverdose: AnalOverdose
|
||||
chocolatebjs: ChocolateBjs
|
||||
bangingbeauties: BangingBeauties
|
||||
oraloverdose: OralOverdose
|
||||
pervcity: PervCity
|
||||
upherasshole: UpHerAsshole
|
||||
# Last Updated October 14, 2020
|
||||
|
||||
@ -1,48 +1,48 @@
|
||||
name: private
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- private.com
|
||||
scraper: sceneScraper
|
||||
movieByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- private.com
|
||||
scraper: movieScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$content: //section[@class="video-description-and-tags clearfix"]
|
||||
scene:
|
||||
Title:
|
||||
selector: $content//ul[@class="scene-models-list"]/preceding-sibling::h1/text()
|
||||
Date:
|
||||
selector: //meta[@itemprop="uploadDate"]/@content
|
||||
postProcess:
|
||||
- parseDate: 01/02/2006
|
||||
Details: $content//p[@id="description-section"]
|
||||
Tags:
|
||||
Name: $content//ul[@class="scene-tags"]/li/a/text()
|
||||
Performers:
|
||||
Name: $content//ul[@class="scene-models-list"]//a/text()
|
||||
Studio:
|
||||
Name: $content//span[@class="title-site"]/text()
|
||||
#fixed: Private
|
||||
Image: //meta[@property="og:image"]/@content
|
||||
movieScraper:
|
||||
movie:
|
||||
Name: //div[@class="dvds-wrapper"]/h1/text()
|
||||
Director: //p[@class="director"]/span[@itemprop="name"]/text()
|
||||
Duration:
|
||||
selector: //p[em[contains(text(), "Duration:")]]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^(\d+).+$
|
||||
with: 00:$1:00
|
||||
Date: //span[@itemprop="datePublished"]/text()
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Private
|
||||
Synopsis: //p[@class="sinopsys"]
|
||||
FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src
|
||||
# Last Updated August 18, 2020
|
||||
name: private
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- private.com
|
||||
scraper: sceneScraper
|
||||
movieByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- private.com
|
||||
scraper: movieScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$content: //section[@class="video-description-and-tags clearfix"]
|
||||
scene:
|
||||
Title:
|
||||
selector: $content//ul[@class="scene-models-list"]/preceding-sibling::h1/text()
|
||||
Date:
|
||||
selector: //meta[@itemprop="uploadDate"]/@content
|
||||
postProcess:
|
||||
- parseDate: 01/02/2006
|
||||
Details: $content//p[@id="description-section"]
|
||||
Tags:
|
||||
Name: $content//ul[@class="scene-tags"]/li/a/text()
|
||||
Performers:
|
||||
Name: $content//ul[@class="scene-models-list"]//a/text()
|
||||
Studio:
|
||||
Name: $content//span[@class="title-site"]/text()
|
||||
#fixed: Private
|
||||
Image: //meta[@property="og:image"]/@content
|
||||
movieScraper:
|
||||
movie:
|
||||
Name: //div[@class="dvds-wrapper"]/h1/text()
|
||||
Director: //p[@class="director"]/span[@itemprop="name"]/text()
|
||||
Duration:
|
||||
selector: //p[em[contains(text(), "Duration:")]]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^(\d+).+$
|
||||
with: 00:$1:00
|
||||
Date: //span[@itemprop="datePublished"]/text()
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Private
|
||||
Synopsis: //p[@class="sinopsys"]
|
||||
FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src
|
||||
# Last Updated August 18, 2020
|
||||
|
||||
@ -1,38 +1,38 @@
|
||||
name: puffynetwork
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- puffynetwork.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$performer: //dl/dd/a
|
||||
scene:
|
||||
Title:
|
||||
selector: //h2[@class="title"]/span/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^(.*)[—]\s+
|
||||
with:
|
||||
Date:
|
||||
selector: //dl/dt[contains(text(),"Released on:")]/span/text()
|
||||
postProcess:
|
||||
- parseDate: Jan 2, 2006
|
||||
Details: //div[@class="show_more"]/text()[1]
|
||||
Tags:
|
||||
Name: //p[@class="tags"]/a/text()
|
||||
Performers:
|
||||
Name: $performer/text()
|
||||
Studio:
|
||||
Name:
|
||||
selector: //h2[@class="title"]//div[contains(text(),"Site:")]/a/text()
|
||||
postProcess:
|
||||
- map:
|
||||
Weliketosuck: We Like To Suck
|
||||
Wetandpuffy: Wet And Puffy
|
||||
Wetandpissy: Wet And Pissy
|
||||
Eurobabefacials: Euro Babe Facials
|
||||
Simplyanal: Simply Anal
|
||||
Image: //video[@id="video"]/@poster
|
||||
# Last Updated August 06, 2020
|
||||
name: puffynetwork
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- puffynetwork.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$performer: //dl/dd/a
|
||||
scene:
|
||||
Title:
|
||||
selector: //h2[@class="title"]/span/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^(.*)[—]\s+
|
||||
with:
|
||||
Date:
|
||||
selector: //dl/dt[contains(text(),"Released on:")]/span/text()
|
||||
postProcess:
|
||||
- parseDate: Jan 2, 2006
|
||||
Details: //div[@class="show_more"]/text()[1]
|
||||
Tags:
|
||||
Name: //p[@class="tags"]/a/text()
|
||||
Performers:
|
||||
Name: $performer/text()
|
||||
Studio:
|
||||
Name:
|
||||
selector: //h2[@class="title"]//div[contains(text(),"Site:")]/a/text()
|
||||
postProcess:
|
||||
- map:
|
||||
Weliketosuck: We Like To Suck
|
||||
Wetandpuffy: Wet And Puffy
|
||||
Wetandpissy: Wet And Pissy
|
||||
Eurobabefacials: Euro Babe Facials
|
||||
Simplyanal: Simply Anal
|
||||
Image: //video[@id="video"]/@poster
|
||||
# Last Updated August 06, 2020
|
||||
|
||||
@ -1,73 +1,73 @@
|
||||
name: "Teencoreclub"
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- analyzedgirls.com/
|
||||
- assteenmouth.com/
|
||||
- bangteenpussy.com/
|
||||
- brutalinvasion.com/
|
||||
- cumaholicteens.com/
|
||||
- defiled18.com/
|
||||
- doubleteamedteens.com/
|
||||
- dreamteenshd.com/
|
||||
- girlsgotcream.com/
|
||||
- hardcoreyouth.com/
|
||||
- littlehellcat.com/
|
||||
- maketeengape.com/
|
||||
- nylonsweeties.com/
|
||||
- seductive18.com/
|
||||
- teenanalcasting.com/
|
||||
- teendrillers.com/
|
||||
- teensnaturalway.com/
|
||||
- teenstryblacks.com/
|
||||
- spermantino.com/
|
||||
- teachmyass.com/
|
||||
- drilledchicks.com/
|
||||
- analcheckups.com/
|
||||
- fabsluts.com/
|
||||
- jerk-offpass.com/
|
||||
- nylonspunkjunkies.com/
|
||||
- shegotsix.com/
|
||||
- spearteenpussy.com/
|
||||
- teencoreclub.com/
|
||||
- teencorezine.com/
|
||||
- teensgoporn.com/
|
||||
- weneednewtalents.com/
|
||||
- xcoreclub.com/
|
||||
- youngthroats.com/
|
||||
- tryteens.com/
|
||||
- whiteteensblackcocks.com/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title:
|
||||
selector: //div[@class="detail-hero-title"]//h1/text()
|
||||
replace:
|
||||
- regex: \t+
|
||||
with: ' '
|
||||
Details: //div[@class="detail-description"]/text()
|
||||
Performers:
|
||||
Name:
|
||||
selector: //div[@class="detail-hero-title"]//h1/text()
|
||||
replace:
|
||||
- regex: \t+
|
||||
with:
|
||||
split: ','
|
||||
Tags:
|
||||
Name:
|
||||
selector: //meta[@name="keywords"]/@content
|
||||
split: ','
|
||||
Image:
|
||||
selector: //div[@class="detail-hero"]/@style
|
||||
replace:
|
||||
- regex: ^.*url.([^\)]+).*$
|
||||
with: "$1"
|
||||
Studio:
|
||||
Name:
|
||||
selector: //div[@class="flex-shrink-0 flex items-center"]//img[1]/@alt
|
||||
replace:
|
||||
- regex: \.\w+
|
||||
with:
|
||||
|
||||
# Last Updated June 22, 2020
|
||||
name: "Teencoreclub"
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- analyzedgirls.com/
|
||||
- assteenmouth.com/
|
||||
- bangteenpussy.com/
|
||||
- brutalinvasion.com/
|
||||
- cumaholicteens.com/
|
||||
- defiled18.com/
|
||||
- doubleteamedteens.com/
|
||||
- dreamteenshd.com/
|
||||
- girlsgotcream.com/
|
||||
- hardcoreyouth.com/
|
||||
- littlehellcat.com/
|
||||
- maketeengape.com/
|
||||
- nylonsweeties.com/
|
||||
- seductive18.com/
|
||||
- teenanalcasting.com/
|
||||
- teendrillers.com/
|
||||
- teensnaturalway.com/
|
||||
- teenstryblacks.com/
|
||||
- spermantino.com/
|
||||
- teachmyass.com/
|
||||
- drilledchicks.com/
|
||||
- analcheckups.com/
|
||||
- fabsluts.com/
|
||||
- jerk-offpass.com/
|
||||
- nylonspunkjunkies.com/
|
||||
- shegotsix.com/
|
||||
- spearteenpussy.com/
|
||||
- teencoreclub.com/
|
||||
- teencorezine.com/
|
||||
- teensgoporn.com/
|
||||
- weneednewtalents.com/
|
||||
- xcoreclub.com/
|
||||
- youngthroats.com/
|
||||
- tryteens.com/
|
||||
- whiteteensblackcocks.com/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title:
|
||||
selector: //div[@class="detail-hero-title"]//h1/text()
|
||||
replace:
|
||||
- regex: \t+
|
||||
with: ' '
|
||||
Details: //div[@class="detail-description"]/text()
|
||||
Performers:
|
||||
Name:
|
||||
selector: //div[@class="detail-hero-title"]//h1/text()
|
||||
replace:
|
||||
- regex: \t+
|
||||
with:
|
||||
split: ','
|
||||
Tags:
|
||||
Name:
|
||||
selector: //meta[@name="keywords"]/@content
|
||||
split: ','
|
||||
Image:
|
||||
selector: //div[@class="detail-hero"]/@style
|
||||
replace:
|
||||
- regex: ^.*url.([^\)]+).*$
|
||||
with: "$1"
|
||||
Studio:
|
||||
Name:
|
||||
selector: //div[@class="flex-shrink-0 flex items-center"]//img[1]/@alt
|
||||
replace:
|
||||
- regex: \.\w+
|
||||
with:
|
||||
|
||||
# Last Updated June 22, 2020
|
||||
|
||||
@ -1,78 +1,78 @@
|
||||
name: TheNude
|
||||
performerByName:
|
||||
action: scrapeXPath
|
||||
queryURL: https://www.thenude.com/index.php?page=search&action=searchModels&__form_name=navbar-search&m_aka=on&m_name={}
|
||||
scraper: performerSearch
|
||||
performerByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- https://www.thenude.com
|
||||
scraper: performerScraper
|
||||
xPathScrapers:
|
||||
performerSearch:
|
||||
performer:
|
||||
# Name: //a[@class="model-name"]/text() Version to get only the name
|
||||
# Name: //a[@class="model-name"]/../../a/@title Version to get a little info on label/studio as well
|
||||
Name:
|
||||
selector: //figcaption/span
|
||||
replace:
|
||||
- regex: "^AKA:"
|
||||
with: ""
|
||||
URL: //a[@class="model-name"]/@href
|
||||
performerScraper:
|
||||
performer:
|
||||
Name: (//meta[@itemprop="name"])[1]/@content
|
||||
URL: (//meta[@itemprop="url"])[1]/@content
|
||||
Twitter: //a[text()="TWITTER"]/@href
|
||||
Instagram: //a[text()="INSTAGRAM"]/@href
|
||||
Birthdate:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
|
||||
parseDate: 02-01-2006
|
||||
Ethnicity:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Ethnicity')]/../text()
|
||||
replace:
|
||||
- regex: Asian
|
||||
with: "asian"
|
||||
- regex: Caucasian
|
||||
with: "white"
|
||||
- regex: Black
|
||||
with: "black"
|
||||
- regex: Latin
|
||||
with: "hispanic"
|
||||
Country:
|
||||
selector: //span[@itemprop="nationality"]/text()
|
||||
replace:
|
||||
- regex: "United States of America"
|
||||
with: "United States"
|
||||
#EyeColor: not listed
|
||||
Height:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Height')]/../text()
|
||||
replace:
|
||||
- regex: ^(\d+).+$
|
||||
with: "$1 cm"
|
||||
Measurements:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Measurements')]/../text()
|
||||
FakeTits:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Breasts')]/../text()
|
||||
replace:
|
||||
- regex: ^[^\(]+\(([^\)]+)\).*$
|
||||
with: "$1"
|
||||
- regex: Fake
|
||||
with: "Yes"
|
||||
- regex: Real
|
||||
with: "No"
|
||||
CareerLength:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Seen')]/../text()
|
||||
concat: "-"
|
||||
Aliases:
|
||||
selector: //meta[@itemprop="additionalName"]/@content
|
||||
concat: ", "
|
||||
Tattoos:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Tattoos')]/../text()
|
||||
Piercings:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Piercings')]/../text()
|
||||
Image:
|
||||
selector: (//meta[@itemprop="image"])[1]/@content
|
||||
Gender:
|
||||
selector: //meta[@itemprop="gender"]/@content
|
||||
# Last updated July 02, 2020
|
||||
name: TheNude
|
||||
performerByName:
|
||||
action: scrapeXPath
|
||||
queryURL: https://www.thenude.com/index.php?page=search&action=searchModels&__form_name=navbar-search&m_aka=on&m_name={}
|
||||
scraper: performerSearch
|
||||
performerByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- https://www.thenude.com
|
||||
scraper: performerScraper
|
||||
xPathScrapers:
|
||||
performerSearch:
|
||||
performer:
|
||||
# Name: //a[@class="model-name"]/text() Version to get only the name
|
||||
# Name: //a[@class="model-name"]/../../a/@title Version to get a little info on label/studio as well
|
||||
Name:
|
||||
selector: //figcaption/span
|
||||
replace:
|
||||
- regex: "^AKA:"
|
||||
with: ""
|
||||
URL: //a[@class="model-name"]/@href
|
||||
performerScraper:
|
||||
performer:
|
||||
Name: (//meta[@itemprop="name"])[1]/@content
|
||||
URL: (//meta[@itemprop="url"])[1]/@content
|
||||
Twitter: //a[text()="TWITTER"]/@href
|
||||
Instagram: //a[text()="INSTAGRAM"]/@href
|
||||
Birthdate:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
|
||||
parseDate: 02-01-2006
|
||||
Ethnicity:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Ethnicity')]/../text()
|
||||
replace:
|
||||
- regex: Asian
|
||||
with: "asian"
|
||||
- regex: Caucasian
|
||||
with: "white"
|
||||
- regex: Black
|
||||
with: "black"
|
||||
- regex: Latin
|
||||
with: "hispanic"
|
||||
Country:
|
||||
selector: //span[@itemprop="nationality"]/text()
|
||||
replace:
|
||||
- regex: "United States of America"
|
||||
with: "United States"
|
||||
#EyeColor: not listed
|
||||
Height:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Height')]/../text()
|
||||
replace:
|
||||
- regex: ^(\d+).+$
|
||||
with: "$1 cm"
|
||||
Measurements:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Measurements')]/../text()
|
||||
FakeTits:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Breasts')]/../text()
|
||||
replace:
|
||||
- regex: ^[^\(]+\(([^\)]+)\).*$
|
||||
with: "$1"
|
||||
- regex: Fake
|
||||
with: "Yes"
|
||||
- regex: Real
|
||||
with: "No"
|
||||
CareerLength:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Seen')]/../text()
|
||||
concat: "-"
|
||||
Aliases:
|
||||
selector: //meta[@itemprop="additionalName"]/@content
|
||||
concat: ", "
|
||||
Tattoos:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Tattoos')]/../text()
|
||||
Piercings:
|
||||
selector: //li/span[@class="list-quest"][contains(text(),'Piercings')]/../text()
|
||||
Image:
|
||||
selector: (//meta[@itemprop="image"])[1]/@content
|
||||
Gender:
|
||||
selector: //meta[@itemprop="gender"]/@content
|
||||
# Last updated July 02, 2020
|
||||
|
||||
@ -1,43 +1,43 @@
|
||||
name: TheScoreGroup
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- scoreland.com/
|
||||
- 60plusmilfs.com/
|
||||
- 50plusmilfs.com/
|
||||
- 40somethingmag.com/
|
||||
- pornmegaload.com/
|
||||
- xlgirls.com/
|
||||
- 18eighteen.com/
|
||||
- legsex.com/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //section[@id="videos_page-page"]/div[@class="row"]/div/h2/text()
|
||||
Studio:
|
||||
Name:
|
||||
selector: //meta[@property="og:site_name"]/@content
|
||||
Date:
|
||||
selector: //div[@class="stat mb-2"]/span[contains(.,"Date:")]/../span[@class="value"]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ..,
|
||||
with:
|
||||
- parseDate: January 2 2006
|
||||
Details:
|
||||
selector: //div[@class="row"]/div/div[@class="p-desc"]/text()
|
||||
concat: "\n"
|
||||
Tags:
|
||||
Name: //div[@class="row"]/div/div[@class="mb-3"]/a/text()
|
||||
Performers:
|
||||
Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text()
|
||||
Image:
|
||||
selector: //meta[@itemprop="image"]/@content
|
||||
# Enable this post process if you want better image quality but sometimes it can fail
|
||||
# postProcess:
|
||||
# - replace:
|
||||
# - regex: _lg
|
||||
# with: _x_800
|
||||
|
||||
# Last Updated August 11, 2020
|
||||
name: TheScoreGroup
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- scoreland.com/
|
||||
- 60plusmilfs.com/
|
||||
- 50plusmilfs.com/
|
||||
- 40somethingmag.com/
|
||||
- pornmegaload.com/
|
||||
- xlgirls.com/
|
||||
- 18eighteen.com/
|
||||
- legsex.com/
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //section[@id="videos_page-page"]/div[@class="row"]/div/h2/text()
|
||||
Studio:
|
||||
Name:
|
||||
selector: //meta[@property="og:site_name"]/@content
|
||||
Date:
|
||||
selector: //div[@class="stat mb-2"]/span[contains(.,"Date:")]/../span[@class="value"]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ..,
|
||||
with:
|
||||
- parseDate: January 2 2006
|
||||
Details:
|
||||
selector: //div[@class="row"]/div/div[@class="p-desc"]/text()
|
||||
concat: "\n"
|
||||
Tags:
|
||||
Name: //div[@class="row"]/div/div[@class="mb-3"]/a/text()
|
||||
Performers:
|
||||
Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text()
|
||||
Image:
|
||||
selector: //meta[@itemprop="image"]/@content
|
||||
# Enable this post process if you want better image quality but sometimes it can fail
|
||||
# postProcess:
|
||||
# - replace:
|
||||
# - regex: _lg
|
||||
# with: _x_800
|
||||
|
||||
# Last Updated August 11, 2020
|
||||
|
||||
@ -1,32 +1,32 @@
|
||||
name: vipissy
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- vipissy.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$performer: //dl/dd/a
|
||||
scene:
|
||||
Title:
|
||||
selector: //section[@class="downloads"]/strong/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^(.*)[—]\s+
|
||||
with:
|
||||
Date:
|
||||
# selector: //dl/dd[contains(text(),"Released on:")]/text()
|
||||
selector: //dt[i[@class="glyphicon glyphicon-calendar"]]/following-sibling::dd[1]/text()
|
||||
postProcess:
|
||||
- parseDate: Jan 2, 2006
|
||||
Details: //div[@class="show_more"]/text()[1]
|
||||
Tags:
|
||||
Name: //p[@class="tags"]/a/text()
|
||||
Performers:
|
||||
Name: $performer/text()
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Vipissy
|
||||
Image: //div[@id="videoplayer"]/video/@poster
|
||||
# Last Updated August 06, 2020
|
||||
name: vipissy
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- vipissy.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
common:
|
||||
$performer: //dl/dd/a
|
||||
scene:
|
||||
Title:
|
||||
selector: //section[@class="downloads"]/strong/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^(.*)[—]\s+
|
||||
with:
|
||||
Date:
|
||||
# selector: //dl/dd[contains(text(),"Released on:")]/text()
|
||||
selector: //dt[i[@class="glyphicon glyphicon-calendar"]]/following-sibling::dd[1]/text()
|
||||
postProcess:
|
||||
- parseDate: Jan 2, 2006
|
||||
Details: //div[@class="show_more"]/text()[1]
|
||||
Tags:
|
||||
Name: //p[@class="tags"]/a/text()
|
||||
Performers:
|
||||
Name: $performer/text()
|
||||
Studio:
|
||||
Name:
|
||||
fixed: Vipissy
|
||||
Image: //div[@id="videoplayer"]/video/@poster
|
||||
# Last Updated August 06, 2020
|
||||
|
||||
@ -1,22 +1,22 @@
|
||||
name: xnxx
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- xnxx.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //div[@class="clear-infobar"]/strong/text()
|
||||
Tags:
|
||||
Name: //div[@class="metadata-row video-tags"]/a/text()
|
||||
Studio:
|
||||
Name: //span[@class="metadata"]/a[@class="gold-plate" or @class="free-plate"]/text()
|
||||
Details: //p[@class="metadata-row video-description"]
|
||||
Image:
|
||||
selector: //script[contains(text(), "setThumbUrl169")]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
|
||||
with: $1
|
||||
# Last Updated August 06, 2020
|
||||
name: xnxx
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- xnxx.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //div[@class="clear-infobar"]/strong/text()
|
||||
Tags:
|
||||
Name: //div[@class="metadata-row video-tags"]/a/text()
|
||||
Studio:
|
||||
Name: //span[@class="metadata"]/a[@class="gold-plate" or @class="free-plate"]/text()
|
||||
Details: //p[@class="metadata-row video-description"]
|
||||
Image:
|
||||
selector: //script[contains(text(), "setThumbUrl169")]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
|
||||
with: $1
|
||||
# Last Updated August 06, 2020
|
||||
|
||||
@ -1,28 +1,28 @@
|
||||
name: xtube
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- xtube.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title:
|
||||
selector: //form/h1/text()
|
||||
Date:
|
||||
selector: //script[contains(text(), "dimension10")]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^.+'(\d{4})(\d{2})(\d{2})'.+$
|
||||
with: $2 $3, $1
|
||||
- parseDate: 01 02, 2006
|
||||
Studio:
|
||||
Name:
|
||||
fixed: xtube
|
||||
Details: //meta[@property="og:description"]/@content
|
||||
Performers:
|
||||
Name: //form[@id="postCommentForm"]/input[@name="contentOwnerId"]/@value[not (contains(.,"sponsor_"))]
|
||||
Tags:
|
||||
Name: //div[@class="categories" or @class="tags"]//a/text()
|
||||
Image: //meta[@property="og:image"]/@content
|
||||
# Last Updated August 06, 2020
|
||||
name: xtube
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- xtube.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title:
|
||||
selector: //form/h1/text()
|
||||
Date:
|
||||
selector: //script[contains(text(), "dimension10")]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^.+'(\d{4})(\d{2})(\d{2})'.+$
|
||||
with: $2 $3, $1
|
||||
- parseDate: 01 02, 2006
|
||||
Studio:
|
||||
Name:
|
||||
fixed: xtube
|
||||
Details: //meta[@property="og:description"]/@content
|
||||
Performers:
|
||||
Name: //form[@id="postCommentForm"]/input[@name="contentOwnerId"]/@value[not (contains(.,"sponsor_"))]
|
||||
Tags:
|
||||
Name: //div[@class="categories" or @class="tags"]//a/text()
|
||||
Image: //meta[@property="og:image"]/@content
|
||||
# Last Updated August 06, 2020
|
||||
|
||||
@ -1,23 +1,23 @@
|
||||
name: xvideos
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- xvideos.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //h2[@class="page-title"]/text()[1]
|
||||
Tags:
|
||||
Name: //li/a[@class="btn btn-default"][contains(@href,"/tags/")]/text()
|
||||
Performers:
|
||||
Name: //a[@class="btn btn-default label profile hover-name"]/span[@class="name"]/text()
|
||||
Studio:
|
||||
Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text()
|
||||
Image:
|
||||
selector: //script[contains(text(), "setThumbUrl169")]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
|
||||
with: $1
|
||||
# Last Updated August 06, 2020
|
||||
name: xvideos
|
||||
sceneByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- xvideos.com
|
||||
scraper: sceneScraper
|
||||
xPathScrapers:
|
||||
sceneScraper:
|
||||
scene:
|
||||
Title: //h2[@class="page-title"]/text()[1]
|
||||
Tags:
|
||||
Name: //li/a[@class="btn btn-default"][contains(@href,"/tags/")]/text()
|
||||
Performers:
|
||||
Name: //a[@class="btn btn-default label profile hover-name"]/span[@class="name"]/text()
|
||||
Studio:
|
||||
Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text()
|
||||
Image:
|
||||
selector: //script[contains(text(), "setThumbUrl169")]/text()
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
|
||||
with: $1
|
||||
# Last Updated August 06, 2020
|
||||
|
||||
@ -1,59 +1,59 @@
|
||||
import json
|
||||
import sys
|
||||
import sqlite3
|
||||
from os import path
|
||||
|
||||
''' This script uses the sqlite database from xbvr (3d porn manager)
|
||||
Copy main.db from yout xbvr configuration and rename this to xbvr.db
|
||||
docker cp xbvr:/root/.config/xbvr/main.db xbvr.db
|
||||
This script needs python3 and sqlite3
|
||||
'''
|
||||
def lookup_scene(id):
|
||||
c=conn.cursor()
|
||||
c.execute('select title,synopsis,site,cover_url,scene_url,date(release_date) from scenes where id=?',(id,))
|
||||
row=c.fetchone()
|
||||
res={}
|
||||
res['title']=row[0]
|
||||
res['details']=row[1]
|
||||
res['studio']={"name":row[2]}
|
||||
res['image']=row[3]
|
||||
res['url']=row[4]
|
||||
res['date']=row[5]
|
||||
c.execute("select tags.name from scene_tags,tags where scene_tags.tag_id=tags.id and scene_tags.scene_id=? ;",(id,))
|
||||
row = c.fetchall()
|
||||
res['tags']=[{"name":x[0]} for x in row]
|
||||
c.execute("select actors.name from scene_cast,actors where actors.id=scene_cast.actor_id and scene_cast.scene_id=? ;",(id,))
|
||||
row = c.fetchall()
|
||||
res['performers']=[{"name":x[0]} for x in row]
|
||||
return res
|
||||
|
||||
def find_scene_id(title):
|
||||
c = conn.cursor()
|
||||
c.execute('SELECT scene_id FROM files WHERE filename=?', (title,))
|
||||
id=c.fetchone()
|
||||
if id == None:
|
||||
c.execute('select id from scenes where title=?',(title,))
|
||||
id=c.fetchone()
|
||||
return id[0]
|
||||
return id[0]
|
||||
|
||||
if not path.exists("xbvr.db"):
|
||||
print("Error, the sqlite database xbvr.db does not exist in the scrapers directory.",file=sys.stderr)
|
||||
print("Copy this database from the docker container and give it the name xbvr.db",file=sys.stderr)
|
||||
print("docker cp xbvr:/root/.config/xbvr/main.db xbvr.db",file=sys.stderr)
|
||||
exit(1)
|
||||
|
||||
|
||||
conn = sqlite3.connect('xbvr.db',detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
|
||||
|
||||
if sys.argv[1] == "query":
|
||||
fragment = json.loads(sys.stdin.read())
|
||||
print(json.dumps(fragment),file=sys.stderr)
|
||||
scene_id = find_scene_id(fragment['title'])
|
||||
if not scene_id:
|
||||
print(f"Could not determine scene id in title: `{fragment['title']}`",file=sys.stderr)
|
||||
else:
|
||||
print(f"Found scene id: {scene_id}",file=sys.stderr)
|
||||
result=lookup_scene(scene_id)
|
||||
print(json.dumps(result))
|
||||
conn.close()
|
||||
import json
|
||||
import sys
|
||||
import sqlite3
|
||||
from os import path
|
||||
|
||||
''' This script uses the sqlite database from xbvr (3d porn manager)
|
||||
Copy main.db from yout xbvr configuration and rename this to xbvr.db
|
||||
docker cp xbvr:/root/.config/xbvr/main.db xbvr.db
|
||||
This script needs python3 and sqlite3
|
||||
'''
|
||||
def lookup_scene(id):
|
||||
c=conn.cursor()
|
||||
c.execute('select title,synopsis,site,cover_url,scene_url,date(release_date) from scenes where id=?',(id,))
|
||||
row=c.fetchone()
|
||||
res={}
|
||||
res['title']=row[0]
|
||||
res['details']=row[1]
|
||||
res['studio']={"name":row[2]}
|
||||
res['image']=row[3]
|
||||
res['url']=row[4]
|
||||
res['date']=row[5]
|
||||
c.execute("select tags.name from scene_tags,tags where scene_tags.tag_id=tags.id and scene_tags.scene_id=? ;",(id,))
|
||||
row = c.fetchall()
|
||||
res['tags']=[{"name":x[0]} for x in row]
|
||||
c.execute("select actors.name from scene_cast,actors where actors.id=scene_cast.actor_id and scene_cast.scene_id=? ;",(id,))
|
||||
row = c.fetchall()
|
||||
res['performers']=[{"name":x[0]} for x in row]
|
||||
return res
|
||||
|
||||
def find_scene_id(title):
|
||||
c = conn.cursor()
|
||||
c.execute('SELECT scene_id FROM files WHERE filename=?', (title,))
|
||||
id=c.fetchone()
|
||||
if id == None:
|
||||
c.execute('select id from scenes where title=?',(title,))
|
||||
id=c.fetchone()
|
||||
return id[0]
|
||||
return id[0]
|
||||
|
||||
if not path.exists("xbvr.db"):
|
||||
print("Error, the sqlite database xbvr.db does not exist in the scrapers directory.",file=sys.stderr)
|
||||
print("Copy this database from the docker container and give it the name xbvr.db",file=sys.stderr)
|
||||
print("docker cp xbvr:/root/.config/xbvr/main.db xbvr.db",file=sys.stderr)
|
||||
exit(1)
|
||||
|
||||
|
||||
conn = sqlite3.connect('xbvr.db',detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
|
||||
|
||||
if sys.argv[1] == "query":
|
||||
fragment = json.loads(sys.stdin.read())
|
||||
print(json.dumps(fragment),file=sys.stderr)
|
||||
scene_id = find_scene_id(fragment['title'])
|
||||
if not scene_id:
|
||||
print(f"Could not determine scene id in title: `{fragment['title']}`",file=sys.stderr)
|
||||
else:
|
||||
print(f"Found scene id: {scene_id}",file=sys.stderr)
|
||||
result=lookup_scene(scene_id)
|
||||
print(json.dumps(result))
|
||||
conn.close()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user