mirror of
https://github.com/stashapp/CommunityScrapers.git
synced 2025-12-11 11:18:56 -06:00
Create .gitattributes (#265)
This commit is contained in:
parent
a9603e2a60
commit
6e013d1064
5
.gitattributes
vendored
Normal file
5
.gitattributes
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
* text=auto
|
||||||
|
|
||||||
|
*.yml eol=lf diff=yaml linguist-detectable
|
||||||
|
*.py eol=lf diff=python
|
||||||
|
*.md eol=lf diff=markdown
|
||||||
@ -1,34 +1,34 @@
|
|||||||
name: "FemDomEmpire"
|
name: "FemDomEmpire"
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- femdomempire.com/tour/trailers/
|
- femdomempire.com/tour/trailers/
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
scene:
|
scene:
|
||||||
Title: //div[@class="videoDetails clear"]/h3
|
Title: //div[@class="videoDetails clear"]/h3
|
||||||
Date:
|
Date:
|
||||||
selector: //p[span[contains(.,"Date Added")]]
|
selector: //p[span[contains(.,"Date Added")]]
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: "Date Added:"
|
- regex: "Date Added:"
|
||||||
with:
|
with:
|
||||||
- parseDate: January 2, 2006
|
- parseDate: January 2, 2006
|
||||||
Details: //div[@class="videoDetails clear"]/p
|
Details: //div[@class="videoDetails clear"]/p
|
||||||
Performers:
|
Performers:
|
||||||
Name: //li[@class="update_models"]/a
|
Name: //li[@class="update_models"]/a
|
||||||
Tags:
|
Tags:
|
||||||
Name: //ul/li[@class="label" and contains(.,"Categories:")]/..//a
|
Name: //ul/li[@class="label" and contains(.,"Categories:")]/..//a
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
fixed: FemDomEmpire
|
fixed: FemDomEmpire
|
||||||
Image:
|
Image:
|
||||||
selector: //img[@class="update_thumb thumbs stdimage"]/@src0_1x
|
selector: //img[@class="update_thumb thumbs stdimage"]/@src0_1x
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: .*/tour/content//
|
- regex: .*/tour/content//
|
||||||
with: "https://femdomempire.com/tour/content/"
|
with: "https://femdomempire.com/tour/content/"
|
||||||
|
|
||||||
# Last Updated October 10, 2020
|
# Last Updated October 10, 2020
|
||||||
# Note: Date Added on older scenes seem to be incorrect on FemDomEmpire site. Newer scenes have correct date.
|
# Note: Date Added on older scenes seem to be incorrect on FemDomEmpire site. Newer scenes have correct date.
|
||||||
@ -1,34 +1,34 @@
|
|||||||
name: firstanalquest
|
name: firstanalquest
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- firstanalquest.com
|
- firstanalquest.com
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
common:
|
common:
|
||||||
$performer: //ul[@class="list-inline"][contains(text(),"Models:")]/li/a
|
$performer: //ul[@class="list-inline"][contains(text(),"Models:")]/li/a
|
||||||
$title: //div[@class="badge-content"]/preceding-sibling::span/text()
|
$title: //div[@class="badge-content"]/preceding-sibling::span/text()
|
||||||
scene:
|
scene:
|
||||||
Title: $title
|
Title: $title
|
||||||
Details:
|
Details:
|
||||||
selector: //div[@class="text-desc"]
|
selector: //div[@class="text-desc"]
|
||||||
Tags:
|
Tags:
|
||||||
Name: //li[strong[text()="Tags:"]]/following-sibling::li/a/text()
|
Name: //li[strong[text()="Tags:"]]/following-sibling::li/a/text()
|
||||||
Performers:
|
Performers:
|
||||||
Name: $performer/text()
|
Name: $performer/text()
|
||||||
Image: //img[@class="player-preview"]/@src
|
Image: //img[@class="player-preview"]/@src
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
fixed: First Anal Quest
|
fixed: First Anal Quest
|
||||||
Date:
|
Date:
|
||||||
selector: $title
|
selector: $title
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: \s
|
- regex: \s
|
||||||
with: +
|
with: +
|
||||||
- regex: ^([^-]+-?).+
|
- regex: ^([^-]+-?).+
|
||||||
with: http://www.firstanalquest.com/search/?q=$1
|
with: http://www.firstanalquest.com/search/?q=$1
|
||||||
- subScraper: //span[@class="thumb-added"]/text()
|
- subScraper: //span[@class="thumb-added"]/text()
|
||||||
- parseDate: Jan 2, 2006
|
- parseDate: Jan 2, 2006
|
||||||
# Last Updated August 06, 2020
|
# Last Updated August 06, 2020
|
||||||
|
|||||||
@ -1,25 +1,25 @@
|
|||||||
name: fistertwister
|
name: fistertwister
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- fistertwister.com
|
- fistertwister.com
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
scene:
|
scene:
|
||||||
Title: //div[@class="jumbotron"]/h2/text()[1]
|
Title: //div[@class="jumbotron"]/h2/text()[1]
|
||||||
Date:
|
Date:
|
||||||
selector: //li[contains(text(),"Released on:")]/strong/text()
|
selector: //li[contains(text(),"Released on:")]/strong/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- parseDate: Jan 2, 2006
|
- parseDate: Jan 2, 2006
|
||||||
Details: //div[@class="jumbotron video-info"]/p[not(@class) and not(a)]/text()
|
Details: //div[@class="jumbotron video-info"]/p[not(@class) and not(a)]/text()
|
||||||
Tags:
|
Tags:
|
||||||
Name: //a[@class="btn btn-default"]/text()
|
Name: //a[@class="btn btn-default"]/text()
|
||||||
Performers:
|
Performers:
|
||||||
Name: //li[contains(text(),"Featuring:")]//a/text()
|
Name: //li[contains(text(),"Featuring:")]//a/text()
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
fixed: Fister Twister
|
fixed: Fister Twister
|
||||||
#Image: //video[@id="video"]/@poster
|
#Image: //video[@id="video"]/@poster
|
||||||
Image: //meta[@property="og:image"]/@content
|
Image: //meta[@property="og:image"]/@content
|
||||||
# Last Updated August 06, 2020
|
# Last Updated August 06, 2020
|
||||||
|
|||||||
@ -1,42 +1,42 @@
|
|||||||
name: "FittingRoom"
|
name: "FittingRoom"
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- fitting-room.com/videos/
|
- fitting-room.com/videos/
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
galleryByURL:
|
galleryByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- fitting-room.com/albums/
|
- fitting-room.com/albums/
|
||||||
scraper: galleryScraper
|
scraper: galleryScraper
|
||||||
|
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
scene:
|
scene:
|
||||||
Title:
|
Title:
|
||||||
selector: //head/title/text()
|
selector: //head/title/text()
|
||||||
Details: //meta[@property="twitter:description"]/@content
|
Details: //meta[@property="twitter:description"]/@content
|
||||||
Performers:
|
Performers:
|
||||||
Name: //div[@class="info-model"]//p[@class="name"]
|
Name: //div[@class="info-model"]//p[@class="name"]
|
||||||
Tags:
|
Tags:
|
||||||
Name: //meta[@property="article:tag"]/@content
|
Name: //meta[@property="article:tag"]/@content
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
fixed: Fitting-Room
|
fixed: Fitting-Room
|
||||||
Image: //meta[@property="twitter:image"]/@content
|
Image: //meta[@property="twitter:image"]/@content
|
||||||
URL: //meta[@property="twitter:url"]/@content
|
URL: //meta[@property="twitter:url"]/@content
|
||||||
galleryScraper:
|
galleryScraper:
|
||||||
gallery:
|
gallery:
|
||||||
Title:
|
Title:
|
||||||
selector: //head/title/text()
|
selector: //head/title/text()
|
||||||
Details: //meta[@property="twitter:description"]/@content
|
Details: //meta[@property="twitter:description"]/@content
|
||||||
Performers:
|
Performers:
|
||||||
Name: //div[@class="info-model"]//p[@class="name"]
|
Name: //div[@class="info-model"]//p[@class="name"]
|
||||||
Tags:
|
Tags:
|
||||||
Name: //meta[@property="article:tag"]/@content
|
Name: //meta[@property="article:tag"]/@content
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
fixed: Fitting-Room
|
fixed: Fitting-Room
|
||||||
URL: //meta[@property="twitter:url"]/@content
|
URL: //meta[@property="twitter:url"]/@content
|
||||||
|
|
||||||
# Last Updated October 23, 2020
|
# Last Updated October 23, 2020
|
||||||
|
|||||||
@ -1,29 +1,29 @@
|
|||||||
name: Mandyflores
|
name: Mandyflores
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- mandyflores.com/vod/
|
- mandyflores.com/vod/
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
common:
|
common:
|
||||||
$updateDesc: //span[@class="update_description"]
|
$updateDesc: //span[@class="update_description"]
|
||||||
$divCenter: /div[@align="center"]
|
$divCenter: /div[@align="center"]
|
||||||
scene:
|
scene:
|
||||||
Title: //span[@class="title_bar_hilite"]
|
Title: //span[@class="title_bar_hilite"]
|
||||||
Details:
|
Details:
|
||||||
selector: $updateDesc$divCenter/span/span[@style]/text()[normalize-space(.)]|($updateDesc | $updateDesc/p)/text()[normalize-space(.)]|($updateDesc$divCenter/text())[1]|($updateDesc/text())[1]
|
selector: $updateDesc$divCenter/span/span[@style]/text()[normalize-space(.)]|($updateDesc | $updateDesc/p)/text()[normalize-space(.)]|($updateDesc$divCenter/text())[1]|($updateDesc/text())[1]
|
||||||
concat: "\n\n"
|
concat: "\n\n"
|
||||||
Date:
|
Date:
|
||||||
selector: //div[@class="cell update_date"][not(ancestor::span[@class="update_description"])]/text()[1]
|
selector: //div[@class="cell update_date"][not(ancestor::span[@class="update_description"])]/text()[1]
|
||||||
postProcess:
|
postProcess:
|
||||||
- parseDate: 01/02/2006
|
- parseDate: 01/02/2006
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
fixed: Mandyflores
|
fixed: Mandyflores
|
||||||
Tags:
|
Tags:
|
||||||
Name: //span[@class="update_tags"]/a
|
Name: //span[@class="update_tags"]/a
|
||||||
Performers:
|
Performers:
|
||||||
Name: //span[@class="update_models"][following-sibling::span[@class="update_tags"]]/a
|
Name: //span[@class="update_models"][following-sibling::span[@class="update_tags"]]/a
|
||||||
|
|
||||||
# Last Updated September 16, 2020
|
# Last Updated September 16, 2020
|
||||||
|
|||||||
@ -1,114 +1,114 @@
|
|||||||
name: Modelhub
|
name: Modelhub
|
||||||
performerByName:
|
performerByName:
|
||||||
action: scrapeXPath
|
action: scrapeXPath
|
||||||
queryURL: https://modelhub.com/model/search?q={}
|
queryURL: https://modelhub.com/model/search?q={}
|
||||||
scraper: performerSearch
|
scraper: performerSearch
|
||||||
performerByURL:
|
performerByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- modelhub.com
|
- modelhub.com
|
||||||
scraper: performerScraper
|
scraper: performerScraper
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- modelhub.com/video/
|
- modelhub.com/video/
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
|
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
performerSearch:
|
performerSearch:
|
||||||
performer:
|
performer:
|
||||||
Name: //div[@class="modelInfo"]/a/h3/text()
|
Name: //div[@class="modelInfo"]/a/h3/text()
|
||||||
URL:
|
URL:
|
||||||
selector: //li[@class="modelBox"]/a/@href
|
selector: //li[@class="modelBox"]/a/@href
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: ^(.*)$
|
- regex: ^(.*)$
|
||||||
with: "https://www.modelhub.com$1/bio"
|
with: "https://www.modelhub.com$1/bio"
|
||||||
|
|
||||||
performerScraper:
|
performerScraper:
|
||||||
common:
|
common:
|
||||||
$infoPiece: //div[@class="modelContent"]
|
$infoPiece: //div[@class="modelContent"]
|
||||||
$modelAttributes: //ul[@class="modelAttributes"]/li
|
$modelAttributes: //ul[@class="modelAttributes"]/li
|
||||||
performer:
|
performer:
|
||||||
Name: //div[@class="infoSection"]/h1/text()
|
Name: //div[@class="infoSection"]/h1/text()
|
||||||
Birthdate:
|
Birthdate:
|
||||||
selector: //span[@class='bday js_lazy_bkg']/text()
|
selector: //span[@class='bday js_lazy_bkg']/text()
|
||||||
parseDate: January 2, 2006
|
parseDate: January 2, 2006
|
||||||
Gender:
|
Gender:
|
||||||
selector: //ul[@class="allStats"]/li[contains(.,"Gender")]/span
|
selector: //ul[@class="allStats"]/li[contains(.,"Gender")]/span
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: "Couple"
|
- regex: "Couple"
|
||||||
with:
|
with:
|
||||||
Ethnicity:
|
Ethnicity:
|
||||||
selector: $modelAttributes/div[contains(.,"Ethnicity")]/span
|
selector: $modelAttributes/div[contains(.,"Ethnicity")]/span
|
||||||
postProcess:
|
postProcess:
|
||||||
- map:
|
- map:
|
||||||
Latin: "hispanic"
|
Latin: "hispanic"
|
||||||
Other: ""
|
Other: ""
|
||||||
Twitter:
|
Twitter:
|
||||||
selector: //a[@class='twitter js_lazy_bkg']/@href
|
selector: //a[@class='twitter js_lazy_bkg']/@href
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: .+(twitter.com)/(.+)
|
- regex: .+(twitter.com)/(.+)
|
||||||
with: $2
|
with: $2
|
||||||
- regex: (.+)\?(.*)
|
- regex: (.+)\?(.*)
|
||||||
with: $1
|
with: $1
|
||||||
Instagram:
|
Instagram:
|
||||||
selector: //a[@class="insta js_lazy_bkg"]/@href
|
selector: //a[@class="insta js_lazy_bkg"]/@href
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: .+(instagram.com/)(.+)
|
- regex: .+(instagram.com/)(.+)
|
||||||
with: $2
|
with: $2
|
||||||
Height:
|
Height:
|
||||||
selector: $modelAttributes/div[contains(.,"Height")]/span
|
selector: $modelAttributes/div[contains(.,"Height")]/span
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: .*\((\d+)cm\)
|
- regex: .*\((\d+)cm\)
|
||||||
with: $1
|
with: $1
|
||||||
FakeTits:
|
FakeTits:
|
||||||
selector: $modelAttributes/div[contains(.,"Breast Type")]/span
|
selector: $modelAttributes/div[contains(.,"Breast Type")]/span
|
||||||
postProcess:
|
postProcess:
|
||||||
- map:
|
- map:
|
||||||
Natural: "No"
|
Natural: "No"
|
||||||
Fake: "Yes"
|
Fake: "Yes"
|
||||||
Piercings: $modelAttributes/div[contains(.,"Piercing")]/span
|
Piercings: $modelAttributes/div[contains(.,"Piercing")]/span
|
||||||
EyeColor: $modelAttributes/div[contains(.,"Eye Color")]/span
|
EyeColor: $modelAttributes/div[contains(.,"Eye Color")]/span
|
||||||
Tattoos: $modelAttributes/div[contains(.,"Tattoo")]/span
|
Tattoos: $modelAttributes/div[contains(.,"Tattoo")]/span
|
||||||
URL: //meta[@name="twitter:url"]/@content
|
URL: //meta[@name="twitter:url"]/@content
|
||||||
Image: //img[@class='profileImg lazy']/@data-src
|
Image: //img[@class='profileImg lazy']/@data-src
|
||||||
Country:
|
Country:
|
||||||
selector: //ul[@class="info"]/li/span[@class="location js_lazy_bkg"]/text()
|
selector: //ul[@class="info"]/li/span[@class="location js_lazy_bkg"]/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- map:
|
- map:
|
||||||
"AR": "Argentina"
|
"AR": "Argentina"
|
||||||
"AT": "Austria"
|
"AT": "Austria"
|
||||||
"AU": "Australia"
|
"AU": "Australia"
|
||||||
"BR": "Brazil"
|
"BR": "Brazil"
|
||||||
"CA": "Canada"
|
"CA": "Canada"
|
||||||
"colombia, CO": "Colombia"
|
"colombia, CO": "Colombia"
|
||||||
"CO": "Colombia"
|
"CO": "Colombia"
|
||||||
"CZ": "Czech Republic"
|
"CZ": "Czech Republic"
|
||||||
"DE": "Germany"
|
"DE": "Germany"
|
||||||
"ES": "Spain"
|
"ES": "Spain"
|
||||||
"FR": "France"
|
"FR": "France"
|
||||||
"GB": "United Kingdom"
|
"GB": "United Kingdom"
|
||||||
"HU": "Hungary"
|
"HU": "Hungary"
|
||||||
"IT": "Italy"
|
"IT": "Italy"
|
||||||
"Peru, PE": "Peru"
|
"Peru, PE": "Peru"
|
||||||
"PE": "Peru"
|
"PE": "Peru"
|
||||||
"PH": "Philippines"
|
"PH": "Philippines"
|
||||||
"RO": "Romania"
|
"RO": "Romania"
|
||||||
"RU": "Russia"
|
"RU": "Russia"
|
||||||
"US": "United States"
|
"US": "United States"
|
||||||
|
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
scene:
|
scene:
|
||||||
Title: //div[@class='videoShortInfo']/div[1]/h1/text()
|
Title: //div[@class='videoShortInfo']/div[1]/h1/text()
|
||||||
Tags:
|
Tags:
|
||||||
Name: //div[@class='videoCategories']/div/a/text()
|
Name: //div[@class='videoCategories']/div/a/text()
|
||||||
Image: //meta[@property="og:image"]/@content
|
Image: //meta[@property="og:image"]/@content
|
||||||
Details: //p[@ class="videoDescription"]
|
Details: //p[@ class="videoDescription"]
|
||||||
Performers:
|
Performers:
|
||||||
Name: //div[@class='videoAvatar']/div[1]/a[2]
|
Name: //div[@class='videoAvatar']/div[1]/a[2]
|
||||||
# Last Updated Jul 25, 2020
|
# Last Updated Jul 25, 2020
|
||||||
|
|||||||
@ -1,55 +1,55 @@
|
|||||||
name: "PervCity"
|
name: "PervCity"
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- analoverdose.com/trailers/
|
- analoverdose.com/trailers/
|
||||||
- bangingbeauties.com/trailers/
|
- bangingbeauties.com/trailers/
|
||||||
- chocolatebjs.com/trailers/
|
- chocolatebjs.com/trailers/
|
||||||
- oraloverdose.com/trailers/
|
- oraloverdose.com/trailers/
|
||||||
- pervcity.com/trailers/
|
- pervcity.com/trailers/
|
||||||
- upherasshole.com/trailers/
|
- upherasshole.com/trailers/
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
common:
|
common:
|
||||||
$sceneinfo: //div[@class="videoInfo"]
|
$sceneinfo: //div[@class="videoInfo"]
|
||||||
$base: //head/base/@href
|
$base: //head/base/@href
|
||||||
scene:
|
scene:
|
||||||
Title: $sceneinfo/div[@class="infoHeader"]
|
Title: $sceneinfo/div[@class="infoHeader"]
|
||||||
Performers:
|
Performers:
|
||||||
Name: $sceneinfo//span[@class="tour_update_models"]/a
|
Name: $sceneinfo//span[@class="tour_update_models"]/a
|
||||||
Details: $sceneinfo//p/text()
|
Details: $sceneinfo//p/text()
|
||||||
Image:
|
Image:
|
||||||
selector: //head/base/@href|//img[@class="posterimg stdimage thumbs"]/@src
|
selector: //head/base/@href|//img[@class="posterimg stdimage thumbs"]/@src
|
||||||
concat: "|"
|
concat: "|"
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: ([^|]+)\|(.*)/(content/.+)
|
- regex: ([^|]+)\|(.*)/(content/.+)
|
||||||
with: $1$3
|
with: $1$3
|
||||||
Date:
|
Date:
|
||||||
selector: $sceneinfo/div[@class="infoHeader"]
|
selector: $sceneinfo/div[@class="infoHeader"]
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: \s
|
- regex: \s
|
||||||
with: "_"
|
with: "_"
|
||||||
- regex: ^
|
- regex: ^
|
||||||
with: "https://pervcity.com/search.php?query="
|
with: "https://pervcity.com/search.php?query="
|
||||||
- subScraper:
|
- subScraper:
|
||||||
selector: //div[@class="category_listing_block"]//div[@class="date"]/text()
|
selector: //div[@class="category_listing_block"]//div[@class="date"]/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- parseDate: 01-02-2006
|
- parseDate: 01-02-2006
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
selector: //head/base/@href
|
selector: //head/base/@href
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: .+/([^\.]+)\.com/.*
|
- regex: .+/([^\.]+)\.com/.*
|
||||||
with: $1
|
with: $1
|
||||||
- map:
|
- map:
|
||||||
analoverdose: AnalOverdose
|
analoverdose: AnalOverdose
|
||||||
chocolatebjs: ChocolateBjs
|
chocolatebjs: ChocolateBjs
|
||||||
bangingbeauties: BangingBeauties
|
bangingbeauties: BangingBeauties
|
||||||
oraloverdose: OralOverdose
|
oraloverdose: OralOverdose
|
||||||
pervcity: PervCity
|
pervcity: PervCity
|
||||||
upherasshole: UpHerAsshole
|
upherasshole: UpHerAsshole
|
||||||
# Last Updated October 14, 2020
|
# Last Updated October 14, 2020
|
||||||
|
|||||||
@ -1,48 +1,48 @@
|
|||||||
name: private
|
name: private
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- private.com
|
- private.com
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
movieByURL:
|
movieByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- private.com
|
- private.com
|
||||||
scraper: movieScraper
|
scraper: movieScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
common:
|
common:
|
||||||
$content: //section[@class="video-description-and-tags clearfix"]
|
$content: //section[@class="video-description-and-tags clearfix"]
|
||||||
scene:
|
scene:
|
||||||
Title:
|
Title:
|
||||||
selector: $content//ul[@class="scene-models-list"]/preceding-sibling::h1/text()
|
selector: $content//ul[@class="scene-models-list"]/preceding-sibling::h1/text()
|
||||||
Date:
|
Date:
|
||||||
selector: //meta[@itemprop="uploadDate"]/@content
|
selector: //meta[@itemprop="uploadDate"]/@content
|
||||||
postProcess:
|
postProcess:
|
||||||
- parseDate: 01/02/2006
|
- parseDate: 01/02/2006
|
||||||
Details: $content//p[@id="description-section"]
|
Details: $content//p[@id="description-section"]
|
||||||
Tags:
|
Tags:
|
||||||
Name: $content//ul[@class="scene-tags"]/li/a/text()
|
Name: $content//ul[@class="scene-tags"]/li/a/text()
|
||||||
Performers:
|
Performers:
|
||||||
Name: $content//ul[@class="scene-models-list"]//a/text()
|
Name: $content//ul[@class="scene-models-list"]//a/text()
|
||||||
Studio:
|
Studio:
|
||||||
Name: $content//span[@class="title-site"]/text()
|
Name: $content//span[@class="title-site"]/text()
|
||||||
#fixed: Private
|
#fixed: Private
|
||||||
Image: //meta[@property="og:image"]/@content
|
Image: //meta[@property="og:image"]/@content
|
||||||
movieScraper:
|
movieScraper:
|
||||||
movie:
|
movie:
|
||||||
Name: //div[@class="dvds-wrapper"]/h1/text()
|
Name: //div[@class="dvds-wrapper"]/h1/text()
|
||||||
Director: //p[@class="director"]/span[@itemprop="name"]/text()
|
Director: //p[@class="director"]/span[@itemprop="name"]/text()
|
||||||
Duration:
|
Duration:
|
||||||
selector: //p[em[contains(text(), "Duration:")]]/text()
|
selector: //p[em[contains(text(), "Duration:")]]/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: ^(\d+).+$
|
- regex: ^(\d+).+$
|
||||||
with: 00:$1:00
|
with: 00:$1:00
|
||||||
Date: //span[@itemprop="datePublished"]/text()
|
Date: //span[@itemprop="datePublished"]/text()
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
fixed: Private
|
fixed: Private
|
||||||
Synopsis: //p[@class="sinopsys"]
|
Synopsis: //p[@class="sinopsys"]
|
||||||
FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src
|
FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src
|
||||||
# Last Updated August 18, 2020
|
# Last Updated August 18, 2020
|
||||||
|
|||||||
@ -1,38 +1,38 @@
|
|||||||
name: puffynetwork
|
name: puffynetwork
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- puffynetwork.com
|
- puffynetwork.com
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
common:
|
common:
|
||||||
$performer: //dl/dd/a
|
$performer: //dl/dd/a
|
||||||
scene:
|
scene:
|
||||||
Title:
|
Title:
|
||||||
selector: //h2[@class="title"]/span/text()
|
selector: //h2[@class="title"]/span/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: ^(.*)[—]\s+
|
- regex: ^(.*)[—]\s+
|
||||||
with:
|
with:
|
||||||
Date:
|
Date:
|
||||||
selector: //dl/dt[contains(text(),"Released on:")]/span/text()
|
selector: //dl/dt[contains(text(),"Released on:")]/span/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- parseDate: Jan 2, 2006
|
- parseDate: Jan 2, 2006
|
||||||
Details: //div[@class="show_more"]/text()[1]
|
Details: //div[@class="show_more"]/text()[1]
|
||||||
Tags:
|
Tags:
|
||||||
Name: //p[@class="tags"]/a/text()
|
Name: //p[@class="tags"]/a/text()
|
||||||
Performers:
|
Performers:
|
||||||
Name: $performer/text()
|
Name: $performer/text()
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
selector: //h2[@class="title"]//div[contains(text(),"Site:")]/a/text()
|
selector: //h2[@class="title"]//div[contains(text(),"Site:")]/a/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- map:
|
- map:
|
||||||
Weliketosuck: We Like To Suck
|
Weliketosuck: We Like To Suck
|
||||||
Wetandpuffy: Wet And Puffy
|
Wetandpuffy: Wet And Puffy
|
||||||
Wetandpissy: Wet And Pissy
|
Wetandpissy: Wet And Pissy
|
||||||
Eurobabefacials: Euro Babe Facials
|
Eurobabefacials: Euro Babe Facials
|
||||||
Simplyanal: Simply Anal
|
Simplyanal: Simply Anal
|
||||||
Image: //video[@id="video"]/@poster
|
Image: //video[@id="video"]/@poster
|
||||||
# Last Updated August 06, 2020
|
# Last Updated August 06, 2020
|
||||||
|
|||||||
@ -1,73 +1,73 @@
|
|||||||
name: "Teencoreclub"
|
name: "Teencoreclub"
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- analyzedgirls.com/
|
- analyzedgirls.com/
|
||||||
- assteenmouth.com/
|
- assteenmouth.com/
|
||||||
- bangteenpussy.com/
|
- bangteenpussy.com/
|
||||||
- brutalinvasion.com/
|
- brutalinvasion.com/
|
||||||
- cumaholicteens.com/
|
- cumaholicteens.com/
|
||||||
- defiled18.com/
|
- defiled18.com/
|
||||||
- doubleteamedteens.com/
|
- doubleteamedteens.com/
|
||||||
- dreamteenshd.com/
|
- dreamteenshd.com/
|
||||||
- girlsgotcream.com/
|
- girlsgotcream.com/
|
||||||
- hardcoreyouth.com/
|
- hardcoreyouth.com/
|
||||||
- littlehellcat.com/
|
- littlehellcat.com/
|
||||||
- maketeengape.com/
|
- maketeengape.com/
|
||||||
- nylonsweeties.com/
|
- nylonsweeties.com/
|
||||||
- seductive18.com/
|
- seductive18.com/
|
||||||
- teenanalcasting.com/
|
- teenanalcasting.com/
|
||||||
- teendrillers.com/
|
- teendrillers.com/
|
||||||
- teensnaturalway.com/
|
- teensnaturalway.com/
|
||||||
- teenstryblacks.com/
|
- teenstryblacks.com/
|
||||||
- spermantino.com/
|
- spermantino.com/
|
||||||
- teachmyass.com/
|
- teachmyass.com/
|
||||||
- drilledchicks.com/
|
- drilledchicks.com/
|
||||||
- analcheckups.com/
|
- analcheckups.com/
|
||||||
- fabsluts.com/
|
- fabsluts.com/
|
||||||
- jerk-offpass.com/
|
- jerk-offpass.com/
|
||||||
- nylonspunkjunkies.com/
|
- nylonspunkjunkies.com/
|
||||||
- shegotsix.com/
|
- shegotsix.com/
|
||||||
- spearteenpussy.com/
|
- spearteenpussy.com/
|
||||||
- teencoreclub.com/
|
- teencoreclub.com/
|
||||||
- teencorezine.com/
|
- teencorezine.com/
|
||||||
- teensgoporn.com/
|
- teensgoporn.com/
|
||||||
- weneednewtalents.com/
|
- weneednewtalents.com/
|
||||||
- xcoreclub.com/
|
- xcoreclub.com/
|
||||||
- youngthroats.com/
|
- youngthroats.com/
|
||||||
- tryteens.com/
|
- tryteens.com/
|
||||||
- whiteteensblackcocks.com/
|
- whiteteensblackcocks.com/
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
scene:
|
scene:
|
||||||
Title:
|
Title:
|
||||||
selector: //div[@class="detail-hero-title"]//h1/text()
|
selector: //div[@class="detail-hero-title"]//h1/text()
|
||||||
replace:
|
replace:
|
||||||
- regex: \t+
|
- regex: \t+
|
||||||
with: ' '
|
with: ' '
|
||||||
Details: //div[@class="detail-description"]/text()
|
Details: //div[@class="detail-description"]/text()
|
||||||
Performers:
|
Performers:
|
||||||
Name:
|
Name:
|
||||||
selector: //div[@class="detail-hero-title"]//h1/text()
|
selector: //div[@class="detail-hero-title"]//h1/text()
|
||||||
replace:
|
replace:
|
||||||
- regex: \t+
|
- regex: \t+
|
||||||
with:
|
with:
|
||||||
split: ','
|
split: ','
|
||||||
Tags:
|
Tags:
|
||||||
Name:
|
Name:
|
||||||
selector: //meta[@name="keywords"]/@content
|
selector: //meta[@name="keywords"]/@content
|
||||||
split: ','
|
split: ','
|
||||||
Image:
|
Image:
|
||||||
selector: //div[@class="detail-hero"]/@style
|
selector: //div[@class="detail-hero"]/@style
|
||||||
replace:
|
replace:
|
||||||
- regex: ^.*url.([^\)]+).*$
|
- regex: ^.*url.([^\)]+).*$
|
||||||
with: "$1"
|
with: "$1"
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
selector: //div[@class="flex-shrink-0 flex items-center"]//img[1]/@alt
|
selector: //div[@class="flex-shrink-0 flex items-center"]//img[1]/@alt
|
||||||
replace:
|
replace:
|
||||||
- regex: \.\w+
|
- regex: \.\w+
|
||||||
with:
|
with:
|
||||||
|
|
||||||
# Last Updated June 22, 2020
|
# Last Updated June 22, 2020
|
||||||
|
|||||||
@ -1,78 +1,78 @@
|
|||||||
name: TheNude
|
name: TheNude
|
||||||
performerByName:
|
performerByName:
|
||||||
action: scrapeXPath
|
action: scrapeXPath
|
||||||
queryURL: https://www.thenude.com/index.php?page=search&action=searchModels&__form_name=navbar-search&m_aka=on&m_name={}
|
queryURL: https://www.thenude.com/index.php?page=search&action=searchModels&__form_name=navbar-search&m_aka=on&m_name={}
|
||||||
scraper: performerSearch
|
scraper: performerSearch
|
||||||
performerByURL:
|
performerByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- https://www.thenude.com
|
- https://www.thenude.com
|
||||||
scraper: performerScraper
|
scraper: performerScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
performerSearch:
|
performerSearch:
|
||||||
performer:
|
performer:
|
||||||
# Name: //a[@class="model-name"]/text() Version to get only the name
|
# Name: //a[@class="model-name"]/text() Version to get only the name
|
||||||
# Name: //a[@class="model-name"]/../../a/@title Version to get a little info on label/studio as well
|
# Name: //a[@class="model-name"]/../../a/@title Version to get a little info on label/studio as well
|
||||||
Name:
|
Name:
|
||||||
selector: //figcaption/span
|
selector: //figcaption/span
|
||||||
replace:
|
replace:
|
||||||
- regex: "^AKA:"
|
- regex: "^AKA:"
|
||||||
with: ""
|
with: ""
|
||||||
URL: //a[@class="model-name"]/@href
|
URL: //a[@class="model-name"]/@href
|
||||||
performerScraper:
|
performerScraper:
|
||||||
performer:
|
performer:
|
||||||
Name: (//meta[@itemprop="name"])[1]/@content
|
Name: (//meta[@itemprop="name"])[1]/@content
|
||||||
URL: (//meta[@itemprop="url"])[1]/@content
|
URL: (//meta[@itemprop="url"])[1]/@content
|
||||||
Twitter: //a[text()="TWITTER"]/@href
|
Twitter: //a[text()="TWITTER"]/@href
|
||||||
Instagram: //a[text()="INSTAGRAM"]/@href
|
Instagram: //a[text()="INSTAGRAM"]/@href
|
||||||
Birthdate:
|
Birthdate:
|
||||||
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
|
selector: //li/span[@class="list-quest"][contains(text(),'Born')]/../text()
|
||||||
parseDate: 02-01-2006
|
parseDate: 02-01-2006
|
||||||
Ethnicity:
|
Ethnicity:
|
||||||
selector: //li/span[@class="list-quest"][contains(text(),'Ethnicity')]/../text()
|
selector: //li/span[@class="list-quest"][contains(text(),'Ethnicity')]/../text()
|
||||||
replace:
|
replace:
|
||||||
- regex: Asian
|
- regex: Asian
|
||||||
with: "asian"
|
with: "asian"
|
||||||
- regex: Caucasian
|
- regex: Caucasian
|
||||||
with: "white"
|
with: "white"
|
||||||
- regex: Black
|
- regex: Black
|
||||||
with: "black"
|
with: "black"
|
||||||
- regex: Latin
|
- regex: Latin
|
||||||
with: "hispanic"
|
with: "hispanic"
|
||||||
Country:
|
Country:
|
||||||
selector: //span[@itemprop="nationality"]/text()
|
selector: //span[@itemprop="nationality"]/text()
|
||||||
replace:
|
replace:
|
||||||
- regex: "United States of America"
|
- regex: "United States of America"
|
||||||
with: "United States"
|
with: "United States"
|
||||||
#EyeColor: not listed
|
#EyeColor: not listed
|
||||||
Height:
|
Height:
|
||||||
selector: //li/span[@class="list-quest"][contains(text(),'Height')]/../text()
|
selector: //li/span[@class="list-quest"][contains(text(),'Height')]/../text()
|
||||||
replace:
|
replace:
|
||||||
- regex: ^(\d+).+$
|
- regex: ^(\d+).+$
|
||||||
with: "$1 cm"
|
with: "$1 cm"
|
||||||
Measurements:
|
Measurements:
|
||||||
selector: //li/span[@class="list-quest"][contains(text(),'Measurements')]/../text()
|
selector: //li/span[@class="list-quest"][contains(text(),'Measurements')]/../text()
|
||||||
FakeTits:
|
FakeTits:
|
||||||
selector: //li/span[@class="list-quest"][contains(text(),'Breasts')]/../text()
|
selector: //li/span[@class="list-quest"][contains(text(),'Breasts')]/../text()
|
||||||
replace:
|
replace:
|
||||||
- regex: ^[^\(]+\(([^\)]+)\).*$
|
- regex: ^[^\(]+\(([^\)]+)\).*$
|
||||||
with: "$1"
|
with: "$1"
|
||||||
- regex: Fake
|
- regex: Fake
|
||||||
with: "Yes"
|
with: "Yes"
|
||||||
- regex: Real
|
- regex: Real
|
||||||
with: "No"
|
with: "No"
|
||||||
CareerLength:
|
CareerLength:
|
||||||
selector: //li/span[@class="list-quest"][contains(text(),'Seen')]/../text()
|
selector: //li/span[@class="list-quest"][contains(text(),'Seen')]/../text()
|
||||||
concat: "-"
|
concat: "-"
|
||||||
Aliases:
|
Aliases:
|
||||||
selector: //meta[@itemprop="additionalName"]/@content
|
selector: //meta[@itemprop="additionalName"]/@content
|
||||||
concat: ", "
|
concat: ", "
|
||||||
Tattoos:
|
Tattoos:
|
||||||
selector: //li/span[@class="list-quest"][contains(text(),'Tattoos')]/../text()
|
selector: //li/span[@class="list-quest"][contains(text(),'Tattoos')]/../text()
|
||||||
Piercings:
|
Piercings:
|
||||||
selector: //li/span[@class="list-quest"][contains(text(),'Piercings')]/../text()
|
selector: //li/span[@class="list-quest"][contains(text(),'Piercings')]/../text()
|
||||||
Image:
|
Image:
|
||||||
selector: (//meta[@itemprop="image"])[1]/@content
|
selector: (//meta[@itemprop="image"])[1]/@content
|
||||||
Gender:
|
Gender:
|
||||||
selector: //meta[@itemprop="gender"]/@content
|
selector: //meta[@itemprop="gender"]/@content
|
||||||
# Last updated July 02, 2020
|
# Last updated July 02, 2020
|
||||||
|
|||||||
@ -1,43 +1,43 @@
|
|||||||
name: TheScoreGroup
|
name: TheScoreGroup
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- scoreland.com/
|
- scoreland.com/
|
||||||
- 60plusmilfs.com/
|
- 60plusmilfs.com/
|
||||||
- 50plusmilfs.com/
|
- 50plusmilfs.com/
|
||||||
- 40somethingmag.com/
|
- 40somethingmag.com/
|
||||||
- pornmegaload.com/
|
- pornmegaload.com/
|
||||||
- xlgirls.com/
|
- xlgirls.com/
|
||||||
- 18eighteen.com/
|
- 18eighteen.com/
|
||||||
- legsex.com/
|
- legsex.com/
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
scene:
|
scene:
|
||||||
Title: //section[@id="videos_page-page"]/div[@class="row"]/div/h2/text()
|
Title: //section[@id="videos_page-page"]/div[@class="row"]/div/h2/text()
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
selector: //meta[@property="og:site_name"]/@content
|
selector: //meta[@property="og:site_name"]/@content
|
||||||
Date:
|
Date:
|
||||||
selector: //div[@class="stat mb-2"]/span[contains(.,"Date:")]/../span[@class="value"]/text()
|
selector: //div[@class="stat mb-2"]/span[contains(.,"Date:")]/../span[@class="value"]/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: ..,
|
- regex: ..,
|
||||||
with:
|
with:
|
||||||
- parseDate: January 2 2006
|
- parseDate: January 2 2006
|
||||||
Details:
|
Details:
|
||||||
selector: //div[@class="row"]/div/div[@class="p-desc"]/text()
|
selector: //div[@class="row"]/div/div[@class="p-desc"]/text()
|
||||||
concat: "\n"
|
concat: "\n"
|
||||||
Tags:
|
Tags:
|
||||||
Name: //div[@class="row"]/div/div[@class="mb-3"]/a/text()
|
Name: //div[@class="row"]/div/div[@class="mb-3"]/a/text()
|
||||||
Performers:
|
Performers:
|
||||||
Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text()
|
Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text()
|
||||||
Image:
|
Image:
|
||||||
selector: //meta[@itemprop="image"]/@content
|
selector: //meta[@itemprop="image"]/@content
|
||||||
# Enable this post process if you want better image quality but sometimes it can fail
|
# Enable this post process if you want better image quality but sometimes it can fail
|
||||||
# postProcess:
|
# postProcess:
|
||||||
# - replace:
|
# - replace:
|
||||||
# - regex: _lg
|
# - regex: _lg
|
||||||
# with: _x_800
|
# with: _x_800
|
||||||
|
|
||||||
# Last Updated August 11, 2020
|
# Last Updated August 11, 2020
|
||||||
|
|||||||
@ -1,32 +1,32 @@
|
|||||||
name: vipissy
|
name: vipissy
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- vipissy.com
|
- vipissy.com
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
common:
|
common:
|
||||||
$performer: //dl/dd/a
|
$performer: //dl/dd/a
|
||||||
scene:
|
scene:
|
||||||
Title:
|
Title:
|
||||||
selector: //section[@class="downloads"]/strong/text()
|
selector: //section[@class="downloads"]/strong/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: ^(.*)[—]\s+
|
- regex: ^(.*)[—]\s+
|
||||||
with:
|
with:
|
||||||
Date:
|
Date:
|
||||||
# selector: //dl/dd[contains(text(),"Released on:")]/text()
|
# selector: //dl/dd[contains(text(),"Released on:")]/text()
|
||||||
selector: //dt[i[@class="glyphicon glyphicon-calendar"]]/following-sibling::dd[1]/text()
|
selector: //dt[i[@class="glyphicon glyphicon-calendar"]]/following-sibling::dd[1]/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- parseDate: Jan 2, 2006
|
- parseDate: Jan 2, 2006
|
||||||
Details: //div[@class="show_more"]/text()[1]
|
Details: //div[@class="show_more"]/text()[1]
|
||||||
Tags:
|
Tags:
|
||||||
Name: //p[@class="tags"]/a/text()
|
Name: //p[@class="tags"]/a/text()
|
||||||
Performers:
|
Performers:
|
||||||
Name: $performer/text()
|
Name: $performer/text()
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
fixed: Vipissy
|
fixed: Vipissy
|
||||||
Image: //div[@id="videoplayer"]/video/@poster
|
Image: //div[@id="videoplayer"]/video/@poster
|
||||||
# Last Updated August 06, 2020
|
# Last Updated August 06, 2020
|
||||||
|
|||||||
@ -1,22 +1,22 @@
|
|||||||
name: xnxx
|
name: xnxx
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- xnxx.com
|
- xnxx.com
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
scene:
|
scene:
|
||||||
Title: //div[@class="clear-infobar"]/strong/text()
|
Title: //div[@class="clear-infobar"]/strong/text()
|
||||||
Tags:
|
Tags:
|
||||||
Name: //div[@class="metadata-row video-tags"]/a/text()
|
Name: //div[@class="metadata-row video-tags"]/a/text()
|
||||||
Studio:
|
Studio:
|
||||||
Name: //span[@class="metadata"]/a[@class="gold-plate" or @class="free-plate"]/text()
|
Name: //span[@class="metadata"]/a[@class="gold-plate" or @class="free-plate"]/text()
|
||||||
Details: //p[@class="metadata-row video-description"]
|
Details: //p[@class="metadata-row video-description"]
|
||||||
Image:
|
Image:
|
||||||
selector: //script[contains(text(), "setThumbUrl169")]/text()
|
selector: //script[contains(text(), "setThumbUrl169")]/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
|
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
|
||||||
with: $1
|
with: $1
|
||||||
# Last Updated August 06, 2020
|
# Last Updated August 06, 2020
|
||||||
|
|||||||
@ -1,28 +1,28 @@
|
|||||||
name: xtube
|
name: xtube
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- xtube.com
|
- xtube.com
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
scene:
|
scene:
|
||||||
Title:
|
Title:
|
||||||
selector: //form/h1/text()
|
selector: //form/h1/text()
|
||||||
Date:
|
Date:
|
||||||
selector: //script[contains(text(), "dimension10")]/text()
|
selector: //script[contains(text(), "dimension10")]/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: ^.+'(\d{4})(\d{2})(\d{2})'.+$
|
- regex: ^.+'(\d{4})(\d{2})(\d{2})'.+$
|
||||||
with: $2 $3, $1
|
with: $2 $3, $1
|
||||||
- parseDate: 01 02, 2006
|
- parseDate: 01 02, 2006
|
||||||
Studio:
|
Studio:
|
||||||
Name:
|
Name:
|
||||||
fixed: xtube
|
fixed: xtube
|
||||||
Details: //meta[@property="og:description"]/@content
|
Details: //meta[@property="og:description"]/@content
|
||||||
Performers:
|
Performers:
|
||||||
Name: //form[@id="postCommentForm"]/input[@name="contentOwnerId"]/@value[not (contains(.,"sponsor_"))]
|
Name: //form[@id="postCommentForm"]/input[@name="contentOwnerId"]/@value[not (contains(.,"sponsor_"))]
|
||||||
Tags:
|
Tags:
|
||||||
Name: //div[@class="categories" or @class="tags"]//a/text()
|
Name: //div[@class="categories" or @class="tags"]//a/text()
|
||||||
Image: //meta[@property="og:image"]/@content
|
Image: //meta[@property="og:image"]/@content
|
||||||
# Last Updated August 06, 2020
|
# Last Updated August 06, 2020
|
||||||
|
|||||||
@ -1,23 +1,23 @@
|
|||||||
name: xvideos
|
name: xvideos
|
||||||
sceneByURL:
|
sceneByURL:
|
||||||
- action: scrapeXPath
|
- action: scrapeXPath
|
||||||
url:
|
url:
|
||||||
- xvideos.com
|
- xvideos.com
|
||||||
scraper: sceneScraper
|
scraper: sceneScraper
|
||||||
xPathScrapers:
|
xPathScrapers:
|
||||||
sceneScraper:
|
sceneScraper:
|
||||||
scene:
|
scene:
|
||||||
Title: //h2[@class="page-title"]/text()[1]
|
Title: //h2[@class="page-title"]/text()[1]
|
||||||
Tags:
|
Tags:
|
||||||
Name: //li/a[@class="btn btn-default"][contains(@href,"/tags/")]/text()
|
Name: //li/a[@class="btn btn-default"][contains(@href,"/tags/")]/text()
|
||||||
Performers:
|
Performers:
|
||||||
Name: //a[@class="btn btn-default label profile hover-name"]/span[@class="name"]/text()
|
Name: //a[@class="btn btn-default label profile hover-name"]/span[@class="name"]/text()
|
||||||
Studio:
|
Studio:
|
||||||
Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text()
|
Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text()
|
||||||
Image:
|
Image:
|
||||||
selector: //script[contains(text(), "setThumbUrl169")]/text()
|
selector: //script[contains(text(), "setThumbUrl169")]/text()
|
||||||
postProcess:
|
postProcess:
|
||||||
- replace:
|
- replace:
|
||||||
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
|
- regex: ^.+setThumbUrl169\('(.+\.jpg).+setRelated.+$
|
||||||
with: $1
|
with: $1
|
||||||
# Last Updated August 06, 2020
|
# Last Updated August 06, 2020
|
||||||
|
|||||||
@ -1,59 +1,59 @@
|
|||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from os import path
|
from os import path
|
||||||
|
|
||||||
''' This script uses the sqlite database from xbvr (3d porn manager)
|
''' This script uses the sqlite database from xbvr (3d porn manager)
|
||||||
Copy main.db from yout xbvr configuration and rename this to xbvr.db
|
Copy main.db from yout xbvr configuration and rename this to xbvr.db
|
||||||
docker cp xbvr:/root/.config/xbvr/main.db xbvr.db
|
docker cp xbvr:/root/.config/xbvr/main.db xbvr.db
|
||||||
This script needs python3 and sqlite3
|
This script needs python3 and sqlite3
|
||||||
'''
|
'''
|
||||||
def lookup_scene(id):
|
def lookup_scene(id):
|
||||||
c=conn.cursor()
|
c=conn.cursor()
|
||||||
c.execute('select title,synopsis,site,cover_url,scene_url,date(release_date) from scenes where id=?',(id,))
|
c.execute('select title,synopsis,site,cover_url,scene_url,date(release_date) from scenes where id=?',(id,))
|
||||||
row=c.fetchone()
|
row=c.fetchone()
|
||||||
res={}
|
res={}
|
||||||
res['title']=row[0]
|
res['title']=row[0]
|
||||||
res['details']=row[1]
|
res['details']=row[1]
|
||||||
res['studio']={"name":row[2]}
|
res['studio']={"name":row[2]}
|
||||||
res['image']=row[3]
|
res['image']=row[3]
|
||||||
res['url']=row[4]
|
res['url']=row[4]
|
||||||
res['date']=row[5]
|
res['date']=row[5]
|
||||||
c.execute("select tags.name from scene_tags,tags where scene_tags.tag_id=tags.id and scene_tags.scene_id=? ;",(id,))
|
c.execute("select tags.name from scene_tags,tags where scene_tags.tag_id=tags.id and scene_tags.scene_id=? ;",(id,))
|
||||||
row = c.fetchall()
|
row = c.fetchall()
|
||||||
res['tags']=[{"name":x[0]} for x in row]
|
res['tags']=[{"name":x[0]} for x in row]
|
||||||
c.execute("select actors.name from scene_cast,actors where actors.id=scene_cast.actor_id and scene_cast.scene_id=? ;",(id,))
|
c.execute("select actors.name from scene_cast,actors where actors.id=scene_cast.actor_id and scene_cast.scene_id=? ;",(id,))
|
||||||
row = c.fetchall()
|
row = c.fetchall()
|
||||||
res['performers']=[{"name":x[0]} for x in row]
|
res['performers']=[{"name":x[0]} for x in row]
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def find_scene_id(title):
|
def find_scene_id(title):
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute('SELECT scene_id FROM files WHERE filename=?', (title,))
|
c.execute('SELECT scene_id FROM files WHERE filename=?', (title,))
|
||||||
id=c.fetchone()
|
id=c.fetchone()
|
||||||
if id == None:
|
if id == None:
|
||||||
c.execute('select id from scenes where title=?',(title,))
|
c.execute('select id from scenes where title=?',(title,))
|
||||||
id=c.fetchone()
|
id=c.fetchone()
|
||||||
return id[0]
|
return id[0]
|
||||||
return id[0]
|
return id[0]
|
||||||
|
|
||||||
if not path.exists("xbvr.db"):
|
if not path.exists("xbvr.db"):
|
||||||
print("Error, the sqlite database xbvr.db does not exist in the scrapers directory.",file=sys.stderr)
|
print("Error, the sqlite database xbvr.db does not exist in the scrapers directory.",file=sys.stderr)
|
||||||
print("Copy this database from the docker container and give it the name xbvr.db",file=sys.stderr)
|
print("Copy this database from the docker container and give it the name xbvr.db",file=sys.stderr)
|
||||||
print("docker cp xbvr:/root/.config/xbvr/main.db xbvr.db",file=sys.stderr)
|
print("docker cp xbvr:/root/.config/xbvr/main.db xbvr.db",file=sys.stderr)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
conn = sqlite3.connect('xbvr.db',detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
|
conn = sqlite3.connect('xbvr.db',detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
|
||||||
|
|
||||||
if sys.argv[1] == "query":
|
if sys.argv[1] == "query":
|
||||||
fragment = json.loads(sys.stdin.read())
|
fragment = json.loads(sys.stdin.read())
|
||||||
print(json.dumps(fragment),file=sys.stderr)
|
print(json.dumps(fragment),file=sys.stderr)
|
||||||
scene_id = find_scene_id(fragment['title'])
|
scene_id = find_scene_id(fragment['title'])
|
||||||
if not scene_id:
|
if not scene_id:
|
||||||
print(f"Could not determine scene id in title: `{fragment['title']}`",file=sys.stderr)
|
print(f"Could not determine scene id in title: `{fragment['title']}`",file=sys.stderr)
|
||||||
else:
|
else:
|
||||||
print(f"Found scene id: {scene_id}",file=sys.stderr)
|
print(f"Found scene id: {scene_id}",file=sys.stderr)
|
||||||
result=lookup_scene(scene_id)
|
result=lookup_scene(scene_id)
|
||||||
print(json.dumps(result))
|
print(json.dumps(result))
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user