Scrape entire movie as part of scraping scenes from Private

This commit is contained in:
Maista6969 2024-01-06 22:39:10 +01:00
parent 700c3986be
commit c796c4a33f

View File

@ -19,6 +19,27 @@ performerByURL:
- private.com
scraper: performerScraper
xPathScrapers:
movieScraper:
movie:
Name: &movieName //div[@class="dvds-wrapper"]/h1/text()
Director: &movieDirector //p[@class="director"]/span[@itemprop="name"]/text()
Duration:
selector: &movieDuration //p[em[contains(text(), "Duration:")]]/text()
postProcess:
- replace:
- regex: ^(\d+).+$
with: 00:$1:00
Date: &movieDate //span[@itemprop="datePublished"]/text()
Studio:
Name: &movieStudioName //div[@class="dvds-wrapper"]/p[@class="line-dvd"]
Synopsis: &movieSynopsis //p[@class="sinopsys"]
FrontImage: &movieFrontImage //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src
performerSearch:
common:
$searchData: //a[@data-track="PORNSTAR_NAME"]
performer:
Name: $searchData
URL: $searchData/@href
sceneScraper:
common:
$content: //section[@class="video-description-and-tags clearfix"]
@ -28,8 +49,8 @@ xPathScrapers:
selector: //meta[@property="og:video"]/@content
postProcess:
- replace:
- regex: .+\/(.+)\/trailers\/.+
with: $1
- regex: .+\/(.+)\/trailers\/.+
with: $1
Date:
selector: //meta[@itemprop="uploadDate"]/@content
postProcess:
@ -46,38 +67,41 @@ xPathScrapers:
Name:
selector: //strong[contains(.,'Full Movie')]/../@href
postProcess:
- subScraper: //div[@class="dvds-wrapper"]/h1/text()
- subScraper: *movieName
Director:
selector: //strong[contains(.,'Full Movie')]/../@href
postProcess:
- subScraper: *movieDirector
Duration:
selector: //strong[contains(.,'Full Movie')]/../@href
postProcess:
- subScraper: *movieDuration
- replace:
- regex: ^(\d+).+$
with: 00:$1:00
URL: //strong[contains(.,'Full Movie')]/../@href
Studio:
Name:
selector: //strong[contains(.,'Full Movie')]/../@href
postProcess:
- subScraper: *movieStudioName
Synopsis:
selector: //strong[contains(.,'Full Movie')]/../@href
postProcess:
- subScraper: *movieSynopsis
FrontImage:
selector: //strong[contains(.,'Full Movie')]/../@href
postProcess:
- subScraper: *movieFrontImage
Studio:
Name:
selector: //div[@class="title-zone"]//li/a/span[@class="title-site"]/text()|/html/@lang
postProcess:
- replace:
- regex: ^en$
with: Private
- regex: ^en$
with: Private
Image: //meta[@property="og:image"]/@content
movieScraper:
movie:
Name: //div[@class="dvds-wrapper"]/h1/text()
Director: //p[@class="director"]/span[@itemprop="name"]/text()
Duration:
selector: //p[em[contains(text(), "Duration:")]]/text()
postProcess:
- replace:
- regex: ^(\d+).+$
with: 00:$1:00
Date: //span[@itemprop="datePublished"]/text()
Studio:
Name:
fixed: Private
Synopsis: //p[@class="sinopsys"]
FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src
performerSearch:
common:
$searchData: //a[@data-track="PORNSTAR_NAME"]
performer:
Name: $searchData
URL: $searchData/@href
performerScraper:
common:
$performerData: //div[contains(concat(' ',normalize-space(@class),' '),' pornstar-wrapper ')]
@ -91,16 +115,16 @@ xPathScrapers:
selector: $performerData//em[text()="Height:"]/../text()
postProcess:
- replace:
- regex: (\d+)cm.+
with: $1
- regex: (\d+)cm.+
with: $1
- map:
-: ""
Weight:
selector: $performerData//em[text()="Weight:"]/../text()
postProcess:
- replace:
- regex: (\d+)kg.+
with: $1
- regex: (\d+)kg.+
with: $1
- map:
-: ""
Country: $performerData//em[text()="Birth place:"]/../text()