LaraLarsen: add scene scraper (#1855)

Signed-off-by: Roman Ondráček <mail@romanondracek.cz>
This commit is contained in:
Roman Ondráček 2024-05-24 00:39:09 +02:00 committed by GitHub
parent 76544bb9a9
commit 36aee4f148
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 36 additions and 0 deletions

View File

@ -1226,6 +1226,7 @@ ladydee.xxx|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
ladysublime.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
lanakendrick.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
lanesisters.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
laralarsen.com|LaraLarsen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish
lasublimexxx.com|Lasublime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
lasvegasamateurs.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
latexlolanoir.com|MyMemberSite.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|-

35
scrapers/LaraLarsen.yml Normal file
View File

@ -0,0 +1,35 @@
name: Lara Larsen
sceneByURL:
- action: scrapeXPath
url:
- laralarsen.com/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Title: //h1[@itemprop='headline']
Details:
selector: //div[@itemprop='articleBody']/p
concat: '\n\n'
Code:
selector: //link[@rel='canonical']/@href
postProcess:
- replace:
- regex: ^https://www.laralarsen.com/[a-z]+/([^/]+)$
with: $1
URL: //link[@rel='canonical']/@href
Date:
selector: //time[@itemprop='datePublished']/@content
postProcess:
- parseDate: '2006-01-02'
Image: //div[@itemprop='video']/video/@poster
Studio:
Name:
fixed: Lara Larsen
Tags:
Name: //ol[@class='tags']/li/a[@rel='tag']
Performers:
Name: //span[@itemprop='author']
# Last Updated May 24, 2024