mirror of
https://github.com/stashapp/CommunityScripts.git
synced 2025-12-14 04:56:05 -06:00
137 lines
5.9 KiB
Python
137 lines
5.9 KiB
Python
import os
|
|
import re
|
|
import json
|
|
from datetime import datetime
|
|
import log
|
|
from abstractParser import AbstractParser
|
|
|
|
|
|
class RegExParser(AbstractParser):
|
|
|
|
def __init__(self, scene_path, defaults=None):
|
|
self._defaults = defaults or [self.empty_default]
|
|
self._scene_path = scene_path
|
|
self._re_config_file = self._find_in_parents(
|
|
os.path.dirname(scene_path),
|
|
"nfoSceneParser.json")
|
|
self._groups = {}
|
|
if self._re_config_file:
|
|
try:
|
|
# Config found => load it
|
|
with open(self._re_config_file, mode="r", encoding="utf-8") as f:
|
|
config = json.load(f)
|
|
# TODO: support stash patterns and build a regex out of it...
|
|
self._regex = config["regex"]
|
|
self._splitter = config.get("splitter")
|
|
self._scope = config.get("scope")
|
|
# Scope defaults to the full path. Change to filename if so configured
|
|
if self._scope is not None and self._scope.lower() == "filename":
|
|
self._name = os.path.split(self._scene_path)[1]
|
|
else:
|
|
self._name = self._scene_path
|
|
log.LogDebug(f"Using regex config file {self._re_config_file}")
|
|
except Exception as e:
|
|
log.LogInfo(
|
|
f"Could not load regex config file '{self._re_config_file}': {repr(e)}")
|
|
else:
|
|
log.LogDebug(f"No re config found for {self._scene_path}")
|
|
|
|
def __format_date(self, re_findall, date_format):
|
|
date_text = "-".join(re_findall[0] if re_findall else ())
|
|
date = datetime.strptime(date_text, date_format) if date_text else None
|
|
return date.isoformat()[:10] if date else None
|
|
|
|
def __find_date(self, text):
|
|
if not text:
|
|
return
|
|
# For proper boundary detection in regex, switch _ to -
|
|
safe_text = text.replace("_", "-")
|
|
# Finds dates in various formats
|
|
re_yyyymmdd = re.findall(
|
|
r"(\b(?:19|20)\d\d)[- /.](\b1[012]|0[1-9])[- /.](\b3[01]|[12]\d|0[1-9])", safe_text)
|
|
re_ddmmyyyy = re.findall(
|
|
r"(\b3[01]|[12]\d|0[1-9])[- /.](\b1[012]|0[1-9])[- /.](\b(?:19|20)\d\d)", safe_text)
|
|
re_yymmdd = re.findall(
|
|
r"(\b\d\d)[- /.](\b1[012]|0[1-9])[- /.](\b3[01]|[12]\d|0[1-9])", safe_text)
|
|
re_ddmmyy = re.findall(
|
|
r"(\b3[01]|[12]\d|0[1-9])[- /.](\b1[012]|0[1-9])[- /.](\b\d\d)", safe_text)
|
|
re_yyyymm = re.findall(
|
|
r"\b((?:19|20)\d\d)[- /.](\b1[012]|0[1-9])", safe_text)
|
|
re_mmyyyy = re.findall(
|
|
r"(\b1[012]|0[1-9])[- /.](\b(?:19|20)\d\d)", safe_text)
|
|
re_yyyy = re.findall(r"(\b(?:19|20)\d\d)", safe_text)
|
|
# Builds iso formatted dates
|
|
yyyymmdd = self.__format_date(re_yyyymmdd, "%Y-%m-%d")
|
|
ddmmyyyy = self.__format_date(re_ddmmyyyy, "%d-%m-%Y")
|
|
yymmdd = self.__format_date(re_yymmdd, "%y-%m-%d")
|
|
ddmmyy = self.__format_date(re_ddmmyy, "%d-%m-%y")
|
|
yyyymm = self.__format_date(re_yyyymm, "%Y-%m")
|
|
mmyyyy = self.__format_date(re_mmyyyy, "%m-%Y")
|
|
yyyy = datetime.strptime(re_yyyy[0], "%Y").isoformat()[
|
|
:10] if re_yyyy else None
|
|
# return in order of preference
|
|
return yyyymmdd or ddmmyyyy or yymmdd or ddmmyy or yyyymm or mmyyyy or yyyy
|
|
|
|
def __extract_re_date(self):
|
|
date_raw = self._groups.get("date") or self._name
|
|
file_date = self.__find_date(date_raw)
|
|
return file_date
|
|
|
|
def __extract_re_actors(self):
|
|
file_actors = []
|
|
if self._groups.get("performers"):
|
|
if self._splitter is not None:
|
|
# re split supports multiple delimiters patterns.
|
|
actors = re.split(
|
|
self._splitter, self._groups.get("performers"))
|
|
# strip() accommodates any number of spaces before/after each delimiter...
|
|
file_actors = list(map(lambda a: a.strip(), actors))
|
|
else:
|
|
file_actors = [self._groups.get("performers")]
|
|
return file_actors
|
|
|
|
def __extract_re_tags(self):
|
|
file_tags = []
|
|
if self._groups.get("tags"):
|
|
if self._splitter is not None:
|
|
file_tags = self._groups.get("tags").split(self._splitter)
|
|
else:
|
|
file_tags = [self._groups.get("tags")]
|
|
return file_tags
|
|
|
|
def __extract_re_rating(self):
|
|
rating = round(float(self._groups.get("rating") or 0))
|
|
if rating > 0:
|
|
return rating
|
|
return 0
|
|
|
|
def parse(self):
|
|
if not self._re_config_file:
|
|
return {}
|
|
log.LogDebug(f"Parsing with {self._regex}")
|
|
# Match the regex against the file name
|
|
matches = re.match(self._regex, self._name)
|
|
self._groups = matches.groupdict() if matches else {}
|
|
if not self._groups:
|
|
log.LogInfo(
|
|
f"Regex found in {self._re_config_file}, is NOT matching '{self._name}'")
|
|
file_data = {
|
|
"file": self._re_config_file,
|
|
"source": "re",
|
|
"title": self._groups.get("title"),
|
|
"director": self._groups.get("director") or self._get_default("director"),
|
|
"details": self._get_default("details"),
|
|
"studio": self._groups.get("studio") or self._get_default("studio"),
|
|
"movie": self._groups.get("movie") or self._get_default("title"),
|
|
"scene_index": self._groups.get("index") or self._get_default("scene_index"),
|
|
"date": self.__extract_re_date() or self._get_default("date"),
|
|
"actors": self.__extract_re_actors() or self._get_default("actors"),
|
|
# tags are merged with defaults
|
|
"tags": list(set(self.__extract_re_tags() + self._get_default("tags"))),
|
|
"rating": self.__extract_re_rating() or self._get_default("rating"),
|
|
"cover_image": None,
|
|
"other_image": None,
|
|
"urls": None,
|
|
}
|
|
return file_data
|