ShokoApi Scraper / Fixes + Refactoring (#968)

This commit is contained in:
MrX292 2022-08-10 23:10:17 +02:00 committed by GitHub
parent 41861de3ed
commit 5f0ffb46c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 203 additions and 163 deletions

View File

@ -1,162 +0,0 @@
from urllib.request import Request, urlopen
import sys
import json
import re
import urllib.request, urllib.error
try:
import requests
from requests.structures import CaseInsensitiveDict
except ModuleNotFoundError:
print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr)
print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr)
sys.exit()
Apikey = '' #it gets Shoko apikey with get_apikey
StashAPIKEY = "" #your Stash apikey
Stashurl = "http://localhost:9999/graphql" #your stash playground url
Shokourl = "http://localhost:8111" #your shoko server url
Shoko_user = "" #your shoko server username
Shoko_pass = "" #your shoko server password
def debug(q):
print(q, file=sys.stderr)
def get_filename(scene_id):
debug(scene_id)
headers = CaseInsensitiveDict()
headers["ApiKey"] = StashAPIKEY
headers["Content-Type"] = "application/json"
#'{"query":"query{findScene(id: 4071){path , id}}"}' --compressed
data = data = '{ \"query\": \" query { findScene (id: ' + scene_id + ' ) {path , id} }\" }'
resp = requests.post(url = Stashurl, headers = headers, data = data)
debug(resp.status_code)
output = resp.json()
path = output['data']['findScene']['path']
debug(str(path))
pattern = "(^.+)([\\\\]|[/])"
replace = ""
filename = re.sub(pattern, replace, str(path))
filename = requests.utils.quote(filename)
return filename
def find_scene_id(scene_id):
if Apikey == "":
apikey = get_apikey()
else:
apikey = Apikey
filename = get_filename(scene_id)
debug(filename)
findscene_scene_id, findscene_epnumber, find_date = find_scene(apikey, filename)
scene_id = str(findscene_scene_id)
epnumber = str(findscene_epnumber)
date = str(find_date)
return scene_id, epnumber, apikey, date
def lookup_scene(scene_id, epnumber, apikey, date):
apikey = apikey
debug(epnumber)
title, details, cover, tags = get_series(apikey, scene_id) #, staff, staff_image, character
tags = tags + ["ShokoAPI"] + ["Hentai"]
#staff = staff
#staff_image = staff_image
res={}
res['title'] = title + " 0" + epnumber
res['details'] = details
res['image'] = cover
res['date'] = date
res['tags'] = [{"name":i} for i in tags]
#perf = {}
#perf['name'] = staff
#perf['image'] = staff_image
#res['performers'] = perf
debug(tags)
debug(res)
return res
def get_apikey():
headers = CaseInsensitiveDict()
headers["Content-Type"] = "application/json"
values = '{"user": "' + Shoko_user + '","pass": "' + Shoko_pass + '","device": "Stash Scan"}'
resp = requests.post(Shokourl + '/api/auth', data=values, headers=headers)
apikey = str(resp.json()['apikey'])
return apikey
def find_scene(apikey, filename):
headers = CaseInsensitiveDict()
headers["apikey"] = apikey
request = Request(Shokourl + '/api/ep/getbyfilename?filename=' + filename, headers=headers)
try:
response_body = urlopen(request).read()
except urllib.error.HTTPError as e:
if e.code == 404:
debug("the file: " + filename + " is not matched on shoko")
error = ["Shoko_not_found"]
not_found(error)
debug('HTTPError: {}'.format(e.code))
except urllib.error.URLError as e:
# Not an HTTP-specific error (e.g. connection refused)
# ...
debug('URLError: {}'.format(e.reason))
else:
# 200
# ...
debug('good')
JSON_object = json.loads(response_body.decode('utf-8'))
debug("found scene\t" + str(JSON_object))
scene_id = JSON_object['id']
epnumber = str(JSON_object['epnumber']) + ' ' + str(JSON_object['name'])
date = JSON_object['air']
return scene_id, epnumber, date
def not_found(error):
tags = error + ["Shoko_error"]
res={}
res['tags'] = [{"name":i} for i in tags]
print(json.dumps(res))
error_exit()
def error_exit():
sys.exit()
def get_series(apikey, scene_id):
headers = CaseInsensitiveDict()
headers["apikey"] = apikey
request = Request(Shokourl + '/api/serie/fromep?id=' + scene_id, headers=headers)
response_body = urlopen(request).read()
JSON_object = json.loads(response_body.decode('utf-8'))
debug("got series:\t" + str(JSON_object))
title = JSON_object['name']
details = JSON_object['summary']
local_sizes = JSON_object['local_sizes']['Episodes']
debug("number of episodes " + str(local_sizes))
#staff = JSON_object['roles'][0]['staff']
#staff_image = Shokourl + JSON_object['roles'][0]['staff_image']
#character = JSON_object['roles'][0]['character']
cover = Shokourl + JSON_object['art']['thumb'][0]['url']
tags = JSON_object['tags']
#debug("staff: " + staff + "\tImage: " + staff_image)
return title, details, cover, tags, #staff, staff_image, character
if sys.argv[1] == "query":
fragment = json.loads(sys.stdin.read())
print(json.dumps(fragment),file=sys.stderr)
fscene_id, fepnumber, fapikey, fdate = find_scene_id(fragment['id'])
scene_id = str(fscene_id)
epnumber = str(fepnumber)
apikey = str(fapikey)
date = str(fdate)
if not scene_id:
print(f"Could not determine scene id in filename: `{fragment['id']}`",file=sys.stderr)
else:
print(f"Found scene id: {scene_id}",file=sys.stderr)
result = lookup_scene(scene_id, epnumber, apikey, date)
print(json.dumps(result))

View File

@ -0,0 +1,202 @@
from urllib.request import Request, urlopen
import sys
import os
import json
import re
import urllib.error
import urllib.request
# to import from a parent directory we need to add that directory to the system path
csd = os.path.dirname(
os.path.realpath(__file__)) # get current script directory
parent = os.path.dirname(csd) # parent directory (should be the scrapers one)
sys.path.append(
parent
) # add parent dir to sys path so that we can import py_common from ther
try:
import requests
from requests.utils import requote_uri
from requests.structures import CaseInsensitiveDict
except ModuleNotFoundError:
print(
"You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)",
file=sys.stderr)
print(
"If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests",
file=sys.stderr)
sys.exit()
try:
from py_common import log
except ModuleNotFoundError:
print(
"You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)",
file=sys.stderr)
sys.exit()
#user inputs start
SHOKO_API_KEY = '' #leave empty it gets your Shoko api key with your shoko server username and password
STASH_API_KEY = "" #your Stash api key
STASH_URL = "http://localhost:9999/graphql" #your stash graphql url
SHOKO_URL = "http://localhost:8111" #your shoko server url
SHOKO_USER = "" #your shoko server username
SHOKO_PASS = "" #your shoko server password
#user inputs end
def validate_user_inputs() -> bool:
shoko = bool(re.search(r"^(http|https)://.+:\d+$", SHOKO_URL))
if shoko is False:
log.error("Shoko Url needs to be hostname:port and is currently " +
SHOKO_URL)
stash = bool(re.match(r"^(http|https)://.+:\d+/graphql$", STASH_URL))
if stash is False:
log.error(
"Stash Url needs to be hostname:port/graphql and is currently " +
STASH_URL)
return (shoko and stash)
def get_filename(scene_id: str) -> str:
log.debug(f"stash sceneid: {scene_id}")
headers = CaseInsensitiveDict()
headers["ApiKey"] = STASH_API_KEY
headers["Content-Type"] = "application/json"
data = data = '{ \"query\": \" query { findScene (id: ' + scene_id + ' ) {path , id} }\" }'
resp = requests.post(url=STASH_URL, headers=headers, data=data)
if resp.status_code == 200:
log.debug("Stash response was successful resp_code: " + str(resp.status_code))
else:
log.error("response from stash was not successful stash resp_code: " + str(resp.status_code))
return None
output = resp.json()
path = output['data']['findScene']['path']
log.debug("scene path in stash: " + str(path))
pattern = "(^.+)([\\\\]|[/])"
replace = ""
filename = re.sub(pattern, replace, str(path))
log.debug(f"encoded filename: {filename}")
return filename
def find_scene_id(scene_id: str) -> (str, str):
if SHOKO_API_KEY == "":
apikey = get_apikey()
else:
apikey = SHOKO_API_KEY
filename = get_filename(scene_id)
return filename, apikey
def lookup_scene(scene_id: str, epnumber: str, apikey: str, date: str) -> dict:
log.debug(epnumber)
title, details, cover, tags = get_series(apikey, scene_id) #, characters
tags = tags + ["ShokoAPI"] + ["Hentai"]
#characters_json = json.dumps(characters)
#json_object = json.loads(characters_json)
#character = json_object[0]['character']
#log.info(str(character))
res = {}
res['title'] = title + " 0" + epnumber
res['details'] = details
res['image'] = cover
res['date'] = date
res['tags'] = [{"name": i} for i in tags]
log.debug("sceneinfo from Shoko: " + str(res))
return res
def get_apikey() -> str:
headers = CaseInsensitiveDict()
headers["Content-Type"] = "application/json"
values = '{"user": "' + SHOKO_USER + '","pass": "' + SHOKO_PASS + '","device": "Stash Scan"}'
resp = requests.post(SHOKO_URL + '/api/auth', data=values, headers=headers)
if resp.status_code == 200:
log.debug("got Shoko's apikey: ")
apikey = str(resp.json()['apikey'])
return apikey
elif resp.status_code == 401:
log.error("check if your shoko server username/password is correct")
return None
else:
log.error("response from Shoko was not successful stash resp_code: " + str(resp.status_code))
return None
def find_scene(apikey: str, filename: str):
headers = CaseInsensitiveDict()
headers["apikey"] = apikey
url_call = requote_uri(SHOKO_URL + '/api/ep/getbyfilename?filename=' + filename)
log.debug(f"using url: {url_call}")
request = Request(url_call, headers=headers)
try:
response_body = urlopen(request).read()
except urllib.error.HTTPError as http_error:
if http_error.code == 404:
log.info(f"the file: {filename} is not matched on shoko")
except urllib.error.URLError as url_error:
# Not an HTTP-specific error (e.g. connection refused)
# ...
log.error(f'URLError: {url_error.reason}')
else:
# 200
log.info(f"the file: {filename} is matched on shoko")
json_object = json.loads(response_body.decode('utf-8'))
log.debug("found scene\t" + str(json_object))
scene_id = json_object['id']
epnumber = str(json_object['epnumber']) + ' ' + str(json_object['name'])
date = json_object['air']
return scene_id, epnumber, date
def get_series(apikey: str, scene_id: str):
headers = CaseInsensitiveDict()
headers["apikey"] = apikey
request = Request(SHOKO_URL + '/api/serie/fromep?id=' + scene_id, headers=headers)
response_body = urlopen(request).read()
json_object = json.loads(response_body.decode('utf-8'))
log.debug("got series:\t" + str(json_object))
title = json_object['name']
details = json_object['summary']
local_sizes = json_object['local_sizes']['Episodes']
log.debug("number of episodes " + str(local_sizes))
#characters = json_object['roles']
cover = SHOKO_URL + json_object['art']['thumb'][0]['url']
tags = json_object['tags']
return title, details, cover, tags #, characters
def query(fragment: dict) -> dict:
filename, apikey = find_scene_id(fragment['id'])
try:
findscene_scene_id, findscene_epnumber, find_date = find_scene(apikey, filename)
except:
return None
scene_id = str(findscene_scene_id)
epnumber = str(findscene_epnumber)
date = str(find_date)
apikey = str(apikey)
log.debug(f"Found scene id: {scene_id}")
result = lookup_scene(scene_id, epnumber, apikey, date)
return result
def main():
mode = sys.argv[1]
fragment = json.loads(sys.stdin.read())
log.debug(str(fragment))
data = None
check_input = validate_user_inputs()
if check_input is True:
if mode == 'query':
data = query(fragment)
print(json.dumps(data))
if __name__ == '__main__':
main()

View File

@ -6,4 +6,4 @@ sceneByFragment:
- ShokoAPI.py
- query
# Last Updated July 04, 2021
# Last Updated April 03, 2022