2020-03-19 16:45:31 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
""" AUTH API """
|
|
|
|
|
|
|
|
from __future__ import absolute_import, division, unicode_literals
|
|
|
|
|
2021-02-17 07:42:24 +01:00
|
|
|
import hashlib
|
2020-03-19 16:45:31 +01:00
|
|
|
import json
|
|
|
|
import logging
|
2020-04-01 11:01:22 +02:00
|
|
|
import os
|
2020-03-19 16:45:31 +01:00
|
|
|
import re
|
2020-04-01 11:01:22 +02:00
|
|
|
import time
|
2020-03-19 16:45:31 +01:00
|
|
|
from datetime import datetime
|
|
|
|
|
2020-03-22 10:30:23 +01:00
|
|
|
import requests
|
2020-03-19 16:45:31 +01:00
|
|
|
|
2022-02-02 17:49:48 +01:00
|
|
|
from resources.lib.kodiutils import STREAM_DASH, STREAM_HLS, html_to_kodi
|
2021-02-01 08:53:13 +01:00
|
|
|
from resources.lib.viervijfzes import ResolvedStream
|
2020-03-19 16:45:31 +01:00
|
|
|
|
2020-11-30 10:15:52 +01:00
|
|
|
try: # Python 3
|
|
|
|
from html import unescape
|
|
|
|
except ImportError: # Python 2
|
|
|
|
from HTMLParser import HTMLParser
|
|
|
|
|
|
|
|
unescape = HTMLParser().unescape
|
|
|
|
|
2020-10-26 10:25:57 +01:00
|
|
|
_LOGGER = logging.getLogger(__name__)
|
2020-03-19 16:45:31 +01:00
|
|
|
|
2020-03-22 15:37:15 +01:00
|
|
|
CACHE_AUTO = 1 # Allow to use the cache, and query the API if no cache is available
|
|
|
|
CACHE_ONLY = 2 # Only use the cache, don't use the API
|
|
|
|
CACHE_PREVENT = 3 # Don't use the cache
|
|
|
|
|
2020-03-19 16:45:31 +01:00
|
|
|
|
|
|
|
class UnavailableException(Exception):
|
|
|
|
""" Is thrown when an item is unavailable. """
|
|
|
|
|
|
|
|
|
|
|
|
class NoContentException(Exception):
|
|
|
|
""" Is thrown when no items are unavailable. """
|
|
|
|
|
|
|
|
|
|
|
|
class GeoblockedException(Exception):
|
|
|
|
""" Is thrown when a geoblocked item is played. """
|
|
|
|
|
|
|
|
|
|
|
|
class Program:
|
|
|
|
""" Defines a Program. """
|
|
|
|
|
2021-03-19 16:45:26 +01:00
|
|
|
def __init__(self, uuid=None, path=None, channel=None, title=None, description=None, aired=None, poster=None, thumb=None, fanart=None, seasons=None,
|
|
|
|
episodes=None,
|
2021-02-09 20:54:40 +01:00
|
|
|
clips=None, my_list=False):
|
2020-03-19 16:45:31 +01:00
|
|
|
"""
|
|
|
|
:type uuid: str
|
|
|
|
:type path: str
|
|
|
|
:type channel: str
|
|
|
|
:type title: str
|
|
|
|
:type description: str
|
|
|
|
:type aired: datetime
|
2021-03-19 16:45:26 +01:00
|
|
|
:type poster: str
|
|
|
|
:type thumb: str
|
|
|
|
:type fanart: str
|
2020-03-19 16:45:31 +01:00
|
|
|
:type seasons: list[Season]
|
|
|
|
:type episodes: list[Episode]
|
2020-04-20 08:59:10 +02:00
|
|
|
:type clips: list[Episode]
|
2021-02-09 20:54:40 +01:00
|
|
|
:type my_list: bool
|
2020-03-19 16:45:31 +01:00
|
|
|
"""
|
|
|
|
self.uuid = uuid
|
|
|
|
self.path = path
|
|
|
|
self.channel = channel
|
|
|
|
self.title = title
|
|
|
|
self.description = description
|
|
|
|
self.aired = aired
|
2021-03-19 16:45:26 +01:00
|
|
|
self.poster = poster
|
|
|
|
self.thumb = thumb
|
|
|
|
self.fanart = fanart
|
2020-03-19 16:45:31 +01:00
|
|
|
self.seasons = seasons
|
|
|
|
self.episodes = episodes
|
2020-04-20 08:59:10 +02:00
|
|
|
self.clips = clips
|
2021-02-09 20:54:40 +01:00
|
|
|
self.my_list = my_list
|
2020-03-19 16:45:31 +01:00
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return "%r" % self.__dict__
|
|
|
|
|
|
|
|
|
|
|
|
class Season:
|
|
|
|
""" Defines a Season. """
|
|
|
|
|
2021-03-19 16:45:26 +01:00
|
|
|
def __init__(self, uuid=None, path=None, channel=None, title=None, description=None, number=None):
|
2020-03-19 16:45:31 +01:00
|
|
|
"""
|
|
|
|
:type uuid: str
|
|
|
|
:type path: str
|
|
|
|
:type channel: str
|
|
|
|
:type title: str
|
|
|
|
:type description: str
|
|
|
|
:type number: int
|
|
|
|
|
|
|
|
"""
|
|
|
|
self.uuid = uuid
|
|
|
|
self.path = path
|
|
|
|
self.channel = channel
|
|
|
|
self.title = title
|
|
|
|
self.description = description
|
|
|
|
self.number = number
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return "%r" % self.__dict__
|
|
|
|
|
|
|
|
|
|
|
|
class Episode:
|
|
|
|
""" Defines an Episode. """
|
|
|
|
|
2021-03-19 16:45:26 +01:00
|
|
|
def __init__(self, uuid=None, nodeid=None, path=None, channel=None, program_title=None, title=None, description=None, thumb=None, duration=None,
|
|
|
|
season=None, season_uuid=None, number=None, rating=None, aired=None, expiry=None, stream=None):
|
2020-03-19 16:45:31 +01:00
|
|
|
"""
|
|
|
|
:type uuid: str
|
|
|
|
:type nodeid: str
|
|
|
|
:type path: str
|
|
|
|
:type channel: str
|
|
|
|
:type program_title: str
|
|
|
|
:type title: str
|
|
|
|
:type description: str
|
2021-03-19 16:45:26 +01:00
|
|
|
:type thumb: str
|
2020-03-19 16:45:31 +01:00
|
|
|
:type duration: int
|
|
|
|
:type season: int
|
2020-03-22 15:37:15 +01:00
|
|
|
:type season_uuid: str
|
2020-03-19 16:45:31 +01:00
|
|
|
:type number: int
|
|
|
|
:type rating: str
|
|
|
|
:type aired: datetime
|
|
|
|
:type expiry: datetime
|
2020-04-20 08:59:10 +02:00
|
|
|
:type stream: string
|
2020-03-19 16:45:31 +01:00
|
|
|
"""
|
|
|
|
self.uuid = uuid
|
|
|
|
self.nodeid = nodeid
|
|
|
|
self.path = path
|
|
|
|
self.channel = channel
|
|
|
|
self.program_title = program_title
|
|
|
|
self.title = title
|
|
|
|
self.description = description
|
2021-03-19 16:45:26 +01:00
|
|
|
self.thumb = thumb
|
2020-03-19 16:45:31 +01:00
|
|
|
self.duration = duration
|
|
|
|
self.season = season
|
2020-03-22 15:37:15 +01:00
|
|
|
self.season_uuid = season_uuid
|
2020-03-19 16:45:31 +01:00
|
|
|
self.number = number
|
|
|
|
self.rating = rating
|
|
|
|
self.aired = aired
|
|
|
|
self.expiry = expiry
|
2020-04-20 08:59:10 +02:00
|
|
|
self.stream = stream
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return "%r" % self.__dict__
|
|
|
|
|
|
|
|
|
|
|
|
class Category:
|
|
|
|
""" Defines a Category. """
|
|
|
|
|
|
|
|
def __init__(self, uuid=None, channel=None, title=None, programs=None, episodes=None):
|
|
|
|
"""
|
|
|
|
:type uuid: str
|
|
|
|
:type channel: str
|
|
|
|
:type title: str
|
|
|
|
:type programs: List[Program]
|
|
|
|
:type episodes: List[Episode]
|
|
|
|
"""
|
|
|
|
self.uuid = uuid
|
|
|
|
self.channel = channel
|
|
|
|
self.title = title
|
|
|
|
self.programs = programs
|
|
|
|
self.episodes = episodes
|
2020-03-19 16:45:31 +01:00
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return "%r" % self.__dict__
|
|
|
|
|
|
|
|
|
|
|
|
class ContentApi:
|
2021-02-01 08:53:13 +01:00
|
|
|
""" GoPlay Content API"""
|
|
|
|
SITE_URL = 'https://www.goplay.be'
|
|
|
|
API_VIERVIJFZES = 'https://api.viervijfzes.be'
|
|
|
|
API_GOPLAY = 'https://api.goplay.be'
|
2020-03-19 16:45:31 +01:00
|
|
|
|
2020-04-01 11:01:22 +02:00
|
|
|
def __init__(self, auth=None, cache_path=None):
|
2020-03-19 16:45:31 +01:00
|
|
|
""" Initialise object """
|
|
|
|
self._session = requests.session()
|
2020-03-22 15:37:15 +01:00
|
|
|
self._auth = auth
|
2020-04-01 11:01:22 +02:00
|
|
|
self._cache_path = cache_path
|
2020-03-19 16:45:31 +01:00
|
|
|
|
2021-02-01 08:53:13 +01:00
|
|
|
def get_programs(self, channel=None, cache=CACHE_AUTO):
|
2020-03-19 16:45:31 +01:00
|
|
|
""" Get a list of all programs of the specified channel.
|
2021-02-09 20:54:40 +01:00
|
|
|
:type channel: str
|
2020-04-01 11:01:22 +02:00
|
|
|
:type cache: str
|
2020-03-19 16:45:31 +01:00
|
|
|
:rtype list[Program]
|
|
|
|
"""
|
|
|
|
|
2020-04-01 11:01:22 +02:00
|
|
|
def update():
|
|
|
|
""" Fetch the program listing by scraping """
|
|
|
|
# Load webpage
|
2021-02-01 08:53:13 +01:00
|
|
|
raw_html = self._get_url(self.SITE_URL + '/programmas')
|
2020-04-01 11:01:22 +02:00
|
|
|
|
|
|
|
# Parse programs
|
2021-02-01 08:53:13 +01:00
|
|
|
regex_programs = re.compile(r'data-program="(?P<json>[^"]+)"', re.DOTALL)
|
|
|
|
|
|
|
|
data = [
|
|
|
|
json.loads(unescape(item.group('json')))
|
2020-04-01 11:01:22 +02:00
|
|
|
for item in regex_programs.finditer(raw_html)
|
2021-02-01 08:53:13 +01:00
|
|
|
]
|
2020-03-19 16:45:31 +01:00
|
|
|
|
2020-04-01 11:01:22 +02:00
|
|
|
if not data:
|
2021-02-01 08:53:13 +01:00
|
|
|
raise Exception('No programs found')
|
2020-03-19 16:45:31 +01:00
|
|
|
|
2020-04-01 11:01:22 +02:00
|
|
|
return data
|
|
|
|
|
|
|
|
# Fetch listing from cache or update if needed
|
2021-02-17 07:42:24 +01:00
|
|
|
data = self._handle_cache(key=['programs'], cache_mode=cache, update=update, ttl=30 * 60) # 30 minutes
|
2020-04-01 11:01:22 +02:00
|
|
|
if not data:
|
|
|
|
return []
|
2020-03-22 15:37:15 +01:00
|
|
|
|
2021-02-01 08:53:13 +01:00
|
|
|
if channel:
|
|
|
|
programs = [
|
|
|
|
self._parse_program_data(record) for record in data if record['pageInfo']['brand'] == channel
|
|
|
|
]
|
|
|
|
else:
|
|
|
|
programs = [
|
|
|
|
self._parse_program_data(record) for record in data
|
|
|
|
]
|
|
|
|
|
2020-03-19 16:45:31 +01:00
|
|
|
return programs
|
|
|
|
|
2021-02-01 08:53:13 +01:00
|
|
|
def get_program(self, path, extract_clips=False, cache=CACHE_AUTO):
|
2020-03-19 16:45:31 +01:00
|
|
|
""" Get a Program object from the specified page.
|
|
|
|
:type path: str
|
2020-06-19 15:20:20 +02:00
|
|
|
:type extract_clips: bool
|
2020-03-22 15:37:15 +01:00
|
|
|
:type cache: int
|
2020-03-19 16:45:31 +01:00
|
|
|
:rtype Program
|
|
|
|
"""
|
2020-04-20 08:59:10 +02:00
|
|
|
# We want to use the html to extract clips
|
|
|
|
# This is the worst hack, since Python 2.7 doesn't support nonlocal
|
|
|
|
raw_html = [None]
|
|
|
|
|
2020-04-01 11:01:22 +02:00
|
|
|
def update():
|
|
|
|
""" Fetch the program metadata by scraping """
|
2020-03-22 15:37:15 +01:00
|
|
|
# Fetch webpage
|
2021-02-01 08:53:13 +01:00
|
|
|
page = self._get_url(self.SITE_URL + '/' + path)
|
2020-03-22 15:37:15 +01:00
|
|
|
|
2020-04-20 08:59:10 +02:00
|
|
|
# Store a copy in the parent's raw_html var.
|
|
|
|
raw_html[0] = page
|
|
|
|
|
2020-03-22 15:37:15 +01:00
|
|
|
# Extract JSON
|
|
|
|
regex_program = re.compile(r'data-hero="([^"]+)', re.DOTALL)
|
2020-11-30 10:15:52 +01:00
|
|
|
json_data = unescape(regex_program.search(page).group(1))
|
2020-03-22 15:37:15 +01:00
|
|
|
data = json.loads(json_data)['data']
|
|
|
|
|
2020-04-01 11:01:22 +02:00
|
|
|
return data
|
|
|
|
|
|
|
|
# Fetch listing from cache or update if needed
|
2021-02-01 08:53:13 +01:00
|
|
|
data = self._handle_cache(key=['program', path], cache_mode=cache, update=update)
|
2020-04-20 08:59:10 +02:00
|
|
|
if not data:
|
|
|
|
return None
|
2020-03-19 16:45:31 +01:00
|
|
|
|
|
|
|
program = self._parse_program_data(data)
|
|
|
|
|
2020-04-20 08:59:10 +02:00
|
|
|
# Also extract clips if we did a real HTTP call
|
|
|
|
if extract_clips and raw_html[0]:
|
2021-02-01 08:53:13 +01:00
|
|
|
clips = self._extract_videos(raw_html[0])
|
2020-04-20 08:59:10 +02:00
|
|
|
program.clips = clips
|
|
|
|
|
2020-03-19 16:45:31 +01:00
|
|
|
return program
|
|
|
|
|
2021-02-09 20:54:40 +01:00
|
|
|
def get_program_by_uuid(self, uuid, cache=CACHE_AUTO):
|
|
|
|
""" Get a Program object with the specified uuid.
|
|
|
|
:type uuid: str
|
|
|
|
:type cache: str
|
|
|
|
:rtype Program
|
|
|
|
"""
|
|
|
|
if not uuid:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def update():
|
|
|
|
""" Fetch the program metadata """
|
|
|
|
# Fetch webpage
|
|
|
|
result = self._get_url(self.SITE_URL + '/api/program/%s' % uuid)
|
|
|
|
data = json.loads(result)
|
|
|
|
return data
|
|
|
|
|
|
|
|
# Fetch listing from cache or update if needed
|
|
|
|
data = self._handle_cache(key=['program', uuid], cache_mode=cache, update=update)
|
|
|
|
if not data:
|
|
|
|
return None
|
|
|
|
|
|
|
|
program = self._parse_program_data(data)
|
|
|
|
|
|
|
|
return program
|
|
|
|
|
2021-02-01 08:53:13 +01:00
|
|
|
def get_episode(self, path, cache=CACHE_AUTO):
|
2020-03-19 16:45:31 +01:00
|
|
|
""" Get a Episode object from the specified page.
|
|
|
|
:type path: str
|
2020-04-20 08:59:10 +02:00
|
|
|
:type cache: str
|
2020-03-19 16:45:31 +01:00
|
|
|
:rtype Episode
|
|
|
|
"""
|
|
|
|
|
2020-04-20 08:59:10 +02:00
|
|
|
def update():
|
|
|
|
""" Fetch the program metadata by scraping """
|
|
|
|
# Load webpage
|
2021-02-01 08:53:13 +01:00
|
|
|
page = self._get_url(self.SITE_URL + '/' + path)
|
2020-03-19 16:45:31 +01:00
|
|
|
|
2020-04-20 08:59:10 +02:00
|
|
|
program_json = None
|
|
|
|
episode_json = None
|
|
|
|
|
|
|
|
# Extract video JSON by looking for a data-video tag
|
|
|
|
# This is not present on every page
|
|
|
|
regex_video_data = re.compile(r'data-video="([^"]+)"', re.DOTALL)
|
|
|
|
result = regex_video_data.search(page)
|
|
|
|
if result:
|
2020-11-30 10:15:52 +01:00
|
|
|
video_id = json.loads(unescape(result.group(1)))['id']
|
2021-02-01 08:53:13 +01:00
|
|
|
video_json_data = self._get_url('%s/api/video/%s' % (self.SITE_URL, video_id))
|
2020-04-20 08:59:10 +02:00
|
|
|
video_json = json.loads(video_json_data)
|
|
|
|
return dict(video=video_json)
|
|
|
|
|
|
|
|
# Extract program JSON
|
|
|
|
regex_program = re.compile(r'data-hero="([^"]+)', re.DOTALL)
|
|
|
|
result = regex_program.search(page)
|
|
|
|
if result:
|
2020-11-30 10:15:52 +01:00
|
|
|
program_json_data = unescape(result.group(1))
|
2020-04-20 08:59:10 +02:00
|
|
|
program_json = json.loads(program_json_data)['data']
|
2020-03-19 16:45:31 +01:00
|
|
|
|
2020-04-20 08:59:10 +02:00
|
|
|
# Extract episode JSON
|
|
|
|
regex_episode = re.compile(r'<script type="application/json" data-drupal-selector="drupal-settings-json">(.*?)</script>', re.DOTALL)
|
|
|
|
result = regex_episode.search(page)
|
|
|
|
if result:
|
2020-11-30 10:15:52 +01:00
|
|
|
episode_json_data = unescape(result.group(1))
|
2020-04-20 08:59:10 +02:00
|
|
|
episode_json = json.loads(episode_json_data)
|
2020-03-19 16:45:31 +01:00
|
|
|
|
2020-04-20 08:59:10 +02:00
|
|
|
return dict(program=program_json, episode=episode_json)
|
|
|
|
|
|
|
|
# Fetch listing from cache or update if needed
|
2021-02-01 08:53:13 +01:00
|
|
|
data = self._handle_cache(key=['episode', path], cache_mode=cache, update=update)
|
2020-04-20 08:59:10 +02:00
|
|
|
if not data:
|
|
|
|
return None
|
|
|
|
|
|
|
|
if 'video' in data and data['video']:
|
|
|
|
# We have found detailed episode information
|
|
|
|
episode = self._parse_episode_data(data['video'])
|
|
|
|
return episode
|
|
|
|
|
|
|
|
if 'program' in data and 'episode' in data and data['program'] and data['episode']:
|
|
|
|
# We don't have detailed episode information
|
|
|
|
# We need to lookup the episode in the program JSON
|
|
|
|
program = self._parse_program_data(data['program'])
|
|
|
|
for episode in program.episodes:
|
|
|
|
if episode.nodeid == data['episode']['pageInfo']['nodeId']:
|
|
|
|
return episode
|
2020-03-19 16:45:31 +01:00
|
|
|
|
|
|
|
return None
|
|
|
|
|
2020-03-26 11:31:28 +01:00
|
|
|
def get_stream_by_uuid(self, uuid):
|
|
|
|
""" Get the stream URL to use for this video.
|
|
|
|
:type uuid: str
|
|
|
|
:rtype str
|
|
|
|
"""
|
2021-02-01 08:53:13 +01:00
|
|
|
response = self._get_url(self.API_VIERVIJFZES + '/content/%s' % uuid, authentication=True)
|
2020-03-26 11:31:28 +01:00
|
|
|
data = json.loads(response)
|
2020-11-04 12:48:33 +01:00
|
|
|
|
2021-04-06 18:07:15 +02:00
|
|
|
if not data:
|
|
|
|
raise UnavailableException
|
|
|
|
|
2020-11-04 12:48:33 +01:00
|
|
|
if 'videoDash' in data:
|
|
|
|
# DRM protected stream
|
|
|
|
# See https://docs.unified-streaming.com/documentation/drm/buydrm.html#setting-up-the-client
|
|
|
|
drm_key = data['drmKey']['S']
|
|
|
|
|
|
|
|
_LOGGER.debug('Fetching Authentication XML with drm_key %s', drm_key)
|
2022-02-02 17:45:56 +01:00
|
|
|
response_drm = self._get_url(self.API_GOPLAY + '/video/xml/%s' % drm_key, authentication=True)
|
2020-11-04 12:48:33 +01:00
|
|
|
data_drm = json.loads(response_drm)
|
|
|
|
|
|
|
|
return ResolvedStream(
|
|
|
|
uuid=uuid,
|
|
|
|
url=data['videoDash']['S'],
|
|
|
|
stream_type=STREAM_DASH,
|
|
|
|
license_url='https://wv-keyos.licensekeyserver.com/',
|
|
|
|
auth=data_drm.get('auth'),
|
|
|
|
)
|
|
|
|
|
|
|
|
# Normal HLS stream
|
|
|
|
return ResolvedStream(
|
|
|
|
uuid=uuid,
|
|
|
|
url=data['video']['S'],
|
|
|
|
stream_type=STREAM_HLS,
|
|
|
|
)
|
2020-03-26 11:31:28 +01:00
|
|
|
|
2021-02-17 07:42:24 +01:00
|
|
|
def get_program_tree(self, cache=CACHE_AUTO):
|
|
|
|
""" Get a content tree with information about all the programs.
|
|
|
|
:type cache: str
|
|
|
|
:rtype dict
|
|
|
|
"""
|
|
|
|
|
|
|
|
def update():
|
|
|
|
""" Fetch the content tree """
|
|
|
|
response = self._get_url(self.SITE_URL + '/api/content_tree')
|
|
|
|
return json.loads(response)
|
|
|
|
|
|
|
|
# Fetch listing from cache or update if needed
|
|
|
|
data = self._handle_cache(key=['content_tree'], cache_mode=cache, update=update, ttl=5 * 60) # 5 minutes
|
|
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
def get_popular_programs(self, brand=None):
|
|
|
|
""" Get a list of popular programs.
|
|
|
|
:rtype list[Program]
|
|
|
|
"""
|
|
|
|
if brand:
|
|
|
|
response = self._get_url(self.SITE_URL + '/api/programs/popular/%s' % brand)
|
|
|
|
else:
|
|
|
|
response = self._get_url(self.SITE_URL + '/api/programs/popular')
|
|
|
|
data = json.loads(response)
|
|
|
|
|
|
|
|
programs = []
|
|
|
|
for program in data:
|
|
|
|
programs.append(self._parse_program_data(program))
|
|
|
|
|
|
|
|
return programs
|
|
|
|
|
|
|
|
def get_categories(self):
|
|
|
|
""" Return a list of categories.
|
|
|
|
:rtype list[Category]
|
|
|
|
"""
|
|
|
|
content_tree = self.get_program_tree()
|
|
|
|
|
|
|
|
categories = []
|
|
|
|
for category_id, category_name in content_tree.get('categories').items():
|
|
|
|
categories.append(Category(uuid=category_id,
|
|
|
|
title=category_name))
|
|
|
|
|
|
|
|
return categories
|
|
|
|
|
|
|
|
def get_category_content(self, category_id):
|
|
|
|
""" Return a category.
|
|
|
|
:type category_id: int
|
|
|
|
:rtype list[Program]
|
|
|
|
"""
|
|
|
|
content_tree = self.get_program_tree()
|
|
|
|
|
|
|
|
# Find out all the program_id's of the requested category
|
|
|
|
program_ids = [key for key, value in content_tree.get('programs').items() if value.get('category') == category_id]
|
|
|
|
|
|
|
|
# Filter out the list of all programs to only keep the one of the requested category
|
|
|
|
return [program for program in self.get_programs() if program.uuid in program_ids]
|
|
|
|
|
|
|
|
def get_recommendation_categories(self):
|
|
|
|
""" Get a list of all categories.
|
|
|
|
:rtype list[Category]
|
|
|
|
"""
|
|
|
|
# Load all programs
|
|
|
|
all_programs = self.get_programs()
|
|
|
|
|
|
|
|
# Load webpage
|
|
|
|
raw_html = self._get_url(self.SITE_URL)
|
|
|
|
|
|
|
|
# Categories regexes
|
|
|
|
regex_articles = re.compile(r'<article[^>]+>(.*?)</article>', re.DOTALL)
|
2021-10-21 15:19:10 +02:00
|
|
|
regex_category = re.compile(r'<h2.*?>(.*?)</h2>(?:.*?<div class="visually-hidden">(.*?)</div>)?', re.DOTALL)
|
2021-02-17 07:42:24 +01:00
|
|
|
|
|
|
|
categories = []
|
|
|
|
for result in regex_articles.finditer(raw_html):
|
|
|
|
article_html = result.group(1)
|
|
|
|
|
|
|
|
match_category = regex_category.search(article_html)
|
2021-09-15 16:18:53 +02:00
|
|
|
category_title = None
|
|
|
|
if match_category:
|
|
|
|
category_title = match_category.group(1).strip()
|
|
|
|
if match_category.group(2):
|
|
|
|
category_title += ' [B]%s[/B]' % match_category.group(2).strip()
|
|
|
|
|
|
|
|
if category_title:
|
|
|
|
# Extract programs and lookup in all_programs so we have more metadata
|
|
|
|
programs = []
|
|
|
|
for program in self._extract_programs(article_html):
|
|
|
|
try:
|
|
|
|
rich_program = next(rich_program for rich_program in all_programs if rich_program.path == program.path)
|
|
|
|
programs.append(rich_program)
|
|
|
|
except StopIteration:
|
|
|
|
programs.append(program)
|
|
|
|
|
|
|
|
episodes = self._extract_videos(article_html)
|
|
|
|
|
|
|
|
categories.append(
|
|
|
|
Category(uuid=hashlib.md5(category_title.encode('utf-8')).hexdigest(), title=category_title, programs=programs, episodes=episodes))
|
2021-02-17 07:42:24 +01:00
|
|
|
|
|
|
|
return categories
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _extract_programs(html):
|
|
|
|
""" Extract Programs from HTML code
|
|
|
|
:type html: str
|
|
|
|
:rtype list[Program]
|
|
|
|
"""
|
|
|
|
# Item regexes
|
|
|
|
regex_item = re.compile(r'<a[^>]+?href="(?P<path>[^"]+)"[^>]+?>'
|
|
|
|
r'.*?<h3 class="poster-teaser__title">(?P<title>[^<]*)</h3>.*?data-background-image="(?P<image>.*?)".*?'
|
|
|
|
r'</a>', re.DOTALL)
|
|
|
|
|
|
|
|
# Extract items
|
|
|
|
programs = []
|
|
|
|
for item in regex_item.finditer(html):
|
|
|
|
path = item.group('path')
|
|
|
|
if path.startswith('/video'):
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Program
|
|
|
|
programs.append(Program(
|
|
|
|
path=path.lstrip('/'),
|
|
|
|
title=unescape(item.group('title')),
|
2021-03-19 16:45:26 +01:00
|
|
|
poster=unescape(item.group('image')),
|
2021-02-17 07:42:24 +01:00
|
|
|
))
|
|
|
|
|
|
|
|
return programs
|
2020-04-20 08:59:10 +02:00
|
|
|
|
|
|
|
@staticmethod
|
2021-02-01 08:53:13 +01:00
|
|
|
def _extract_videos(html):
|
2021-02-17 07:42:24 +01:00
|
|
|
""" Extract videos from HTML code
|
|
|
|
:type html: str
|
|
|
|
:rtype list[Episode]
|
|
|
|
"""
|
2020-04-20 08:59:10 +02:00
|
|
|
# Item regexes
|
|
|
|
regex_item = re.compile(r'<a[^>]+?href="(?P<path>[^"]+)"[^>]+?>.*?</a>', re.DOTALL)
|
|
|
|
|
2021-02-17 07:42:24 +01:00
|
|
|
regex_episode_program = re.compile(r'<h3 class="episode-teaser__subtitle">([^<]*)</h3>')
|
|
|
|
regex_episode_title = re.compile(r'<(?:div|h3) class="(?:poster|card|image|episode)-teaser__title">(?:<span>)?([^<]*)(?:</span>)?</(?:div|h3)>')
|
2020-04-20 08:59:10 +02:00
|
|
|
regex_episode_duration = re.compile(r'data-duration="([^"]*)"')
|
2021-02-17 07:42:24 +01:00
|
|
|
regex_episode_video_id = re.compile(r'data-video-id="([^"]*)"')
|
2020-04-20 08:59:10 +02:00
|
|
|
regex_episode_image = re.compile(r'data-background-image="([^"]*)"')
|
2021-02-17 07:42:24 +01:00
|
|
|
regex_episode_badge = re.compile(r'<div class="(?:poster|card|image|episode)-teaser__badge badge">([^<]*)</div>')
|
2020-04-20 08:59:10 +02:00
|
|
|
|
|
|
|
# Extract items
|
|
|
|
episodes = []
|
|
|
|
for item in regex_item.finditer(html):
|
|
|
|
item_html = item.group(0)
|
|
|
|
path = item.group('path')
|
|
|
|
|
|
|
|
# Extract title
|
|
|
|
try:
|
2020-11-30 10:15:52 +01:00
|
|
|
title = unescape(regex_episode_title.search(item_html).group(1))
|
2020-04-20 08:59:10 +02:00
|
|
|
except AttributeError:
|
|
|
|
continue
|
|
|
|
|
2021-02-17 07:42:24 +01:00
|
|
|
# This is not a video
|
2020-04-20 08:59:10 +02:00
|
|
|
if not path.startswith('/video'):
|
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
|
|
|
episode_program = regex_episode_program.search(item_html).group(1)
|
|
|
|
except AttributeError:
|
|
|
|
_LOGGER.warning('Found no episode_program for %s', title)
|
|
|
|
episode_program = None
|
2021-02-17 07:42:24 +01:00
|
|
|
|
2020-04-20 08:59:10 +02:00
|
|
|
try:
|
|
|
|
episode_duration = int(regex_episode_duration.search(item_html).group(1))
|
|
|
|
except AttributeError:
|
|
|
|
_LOGGER.warning('Found no episode_duration for %s', title)
|
|
|
|
episode_duration = None
|
2021-02-17 07:42:24 +01:00
|
|
|
|
2020-04-20 08:59:10 +02:00
|
|
|
try:
|
|
|
|
episode_video_id = regex_episode_video_id.search(item_html).group(1)
|
|
|
|
except AttributeError:
|
|
|
|
_LOGGER.warning('Found no episode_video_id for %s', title)
|
|
|
|
episode_video_id = None
|
2021-02-17 07:42:24 +01:00
|
|
|
|
2020-04-20 08:59:10 +02:00
|
|
|
try:
|
2020-11-30 10:15:52 +01:00
|
|
|
episode_image = unescape(regex_episode_image.search(item_html).group(1))
|
2020-04-20 08:59:10 +02:00
|
|
|
except AttributeError:
|
|
|
|
_LOGGER.warning('Found no episode_image for %s', title)
|
|
|
|
episode_image = None
|
2021-02-17 07:42:24 +01:00
|
|
|
|
2020-04-20 08:59:10 +02:00
|
|
|
try:
|
2021-02-17 07:42:24 +01:00
|
|
|
episode_badge = unescape(regex_episode_badge.search(item_html).group(1))
|
2020-04-20 08:59:10 +02:00
|
|
|
except AttributeError:
|
2021-02-17 07:42:24 +01:00
|
|
|
episode_badge = None
|
|
|
|
|
|
|
|
description = title
|
|
|
|
if episode_badge:
|
|
|
|
description += "\n\n[B]%s[/B]" % episode_badge
|
2020-04-20 08:59:10 +02:00
|
|
|
|
|
|
|
# Episode
|
|
|
|
episodes.append(Episode(
|
|
|
|
path=path.lstrip('/'),
|
2021-02-01 08:53:13 +01:00
|
|
|
channel='', # TODO
|
2020-04-20 08:59:10 +02:00
|
|
|
title=title,
|
2021-02-17 07:42:24 +01:00
|
|
|
description=html_to_kodi(description),
|
2020-04-20 08:59:10 +02:00
|
|
|
duration=episode_duration,
|
|
|
|
uuid=episode_video_id,
|
2021-03-19 16:45:26 +01:00
|
|
|
thumb=episode_image,
|
2020-04-20 08:59:10 +02:00
|
|
|
program_title=episode_program,
|
|
|
|
))
|
|
|
|
|
|
|
|
return episodes
|
|
|
|
|
2020-03-19 16:45:31 +01:00
|
|
|
@staticmethod
|
|
|
|
def _parse_program_data(data):
|
|
|
|
""" Parse the Program JSON.
|
|
|
|
:type data: dict
|
|
|
|
:rtype Program
|
|
|
|
"""
|
|
|
|
# Create Program info
|
|
|
|
program = Program(
|
2021-09-15 16:18:53 +02:00
|
|
|
uuid=data.get('id'),
|
|
|
|
path=data.get('link').lstrip('/'),
|
|
|
|
channel=data.get('pageInfo').get('brand'),
|
|
|
|
title=data.get('title'),
|
|
|
|
description=html_to_kodi(data.get('description')),
|
|
|
|
aired=datetime.fromtimestamp(data.get('pageInfo', {}).get('publishDate', 0.0)),
|
|
|
|
poster=data.get('images').get('poster'),
|
|
|
|
thumb=data.get('images').get('teaser'),
|
|
|
|
fanart=data.get('images').get('teaser'),
|
2020-03-19 16:45:31 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
# Create Season info
|
|
|
|
program.seasons = {
|
2020-03-22 15:37:15 +01:00
|
|
|
key: Season(
|
2021-09-15 16:18:53 +02:00
|
|
|
uuid=playlist.get('id'),
|
|
|
|
path=playlist.get('link').lstrip('/'),
|
|
|
|
channel=playlist.get('pageInfo').get('brand'),
|
|
|
|
title=playlist.get('title'),
|
2021-02-16 16:57:52 +01:00
|
|
|
description=html_to_kodi(playlist.get('description')),
|
2021-09-15 16:18:53 +02:00
|
|
|
number=playlist.get('episodes')[0].get('seasonNumber'), # You did not see this
|
2020-03-19 16:45:31 +01:00
|
|
|
)
|
2021-09-15 16:18:53 +02:00
|
|
|
for key, playlist in enumerate(data.get('playlists', [])) if playlist.get('episodes')
|
2020-03-19 16:45:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
# Create Episodes info
|
|
|
|
program.episodes = [
|
2021-09-15 16:18:53 +02:00
|
|
|
ContentApi._parse_episode_data(episode, playlist.get('id'))
|
|
|
|
for playlist in data.get('playlists', [])
|
|
|
|
for episode in playlist.get('episodes')
|
2020-03-19 16:45:31 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
return program
|
|
|
|
|
|
|
|
@staticmethod
|
2020-04-20 08:59:10 +02:00
|
|
|
def _parse_episode_data(data, season_uuid=None):
|
2020-03-19 16:45:31 +01:00
|
|
|
""" Parse the Episode JSON.
|
|
|
|
:type data: dict
|
2020-03-22 15:37:15 +01:00
|
|
|
:type season_uuid: str
|
2020-03-19 16:45:31 +01:00
|
|
|
:rtype Episode
|
|
|
|
"""
|
|
|
|
|
|
|
|
if data.get('episodeNumber'):
|
|
|
|
episode_number = data.get('episodeNumber')
|
|
|
|
else:
|
|
|
|
# The episodeNumber can be absent
|
|
|
|
match = re.compile(r'\d+$').search(data.get('title'))
|
|
|
|
if match:
|
|
|
|
episode_number = match.group(0)
|
|
|
|
else:
|
|
|
|
episode_number = None
|
|
|
|
|
|
|
|
episode = Episode(
|
|
|
|
uuid=data.get('videoUuid'),
|
|
|
|
nodeid=data.get('pageInfo', {}).get('nodeId'),
|
|
|
|
path=data.get('link').lstrip('/'),
|
|
|
|
channel=data.get('pageInfo', {}).get('site'),
|
2020-10-26 09:57:23 +01:00
|
|
|
program_title=data.get('program', {}).get('title') if data.get('program') else data.get('title'),
|
2020-03-22 15:37:15 +01:00
|
|
|
title=data.get('title'),
|
2021-02-16 16:57:52 +01:00
|
|
|
description=html_to_kodi(data.get('description')),
|
2021-03-19 16:45:26 +01:00
|
|
|
thumb=data.get('image'),
|
2020-03-19 16:45:31 +01:00
|
|
|
duration=data.get('duration'),
|
|
|
|
season=data.get('seasonNumber'),
|
2020-03-22 15:37:15 +01:00
|
|
|
season_uuid=season_uuid,
|
2020-03-19 16:45:31 +01:00
|
|
|
number=episode_number,
|
|
|
|
aired=datetime.fromtimestamp(data.get('createdDate')),
|
|
|
|
expiry=datetime.fromtimestamp(data.get('unpublishDate')) if data.get('unpublishDate') else None,
|
2020-04-20 08:59:10 +02:00
|
|
|
rating=data.get('parentalRating'),
|
|
|
|
stream=data.get('path'),
|
2020-03-19 16:45:31 +01:00
|
|
|
)
|
|
|
|
return episode
|
|
|
|
|
2020-03-22 15:37:15 +01:00
|
|
|
def _get_url(self, url, params=None, authentication=False):
|
2020-03-19 16:45:31 +01:00
|
|
|
""" Makes a GET request for the specified URL.
|
|
|
|
:type url: str
|
|
|
|
:rtype str
|
|
|
|
"""
|
2020-03-22 15:37:15 +01:00
|
|
|
if authentication:
|
|
|
|
if not self._auth:
|
|
|
|
raise Exception('Requested to authenticate, but not auth object passed')
|
2020-03-21 20:34:07 +01:00
|
|
|
response = self._session.get(url, params=params, headers={
|
2020-03-22 15:37:15 +01:00
|
|
|
'authorization': self._auth.get_token(),
|
2020-03-21 20:34:07 +01:00
|
|
|
})
|
|
|
|
else:
|
|
|
|
response = self._session.get(url, params=params)
|
2020-03-19 16:45:31 +01:00
|
|
|
|
|
|
|
if response.status_code != 200:
|
2020-03-22 15:37:15 +01:00
|
|
|
_LOGGER.error(response.text)
|
2020-03-19 16:45:31 +01:00
|
|
|
raise Exception('Could not fetch data')
|
|
|
|
|
|
|
|
return response.text
|
2020-04-01 11:01:22 +02:00
|
|
|
|
|
|
|
def _handle_cache(self, key, cache_mode, update, ttl=30 * 24 * 60 * 60):
|
|
|
|
""" Fetch something from the cache, and update if needed """
|
|
|
|
if cache_mode in [CACHE_AUTO, CACHE_ONLY]:
|
|
|
|
# Try to fetch from cache
|
|
|
|
data = self._get_cache(key)
|
|
|
|
if data is None and cache_mode == CACHE_ONLY:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
data = None
|
|
|
|
|
|
|
|
if data is None:
|
|
|
|
try:
|
|
|
|
# Fetch fresh data
|
|
|
|
_LOGGER.debug('Fetching fresh data for key %s', '.'.join(key))
|
|
|
|
data = update()
|
|
|
|
if data:
|
|
|
|
# Store fresh response in cache
|
|
|
|
self._set_cache(key, data, ttl)
|
|
|
|
except Exception as exc: # pylint: disable=broad-except
|
|
|
|
_LOGGER.warning('Something went wrong when refreshing live data: %s. Using expired cached values.', exc)
|
|
|
|
data = self._get_cache(key, allow_expired=True)
|
|
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
def _get_cache(self, key, allow_expired=False):
|
|
|
|
""" Get an item from the cache """
|
2020-04-20 08:59:10 +02:00
|
|
|
filename = ('.'.join(key) + '.json').replace('/', '_')
|
2020-09-10 21:06:02 +02:00
|
|
|
fullpath = os.path.join(self._cache_path, filename)
|
2020-04-01 11:01:22 +02:00
|
|
|
|
|
|
|
if not os.path.exists(fullpath):
|
|
|
|
return None
|
|
|
|
|
|
|
|
if not allow_expired and os.stat(fullpath).st_mtime < time.time():
|
|
|
|
return None
|
|
|
|
|
|
|
|
with open(fullpath, 'r') as fdesc:
|
|
|
|
try:
|
|
|
|
_LOGGER.debug('Fetching %s from cache', filename)
|
|
|
|
value = json.load(fdesc)
|
|
|
|
return value
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
return None
|
|
|
|
|
|
|
|
def _set_cache(self, key, data, ttl):
|
|
|
|
""" Store an item in the cache """
|
2020-04-20 08:59:10 +02:00
|
|
|
filename = ('.'.join(key) + '.json').replace('/', '_')
|
2020-07-09 10:16:45 +02:00
|
|
|
fullpath = os.path.join(self._cache_path, filename)
|
2020-04-01 11:01:22 +02:00
|
|
|
|
|
|
|
if not os.path.exists(self._cache_path):
|
2020-07-09 10:16:45 +02:00
|
|
|
os.makedirs(self._cache_path)
|
2020-04-01 11:01:22 +02:00
|
|
|
|
|
|
|
with open(fullpath, 'w') as fdesc:
|
|
|
|
_LOGGER.debug('Storing to cache as %s', filename)
|
|
|
|
json.dump(data, fdesc)
|
|
|
|
|
|
|
|
# Set TTL by modifying modification date
|
|
|
|
deadline = int(time.time()) + ttl
|
|
|
|
os.utime(fullpath, (deadline, deadline))
|