plugin.video.viervijfzes/resources/lib/viervijfzes/content.py

639 lines
22 KiB
Python
Raw Normal View History

2020-03-19 16:45:31 +01:00
# -*- coding: utf-8 -*-
""" AUTH API """
from __future__ import absolute_import, division, unicode_literals
import json
import logging
import os
2020-03-19 16:45:31 +01:00
import re
import time
2020-03-19 16:45:31 +01:00
from datetime import datetime
2020-03-22 10:30:23 +01:00
import requests
2020-03-19 16:45:31 +01:00
from resources.lib.kodiutils import STREAM_DASH, STREAM_HLS
from resources.lib.viervijfzes import ResolvedStream
2020-03-19 16:45:31 +01:00
2020-11-30 10:15:52 +01:00
try: # Python 3
from html import unescape
except ImportError: # Python 2
from HTMLParser import HTMLParser
unescape = HTMLParser().unescape
_LOGGER = logging.getLogger(__name__)
2020-03-19 16:45:31 +01:00
CACHE_AUTO = 1 # Allow to use the cache, and query the API if no cache is available
CACHE_ONLY = 2 # Only use the cache, don't use the API
CACHE_PREVENT = 3 # Don't use the cache
2020-03-19 16:45:31 +01:00
class UnavailableException(Exception):
""" Is thrown when an item is unavailable. """
class NoContentException(Exception):
""" Is thrown when no items are unavailable. """
class GeoblockedException(Exception):
""" Is thrown when a geoblocked item is played. """
class Program:
""" Defines a Program. """
2020-04-20 08:59:10 +02:00
def __init__(self, uuid=None, path=None, channel=None, title=None, description=None, aired=None, cover=None, background=None, seasons=None, episodes=None,
clips=None):
2020-03-19 16:45:31 +01:00
"""
:type uuid: str
:type path: str
:type channel: str
:type title: str
:type description: str
:type aired: datetime
:type cover: str
:type background: str
:type seasons: list[Season]
:type episodes: list[Episode]
2020-04-20 08:59:10 +02:00
:type clips: list[Episode]
2020-03-19 16:45:31 +01:00
"""
self.uuid = uuid
self.path = path
self.channel = channel
self.title = title
self.description = description
self.aired = aired
self.cover = cover
self.background = background
self.seasons = seasons
self.episodes = episodes
2020-04-20 08:59:10 +02:00
self.clips = clips
2020-03-19 16:45:31 +01:00
def __repr__(self):
return "%r" % self.__dict__
class Season:
""" Defines a Season. """
def __init__(self, uuid=None, path=None, channel=None, title=None, description=None, cover=None, number=None):
"""
:type uuid: str
:type path: str
:type channel: str
:type title: str
:type description: str
:type cover: str
:type number: int
"""
self.uuid = uuid
self.path = path
self.channel = channel
self.title = title
self.description = description
self.cover = cover
self.number = number
def __repr__(self):
return "%r" % self.__dict__
class Episode:
""" Defines an Episode. """
2020-04-20 08:59:10 +02:00
def __init__(self, uuid=None, nodeid=None, path=None, channel=None, program_title=None, title=None, description=None, cover=None, background=None,
duration=None, season=None, season_uuid=None, number=None, rating=None, aired=None, expiry=None, stream=None):
2020-03-19 16:45:31 +01:00
"""
:type uuid: str
:type nodeid: str
:type path: str
:type channel: str
:type program_title: str
:type title: str
:type description: str
:type cover: str
2020-04-20 08:59:10 +02:00
:type background: str
2020-03-19 16:45:31 +01:00
:type duration: int
:type season: int
:type season_uuid: str
2020-03-19 16:45:31 +01:00
:type number: int
:type rating: str
:type aired: datetime
:type expiry: datetime
2020-04-20 08:59:10 +02:00
:type stream: string
2020-03-19 16:45:31 +01:00
"""
self.uuid = uuid
self.nodeid = nodeid
self.path = path
self.channel = channel
self.program_title = program_title
self.title = title
self.description = description
self.cover = cover
2020-04-20 08:59:10 +02:00
self.background = background
2020-03-19 16:45:31 +01:00
self.duration = duration
self.season = season
self.season_uuid = season_uuid
2020-03-19 16:45:31 +01:00
self.number = number
self.rating = rating
self.aired = aired
self.expiry = expiry
2020-04-20 08:59:10 +02:00
self.stream = stream
def __repr__(self):
return "%r" % self.__dict__
class Category:
""" Defines a Category. """
def __init__(self, uuid=None, channel=None, title=None, programs=None, episodes=None):
"""
:type uuid: str
:type channel: str
:type title: str
:type programs: List[Program]
:type episodes: List[Episode]
"""
self.uuid = uuid
self.channel = channel
self.title = title
self.programs = programs
self.episodes = episodes
2020-03-19 16:45:31 +01:00
def __repr__(self):
return "%r" % self.__dict__
class ContentApi:
""" GoPlay Content API"""
SITE_URL = 'https://www.goplay.be'
API_VIERVIJFZES = 'https://api.viervijfzes.be'
API_GOPLAY = 'https://api.goplay.be'
2020-03-19 16:45:31 +01:00
def __init__(self, auth=None, cache_path=None):
2020-03-19 16:45:31 +01:00
""" Initialise object """
self._session = requests.session()
self._auth = auth
self._cache_path = cache_path
2020-03-19 16:45:31 +01:00
def get_programs(self, channel=None, cache=CACHE_AUTO):
2020-03-19 16:45:31 +01:00
""" Get a list of all programs of the specified channel.
:type cache: str
2020-03-19 16:45:31 +01:00
:rtype list[Program]
"""
def update():
""" Fetch the program listing by scraping """
# Load webpage
raw_html = self._get_url(self.SITE_URL + '/programmas')
# Parse programs
regex_programs = re.compile(r'data-program="(?P<json>[^"]+)"', re.DOTALL)
data = [
json.loads(unescape(item.group('json')))
for item in regex_programs.finditer(raw_html)
]
2020-03-19 16:45:31 +01:00
if not data:
raise Exception('No programs found')
2020-03-19 16:45:31 +01:00
return data
# Fetch listing from cache or update if needed
data = self._handle_cache(key=['programs'], cache_mode=cache, update=update, ttl=30 * 5)
if not data:
return []
if channel:
programs = [
self._parse_program_data(record) for record in data if record['pageInfo']['brand'] == channel
]
else:
programs = [
self._parse_program_data(record) for record in data
]
2020-03-19 16:45:31 +01:00
return programs
def get_program(self, path, extract_clips=False, cache=CACHE_AUTO):
2020-03-19 16:45:31 +01:00
""" Get a Program object from the specified page.
:type path: str
2020-06-19 15:20:20 +02:00
:type extract_clips: bool
:type cache: int
2020-03-19 16:45:31 +01:00
:rtype Program
"""
2020-04-20 08:59:10 +02:00
# We want to use the html to extract clips
# This is the worst hack, since Python 2.7 doesn't support nonlocal
raw_html = [None]
def update():
""" Fetch the program metadata by scraping """
# Fetch webpage
page = self._get_url(self.SITE_URL + '/' + path)
2020-04-20 08:59:10 +02:00
# Store a copy in the parent's raw_html var.
raw_html[0] = page
# Extract JSON
regex_program = re.compile(r'data-hero="([^"]+)', re.DOTALL)
2020-11-30 10:15:52 +01:00
json_data = unescape(regex_program.search(page).group(1))
data = json.loads(json_data)['data']
return data
# Fetch listing from cache or update if needed
data = self._handle_cache(key=['program', path], cache_mode=cache, update=update)
2020-04-20 08:59:10 +02:00
if not data:
return None
2020-03-19 16:45:31 +01:00
program = self._parse_program_data(data)
2020-04-20 08:59:10 +02:00
# Also extract clips if we did a real HTTP call
if extract_clips and raw_html[0]:
clips = self._extract_videos(raw_html[0])
2020-04-20 08:59:10 +02:00
program.clips = clips
2020-03-19 16:45:31 +01:00
return program
def get_episode(self, path, cache=CACHE_AUTO):
2020-03-19 16:45:31 +01:00
""" Get a Episode object from the specified page.
:type path: str
2020-04-20 08:59:10 +02:00
:type cache: str
2020-03-19 16:45:31 +01:00
:rtype Episode
"""
2020-04-20 08:59:10 +02:00
def update():
""" Fetch the program metadata by scraping """
# Load webpage
page = self._get_url(self.SITE_URL + '/' + path)
2020-03-19 16:45:31 +01:00
2020-04-20 08:59:10 +02:00
program_json = None
episode_json = None
# Extract video JSON by looking for a data-video tag
# This is not present on every page
regex_video_data = re.compile(r'data-video="([^"]+)"', re.DOTALL)
result = regex_video_data.search(page)
if result:
2020-11-30 10:15:52 +01:00
video_id = json.loads(unescape(result.group(1)))['id']
video_json_data = self._get_url('%s/api/video/%s' % (self.SITE_URL, video_id))
2020-04-20 08:59:10 +02:00
video_json = json.loads(video_json_data)
return dict(video=video_json)
# Extract program JSON
regex_program = re.compile(r'data-hero="([^"]+)', re.DOTALL)
result = regex_program.search(page)
if result:
2020-11-30 10:15:52 +01:00
program_json_data = unescape(result.group(1))
2020-04-20 08:59:10 +02:00
program_json = json.loads(program_json_data)['data']
2020-03-19 16:45:31 +01:00
2020-04-20 08:59:10 +02:00
# Extract episode JSON
regex_episode = re.compile(r'<script type="application/json" data-drupal-selector="drupal-settings-json">(.*?)</script>', re.DOTALL)
result = regex_episode.search(page)
if result:
2020-11-30 10:15:52 +01:00
episode_json_data = unescape(result.group(1))
2020-04-20 08:59:10 +02:00
episode_json = json.loads(episode_json_data)
2020-03-19 16:45:31 +01:00
2020-04-20 08:59:10 +02:00
return dict(program=program_json, episode=episode_json)
# Fetch listing from cache or update if needed
data = self._handle_cache(key=['episode', path], cache_mode=cache, update=update)
2020-04-20 08:59:10 +02:00
if not data:
return None
if 'video' in data and data['video']:
# We have found detailed episode information
episode = self._parse_episode_data(data['video'])
return episode
if 'program' in data and 'episode' in data and data['program'] and data['episode']:
# We don't have detailed episode information
# We need to lookup the episode in the program JSON
program = self._parse_program_data(data['program'])
for episode in program.episodes:
if episode.nodeid == data['episode']['pageInfo']['nodeId']:
return episode
2020-03-19 16:45:31 +01:00
return None
2020-03-26 11:31:28 +01:00
def get_stream_by_uuid(self, uuid):
""" Get the stream URL to use for this video.
:type uuid: str
:rtype str
"""
response = self._get_url(self.API_VIERVIJFZES + '/content/%s' % uuid, authentication=True)
2020-03-26 11:31:28 +01:00
data = json.loads(response)
if 'videoDash' in data:
# DRM protected stream
# See https://docs.unified-streaming.com/documentation/drm/buydrm.html#setting-up-the-client
drm_key = data['drmKey']['S']
_LOGGER.debug('Fetching Authentication XML with drm_key %s', drm_key)
response_drm = self._get_url(self.API_GOPLAY + '/restricted/decode/%s' % drm_key, authentication=True)
data_drm = json.loads(response_drm)
return ResolvedStream(
uuid=uuid,
url=data['videoDash']['S'],
stream_type=STREAM_DASH,
license_url='https://wv-keyos.licensekeyserver.com/',
auth=data_drm.get('auth'),
)
# Normal HLS stream
return ResolvedStream(
uuid=uuid,
url=data['video']['S'],
stream_type=STREAM_HLS,
)
2020-03-26 11:31:28 +01:00
# def get_categories(self):
# """ Get a list of all categories.
# :rtype list[Category]
# """
# # Load webpage
# raw_html = self._get_url(self.SITE_URL)
#
# # Categories regexes
# regex_articles = re.compile(r'<article([^>]+)>(.*?)</article>', re.DOTALL)
# regex_submenu_id = re.compile(r'data-submenu-id="([^"]*)"') # splitted since the order might change
# regex_submenu_title = re.compile(r'data-submenu-title="([^"]*)"')
#
# categories = []
# for result in regex_articles.finditer(raw_html):
# article_info_html = result.group(1)
# article_html = result.group(2)
# category_title = regex_submenu_title.search(article_info_html).group(1)
# category_id = regex_submenu_id.search(article_info_html).group(1)
#
# # Skip empty categories or 'All programs'
# if not category_id or category_id == 'programmas':
# continue
#
# # Extract items
# programs = self._extract_programs(article_html, channel)
# episodes = self._extract_videos(article_html)
# categories.append(Category(uuid=category_id, channel=channel, title=category_title, programs=programs, episodes=episodes))
#
# return categories
# @staticmethod
# def _extract_programs(html, channel):
# """ Extract Programs from HTML code """
# # Item regexes
# regex_item = re.compile(r'<a[^>]+?href="(?P<path>[^"]+)"[^>]+?>'
# r'.*?<h3 class="poster-teaser__title"><span>(?P<title>[^<]*)</span></h3>.*?'
# r'</a>', re.DOTALL)
#
# # Extract items
# programs = []
# for item in regex_item.finditer(html):
# path = item.group('path')
# if path.startswith('/video'):
# continue
#
# title = unescape(item.group('title'))
#
# # Program
# programs.append(Program(
# path=path.lstrip('/'),
# channel=channel,
# title=title,
# ))
#
# return programs
2020-04-20 08:59:10 +02:00
@staticmethod
def _extract_videos(html):
2020-04-20 08:59:10 +02:00
""" Extract videos from HTML code """
# Item regexes
regex_item = re.compile(r'<a[^>]+?href="(?P<path>[^"]+)"[^>]+?>.*?</a>', re.DOTALL)
# Episode regexes
regex_episode_title = re.compile(r'<h3 class="(?:poster|card|image)-teaser__title">(?:<span>)?([^<]*)(?:</span>)?</h3>')
regex_episode_program = re.compile(r'<div class="card-teaser__label">([^<]*)</div>')
regex_episode_duration = re.compile(r'data-duration="([^"]*)"')
regex_episode_video_id = re.compile(r'data-videoid="([^"]*)"')
regex_episode_image = re.compile(r'data-background-image="([^"]*)"')
regex_episode_timestamp = re.compile(r'data-timestamp="([^"]*)"')
# Extract items
episodes = []
for item in regex_item.finditer(html):
item_html = item.group(0)
path = item.group('path')
# Extract title
try:
2020-11-30 10:15:52 +01:00
title = unescape(regex_episode_title.search(item_html).group(1))
2020-04-20 08:59:10 +02:00
except AttributeError:
continue
# This is not a episode
if not path.startswith('/video'):
continue
try:
episode_program = regex_episode_program.search(item_html).group(1)
except AttributeError:
_LOGGER.warning('Found no episode_program for %s', title)
episode_program = None
try:
episode_duration = int(regex_episode_duration.search(item_html).group(1))
except AttributeError:
_LOGGER.warning('Found no episode_duration for %s', title)
episode_duration = None
try:
episode_video_id = regex_episode_video_id.search(item_html).group(1)
except AttributeError:
_LOGGER.warning('Found no episode_video_id for %s', title)
episode_video_id = None
try:
2020-11-30 10:15:52 +01:00
episode_image = unescape(regex_episode_image.search(item_html).group(1))
2020-04-20 08:59:10 +02:00
except AttributeError:
_LOGGER.warning('Found no episode_image for %s', title)
episode_image = None
try:
episode_timestamp = int(regex_episode_timestamp.search(item_html).group(1))
except AttributeError:
_LOGGER.warning('Found no episode_timestamp for %s', title)
episode_timestamp = None
# Episode
episodes.append(Episode(
path=path.lstrip('/'),
channel='', # TODO
2020-04-20 08:59:10 +02:00
title=title,
duration=episode_duration,
uuid=episode_video_id,
aired=datetime.fromtimestamp(episode_timestamp) if episode_timestamp else None,
cover=episode_image,
program_title=episode_program,
))
return episodes
2020-03-19 16:45:31 +01:00
@staticmethod
def _parse_program_data(data):
""" Parse the Program JSON.
:type data: dict
:rtype Program
"""
# Create Program info
program = Program(
uuid=data['id'],
path=data['link'].lstrip('/'),
channel=data['pageInfo']['brand'],
2020-03-19 16:45:31 +01:00
title=data['title'],
description=data['description'],
aired=datetime.fromtimestamp(data.get('pageInfo', {}).get('publishDate')),
cover=data['images']['poster'],
background=data['images']['hero'],
)
# Create Season info
program.seasons = {
key: Season(
2020-03-19 16:45:31 +01:00
uuid=playlist['id'],
path=playlist['link'].lstrip('/'),
channel=playlist['pageInfo']['brand'],
2020-03-19 16:45:31 +01:00
title=playlist['title'],
description=playlist['pageInfo']['description'],
number=playlist['episodes'][0]['seasonNumber'], # You did not see this
)
2020-04-13 10:32:04 +02:00
for key, playlist in enumerate(data['playlists']) if playlist['episodes']
2020-03-19 16:45:31 +01:00
}
# Create Episodes info
program.episodes = [
ContentApi._parse_episode_data(episode, playlist['id'])
2020-03-19 16:45:31 +01:00
for playlist in data['playlists']
for episode in playlist['episodes']
]
return program
@staticmethod
2020-04-20 08:59:10 +02:00
def _parse_episode_data(data, season_uuid=None):
2020-03-19 16:45:31 +01:00
""" Parse the Episode JSON.
:type data: dict
:type season_uuid: str
2020-03-19 16:45:31 +01:00
:rtype Episode
"""
if data.get('episodeNumber'):
episode_number = data.get('episodeNumber')
else:
# The episodeNumber can be absent
match = re.compile(r'\d+$').search(data.get('title'))
if match:
episode_number = match.group(0)
else:
episode_number = None
episode = Episode(
uuid=data.get('videoUuid'),
nodeid=data.get('pageInfo', {}).get('nodeId'),
path=data.get('link').lstrip('/'),
channel=data.get('pageInfo', {}).get('site'),
program_title=data.get('program', {}).get('title') if data.get('program') else data.get('title'),
title=data.get('title'),
2020-03-19 16:45:31 +01:00
description=data.get('pageInfo', {}).get('description'),
cover=data.get('image'),
2020-04-20 08:59:10 +02:00
background=data.get('image'),
2020-03-19 16:45:31 +01:00
duration=data.get('duration'),
season=data.get('seasonNumber'),
season_uuid=season_uuid,
2020-03-19 16:45:31 +01:00
number=episode_number,
aired=datetime.fromtimestamp(data.get('createdDate')),
expiry=datetime.fromtimestamp(data.get('unpublishDate')) if data.get('unpublishDate') else None,
2020-04-20 08:59:10 +02:00
rating=data.get('parentalRating'),
stream=data.get('path'),
2020-03-19 16:45:31 +01:00
)
return episode
def _get_url(self, url, params=None, authentication=False):
2020-03-19 16:45:31 +01:00
""" Makes a GET request for the specified URL.
:type url: str
:rtype str
"""
if authentication:
if not self._auth:
raise Exception('Requested to authenticate, but not auth object passed')
2020-03-21 20:34:07 +01:00
response = self._session.get(url, params=params, headers={
'authorization': self._auth.get_token(),
2020-03-21 20:34:07 +01:00
})
else:
response = self._session.get(url, params=params)
2020-03-19 16:45:31 +01:00
if response.status_code != 200:
_LOGGER.error(response.text)
2020-03-19 16:45:31 +01:00
raise Exception('Could not fetch data')
return response.text
def _handle_cache(self, key, cache_mode, update, ttl=30 * 24 * 60 * 60):
""" Fetch something from the cache, and update if needed """
if cache_mode in [CACHE_AUTO, CACHE_ONLY]:
# Try to fetch from cache
data = self._get_cache(key)
if data is None and cache_mode == CACHE_ONLY:
return None
else:
data = None
if data is None:
try:
# Fetch fresh data
_LOGGER.debug('Fetching fresh data for key %s', '.'.join(key))
data = update()
if data:
# Store fresh response in cache
self._set_cache(key, data, ttl)
except Exception as exc: # pylint: disable=broad-except
_LOGGER.warning('Something went wrong when refreshing live data: %s. Using expired cached values.', exc)
data = self._get_cache(key, allow_expired=True)
return data
def _get_cache(self, key, allow_expired=False):
""" Get an item from the cache """
2020-04-20 08:59:10 +02:00
filename = ('.'.join(key) + '.json').replace('/', '_')
2020-09-10 21:06:02 +02:00
fullpath = os.path.join(self._cache_path, filename)
if not os.path.exists(fullpath):
return None
if not allow_expired and os.stat(fullpath).st_mtime < time.time():
return None
with open(fullpath, 'r') as fdesc:
try:
_LOGGER.debug('Fetching %s from cache', filename)
value = json.load(fdesc)
return value
except (ValueError, TypeError):
return None
def _set_cache(self, key, data, ttl):
""" Store an item in the cache """
2020-04-20 08:59:10 +02:00
filename = ('.'.join(key) + '.json').replace('/', '_')
2020-07-09 10:16:45 +02:00
fullpath = os.path.join(self._cache_path, filename)
if not os.path.exists(self._cache_path):
2020-07-09 10:16:45 +02:00
os.makedirs(self._cache_path)
with open(fullpath, 'w') as fdesc:
_LOGGER.debug('Storing to cache as %s', filename)
json.dump(data, fdesc)
# Set TTL by modifying modification date
deadline = int(time.time()) + ttl
os.utime(fullpath, (deadline, deadline))