Skip to content
Snippets Groups Projects
utils.py 166 KiB
Newer Older
#!/usr/bin/env python3
Déstin Reed's avatar
Déstin Reed committed
# coding: utf-8
Philipp Hagemeister's avatar
Philipp Hagemeister committed
from __future__ import unicode_literals

Yen Chi Hsuan's avatar
Yen Chi Hsuan committed
import base64
import calendar
import codecs
import collections
import contextlib
import ctypes
import datetime
import email.utils
import functools
import hashlib
import hmac
import importlib.util
import subprocess
import time
import xml.etree.ElementTree
import mimetypes
try:
    import dateutil.parser
    HAVE_DATEUTIL = True
except (ImportError, SyntaxError):
    # dateutil is optional
    HAVE_DATEUTIL = False
from .compat import (
pukkandan's avatar
pukkandan committed
    compat_HTTPError,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
    compat_basestring,
    compat_ctypes_WINFUNCTYPE,
    compat_expanduser,
    compat_html_entities,
    compat_html_entities_html5,
    compat_http_client,
Sergey M․'s avatar
Sergey M․ committed
    compat_integer_types,
    compat_numeric_types,
    compat_os_name,
    compat_struct_unpack,
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_parse_urlparse,
    compat_urllib_parse_urlunparse,
    compat_urllib_parse_quote,
    compat_urllib_parse_quote_plus,
    compat_urllib_parse_unquote_plus,
    compat_urllib_request,
    compat_urlparse,
Sergey M․'s avatar
Sergey M․ committed
    compat_xpath,
from .socks import (
    ProxyType,
    sockssocket,
)
from .chrome_versions import versions as _CHROME_VERSIONS
def register_socks_protocols():
    # "Register" SOCKS protocols
    # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
    # URLs with protocols not in urlparse.uses_netloc are not handled correctly
    for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
        if scheme not in compat_urlparse.uses_netloc:
            compat_urlparse.uses_netloc.append(scheme)


# This is not clearly defined otherwise
compiled_regex_type = type(re.compile(''))

    _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT %s; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
    _WINDOWS_VERSIONS = (
        '6.1',  # 7
        '6.2',  # 8
        '6.3',  # 8.1
        '10.0',
    return _USER_AGENT_TPL % (random.choice(_WINDOWS_VERSIONS), random.choice(_CHROME_VERSIONS))
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'en-us,en;q=0.5',
Jouke Waleson's avatar
Jouke Waleson committed

USER_AGENTS = {
    'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
}


NO_DEFAULT = object()

Yen Chi Hsuan's avatar
Yen Chi Hsuan committed
ENGLISH_MONTH_NAMES = [
    'January', 'February', 'March', 'April', 'May', 'June',
    'July', 'August', 'September', 'October', 'November', 'December']

MONTH_NAMES = {
    'en': ENGLISH_MONTH_NAMES,
    'fr': [
        'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
        'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
KNOWN_EXTENSIONS = (
    'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
    'flv', 'f4v', 'f4a', 'f4b',
    'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
    'mkv', 'mka', 'mk3d',
    'avi', 'divx',
    'mov',
    'asf', 'wmv', 'wma',
    '3gp', '3g2',
    'mp3',
    'flac',
    'ape',
    'wav',
    'f4f', 'f4m', 'm3u8', 'smil')

# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
DATE_FORMATS = (
    '%d %B %Y',
    '%d %b %Y',
    '%B %d %Y',
    '%B %dst %Y',
    '%B %dnd %Y',
    '%B %dth %Y',
    '%b %d %Y',
    '%b %dst %Y',
    '%b %dnd %Y',
    '%b %dth %Y',
    '%b %dst %Y %I:%M',
    '%b %dnd %Y %I:%M',
    '%b %dth %Y %I:%M',
    '%Y %m %d',
    '%Y-%m-%d',
    '%Y.%m.%d.',
    '%Y/%m/%d',
    '%Y/%m/%d %H:%M:%S',
nao20010128nao's avatar
nao20010128nao committed
    '%Y%m%d%H%M',
    '%Y%m%d%H%M%S',
    '%Y-%m-%d %H:%M:%S',
    '%Y-%m-%d %H:%M:%S.%f',
    '%Y-%m-%d %H:%M:%S:%f',
    '%d.%m.%Y %H:%M',
    '%d.%m.%Y %H.%M',
    '%Y-%m-%dT%H:%M:%SZ',
    '%Y-%m-%dT%H:%M:%S.%fZ',
    '%Y-%m-%dT%H:%M:%S.%f0Z',
    '%Y-%m-%dT%H:%M:%S',
    '%Y-%m-%dT%H:%M:%S.%f',
    '%Y-%m-%dT%H:%M',
    '%b %d %Y at %H:%M',
    '%b %d %Y at %H:%M:%S',
    '%B %d %Y at %H:%M',
    '%B %d %Y at %H:%M:%S',
Yuan Chao's avatar
Yuan Chao committed
    '%H:%M %d-%b-%Y',
)

DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
DATE_FORMATS_DAY_FIRST.extend([
    '%d-%m-%Y',
    '%d.%m.%Y',
    '%d.%m.%y',
    '%d/%m/%Y',
    '%d/%m/%y',
    '%d/%m/%Y %H:%M:%S',
])

DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
DATE_FORMATS_MONTH_FIRST.extend([
    '%m-%d-%Y',
    '%m.%d.%Y',
    '%m/%d/%Y',
    '%m/%d/%y',
    '%m/%d/%Y %H:%M:%S',
])

PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
    """Get preferred encoding.
    Returns the best encoding scheme for the system, based on
    locale.getpreferredencoding() and some further tweaks.
    """
    try:
        pref = locale.getpreferredencoding()
        'TEST'.encode(pref)
    except Exception:
        pref = 'UTF-8'
def write_json_file(obj, fn):
    """ Encode obj as JSON and write it to fn, atomically if possible """
    if sys.version_info < (3, 0) and sys.platform != 'win32':
        encoding = get_filesystem_encoding()
        # os.path.basename returns a bytes object, but NamedTemporaryFile
        # will fail if the filename contains non ascii characters unless we
        # use a unicode object
        path_basename = lambda f: os.path.basename(fn).decode(encoding)
        # the same for os.path.dirname
        path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
    else:
        path_basename = os.path.basename
        path_dirname = os.path.dirname

    args = {
        'suffix': '.tmp',
        'prefix': path_basename(fn) + '.',
        'dir': path_dirname(fn),
    # In Python 2.x, json.dump expects a bytestream.
    # In Python 3.x, it writes to a character stream
    if sys.version_info < (3, 0):
        args['mode'] = 'wb'
        args.update({
            'mode': 'w',
            'encoding': 'utf-8',
        })

    tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
            json.dump(obj, tf, ensure_ascii=False)
        if sys.platform == 'win32':
            # Need to remove existing file on Windows, else os.rename raises
            # WindowsError or FileExistsError.
            try:
                os.unlink(fn)
            except OSError:
                pass
        try:
            mask = os.umask(0)
            os.umask(mask)
            os.chmod(tf.name, 0o666 & ~mask)
        except OSError:
            pass
    except Exception:
        try:
            os.remove(tf.name)
        except OSError:
            pass
        raise


if sys.version_info >= (2, 7):
    def find_xpath_attr(node, xpath, key, val=None):
        """ Find the xpath xpath[@key=val] """
        assert re.match(r'^[a-zA-Z_-]+$', key)
        expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
        return node.find(expr)
else:
    def find_xpath_attr(node, xpath, key, val=None):
Sergey M․'s avatar
Sergey M․ committed
        for f in node.findall(compat_xpath(xpath)):
            if key not in f.attrib:
                continue
            if val is None or f.attrib.get(key) == val:
# On python2.6 the xml.etree.ElementTree.Element methods don't support
# the namespace parameter
Jouke Waleson's avatar
Jouke Waleson committed


def xpath_with_ns(path, ns_map):
    components = [c.split(':') for c in path.split('/')]
    replaced = []
    for c in components:
        if len(c) == 1:
            replaced.append(c[0])
        else:
            ns, tag = c
            replaced.append('{%s}%s' % (ns_map[ns], tag))
    return '/'.join(replaced)

def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
    def _find_xpath(xpath):
Sergey M․'s avatar
Sergey M․ committed
        return node.find(compat_xpath(xpath))

    if isinstance(xpath, (str, compat_str)):
        n = _find_xpath(xpath)
    else:
        for xp in xpath:
            n = _find_xpath(xp)
            if n is not None:
                break
Sergey M․'s avatar
Sergey M․ committed
    if n is None:
        if default is not NO_DEFAULT:
            return default
        elif fatal:
            name = xpath if name is None else name
            raise ExtractorError('Could not find XML element %s' % name)
        else:
            return None
    return n


def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
Sergey M․'s avatar
Sergey M․ committed
    n = xpath_element(node, xpath, name, fatal=fatal, default=default)
    if n is None or n == default:
        return n
    if n.text is None:
        if default is not NO_DEFAULT:
            return default
        elif fatal:
            name = xpath if name is None else name
            raise ExtractorError('Could not find XML element\'s text %s' % name)
        else:
            return None
    return n.text


def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
    n = find_xpath_attr(node, xpath, key)
    if n is None:
        if default is not NO_DEFAULT:
            return default
        elif fatal:
            name = '%s[@%s]' % (xpath, key) if name is None else name
            raise ExtractorError('Could not find XML attribute %s' % name)
        else:
            return None
    return n.attrib[key]
def get_element_by_id(id, html):
    """Return the content of the tag with the specified ID in the passed HTML document"""
    return get_element_by_attribute('id', id, html)
def get_element_by_class(class_name, html):
    """Return the content of the first tag with the specified class in the passed HTML document"""
    retval = get_elements_by_class(class_name, html)
    return retval[0] if retval else None


def get_element_by_attribute(attribute, value, html, escape_value=True):
    retval = get_elements_by_attribute(attribute, value, html, escape_value)
    return retval[0] if retval else None


def get_elements_by_class(class_name, html):
    """Return the content of all tags with the specified class in the passed HTML document as a list"""
    return get_elements_by_attribute(
        'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
        html, escape_value=False)


def get_elements_by_attribute(attribute, value, html, escape_value=True):
    """Return the content of the tag with the specified attribute in the passed HTML document"""
    value = re.escape(value) if escape_value else value

         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
    ''' % (re.escape(attribute), value), html):
        res = m.group('content')
        if res.startswith('"') or res.startswith("'"):
            res = res[1:-1]
Sergey M․'s avatar
Sergey M․ committed

class HTMLAttributeParser(compat_HTMLParser):
    """Trivial HTML parser to gather the attributes for a single element"""
Sergey M․'s avatar
Sergey M․ committed
        self.attrs = {}
        compat_HTMLParser.__init__(self)

    def handle_starttag(self, tag, attrs):
        self.attrs = dict(attrs)

Sergey M․'s avatar
Sergey M․ committed

class HTMLListAttrsParser(compat_HTMLParser):
    """HTML parser to gather the attributes for the elements of a list"""

    def __init__(self):
        compat_HTMLParser.__init__(self)
        self.items = []
        self._level = 0

    def handle_starttag(self, tag, attrs):
        if tag == 'li' and self._level == 0:
            self.items.append(dict(attrs))
        self._level += 1

    def handle_endtag(self, tag):
        self._level -= 1


def extract_attributes(html_element):
    """Given a string for an HTML element such as
    <el
         a="foo" B="bar" c="&98;az" d=boz
         empty= noval entity="&amp;"
         sq='"' dq="'"
    >
    Decode and return a dictionary of attributes.
    {
        'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
        'empty': '', 'noval': None, 'entity': '&',
        'sq': '"', 'dq': '\''
    }.
    NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
    but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
    """
    parser = HTMLAttributeParser()
    try:
        parser.feed(html_element)
        parser.close()
    # Older Python may throw HTMLParseError in case of malformed HTML
    except compat_HTMLParseError:
        pass
Sergey M․'s avatar
Sergey M․ committed

def parse_list(webpage):
    """Given a string for an series of HTML <li> elements,
    return a dictionary of their attributes"""
    parser = HTMLListAttrsParser()
    parser.feed(webpage)
    parser.close()
    return parser.items


    """Clean an HTML snippet into a readable string"""

    if html is None:  # Convenience for sanitizing descriptions etc.
        return html

    # Newline vs <br />
    html = html.replace('\n', ' ')
    html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
    html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
    # Strip html tags
    html = re.sub('<.*?>', '', html)
    # Replace html entities
    html = unescapeHTML(html)
    return html.strip()
def sanitize_open(filename, open_mode):
    """Try to open the given filename, and slightly tweak it if this fails.

    Attempts to open the given filename. If this fails, it tries to change
    the filename slightly, step by step, until it's either able to open it
    or it fails and raises a final exception, like the standard open()
    function.

    It returns the tuple (stream, definitive_file_name).
    """
    try:
        if filename == '-':
            if sys.platform == 'win32':
                import msvcrt
                msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
            return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
        stream = open(encodeFilename(filename), open_mode)
        return (stream, filename)
    except (IOError, OSError) as err:
        # In case of error, try to remove win32 forbidden chars
Sergey M․'s avatar
Sergey M․ committed
        alt_filename = sanitize_path(filename)
        if alt_filename == filename:
            raise
        else:
            # An exception here should be caught in the caller
Sergey M․'s avatar
Sergey M․ committed
            stream = open(encodeFilename(alt_filename), open_mode)
    """Convert RFC 2822 defined time string into system timestamp"""
    timestamp = None
    timetuple = email.utils.parsedate_tz(timestr)
    if timetuple is not None:
        timestamp = email.utils.mktime_tz(timetuple)
    return timestamp
Jouke Waleson's avatar
Jouke Waleson committed

def sanitize_filename(s, restricted=False, is_id=False):
    """Sanitizes a string so it could be used as part of a filename.
    If restricted is set, use a stricter subset of allowed characters.
    Set is_id if this is not an arbitrary string, but an ID that should be kept
    if possible.
    """
    def replace_insane(char):
        if restricted and char in ACCENT_CHARS:
            return ACCENT_CHARS[char]
        elif not restricted and char == '\n':
            return ' '
        elif char == '?' or ord(char) < 32 or ord(char) == 127:
            return ''
        elif char == '"':
            return '' if restricted else '\''
        elif char == ':':
            return '_-' if restricted else ' -'
        elif char in '\\/|*<>':
            return '_'
        if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
            return '_'
        if restricted and ord(char) > 127:
            return '_'
        return char

    if s == '':
        return ''
    # Handle timestamps
    s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
    result = ''.join(map(replace_insane, s))
    if not is_id:
        while '__' in result:
            result = result.replace('__', '_')
        result = result.strip('_')
        # Common case of "Foreign band name - English song title"
        if restricted and result.startswith('-_'):
            result = result[2:]
        if result.startswith('-'):
            result = '_' + result[len('-'):]
        result = result.lstrip('.')
Jouke Waleson's avatar
Jouke Waleson committed

def sanitize_path(s, force=False):
Sergey M․'s avatar
Sergey M․ committed
    """Sanitizes and normalizes path on Windows"""
        drive_or_unc, _ = os.path.splitdrive(s)
        if sys.version_info < (2, 7) and not drive_or_unc:
            drive_or_unc, _ = os.path.splitunc(s)
    elif force:
        drive_or_unc = ''
    else:
Sergey M․'s avatar
Sergey M․ committed
        return s
    norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
    if drive_or_unc:
Sergey M․'s avatar
Sergey M․ committed
        norm_path.pop(0)
    sanitized_path = [
        path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
Sergey M․'s avatar
Sergey M․ committed
        for path_part in norm_path]
    if drive_or_unc:
        sanitized_path.insert(0, drive_or_unc + os.path.sep)
    elif force and s[0] == os.path.sep:
        sanitized_path.insert(0, os.path.sep)
Sergey M․'s avatar
Sergey M․ committed
    return os.path.join(*sanitized_path)


def sanitize_url(url):
    # Prepend protocol-less URLs with `http:` scheme in order to mitigate
    # the number of unwanted failures due to missing protocol
    if url.startswith('//'):
        return 'http:%s' % url
    # Fix some common typos seen so far
    COMMON_TYPOS = (
Sergey M․'s avatar
Sergey M․ committed
        # https://github.com/ytdl-org/youtube-dl/issues/15649
        (r'^httpss://', r'https://'),
        # https://bx1.be/lives/direct-tv/
        (r'^rmtp([es]?)://', r'rtmp\1://'),
    )
    for mistake, fixup in COMMON_TYPOS:
        if re.match(mistake, url):
            return re.sub(mistake, fixup, url)
def extract_basic_auth(url):
    parts = compat_urlparse.urlsplit(url)
    if parts.username is None:
        return url, None
    url = compat_urlparse.urlunsplit(parts._replace(netloc=(
        parts.hostname if parts.port is None
        else '%s:%d' % (parts.hostname, parts.port))))
    auth_payload = base64.b64encode(
        ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
    return url, 'Basic ' + auth_payload.decode('utf-8')


def sanitized_Request(url, *args, **kwargs):
    url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
    if auth_header is not None:
        headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
        headers['Authorization'] = auth_header
    return compat_urllib_request.Request(url, *args, **kwargs)
def expand_path(s):
    """Expand shell variables and ~"""
    return os.path.expandvars(compat_expanduser(s))


    """ Remove all duplicates from the input iterable """
    res = []
    for el in iterable:
        if el not in res:
            res.append(el)
    return res
def _htmlentity_transform(entity_with_semicolon):
    """Transforms an HTML entity to a character."""
    entity = entity_with_semicolon[:-1]

    # Known non-numeric HTML entity
    if entity in compat_html_entities.name2codepoint:
        return compat_chr(compat_html_entities.name2codepoint[entity])

    # TODO: HTML5 allows entities without a semicolon. For example,
    # '&Eacuteric' should be decoded as 'Éric'.
    if entity_with_semicolon in compat_html_entities_html5:
        return compat_html_entities_html5[entity_with_semicolon]

    mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
    if mobj is not None:
        numstr = mobj.group(1)
        if numstr.startswith('x'):
            base = 16
            numstr = '0%s' % numstr
        else:
            base = 10
Sergey M․'s avatar
Sergey M․ committed
        # See https://github.com/ytdl-org/youtube-dl/issues/7518
        try:
            return compat_chr(int(numstr, base))
        except ValueError:
            pass

    # Unknown entity in name, return its literal representation
Sergey M․'s avatar
Sergey M․ committed
    return '&%s;' % entity
    if s is None:
        return None
    assert type(s) == compat_str
    return re.sub(
        r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
def escapeHTML(text):
    return (
        text
        .replace('&', '&amp;')
        .replace('<', '&lt;')
        .replace('>', '&gt;')
        .replace('"', '&quot;')
        .replace("'", '&#39;')
    )


def process_communicate_or_kill(p, *args, **kwargs):
    try:
        return p.communicate(*args, **kwargs)
    except BaseException:  # Including KeyboardInterrupt
        p.kill()
        p.wait()
        raise


class Popen(subprocess.Popen):
    if sys.platform == 'win32':
        _startupinfo = subprocess.STARTUPINFO()
        _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
    else:
        _startupinfo = None

    def __init__(self, *args, **kwargs):
        super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
        self.textmode = bool(
            kwargs.get('universal_newlines')
            or kwargs.get('encoding')
            or kwargs.get('errors')
            or kwargs.get('text')
            or isinstance(self.stdout, io.TextIOWrapper)
            or isinstance(self.stderr, io.TextIOWrapper)
            or isinstance(self.stdin, io.TextIOWrapper))

    def communicate_or_kill(self, *args, **kwargs):
        if self.textmode and args and isinstance(args[0], bytes):
            if isinstance(args, tuple):
                args = list(args)
            args[0] = args[0].decode()
        return process_communicate_or_kill(self, *args, **kwargs)


def get_subprocess_encoding():
    if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
        # For subprocess calls, encode with locale encoding
        # Refer to http://stackoverflow.com/a/9951851/35070
        encoding = preferredencoding()
    else:
        encoding = sys.getfilesystemencoding()
    if encoding is None:
        encoding = 'utf-8'
    return encoding


def encodeFilename(s, for_subprocess=False):
    """
    @param s The name of the file
    """
    assert type(s) == compat_str
    # Python 3 has a Unicode API
    if sys.version_info >= (3, 0):
        return s
    # Pass '' directly to use Unicode APIs on Windows 2000 and up
    # (Detecting Windows NT 4 is tricky because 'major >= 4' would
    # match Windows 9x series as well. Besides, NT 4 is obsolete.)
    if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
        return s

    # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
    if sys.platform.startswith('java'):
        return s

    return s.encode(get_subprocess_encoding(), 'ignore')


def decodeFilename(b, for_subprocess=False):

    if sys.version_info >= (3, 0):
        return b

    if not isinstance(b, bytes):
        return b

    return b.decode(get_subprocess_encoding(), 'ignore')

def encodeArgument(s):
    if not isinstance(s, compat_str):
        # Legacy code that uses byte strings
        # Uncomment the following line after fixing all post processors
Philipp Hagemeister's avatar
Philipp Hagemeister committed
        # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
        s = s.decode('ascii')
    return encodeFilename(s, True)


def decodeArgument(b):
    return decodeFilename(b, True)


def decodeOption(optval):
    if optval is None:
        return optval
    if isinstance(optval, bytes):
        optval = optval.decode(preferredencoding())

    assert isinstance(optval, compat_str)
    return optval
Jouke Waleson's avatar
Jouke Waleson committed

_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))


def timetuple_from_msec(msec):
    secs, msec = divmod(msec, 1000)
    mins, secs = divmod(secs, 60)
    hrs, mins = divmod(mins, 60)
    return _timetuple(hrs, mins, secs, msec)


def formatSeconds(secs, delim=':', msec=False):
    time = timetuple_from_msec(secs * 1000)
    if time.hours:
        ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
    elif time.minutes:
        ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
        ret = '%d' % time.seconds
    return '%s.%03d' % (ret, time.milliseconds) if msec else ret
def _ssl_load_windows_store_certs(ssl_context, storename):
    # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
    try:
        certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
                 if encoding == 'x509_asn' and (
                     trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
    except PermissionError:
        return
    for cert in certs:
            ssl_context.load_verify_locations(cadata=cert)
        except ssl.SSLError:

def make_HTTPS_handler(params, **kwargs):
    opts_check_certificate = not params.get('nocheckcertificate')
    opts_modern_tls_cipher = params.get('use_modern_tls_cipher')
    context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
    if opts_modern_tls_cipher:
        context.set_ciphers('ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-SHA256:AES256-SHA')
    context.check_hostname = opts_check_certificate
    context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
    if opts_check_certificate:
        try:
            context.load_default_certs()
            # Work around the issue in load_default_certs when there are bad certificates. See:
            # https://github.com/yt-dlp/yt-dlp/issues/1060,
            # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
        except ssl.SSLError:
            # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
            if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
                # Create a new context to discard any certificates that were already loaded
                context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
                context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
                for storename in ('CA', 'ROOT'):
                    _ssl_load_windows_store_certs(context, storename)
            context.set_default_verify_paths()
    return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
def bug_reports_message(before=';'):
        update_cmd = 'type  yt-dlp -U  to update'
        update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
    msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
    msg += ' Make sure you are using the latest version; %s.' % update_cmd
    msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'

    before = before.rstrip()
    if not before or before.endswith(('.', '!', '?')):
        msg = msg[0].title() + msg[1:]

    return (before + ' ' if before else '') + msg
class YoutubeDLError(Exception):
    """Base exception for YoutubeDL errors."""
    msg = None

    def __init__(self, msg=None):
        if msg is not None:
            self.msg = msg
        elif self.msg is None:
            self.msg = type(self).__name__
        super().__init__(self.msg)
network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
if hasattr(ssl, 'CertificateError'):
    network_exceptions.append(ssl.CertificateError)
network_exceptions = tuple(network_exceptions)


class ExtractorError(YoutubeDLError):
    """Error during info extraction."""
Jouke Waleson's avatar
Jouke Waleson committed

    def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
        """ tb, if given, is the original traceback (so that it can be printed out).
        If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
        if sys.exc_info()[0] in network_exceptions:
pukkandan's avatar
pukkandan committed
        self.msg = str(msg)
        self.traceback = tb
        self.expected = expected
Philipp Hagemeister's avatar
Philipp Hagemeister committed
        self.cause = cause
        self.ie = ie
        self.exc_info = sys.exc_info()  # preserve original exception

        super(ExtractorError, self).__init__(''.join((
            format_field(ie, template='[%s] '),
            format_field(video_id, template='%s: '),
pukkandan's avatar
pukkandan committed
            self.msg,
            format_field(cause, template=' (caused by %r)'),
            '' if expected else bug_reports_message())))
    def format_traceback(self):
        if self.traceback is None:
            return None
        return ''.join(traceback.format_tb(self.traceback))
class UnsupportedError(ExtractorError):
    def __init__(self, url):
        super(UnsupportedError, self).__init__(
            'Unsupported URL: %s' % url, expected=True)
        self.url = url


class RegexNotFoundError(ExtractorError):
    """Error when a regex didn't match"""
    pass


class GeoRestrictedError(ExtractorError):
    """Geographic restriction Error exception.

    This exception may be thrown when a video is not available from your
    geographic location due to geographic restrictions imposed by a website.
    """
    def __init__(self, msg, countries=None, **kwargs):
        kwargs['expected'] = True
        super(GeoRestrictedError, self).__init__(msg, **kwargs)
class DownloadError(YoutubeDLError):
    """Download Error exception.
    This exception may be thrown by FileDownloader objects if they are not
    configured to continue on errors. They will contain the appropriate
    error message.
    """
Jouke Waleson's avatar
Jouke Waleson committed

    def __init__(self, msg, exc_info=None):
        """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
        super(DownloadError, self).__init__(msg)
        self.exc_info = exc_info
class EntryNotInPlaylist(YoutubeDLError):
    """Entry not in playlist exception.