Newer
Older
import datetime
import email.utils
import email.header

Philipp Hagemeister
committed
import errno

Adam Thalhammer
committed
import itertools
import locale
import operator
import platform
import socket

Adam Thalhammer
committed
import ssl
import xml.etree.ElementTree
try:
import dateutil.parser
HAVE_DATEUTIL = True
except (ImportError, SyntaxError):
# dateutil is optional
HAVE_DATEUTIL = False
from .compat import (
compat_HTMLParseError,
compat_HTMLParser,
compat_cookiejar,
compat_ctypes_WINFUNCTYPE,

Jaime Marquínez Ferrándiz
committed
compat_etree_fromstring,
compat_html_entities,
compat_kwargs,
compat_shlex_quote,
compat_struct_pack,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
compat_urllib_parse_urlunparse,
compat_urllib_parse_quote,
compat_urllib_parse_quote_plus,
compat_urllib_parse_unquote_plus,
compat_urllib_request,
compat_urlparse,
from .socks import (
ProxyType,
sockssocket,
)
from .chrome_versions import versions as _CHROME_VERSIONS
def register_socks_protocols():
# "Register" SOCKS protocols
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
if scheme not in compat_urlparse.uses_netloc:
compat_urlparse.uses_netloc.append(scheme)
# This is not clearly defined otherwise
compiled_regex_type = type(re.compile(''))

Sergey M․
committed
def random_user_agent():
_USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT %s; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
_WINDOWS_VERSIONS = (
'6.1', # 7
'6.2', # 8
'6.3', # 8.1
'10.0',

Sergey M․
committed
)
return _USER_AGENT_TPL % (random.choice(_WINDOWS_VERSIONS), random.choice(_CHROME_VERSIONS))

Sergey M․
committed

Sergey M․
committed
'User-Agent': random_user_agent(),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-us,en;q=0.5',
USER_AGENTS = {
'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
}
ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December']
MONTH_NAMES = {
'en': ENGLISH_MONTH_NAMES,
'fr': [
'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
KNOWN_EXTENSIONS = (
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
'flv', 'f4v', 'f4a', 'f4b',
'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
'mkv', 'mka', 'mk3d',
'avi', 'divx',
'mov',
'asf', 'wmv', 'wma',
'3gp', '3g2',
'mp3',
'flac',
'ape',
'wav',
'f4f', 'f4m', 'm3u8', 'smil')
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
DATE_FORMATS = (
'%d %B %Y',
'%d %b %Y',
'%B %d %Y',
'%B %drd %Y',
'%b %drd %Y',
'%b %drd %Y %I:%M',
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%d.%m.%Y %H:%M',
'%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
'%b %d %Y at %H:%M',
'%b %d %Y at %H:%M:%S',
'%B %d %Y at %H:%M',
'%B %d %Y at %H:%M:%S',
)
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
DATE_FORMATS_DAY_FIRST.extend([
'%d-%m-%Y',
'%d.%m.%Y',
'%d.%m.%y',
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
])
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
DATE_FORMATS_MONTH_FIRST.extend([
'%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
'%m/%d/%Y %H:%M:%S',
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
def preferredencoding():
Returns the best encoding scheme for the system, based on
locale.getpreferredencoding() and some further tweaks.
"""
try:
pref = locale.getpreferredencoding()
def write_json_file(obj, fn):
""" Encode obj as JSON and write it to fn, atomically if possible """
fn = encodeFilename(fn)
if sys.version_info < (3, 0) and sys.platform != 'win32':

Jaime Marquínez Ferrándiz
committed
encoding = get_filesystem_encoding()
# os.path.basename returns a bytes object, but NamedTemporaryFile
# will fail if the filename contains non ascii characters unless we
# use a unicode object
path_basename = lambda f: os.path.basename(fn).decode(encoding)
# the same for os.path.dirname
path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
else:
path_basename = os.path.basename
path_dirname = os.path.dirname
args = {
'suffix': '.tmp',

Jaime Marquínez Ferrándiz
committed
'prefix': path_basename(fn) + '.',
'dir': path_dirname(fn),
# In Python 2.x, json.dump expects a bytestream.
# In Python 3.x, it writes to a character stream
if sys.version_info < (3, 0):
args.update({
'mode': 'w',
'encoding': 'utf-8',
})
tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
try:
with tf:
json.dump(obj, tf, ensure_ascii=False)
if sys.platform == 'win32':
# Need to remove existing file on Windows, else os.rename raises
# WindowsError or FileExistsError.
try:
os.unlink(fn)
except OSError:
pass
try:
mask = os.umask(0)
os.umask(mask)
os.chmod(tf.name, 0o666 & ~mask)
except OSError:
pass
os.rename(tf.name, fn)
try:
os.remove(tf.name)
except OSError:
pass
raise
if sys.version_info >= (2, 7):
def find_xpath_attr(node, xpath, key, val=None):
""" Find the xpath xpath[@key=val] """
expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
return node.find(expr)
else:
def find_xpath_attr(node, xpath, key, val=None):
if key not in f.attrib:
continue
if val is None or f.attrib.get(key) == val:
# On python2.6 the xml.etree.ElementTree.Element methods don't support
# the namespace parameter
def xpath_with_ns(path, ns_map):
components = [c.split(':') for c in path.split('/')]
replaced = []
for c in components:
if len(c) == 1:
replaced.append(c[0])
else:
ns, tag = c
replaced.append('{%s}%s' % (ns_map[ns], tag))
return '/'.join(replaced)
def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
if isinstance(xpath, (str, compat_str)):
n = _find_xpath(xpath)
else:
for xp in xpath:
n = _find_xpath(xp)
if n is not None:
break
if default is not NO_DEFAULT:
return default
elif fatal:
name = xpath if name is None else name
raise ExtractorError('Could not find XML element %s' % name)
else:
return None
return n
def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
n = xpath_element(node, xpath, name, fatal=fatal, default=default)
if n is None or n == default:
return n
if n.text is None:
if default is not NO_DEFAULT:
return default
elif fatal:
name = xpath if name is None else name
raise ExtractorError('Could not find XML element\'s text %s' % name)
else:
return None
return n.text
def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
n = find_xpath_attr(node, xpath, key)
if n is None:
if default is not NO_DEFAULT:
return default
elif fatal:
name = '%s[@%s]' % (xpath, key) if name is None else name
raise ExtractorError('Could not find XML attribute %s' % name)
else:
return None
return n.attrib[key]
def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document"""
return get_element_by_attribute('id', id, html)
def get_element_by_class(class_name, html):

Thomas Christlieb
committed
"""Return the content of the first tag with the specified class in the passed HTML document"""
retval = get_elements_by_class(class_name, html)
return retval[0] if retval else None
def get_element_by_attribute(attribute, value, html, escape_value=True):
retval = get_elements_by_attribute(attribute, value, html, escape_value)
return retval[0] if retval else None
def get_elements_by_class(class_name, html):
"""Return the content of all tags with the specified class in the passed HTML document as a list"""
return get_elements_by_attribute(
'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
html, escape_value=False)

Thomas Christlieb
committed
def get_elements_by_attribute(attribute, value, html, escape_value=True):
"""Return the content of the tag with the specified attribute in the passed HTML document"""
value = re.escape(value) if escape_value else value

Thomas Christlieb
committed
retlist = []
for m in re.finditer(r'''(?xs)

Philipp Hagemeister
committed
<([a-zA-Z0-9:._-]+)
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?

Philipp Hagemeister
committed
\s+%s=['"]?%s['"]?
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?

Philipp Hagemeister
committed
\s*>
(?P<content>.*?)
</\1>

Thomas Christlieb
committed
''' % (re.escape(attribute), value), html):
res = m.group('content')

Philipp Hagemeister
committed

Thomas Christlieb
committed
if res.startswith('"') or res.startswith("'"):
res = res[1:-1]

Philipp Hagemeister
committed

Thomas Christlieb
committed
retlist.append(unescapeHTML(res))

Thomas Christlieb
committed
return retlist
class HTMLAttributeParser(compat_HTMLParser):
"""Trivial HTML parser to gather the attributes for a single element"""
def __init__(self):
compat_HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
self.attrs = dict(attrs)
class HTMLListAttrsParser(compat_HTMLParser):
"""HTML parser to gather the attributes for the elements of a list"""
def __init__(self):
compat_HTMLParser.__init__(self)
self.items = []
self._level = 0
def handle_starttag(self, tag, attrs):
if tag == 'li' and self._level == 0:
self.items.append(dict(attrs))
self._level += 1
def handle_endtag(self, tag):
self._level -= 1
def extract_attributes(html_element):
"""Given a string for an HTML element such as
<el
a="foo" B="bar" c="&98;az" d=boz
empty= noval entity="&"
sq='"' dq="'"
>
Decode and return a dictionary of attributes.
{
'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
'empty': '', 'noval': None, 'entity': '&',
'sq': '"', 'dq': '\''
}.
NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
"""
parser = HTMLAttributeParser()
try:
parser.feed(html_element)
parser.close()
# Older Python may throw HTMLParseError in case of malformed HTML
except compat_HTMLParseError:
pass
return parser.attrs
def parse_list(webpage):
"""Given a string for an series of HTML <li> elements,
return a dictionary of their attributes"""
parser = HTMLListAttrsParser()
parser.feed(webpage)
parser.close()
return parser.items
def clean_html(html):
"""Clean an HTML snippet into a readable string"""
if html is None: # Convenience for sanitizing descriptions etc.
return html
# Newline vs <br />
html = html.replace('\n', ' ')
html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
# Strip html tags
html = re.sub('<.*?>', '', html)
# Replace html entities
html = unescapeHTML(html)
def sanitize_open(filename, open_mode):
"""Try to open the given filename, and slightly tweak it if this fails.
Attempts to open the given filename. If this fails, it tries to change
the filename slightly, step by step, until it's either able to open it
or it fails and raises a final exception, like the standard open()
function.
It returns the tuple (stream, definitive_file_name).
"""
try:
if sys.platform == 'win32':
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
stream = open(encodeFilename(filename), open_mode)
return (stream, filename)
except (IOError, OSError) as err:

Philipp Hagemeister
committed
if err.errno in (errno.EACCES,):
raise

Philipp Hagemeister
committed
# In case of error, try to remove win32 forbidden chars

Philipp Hagemeister
committed
if alt_filename == filename:
raise
else:
# An exception here should be caught in the caller
stream = open(encodeFilename(alt_filename), open_mode)

Philipp Hagemeister
committed
return (stream, alt_filename)
def timeconvert(timestr):
"""Convert RFC 2822 defined time string into system timestamp"""
timestamp = None
timetuple = email.utils.parsedate_tz(timestr)
if timetuple is not None:
timestamp = email.utils.mktime_tz(timetuple)
return timestamp
def sanitize_filename(s, restricted=False, is_id=False):
"""Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
Set is_id if this is not an arbitrary string, but an ID that should be kept
if possible.
"""
def replace_insane(char):
if restricted and char in ACCENT_CHARS:
return ACCENT_CHARS[char]
elif not restricted and char == '\n':
return ' '
elif char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
return '' if restricted else '\''
elif char == ':':
return '_-' if restricted else ' -'
elif char in '\\/|*<>':
return '_'
if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
return '_'
if restricted and ord(char) > 127:
return '_'
return char
# Handle timestamps
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
if not is_id:
while '__' in result:
result = result.replace('__', '_')
result = result.strip('_')
# Common case of "Foreign band name - English song title"
if restricted and result.startswith('-_'):
result = result[2:]
if result.startswith('-'):
result = '_' + result[len('-'):]
if not result:
result = '_'
def sanitize_path(s, force=False):
if sys.platform == 'win32':
drive_or_unc, _ = os.path.splitdrive(s)
if sys.version_info < (2, 7) and not drive_or_unc:
drive_or_unc, _ = os.path.splitunc(s)
elif force:
drive_or_unc = ''
else:
norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
if drive_or_unc:
path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
if drive_or_unc:
sanitized_path.insert(0, drive_or_unc + os.path.sep)
elif force and s[0] == os.path.sep:
sanitized_path.insert(0, os.path.sep)
# Prepend protocol-less URLs with `http:` scheme in order to mitigate
# the number of unwanted failures due to missing protocol
if url.startswith('//'):
return 'http:%s' % url
# Fix some common typos seen so far
COMMON_TYPOS = (
# https://github.com/ytdl-org/youtube-dl/issues/15649
(r'^httpss://', r'https://'),
# https://bx1.be/lives/direct-tv/
(r'^rmtp([es]?)://', r'rtmp\1://'),
)
for mistake, fixup in COMMON_TYPOS:
if re.match(mistake, url):
return re.sub(mistake, fixup, url)
def extract_basic_auth(url):
parts = compat_urlparse.urlsplit(url)
if parts.username is None:
return url, None
url = compat_urlparse.urlunsplit(parts._replace(netloc=(
parts.hostname if parts.port is None
else '%s:%d' % (parts.hostname, parts.port))))
auth_payload = base64.b64encode(
('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
return url, 'Basic ' + auth_payload.decode('utf-8')
def sanitized_Request(url, *args, **kwargs):
url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
if auth_header is not None:
headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
headers['Authorization'] = auth_header
return compat_urllib_request.Request(url, *args, **kwargs)
def expand_path(s):
"""Expand shell variables and ~"""
return os.path.expandvars(compat_expanduser(s))
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
res = []
for el in iterable:
if el not in res:
res.append(el)
return res
def _htmlentity_transform(entity_with_semicolon):
"""Transforms an HTML entity to a character."""
# Known non-numeric HTML entity
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
# TODO: HTML5 allows entities without a semicolon. For example,
# 'Éric' should be decoded as 'Éric'.
if entity_with_semicolon in compat_html_entities_html5:
return compat_html_entities_html5[entity_with_semicolon]
mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
numstr = mobj.group(1)
# See https://github.com/ytdl-org/youtube-dl/issues/7518
try:
return compat_chr(int(numstr, base))
except ValueError:
pass
# Unknown entity in name, return its literal representation
def unescapeHTML(s):
if s is None:
return None
assert type(s) == compat_str
r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
def escapeHTML(text):
return (
text
.replace('&', '&')
.replace('<', '<')
.replace('>', '>')
.replace('"', '"')
.replace("'", ''')
)

pukkandan
committed
def process_communicate_or_kill(p, *args, **kwargs):
try:
return p.communicate(*args, **kwargs)
except BaseException: # Including KeyboardInterrupt
p.kill()
p.wait()
raise
class Popen(subprocess.Popen):
if sys.platform == 'win32':
_startupinfo = subprocess.STARTUPINFO()
_startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
else:
_startupinfo = None
def __init__(self, *args, **kwargs):
super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
self.textmode = bool(
kwargs.get('universal_newlines')
or kwargs.get('encoding')
or kwargs.get('errors')
or kwargs.get('text')
or isinstance(self.stdout, io.TextIOWrapper)
or isinstance(self.stderr, io.TextIOWrapper)
or isinstance(self.stdin, io.TextIOWrapper))
def communicate_or_kill(self, *args, **kwargs):
if self.textmode and args and isinstance(args[0], bytes):
if isinstance(args, tuple):
args = list(args)
args[0] = args[0].decode()
return process_communicate_or_kill(self, *args, **kwargs)
def get_subprocess_encoding():
if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
# For subprocess calls, encode with locale encoding
# Refer to http://stackoverflow.com/a/9951851/35070
encoding = preferredencoding()
else:
encoding = sys.getfilesystemencoding()
if encoding is None:
encoding = 'utf-8'
return encoding
def encodeFilename(s, for_subprocess=False):
"""
@param s The name of the file
"""
assert type(s) == compat_str
# Python 3 has a Unicode API
if sys.version_info >= (3, 0):
return s
# Pass '' directly to use Unicode APIs on Windows 2000 and up
# (Detecting Windows NT 4 is tricky because 'major >= 4' would
# match Windows 9x series as well. Besides, NT 4 is obsolete.)
if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
return s
# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
if sys.platform.startswith('java'):
return s
return s.encode(get_subprocess_encoding(), 'ignore')
def decodeFilename(b, for_subprocess=False):
if sys.version_info >= (3, 0):
return b
if not isinstance(b, bytes):
return b
return b.decode(get_subprocess_encoding(), 'ignore')
def encodeArgument(s):
if not isinstance(s, compat_str):
# Legacy code that uses byte strings
# Uncomment the following line after fixing all post processors
# assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
s = s.decode('ascii')
return encodeFilename(s, True)
def decodeArgument(b):
return decodeFilename(b, True)
def decodeOption(optval):
if optval is None:
return optval
if isinstance(optval, bytes):
optval = optval.decode(preferredencoding())
assert isinstance(optval, compat_str)
return optval
_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
def timetuple_from_msec(msec):
secs, msec = divmod(msec, 1000)
mins, secs = divmod(secs, 60)
hrs, mins = divmod(mins, 60)
return _timetuple(hrs, mins, secs, msec)
def formatSeconds(secs, delim=':', msec=False):
time = timetuple_from_msec(secs * 1000)
if time.hours:
ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
elif time.minutes:
ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
ret = '%d' % time.seconds
return '%s.%03d' % (ret, time.milliseconds) if msec else ret
def _ssl_load_windows_store_certs(ssl_context, storename):
# Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
try:
certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
if encoding == 'x509_asn' and (
trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
except PermissionError:
return
for cert in certs:
ssl_context.load_verify_locations(cadata=cert)
except ssl.SSLError:
def make_HTTPS_handler(params, **kwargs):
opts_check_certificate = not params.get('nocheckcertificate')
opts_modern_tls_cipher = params.get('use_modern_tls_cipher')
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
if opts_modern_tls_cipher:
context.set_ciphers('ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-SHA256:AES256-SHA')
context.check_hostname = opts_check_certificate
context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
if opts_check_certificate:
try:
context.load_default_certs()
# Work around the issue in load_default_certs when there are bad certificates. See:
# https://github.com/yt-dlp/yt-dlp/issues/1060,
# https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
except ssl.SSLError:
# enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
# Create a new context to discard any certificates that were already loaded
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
for storename in ('CA', 'ROOT'):
_ssl_load_windows_store_certs(context, storename)
context.set_default_verify_paths()
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)

Jaime Marquínez Ferrándiz
committed
if ytdl_is_updateable():
update_cmd = 'type yt-dlp -U to update'

Jaime Marquínez Ferrándiz
committed
else:
update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'

Jaime Marquínez Ferrándiz
committed
msg += ' Make sure you are using the latest version; %s.' % update_cmd
msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
before = before.rstrip()
if not before or before.endswith(('.', '!', '?')):
msg = msg[0].title() + msg[1:]
return (before + ' ' if before else '') + msg

Jaime Marquínez Ferrándiz
committed
class YoutubeDLError(Exception):
"""Base exception for YoutubeDL errors."""
msg = None
def __init__(self, msg=None):
if msg is not None:
self.msg = msg
elif self.msg is None:
self.msg = type(self).__name__
super().__init__(self.msg)
network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
if hasattr(ssl, 'CertificateError'):
network_exceptions.append(ssl.CertificateError)
network_exceptions = tuple(network_exceptions)
class ExtractorError(YoutubeDLError):
"""Error during info extraction."""
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):

Philipp Hagemeister
committed
""" tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in yt-dlp.

Philipp Hagemeister
committed
"""
if sys.exc_info()[0] in network_exceptions:

Philipp Hagemeister
committed
expected = True
self.expected = expected
self.video_id = video_id
self.ie = ie
self.exc_info = sys.exc_info() # preserve original exception
super(ExtractorError, self).__init__(''.join((
format_field(ie, template='[%s] '),
format_field(video_id, template='%s: '),
format_field(cause, template=' (caused by %r)'),
'' if expected else bug_reports_message())))
def format_traceback(self):
if self.traceback is None:
return None
return ''.join(traceback.format_tb(self.traceback))
class UnsupportedError(ExtractorError):
def __init__(self, url):
super(UnsupportedError, self).__init__(
'Unsupported URL: %s' % url, expected=True)
self.url = url
class RegexNotFoundError(ExtractorError):
"""Error when a regex didn't match"""
pass
class GeoRestrictedError(ExtractorError):
"""Geographic restriction Error exception.
This exception may be thrown when a video is not available from your
geographic location due to geographic restrictions imposed by a website.
"""
def __init__(self, msg, countries=None, **kwargs):
kwargs['expected'] = True
super(GeoRestrictedError, self).__init__(msg, **kwargs)
self.countries = countries
class DownloadError(YoutubeDLError):
This exception may be thrown by FileDownloader objects if they are not
configured to continue on errors. They will contain the appropriate
error message.
"""

Filippo Valsorda
committed
def __init__(self, msg, exc_info=None):
""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
super(DownloadError, self).__init__(msg)
self.exc_info = exc_info
class EntryNotInPlaylist(YoutubeDLError):
"""Entry not in playlist exception.