Initial commit

This commit is contained in:
2025-10-29 14:37:08 +08:00
commit 034509181f
4131 changed files with 555736 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,17 @@
#!/usr/bin/env python3
# Execute with
# $ python3 -m yt_dlp
import sys
if __package__ is None and not getattr(sys, 'frozen', False):
# direct call of __main__.py
import os.path
path = os.path.realpath(os.path.abspath(__file__))
sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
import yt_dlp
if __name__ == '__main__':
yt_dlp.main()

View File

@@ -0,0 +1,5 @@
import os
def get_hook_dirs():
return [os.path.dirname(__file__)]

View File

@@ -0,0 +1,36 @@
import sys
from PyInstaller.utils.hooks import collect_submodules, collect_data_files
def pycryptodome_module():
try:
import Cryptodome # noqa: F401
except ImportError:
try:
import Crypto # noqa: F401
print('WARNING: Using Crypto since Cryptodome is not available. '
'Install with: python3 -m pip install pycryptodomex', file=sys.stderr)
return 'Crypto'
except ImportError:
pass
return 'Cryptodome'
def get_hidden_imports():
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
yield pycryptodome_module()
# Only `websockets` is required, others are collected just in case
for module in ('websockets', 'requests', 'urllib3'):
yield from collect_submodules(module)
# These are auto-detected, but explicitly add them just in case
yield from ('mutagen', 'brotli', 'certifi', 'secretstorage', 'curl_cffi')
hiddenimports = list(get_hidden_imports())
print(f'Adding imports: {hiddenimports}')
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
datas = collect_data_files('curl_cffi', includes=['cacert.pem'])

View File

@@ -0,0 +1,552 @@
import base64
from math import ceil
from .compat import compat_ord
from .dependencies import Cryptodome
if Cryptodome.AES:
def aes_cbc_decrypt_bytes(data, key, iv):
""" Decrypt bytes with AES-CBC using pycryptodome """
return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data)
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
""" Decrypt bytes with AES-GCM using pycryptodome """
return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
else:
def aes_cbc_decrypt_bytes(data, key, iv):
""" Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """
return bytes(aes_cbc_decrypt(*map(list, (data, key, iv))))
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
""" Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """
return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce))))
def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs))
BLOCK_SIZE_BYTES = 16
def unpad_pkcs7(data):
return data[:-compat_ord(data[-1])]
def pkcs7_padding(data):
"""
PKCS#7 padding
@param {int[]} data cleartext
@returns {int[]} padding data
"""
remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
return data + [remaining_length] * remaining_length
def pad_block(block, padding_mode):
"""
Pad a block with the given padding mode
@param {int[]} block block to pad
@param padding_mode padding mode
"""
padding_size = BLOCK_SIZE_BYTES - len(block)
PADDING_BYTE = {
'pkcs7': padding_size,
'iso7816': 0x0,
'whitespace': 0x20,
'zero': 0x0,
}
if padding_size < 0:
raise ValueError('Block size exceeded')
elif padding_mode not in PADDING_BYTE:
raise NotImplementedError(f'Padding mode {padding_mode} is not implemented')
if padding_mode == 'iso7816' and padding_size:
block = [*block, 0x80] # NB: += mutates list
padding_size -= 1
return block + [PADDING_BYTE[padding_mode]] * padding_size
def aes_ecb_encrypt(data, key, iv=None):
"""
Encrypt with aes in ECB mode. Using PKCS#7 padding
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv Unused for this mode
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
encrypted_data = []
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key)
return encrypted_data
def aes_ecb_decrypt(data, key, iv=None):
"""
Decrypt with aes in ECB mode
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv Unused for this mode
@returns {int[]} decrypted data
"""
expanded_key = key_expansion(key)
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
encrypted_data = []
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
encrypted_data += aes_decrypt(block, expanded_key)
return encrypted_data[:len(data)]
def aes_ctr_decrypt(data, key, iv):
"""
Decrypt with aes in counter mode
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte initialization vector
@returns {int[]} decrypted data
"""
return aes_ctr_encrypt(data, key, iv)
def aes_ctr_encrypt(data, key, iv):
"""
Encrypt with aes in counter mode
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte initialization vector
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
counter = iter_vector(iv)
encrypted_data = []
for i in range(block_count):
counter_block = next(counter)
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
block += [0] * (BLOCK_SIZE_BYTES - len(block))
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
encrypted_data += xor(block, cipher_counter_block)
return encrypted_data[:len(data)]
def aes_cbc_decrypt(data, key, iv):
"""
Decrypt with aes in CBC mode
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte IV
@returns {int[]} decrypted data
"""
expanded_key = key_expansion(key)
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
decrypted_data = []
previous_cipher_block = iv
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
block += [0] * (BLOCK_SIZE_BYTES - len(block))
decrypted_block = aes_decrypt(block, expanded_key)
decrypted_data += xor(decrypted_block, previous_cipher_block)
previous_cipher_block = block
return decrypted_data[:len(data)]
def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
"""
Encrypt with aes in CBC mode
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte IV
@param padding_mode Padding mode to use
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
encrypted_data = []
previous_cipher_block = iv
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
block = pad_block(block, padding_mode)
mixed_block = xor(block, previous_cipher_block)
encrypted_block = aes_encrypt(mixed_block, expanded_key)
encrypted_data += encrypted_block
previous_cipher_block = encrypted_block
return encrypted_data
def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
"""
Decrypt with aes in GBM mode and checks authenticity using tag
@param {int[]} data cipher
@param {int[]} key 16-Byte cipher key
@param {int[]} tag authentication tag
@param {int[]} nonce IV (recommended 12-Byte)
@returns {int[]} decrypted data
"""
# XXX: check aes, gcm param
hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key))
if len(nonce) == 12:
j0 = [*nonce, 0, 0, 0, 1]
else:
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big'))
j0 = ghash(hash_subkey, ghash_in)
# TODO: add nonce support to aes_ctr_decrypt
# nonce_ctr = j0[:12]
iv_ctr = inc(j0)
decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
pad_len = (BLOCK_SIZE_BYTES - (len(data) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES
s_tag = ghash(
hash_subkey,
data
+ [0] * pad_len # pad
+ list((0 * 8).to_bytes(8, 'big') # length of associated data
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data
)
if tag != aes_ctr_encrypt(s_tag, key, j0):
raise ValueError('Mismatching authentication tag')
return decrypted_data
def aes_encrypt(data, expanded_key):
"""
Encrypt one block with aes
@param {int[]} data 16-Byte state
@param {int[]} expanded_key 176/208/240-Byte expanded key
@returns {int[]} 16-Byte cipher
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
for i in range(1, rounds + 1):
data = sub_bytes(data)
data = shift_rows(data)
if i != rounds:
data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX))
data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
return data
def aes_decrypt(data, expanded_key):
"""
Decrypt one block with aes
@param {int[]} data 16-Byte cipher
@param {int[]} expanded_key 176/208/240-Byte expanded key
@returns {int[]} 16-Byte state
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
for i in range(rounds, 0, -1):
data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
if i != rounds:
data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV))
data = shift_rows_inv(data)
data = sub_bytes_inv(data)
return xor(data, expanded_key[:BLOCK_SIZE_BYTES])
def aes_decrypt_text(data, password, key_size_bytes):
"""
Decrypt text
- The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
- The cipher key is retrieved by encrypting the first 16 Byte of 'password'
with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
- Mode of operation is 'counter'
@param {str} data Base64 encoded string
@param {str,unicode} password Password (will be encoded with utf-8)
@param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
@returns {str} Decrypted data
"""
NONCE_LENGTH_BYTES = 8
data = list(base64.b64decode(data))
password = list(password.encode())
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
nonce = data[:NONCE_LENGTH_BYTES]
cipher = data[NONCE_LENGTH_BYTES:]
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
return bytes(decrypted_data)
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1),
(0x1, 0x2, 0x3, 0x1),
(0x1, 0x1, 0x2, 0x3),
(0x3, 0x1, 0x1, 0x2))
MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9),
(0x9, 0xE, 0xB, 0xD),
(0xD, 0x9, 0xE, 0xB),
(0xB, 0xD, 0x9, 0xE))
RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
def key_expansion(data):
"""
Generate key schedule
@param {int[]} data 16/24/32-Byte cipher key
@returns {int[]} 176/208/240-Byte expanded key
"""
data = data[:] # copy
rcon_iteration = 1
key_size_bytes = len(data)
expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
while len(data) < expanded_key_size_bytes:
temp = data[-4:]
temp = key_schedule_core(temp, rcon_iteration)
rcon_iteration += 1
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
for _ in range(3):
temp = data[-4:]
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
if key_size_bytes == 32:
temp = data[-4:]
temp = sub_bytes(temp)
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
temp = data[-4:]
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
return data[:expanded_key_size_bytes]
def iter_vector(iv):
while True:
yield iv
iv = inc(iv)
def sub_bytes(data):
return [SBOX[x] for x in data]
def sub_bytes_inv(data):
return [SBOX_INV[x] for x in data]
def rotate(data):
return [*data[1:], data[0]]
def key_schedule_core(data, rcon_iteration):
data = rotate(data)
data = sub_bytes(data)
data[0] = data[0] ^ RCON[rcon_iteration]
return data
def xor(data1, data2):
return [x ^ y for x, y in zip(data1, data2, strict=False)]
def iter_mix_columns(data, matrix):
for i in (0, 4, 8, 12):
for row in matrix:
mixed = 0
for j in range(4):
# xor is (+) and (-)
mixed ^= (0 if data[i:i + 4][j] == 0 or row[j] == 0 else
RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[data[i + j]] + RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF])
yield mixed
def shift_rows(data):
return [data[((column + row) & 0b11) * 4 + row] for column in range(4) for row in range(4)]
def shift_rows_inv(data):
return [data[((column - row) & 0b11) * 4 + row] for column in range(4) for row in range(4)]
def shift_block(data):
data_shifted = []
bit = 0
for n in data:
if bit:
n |= 0x100
bit = n & 1
n >>= 1
data_shifted.append(n)
return data_shifted
def inc(data):
data = data[:] # copy
for i in range(len(data) - 1, -1, -1):
if data[i] == 255:
data[i] = 0
else:
data[i] = data[i] + 1
break
return data
def block_product(block_x, block_y):
# NIST SP 800-38D, Algorithm 1
if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES:
raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes')
block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1)
block_v = block_y[:]
block_z = [0] * BLOCK_SIZE_BYTES
for i in block_x:
for bit in range(7, -1, -1):
if i & (1 << bit):
block_z = xor(block_z, block_v)
do_xor = block_v[-1] & 1
block_v = shift_block(block_v)
if do_xor:
block_v = xor(block_v, block_r)
return block_z
def ghash(subkey, data):
# NIST SP 800-38D, Algorithm 2
if len(data) % BLOCK_SIZE_BYTES:
raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes')
last_y = [0] * BLOCK_SIZE_BYTES
for i in range(0, len(data), BLOCK_SIZE_BYTES):
block = data[i: i + BLOCK_SIZE_BYTES]
last_y = block_product(xor(last_y, block), subkey)
return last_y
__all__ = [
'aes_cbc_decrypt',
'aes_cbc_decrypt_bytes',
'aes_cbc_encrypt',
'aes_cbc_encrypt_bytes',
'aes_ctr_decrypt',
'aes_ctr_encrypt',
'aes_decrypt',
'aes_decrypt_text',
'aes_ecb_decrypt',
'aes_ecb_encrypt',
'aes_encrypt',
'aes_gcm_decrypt_and_verify',
'aes_gcm_decrypt_and_verify_bytes',
'key_expansion',
'pad_block',
'pkcs7_padding',
'unpad_pkcs7',
]

View File

@@ -0,0 +1,91 @@
import contextlib
import json
import os
import re
import shutil
import traceback
import urllib.parse
from .utils import expand_path, traverse_obj, version_tuple, write_json_file
from .version import __version__
class Cache:
def __init__(self, ydl):
self._ydl = ydl
def _get_root_dir(self):
res = self._ydl.params.get('cachedir')
if res is None:
cache_root = os.getenv('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, 'yt-dlp')
return expand_path(res)
def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}'
key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters
return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}')
@property
def enabled(self):
return self._ydl.params.get('cachedir') is not False
def store(self, section, key, data, dtype='json'):
assert dtype in ('json',)
if not self.enabled:
return
fn = self._get_cache_fn(section, key, dtype)
try:
os.makedirs(os.path.dirname(fn), exist_ok=True)
self._ydl.write_debug(f'Saving {section}.{key} to cache')
write_json_file({'yt-dlp_version': __version__, 'data': data}, fn)
except Exception:
tb = traceback.format_exc()
self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}')
def _validate(self, data, min_ver):
version = traverse_obj(data, 'yt-dlp_version')
if not version: # Backward compatibility
data, version = {'data': data}, '2022.08.19'
if not min_ver or version_tuple(version) >= version_tuple(min_ver):
return data['data']
self._ydl.write_debug(f'Discarding old cache from version {version} (needs {min_ver})')
def load(self, section, key, dtype='json', default=None, *, min_ver=None):
assert dtype in ('json',)
if not self.enabled:
return default
cache_fn = self._get_cache_fn(section, key, dtype)
with contextlib.suppress(OSError):
try:
with open(cache_fn, encoding='utf-8') as cachef:
self._ydl.write_debug(f'Loading {section}.{key} from cache')
return self._validate(json.load(cachef), min_ver)
except (ValueError, KeyError):
try:
file_size = os.path.getsize(cache_fn)
except OSError as oe:
file_size = str(oe)
self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})')
return default
def remove(self):
if not self.enabled:
self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
return
cachedir = self._get_root_dir()
if not any((term in cachedir) for term in ('cache', 'tmp')):
raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir')
self._ydl.to_screen(
f'Removing cache dir {cachedir} .', skip_eol=True)
if os.path.exists(cachedir):
self._ydl.to_screen('.', skip_eol=True)
shutil.rmtree(cachedir)
self._ydl.to_screen('.')

View File

@@ -0,0 +1,64 @@
import datetime as dt
import os
import xml.etree.ElementTree as etree
from .compat_utils import passthrough_module
passthrough_module(__name__, '._deprecated')
del passthrough_module
# HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exception handling
class compat_HTMLParseError(ValueError):
pass
class _TreeBuilder(etree.TreeBuilder):
def doctype(self, name, pubid, system):
pass
def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
def compat_ord(c):
return c if isinstance(c, int) else ord(c)
def compat_datetime_from_timestamp(timestamp):
# Calling dt.datetime.fromtimestamp with negative timestamps throws error in Windows
# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/81708,
# https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
return (dt.datetime.fromtimestamp(0, dt.timezone.utc) + dt.timedelta(seconds=timestamp))
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
# See https://github.com/yt-dlp/yt-dlp/issues/792
# https://docs.python.org/3/library/os.path.html#os.path.expanduser
if os.name in ('nt', 'ce'):
def compat_expanduser(path):
HOME = os.environ.get('HOME')
if not HOME:
return os.path.expanduser(path)
elif not path.startswith('~'):
return path
i = path.replace('\\', '/', 1).find('/') # ~user
if i < 0:
i = len(path)
userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME
return userhome + path[i:]
else:
compat_expanduser = os.path.expanduser
def urllib_req_to_req(urllib_request):
"""Convert urllib Request to a networking Request"""
from ..networking import Request
from ..utils.networking import HTTPHeaderDict
return Request(
urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)

View File

@@ -0,0 +1,21 @@
"""Deprecated - New code should avoid these"""
import warnings
from .compat_utils import passthrough_module
# XXX: Implement this the same way as other DeprecationWarnings without circular import
passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
del passthrough_module
import functools # noqa: F401
import os
compat_os_name = os.name
compat_realpath = os.path.realpath
def compat_shlex_quote(s):
from ..utils import shell_quote
return shell_quote(s)

View File

@@ -0,0 +1,115 @@
""" Do not use! """
import base64
import collections
import ctypes
import getpass
import html.entities
import html.parser
import http.client
import http.cookiejar
import http.cookies
import http.server
import itertools
import os
import shlex
import shutil
import socket
import struct
import subprocess
import tokenize
import urllib.error
import urllib.parse
import urllib.request
import xml.etree.ElementTree as etree
# isort: split
import asyncio # noqa: F401
import re # noqa: F401
from asyncio import run as compat_asyncio_run # noqa: F401
from re import Pattern as compat_Pattern # noqa: F401
from re import match as compat_Match # noqa: F401
from . import compat_expanduser, compat_HTMLParseError
from .compat_utils import passthrough_module
from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError
passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',))
# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE
# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
return ctypes.WINFUNCTYPE(*args, **kwargs)
def compat_setenv(key, value, env=os.environ):
env[key] = value
compat_base64_b64decode = base64.b64decode
compat_basestring = str
compat_casefold = str.casefold
compat_chr = chr
compat_collections_abc = collections.abc
compat_cookiejar = compat_http_cookiejar = http.cookiejar
compat_cookiejar_Cookie = compat_http_cookiejar_Cookie = http.cookiejar.Cookie
compat_cookies = compat_http_cookies = http.cookies
compat_cookies_SimpleCookie = compat_http_cookies_SimpleCookie = http.cookies.SimpleCookie
compat_etree_Element = compat_xml_etree_ElementTree_Element = etree.Element
compat_etree_register_namespace = compat_xml_etree_register_namespace = etree.register_namespace
compat_filter = filter
compat_get_terminal_size = shutil.get_terminal_size
compat_getenv = os.getenv
compat_getpass = compat_getpass_getpass = getpass.getpass
compat_html_entities = html.entities
compat_html_entities_html5 = html.entities.html5
compat_html_parser_HTMLParseError = compat_HTMLParseError
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
compat_http_client = http.client
compat_http_server = http.server
compat_input = input
compat_integer_types = (int, )
compat_itertools_count = itertools.count
compat_kwargs = lambda kwargs: kwargs
compat_map = map
compat_numeric_types = (int, float, complex)
compat_os_path_expanduser = compat_expanduser
compat_os_path_realpath = os.path.realpath
compat_print = print
compat_shlex_split = shlex.split
compat_socket_create_connection = socket.create_connection
compat_Struct = struct.Struct
compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
compat_tokenize_tokenize = tokenize.tokenize
compat_urllib_error = urllib.error
compat_urllib_HTTPError = compat_HTTPError
compat_urllib_parse = urllib.parse
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_quote = urllib.parse.quote
compat_urllib_parse_quote_plus = urllib.parse.quote_plus
compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
compat_urllib_parse_urlunparse = urllib.parse.urlunparse
compat_urllib_request = urllib.request
compat_urllib_request_DataHandler = urllib.request.DataHandler
compat_urllib_response = urllib.response
compat_urlretrieve = compat_urllib_request_urlretrieve = urllib.request.urlretrieve
compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseError
compat_xpath = lambda xpath: xpath
compat_zip = zip
workaround_optparse_bug9161 = lambda: None
compat_str = str
compat_b64decode = base64.b64decode
compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_unquote = urllib.parse.unquote
compat_urllib_parse_urlencode = urllib.parse.urlencode
compat_urllib_parse_urlparse = urllib.parse.urlparse
legacy = []

View File

@@ -0,0 +1,83 @@
import collections
import contextlib
import functools
import importlib
import sys
import types
_NO_ATTRIBUTE = object()
_Package = collections.namedtuple('Package', ('name', 'version'))
def get_package_info(module):
return _Package(
name=getattr(module, '_yt_dlp__identifier', module.__name__),
version=str(next(filter(None, (
getattr(module, attr, None)
for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
)), None)))
def _is_package(module):
return '__path__' in vars(module)
def _is_dunder(name):
return name.startswith('__') and name.endswith('__')
class EnhancedModule(types.ModuleType):
def __bool__(self):
return vars(self).get('__bool__', lambda: True)()
def __getattribute__(self, attr):
try:
ret = super().__getattribute__(attr)
except AttributeError:
if _is_dunder(attr):
raise
getter = getattr(self, '__getattr__', None)
if not getter:
raise
ret = getter(attr)
return ret.fget() if isinstance(ret, property) else ret
def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=lambda _: None):
"""Passthrough parent module into a child module, creating the parent if necessary"""
def __getattr__(attr):
if _is_package(parent):
with contextlib.suppress(ModuleNotFoundError):
return importlib.import_module(f'.{attr}', parent.__name__)
ret = from_child(attr)
if ret is _NO_ATTRIBUTE:
raise AttributeError(f'module {parent.__name__} has no attribute {attr}')
callback(attr)
return ret
@functools.cache
def from_child(attr):
nonlocal child
if attr not in allowed_attributes:
if ... not in allowed_attributes or _is_dunder(attr):
return _NO_ATTRIBUTE
if isinstance(child, str):
child = importlib.import_module(child, parent.__name__)
if _is_package(child):
with contextlib.suppress(ImportError):
return passthrough_module(f'{parent.__name__}.{attr}',
importlib.import_module(f'.{attr}', child.__name__))
with contextlib.suppress(AttributeError):
return getattr(child, attr)
return _NO_ATTRIBUTE
parent = sys.modules.get(parent, types.ModuleType(parent))
parent.__class__ = EnhancedModule
parent.__getattr__ = __getattr__
return parent

View File

@@ -0,0 +1,22 @@
def what(file=None, h=None):
"""Detect format of image (Currently supports jpeg, png, webp, gif only)
Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
"""
if h is None:
with open(file, 'rb') as f:
h = f.read(12)
if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
return 'webp'
if h.startswith(b'\x89PNG'):
return 'png'
if h.startswith(b'\xFF\xD8\xFF'):
return 'jpeg'
if h.startswith(b'GIF'):
return 'gif'
return None

View File

@@ -0,0 +1,30 @@
# flake8: noqa: F405
from shutil import * # noqa: F403
from .compat_utils import passthrough_module
passthrough_module(__name__, 'shutil')
del passthrough_module
import sys
if sys.platform.startswith('freebsd'):
import errno
import os
import shutil
# Workaround for PermissionError when using restricted ACL mode on FreeBSD
def copy2(src, dst, *args, **kwargs):
if os.path.isdir(dst):
dst = os.path.join(dst, os.path.basename(src))
shutil.copyfile(src, dst, *args, **kwargs)
try:
shutil.copystat(src, dst, *args, **kwargs)
except PermissionError as e:
if e.errno != getattr(errno, 'EPERM', None):
raise
return dst
def move(*args, copy_function=copy2, **kwargs):
return shutil.move(*args, copy_function=copy_function, **kwargs)

View File

@@ -0,0 +1,10 @@
# flake8: noqa: F405
from urllib import * # noqa: F403
del request # noqa: F821
from . import request # noqa: F401
from ..compat_utils import passthrough_module
passthrough_module(__name__, 'urllib')
del passthrough_module

View File

@@ -0,0 +1,36 @@
# flake8: noqa: F405
from urllib.request import * # noqa: F403
from ..compat_utils import passthrough_module
passthrough_module(__name__, 'urllib.request')
del passthrough_module
import os
if os.name == 'nt':
# On older Python versions, proxies are extracted from Windows registry erroneously. [1]
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
# it to http on these older Python versions to avoid issues
# This also applies for ftp proxy type, as ftp:// proxy scheme is not supported.
# 1: https://github.com/python/cpython/issues/86793
# 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698
import sys
from urllib.request import getproxies_environment, getproxies_registry
def getproxies_registry_patched():
proxies = getproxies_registry()
if sys.version_info < (3, 10, 5): # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final
for scheme in ('https', 'ftp'):
if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'):
proxies[scheme] = 'http' + proxies[scheme][len(scheme):]
return proxies
def getproxies():
return getproxies_environment() or getproxies_registry_patched()
del os

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,38 @@
from ..compat.compat_utils import passthrough_module
try:
import Cryptodome as _parent
except ImportError:
try:
import Crypto as _parent
except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python
_parent = passthrough_module(__name__, 'no_Cryptodome')
__bool__ = lambda: False
del passthrough_module
__version__ = ''
AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None
try:
if _parent.__name__ == 'Cryptodome':
from Cryptodome import __version__
from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5
from Cryptodome.Hash import CMAC, SHA1
from Cryptodome.PublicKey import RSA
elif _parent.__name__ == 'Crypto':
from Crypto import __version__
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
from Crypto.Hash import CMAC, SHA1 # noqa: F401
from Crypto.PublicKey import RSA # noqa: F401
except (ImportError, OSError):
__version__ = f'broken {__version__}'.strip()
_yt_dlp__identifier = _parent.__name__
if AES and _yt_dlp__identifier == 'Crypto':
try:
# In pycrypto, mode defaults to ECB. See:
# https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode
AES.new(b'abcdefghijklmnop')
except TypeError:
_yt_dlp__identifier = 'pycrypto'

View File

@@ -0,0 +1,96 @@
# flake8: noqa: F401
"""Imports all optional dependencies for the project.
An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambiguous namespace"""
try:
import brotlicffi as brotli
except ImportError:
try:
import brotli
except ImportError:
brotli = None
try:
import certifi
except ImportError:
certifi = None
else:
from os.path import exists as _path_exists
# The certificate may not be bundled in executable
if not _path_exists(certifi.where()):
certifi = None
try:
import mutagen
except ImportError:
mutagen = None
secretstorage = None
try:
import secretstorage
_SECRETSTORAGE_UNAVAILABLE_REASON = None
except ImportError:
_SECRETSTORAGE_UNAVAILABLE_REASON = (
'as the `secretstorage` module is not installed. '
'Please install by running `python3 -m pip install secretstorage`')
except Exception as _err:
_SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
try:
import sqlite3
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
sqlite3._yt_dlp__version = sqlite3.sqlite_version
except ImportError:
# although sqlite3 is part of the standard library, it is possible to compile Python without
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
sqlite3 = None
try:
import websockets
except ImportError:
websockets = None
try:
import urllib3
except ImportError:
urllib3 = None
try:
import requests
except ImportError:
requests = None
try:
import xattr # xattr or pyxattr
except ImportError:
xattr = None
else:
if hasattr(xattr, 'set'): # pyxattr
xattr._yt_dlp__identifier = 'pyxattr'
try:
import curl_cffi
except ImportError:
curl_cffi = None
from . import Cryptodome
all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}
available_dependencies = {k: v for k, v in all_dependencies.items() if v}
# Deprecated
Cryptodome_AES = Cryptodome.AES
__all__ = [
'all_dependencies',
'available_dependencies',
*all_dependencies.keys(),
]

View File

@@ -0,0 +1,131 @@
from ..utils import NO_DEFAULT, determine_protocol
def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False):
info_dict['protocol'] = determine_protocol(info_dict)
info_copy = info_dict.copy()
info_copy['to_stdout'] = to_stdout
protocols = (protocol or info_copy['protocol']).split('+')
downloaders = [_get_suitable_downloader(info_copy, proto, params, default) for proto in protocols]
if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params):
return FFmpegFD
elif (set(downloaders) == {DashSegmentsFD}
and not (to_stdout and len(protocols) > 1)
and set(protocols) == {'http_dash_segments_generator'}):
return DashSegmentsFD
elif len(downloaders) == 1:
return downloaders[0]
return None
# Some of these require get_suitable_downloader
from .common import FileDownloader
from .dash import DashSegmentsFD
from .external import FFmpegFD, get_external_downloader
from .f4m import F4mFD
from .fc2 import FC2LiveFD
from .hls import HlsFD
from .http import HttpFD
from .ism import IsmFD
from .mhtml import MhtmlFD
from .niconico import NiconicoLiveFD
from .rtmp import RtmpFD
from .rtsp import RtspFD
from .websocket import WebSocketFragmentFD
from .youtube_live_chat import YoutubeLiveChatFD
from .bunnycdn import BunnyCdnFD
PROTOCOL_MAP = {
'rtmp': RtmpFD,
'rtmpe': RtmpFD,
'rtmp_ffmpeg': FFmpegFD,
'm3u8_native': HlsFD,
'm3u8': FFmpegFD,
'mms': RtspFD,
'rtsp': RtspFD,
'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
'http_dash_segments_generator': DashSegmentsFD,
'ism': IsmFD,
'mhtml': MhtmlFD,
'niconico_live': NiconicoLiveFD,
'fc2_live': FC2LiveFD,
'websocket_frag': WebSocketFragmentFD,
'youtube_live_chat': YoutubeLiveChatFD,
'youtube_live_chat_replay': YoutubeLiveChatFD,
'bunnycdn': BunnyCdnFD,
}
def shorten_protocol_name(proto, simplify=False):
short_protocol_names = {
'm3u8_native': 'm3u8',
'm3u8': 'm3u8F',
'rtmp_ffmpeg': 'rtmpF',
'http_dash_segments': 'dash',
'http_dash_segments_generator': 'dashG',
'websocket_frag': 'WSfrag',
}
if simplify:
short_protocol_names.update({
'https': 'http',
'ftps': 'ftp',
'm3u8': 'm3u8', # Reverse above m3u8 mapping
'm3u8_native': 'm3u8',
'http_dash_segments_generator': 'dash',
'rtmp_ffmpeg': 'rtmp',
'm3u8_frag_urls': 'm3u8',
'dash_frag_urls': 'dash',
})
return short_protocol_names.get(proto, proto)
def _get_suitable_downloader(info_dict, protocol, params, default):
"""Get the downloader class that can handle the info dict."""
if default is NO_DEFAULT:
default = HttpFD
if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict):
return FFmpegFD
info_dict['protocol'] = protocol
downloaders = params.get('external_downloader')
external_downloader = (
downloaders if isinstance(downloaders, str) or downloaders is None
else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
if external_downloader is None:
if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params):
return FFmpegFD
elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None:
ed = get_external_downloader(external_downloader)
if ed.can_download(info_dict, external_downloader):
return ed
if protocol == 'http_dash_segments':
if info_dict.get('is_live') and (external_downloader or '').lower() != 'native':
return FFmpegFD
if protocol in ('m3u8', 'm3u8_native'):
if info_dict.get('is_live'):
return FFmpegFD
elif (external_downloader or '').lower() == 'native':
return HlsFD
elif protocol == 'm3u8_native' and get_suitable_downloader(
info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']):
return HlsFD
elif params.get('hls_prefer_native') is True:
return HlsFD
elif params.get('hls_prefer_native') is False:
return FFmpegFD
return PROTOCOL_MAP.get(protocol, default)
__all__ = [
'FileDownloader',
'get_suitable_downloader',
'shorten_protocol_name',
]

View File

@@ -0,0 +1,50 @@
import hashlib
import random
import threading
from .common import FileDownloader
from . import HlsFD
from ..networking import Request
from ..networking.exceptions import network_exceptions
class BunnyCdnFD(FileDownloader):
"""
Downloads from BunnyCDN with required pings
Note, this is not a part of public API, and will be removed without notice.
DO NOT USE
"""
def real_download(self, filename, info_dict):
self.to_screen(f'[{self.FD_NAME}] Downloading from BunnyCDN')
fd = HlsFD(self.ydl, self.params)
stop_event = threading.Event()
ping_thread = threading.Thread(target=self.ping_thread, args=(stop_event,), kwargs=info_dict['_bunnycdn_ping_data'])
ping_thread.start()
try:
return fd.real_download(filename, info_dict)
finally:
stop_event.set()
def ping_thread(self, stop_event, url, headers, secret, context_id):
# Site sends ping every 4 seconds, but this throttles the download. Pinging every 2 seconds seems to work.
ping_interval = 2
# Hard coded resolution as it doesn't seem to matter
res = 1080
paused = 'false'
current_time = 0
while not stop_event.wait(ping_interval):
current_time += ping_interval
time = current_time + round(random.random(), 6)
md5_hash = hashlib.md5(f'{secret}_{context_id}_{time}_{paused}_{res}'.encode()).hexdigest()
ping_url = f'{url}?hash={md5_hash}&time={time}&paused={paused}&resolution={res}'
try:
self.ydl.urlopen(Request(ping_url, headers=headers)).read()
except network_exceptions as e:
self.to_screen(f'[{self.FD_NAME}] Ping failed: {e}')

View File

@@ -0,0 +1,519 @@
import contextlib
import errno
import functools
import os
import random
import re
import threading
import time
from ..minicurses import (
BreaklineStatusPrinter,
MultilineLogger,
MultilinePrinter,
QuietMultilinePrinter,
)
from ..utils import (
IDENTITY,
NO_DEFAULT,
LockingUnsupportedError,
Namespace,
RetryManager,
classproperty,
deprecation_warning,
format_bytes,
join_nonempty,
parse_bytes,
remove_start,
sanitize_open,
shell_quote,
timeconvert,
timetuple_from_msec,
try_call,
)
from ..utils._utils import _ProgressState
class FileDownloader:
"""File Downloader class.
File downloader objects are the ones responsible of downloading the
actual video file and writing it to disk.
File downloaders accept a lot of parameters. In order not to saturate
the object constructor with arguments, it receives a dictionary of
options instead.
Available options:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for expected network errors.
Default is 0 for API, but 10 for CLI
file_access_retries: Number of times to retry on file access error (default: 3)
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
progress_delta: The minimum time between progress output, in seconds
external_downloader_args: A dictionary of downloader keys (in lower case)
and a list of additional command-line arguments for the
executable. Use 'default' as the name for arguments to be
passed to all downloaders. For compatibility with youtube-dl,
a single list of args can also be used
hls_use_mpegts: Use the mpegts container for HLS videos.
http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
useful for bypassing bandwidth throttling imposed by
a webserver (experimental)
progress_template: See YoutubeDL.py
retry_sleep_functions: See YoutubeDL.py
Subclasses of this one must re-define the real_download method.
"""
_TEST_FILE_SIZE = 10241
params = None
def __init__(self, ydl, params):
"""Create a FileDownloader object with the given options."""
self._set_ydl(ydl)
self._progress_hooks = []
self.params = params
self._prepare_multiline_status()
self.add_progress_hook(self.report_progress)
if self.params.get('progress_delta'):
self._progress_delta_lock = threading.Lock()
self._progress_delta_time = time.monotonic()
def _set_ydl(self, ydl):
self.ydl = ydl
for func in (
'deprecation_warning',
'deprecated_feature',
'report_error',
'report_file_already_downloaded',
'report_warning',
'to_console_title',
'to_stderr',
'trouble',
'write_debug',
):
if not hasattr(self, func):
setattr(self, func, getattr(ydl, func))
def to_screen(self, *args, **kargs):
self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
__to_screen = to_screen
@classproperty
def FD_NAME(cls):
return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
@staticmethod
def format_seconds(seconds):
if seconds is None:
return ' Unknown'
time = timetuple_from_msec(seconds * 1000)
if time.hours > 99:
return '--:--:--'
return '%02d:%02d:%02d' % time[:-1]
@classmethod
def format_eta(cls, seconds):
return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
return None
return float(byte_counter) / float(data_len) * 100.0
@staticmethod
def format_percent(percent):
return ' N/A%' if percent is None else f'{percent:>5.1f}%'
@classmethod
def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
if total is NO_DEFAULT:
rate, remaining = start_or_rate, now_or_remaining
if None in (rate, remaining):
return None
return int(float(remaining) / rate)
start, now = start_or_rate, now_or_remaining
if total is None:
return None
if now is None:
now = time.time()
rate = cls.calc_speed(start, now, current)
return rate and int((float(total) - float(current)) / rate)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
return None
return float(bytes) / dif
@staticmethod
def format_speed(speed):
return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
@staticmethod
def format_retries(retries):
return 'inf' if retries == float('inf') else int(retries)
@staticmethod
def filesize_or_none(unencoded_filename):
if os.path.isfile(unencoded_filename):
return os.path.getsize(unencoded_filename)
return 0
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
return int(new_max)
rate = bytes / elapsed_time
if rate > new_max:
return int(new_max)
if rate < new_min:
return int(new_min)
return int(rate)
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and '
'may be removed in the future. Use yt_dlp.utils.parse_bytes instead')
return parse_bytes(bytestr)
def slow_down(self, start_time, now, byte_counter):
"""Sleep if the download speed is over the rate limit."""
rate_limit = self.params.get('ratelimit')
if rate_limit is None or byte_counter == 0:
return
if now is None:
now = time.time()
elapsed = now - start_time
if elapsed <= 0.0:
return
speed = float(byte_counter) / elapsed
if speed > rate_limit:
sleep_time = float(byte_counter) / rate_limit - elapsed
if sleep_time > 0:
time.sleep(sleep_time)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == '-' or \
(os.path.exists(filename) and not os.path.isfile(filename)):
return filename
return filename + '.part'
def undo_temp_name(self, filename):
if filename.endswith('.part'):
return filename[:-len('.part')]
return filename
def ytdl_filename(self, filename):
return filename + '.ytdl'
def wrap_file_access(action, *, fatal=False):
def error_callback(err, count, retries, *, fd):
return RetryManager.report_retry(
err, count, retries, info=fd.__to_screen,
warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
def wrapper(self, func, *args, **kwargs):
for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
try:
return func(self, *args, **kwargs)
except OSError as err:
if err.errno in (errno.EACCES, errno.EINVAL):
retry.error = err
continue
retry.error_callback(err, 1, 0)
return functools.partial(functools.partialmethod, wrapper)
@wrap_file_access('open', fatal=True)
def sanitize_open(self, filename, open_mode):
f, filename = sanitize_open(filename, open_mode)
if not getattr(f, 'locked', None):
self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
return f, filename
@wrap_file_access('remove')
def try_remove(self, filename):
if os.path.isfile(filename):
os.remove(filename)
@wrap_file_access('rename')
def try_rename(self, old_filename, new_filename):
if old_filename == new_filename:
return
os.replace(old_filename, new_filename)
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
if last_modified_hdr is None:
return
if not os.path.isfile(filename):
return
timestr = last_modified_hdr
if timestr is None:
return
filetime = timeconvert(timestr)
if filetime is None:
return filetime
# Ignore obviously invalid dates
if filetime == 0:
return
with contextlib.suppress(Exception):
os.utime(filename, (time.time(), filetime))
return filetime
def report_destination(self, filename):
"""Report destination filename."""
self.to_screen('[download] Destination: ' + filename)
def _prepare_multiline_status(self, lines=1):
if self.params.get('noprogress'):
self._multiline = QuietMultilinePrinter()
elif self.ydl.params.get('logger'):
self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
elif self.params.get('progress_with_newline'):
self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
else:
self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
def _finish_multiline_status(self):
self._multiline.end()
ProgressStyles = Namespace(
downloaded_bytes='light blue',
percent='light blue',
eta='yellow',
speed='green',
elapsed='bold white',
total_bytes='',
total_bytes_estimate='',
)
def _report_progress_status(self, s, default_template):
for name, style in self.ProgressStyles.items_:
name = f'_{name}_str'
if name not in s:
continue
s[name] = self._format_progress(s[name], style)
s['_default_template'] = default_template % s
progress_dict = s.copy()
progress_dict.pop('info_dict')
progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
progress_template = self.params.get('progress_template', {})
self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
progress_template.get('download') or '[download] %(progress._default_template)s',
progress_dict), s.get('progress_idx') or 0)
self.to_console_title(self.ydl.evaluate_outtmpl(
progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
progress_dict), _ProgressState.from_dict(s), s.get('_percent'))
def _format_progress(self, *args, **kwargs):
return self.ydl._format_text(
self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
def report_progress(self, s):
def with_fields(*tups, default=''):
for *fields, tmpl in tups:
if all(s.get(f) is not None for f in fields):
return tmpl
return default
_format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
if s['status'] == 'finished':
if self.params.get('noprogress'):
self.to_screen('[download] Download completed')
speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
s.update({
'speed': speed,
'_speed_str': self.format_speed(speed).strip(),
'_total_bytes_str': _format_bytes('total_bytes'),
'_elapsed_str': self.format_seconds(s.get('elapsed')),
'_percent': 100.0,
'_percent_str': self.format_percent(100),
})
self._report_progress_status(s, join_nonempty(
'100%%',
with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
with_fields(('elapsed', 'in %(_elapsed_str)s')),
with_fields(('speed', 'at %(_speed_str)s')),
delim=' '))
if s['status'] != 'downloading':
return
if update_delta := self.params.get('progress_delta'):
with self._progress_delta_lock:
if time.monotonic() < self._progress_delta_time:
return
self._progress_delta_time += update_delta
progress = try_call(
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
lambda: s['downloaded_bytes'] == 0 and 0)
s.update({
'_eta_str': self.format_eta(s.get('eta')).strip(),
'_speed_str': self.format_speed(s.get('speed')),
'_percent': progress,
'_percent_str': self.format_percent(progress),
'_total_bytes_str': _format_bytes('total_bytes'),
'_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
'_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
'_elapsed_str': self.format_seconds(s.get('elapsed')),
})
msg_template = with_fields(
('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
msg_template += with_fields(
('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
('fragment_index', ' (frag %(fragment_index)s)'))
self._report_progress_status(s, msg_template)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen(f'[download] Resuming download at byte {resume_len}')
def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
"""Report retry"""
is_frag = False if frag_index is NO_DEFAULT else 'fragment'
RetryManager.report_retry(
err, count, retries, info=self.__to_screen,
warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen('[download] Unable to resume')
@staticmethod
def supports_manifest(manifest):
""" Whether the downloader can download the fragments from the manifest.
Redefine in subclasses if needed. """
pass
def download(self, filename, info_dict, subtitle=False):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
nooverwrites_and_exists = (
not self.params.get('overwrites', True)
and os.path.exists(filename)
)
if not hasattr(filename, 'write'):
continuedl_and_exists = (
self.params.get('continuedl', True)
and os.path.isfile(filename)
and not self.params.get('nopart', False)
)
# Check file already present
if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(filename),
}, info_dict)
self._finish_multiline_status()
return True, False
sleep_note = ''
if subtitle:
sleep_interval = self.params.get('sleep_interval_subtitles') or 0
else:
min_sleep_interval = self.params.get('sleep_interval') or 0
max_sleep_interval = self.params.get('max_sleep_interval') or 0
if available_at := info_dict.get('available_at'):
forced_sleep_interval = available_at - int(time.time())
if forced_sleep_interval > min_sleep_interval:
sleep_note = 'as required by the site'
min_sleep_interval = forced_sleep_interval
if forced_sleep_interval > max_sleep_interval:
max_sleep_interval = forced_sleep_interval
sleep_interval = random.uniform(
min_sleep_interval, max_sleep_interval or min_sleep_interval)
if sleep_interval > 0:
self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds {sleep_note}...')
time.sleep(sleep_interval)
ret = self.real_download(filename, info_dict)
self._finish_multiline_status()
return ret, True
def real_download(self, filename, info_dict):
"""Real download process. Redefine in subclasses."""
raise NotImplementedError('This method must be implemented by subclasses')
def _hook_progress(self, status, info_dict):
# Ideally we want to make a copy of the dict, but that is too slow
status['info_dict'] = info_dict
# youtube-dl passes the same status object to all the hooks.
# Some third party scripts seems to be relying on this.
# So keep this behavior if possible
for ph in self._progress_hooks:
ph(status)
def add_progress_hook(self, ph):
# See YoutubeDl.py (search for progress_hooks) for a description of
# this interface
self._progress_hooks.append(ph)
def _debug_cmd(self, args, exe=None):
if not self.params.get('verbose', False):
return
if exe is None:
exe = os.path.basename(args[0])
self.write_debug(f'{exe} command line: {shell_quote(args)}')
def _get_impersonate_target(self, info_dict):
impersonate = info_dict.get('impersonate')
if impersonate is None:
return None
available_target, requested_targets = self.ydl._parse_impersonate_targets(impersonate)
if available_target:
return available_target
elif requested_targets:
self.report_warning(self.ydl._unavailable_targets_message(requested_targets))
return None

View File

@@ -0,0 +1,95 @@
import time
import urllib.parse
from . import get_suitable_downloader
from .fragment import FragmentFD
from ..utils import ReExtractInfo, update_url_query, urljoin
class DashSegmentsFD(FragmentFD):
"""
Download segments in a DASH manifest. External downloaders can take over
the fragment downloads by supporting the 'dash_frag_urls' protocol
"""
FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict):
if 'http_dash_segments_generator' in info_dict['protocol'].split('+'):
real_downloader = None # No external FD can support --live-from-start
else:
if info_dict.get('is_live'):
self.report_error('Live DASH videos are not supported')
real_downloader = get_suitable_downloader(
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
real_start = time.time()
requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])]
args = []
for fmt in requested_formats or [info_dict]:
# Re-extract if --load-info-json is used and 'fragments' was originally a generator
# See https://github.com/yt-dlp/yt-dlp/issues/13906
if isinstance(fmt['fragments'], str):
raise ReExtractInfo('the stream needs to be re-extracted', expected=True)
try:
fragment_count = 1 if self.params.get('test') else len(fmt['fragments'])
except TypeError:
fragment_count = None
ctx = {
'filename': fmt.get('filepath') or filename,
'live': 'is_from_start' if fmt.get('is_from_start') else fmt.get('is_live'),
'total_frags': fragment_count,
}
if real_downloader:
self._prepare_external_frag_download(ctx)
else:
self._prepare_and_start_frag_download(ctx, fmt)
ctx['start'] = real_start
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
fragments_to_download = self._get_fragments(fmt, ctx, extra_query)
if real_downloader:
self.to_screen(
f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
info_dict['fragments'] = list(fragments_to_download)
fd = real_downloader(self.ydl, self.params)
return fd.real_download(filename, info_dict)
args.append([ctx, fragments_to_download, fmt])
return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0)
def _resolve_fragments(self, fragments, ctx):
fragments = fragments(ctx) if callable(fragments) else fragments
return [next(iter(fragments))] if self.params.get('test') else fragments
def _get_fragments(self, fmt, ctx, extra_query):
fragment_base_url = fmt.get('fragment_base_url')
fragments = self._resolve_fragments(fmt['fragments'], ctx)
frag_index = 0
for i, fragment in enumerate(fragments):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
if extra_query:
fragment_url = update_url_query(fragment_url, extra_query)
yield {
'frag_index': frag_index,
'fragment_count': fragment.get('fragment_count'),
'index': i,
'url': fragment_url,
}

View File

@@ -0,0 +1,673 @@
import enum
import functools
import json
import os
import re
import subprocess
import sys
import tempfile
import time
import uuid
from .fragment import FragmentFD
from ..networking import Request
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import (
Popen,
RetryManager,
_configuration_args,
check_executable,
classproperty,
cli_bool_option,
cli_option,
cli_valueless_option,
determine_ext,
encodeArgument,
find_available_port,
remove_end,
traverse_obj,
)
class Features(enum.Enum):
TO_STDOUT = enum.auto()
MULTIPLE_FORMATS = enum.auto()
class ExternalFD(FragmentFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
SUPPORTED_FEATURES = ()
_CAPTURE_STDERR = True
def real_download(self, filename, info_dict):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
self._cookies_tempfile = None
try:
started = time.time()
retval = self._call_downloader(tmpfilename, info_dict)
except KeyboardInterrupt:
if not info_dict.get('is_live'):
raise
# Live stream downloading cancellation should be considered as
# correct and expected termination thus all postprocessing
# should take place
retval = 0
self.to_screen(f'[{self.get_basename()}] Interrupted by user')
finally:
if self._cookies_tempfile:
self.try_remove(self._cookies_tempfile)
if retval == 0:
status = {
'filename': filename,
'status': 'finished',
'elapsed': time.time() - started,
}
if filename != '-':
fsize = os.path.getsize(tmpfilename)
self.try_rename(tmpfilename, filename)
status.update({
'downloaded_bytes': fsize,
'total_bytes': fsize,
})
self._hook_progress(status, info_dict)
return True
else:
self.to_stderr('\n')
self.report_error('%s exited with code %d' % (
self.get_basename(), retval))
return False
@classmethod
def get_basename(cls):
return cls.__name__[:-2].lower()
@classproperty
def EXE_NAME(cls):
return cls.get_basename()
@functools.cached_property
def exe(self):
return self.EXE_NAME
@classmethod
def available(cls, path=None):
path = check_executable(
cls.EXE_NAME if path in (None, cls.get_basename()) else path,
[cls.AVAILABLE_OPT])
if not path:
return False
cls.exe = path
return path
@classmethod
def supports(cls, info_dict):
return all((
not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
'+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'),
all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
))
@classmethod
def can_download(cls, info_dict, path=None):
return cls.available(path) and cls.supports(info_dict)
def _option(self, command_option, param):
return cli_option(self.params, command_option, param)
def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
def _valueless_option(self, command_option, param, expected_value=True):
return cli_valueless_option(self.params, command_option, param, expected_value)
def _configuration_args(self, keys=None, *args, **kwargs):
return _configuration_args(
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
keys, *args, **kwargs)
def _write_cookies(self):
if not self.ydl.cookiejar.filename:
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
tmp_cookies.close()
self._cookies_tempfile = tmp_cookies.name
self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
# real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
self.ydl.cookiejar.save(self._cookies_tempfile)
return self.ydl.cookiejar.filename or self._cookies_tempfile
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
self._debug_cmd(cmd)
if 'fragments' not in info_dict:
_, stderr, returncode = self._call_process(cmd, info_dict)
if returncode and stderr:
self.to_stderr(stderr)
return returncode
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
frag_index=None, fatal=not skip_unavailable_fragments)
for retry in retry_manager:
_, stderr, returncode = self._call_process(cmd, info_dict)
if not returncode:
break
# TODO: Decide whether to retry based on error code
# https://aria2.github.io/manual/en/html/aria2c.html#exit-status
if stderr:
self.to_stderr(stderr)
retry.error = Exception()
continue
if not skip_unavailable_fragments and retry_manager.error:
return -1
decrypt_fragment = self.decrypter(info_dict)
dest, _ = self.sanitize_open(tmpfilename, 'wb')
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = f'{tmpfilename}-Frag{frag_index}'
try:
src, _ = self.sanitize_open(fragment_filename, 'rb')
except OSError as err:
if skip_unavailable_fragments and frag_index > 1:
self.report_skip_fragment(frag_index, err)
continue
self.report_error(f'Unable to open fragment {frag_index}; {err}')
return -1
dest.write(decrypt_fragment(fragment, src.read()))
src.close()
if not self.params.get('keep_fragments', False):
self.try_remove(fragment_filename)
dest.close()
self.try_remove(f'{tmpfilename}.frag.urls')
return 0
def _call_process(self, cmd, info_dict):
return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
_CAPTURE_STDERR = False # curl writes the progress to stderr
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['--cookie', cookie_header]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}']
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress')
cmd += self._valueless_option('--verbose', 'verbose')
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--retry', 'retries')
if len(retry) == 2:
if retry[1] in ('inf', 'infinite'):
retry[1] = '2147483647'
cmd += retry
cmd += self._option('--max-filesize', 'max_filesize')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--proxy', 'proxy')
cmd += self._valueless_option('--insecure', 'nocheckcertificate')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
class AxelFD(ExternalFD):
AVAILABLE_OPT = '-V'
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['-H', f'{key}: {val}']
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
class WgetFD(ExternalFD):
AVAILABLE_OPT = '--version'
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
cmd += ['--load-cookies', self._write_cookies()]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}']
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries')
if len(retry) == 2:
if retry[1] in ('inf', 'infinite'):
retry[1] = '0'
cmd += retry
cmd += self._option('--bind-address', 'source_address')
proxy = self.params.get('proxy')
if proxy:
for var in ('http_proxy', 'https_proxy'):
cmd += ['--execute', f'{var}={proxy}']
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
class Aria2cFD(ExternalFD):
AVAILABLE_OPT = '-v'
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
@staticmethod
def supports_manifest(manifest):
UNSUPPORTED_FEATURES = [
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
]
check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
return all(check_results)
@staticmethod
def _aria2c_filename(fn):
return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
def _call_downloader(self, tmpfilename, info_dict):
# FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931
if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []):
info_dict['__rpc'] = {
'port': find_available_port() or 19190,
'secret': str(uuid.uuid4()),
}
return super()._call_downloader(tmpfilename, info_dict)
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c', '--no-conf',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
if 'fragments' in info_dict:
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
else:
cmd += ['--min-split-size', '1M']
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
cmd += [f'--load-cookies={self._write_cookies()}']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}']
cmd += self._option('--max-overall-download-limit', 'ratelimit')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
cmd += self._configuration_args()
if '__rpc' in info_dict:
cmd += [
'--enable-rpc',
f'--rpc-listen-port={info_dict["__rpc"]["port"]}',
f'--rpc-secret={info_dict["__rpc"]["secret"]}']
# aria2c strips out spaces from the beginning/end of filenames and paths.
# We work around this issue by adding a "./" to the beginning of the
# filename and relative path, and adding a "/" at the end of the path.
# See: https://github.com/yt-dlp/yt-dlp/issues/276
# https://github.com/ytdl-org/youtube-dl/issues/20312
# https://github.com/aria2/aria2/issues/1373
dn = os.path.dirname(tmpfilename)
if dn:
cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
if 'fragments' not in info_dict:
cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
cmd += ['--auto-file-renaming=false']
if 'fragments' in info_dict:
cmd += ['--uri-selector=inorder']
url_list_file = f'{tmpfilename}.frag.urls'
url_list = []
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}'
url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename)))
stream, _ = self.sanitize_open(url_list_file, 'wb')
stream.write('\n'.join(url_list).encode())
stream.close()
cmd += ['-i', self._aria2c_filename(url_list_file)]
else:
cmd += ['--', info_dict['url']]
return cmd
def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()):
# Does not actually need to be UUID, just unique
sanitycheck = str(uuid.uuid4())
d = json.dumps({
'jsonrpc': '2.0',
'id': sanitycheck,
'method': method,
'params': [f'token:{rpc_secret}', *params],
}).encode()
request = Request(
f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={
'Content-Type': 'application/json',
'Content-Length': f'{len(d)}',
}, proxies={'all': None})
with self.ydl.urlopen(request) as r:
resp = json.load(r)
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
return resp['result']
def _call_process(self, cmd, info_dict):
if '__rpc' not in info_dict:
return super()._call_process(cmd, info_dict)
send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret'])
started = time.time()
fragmented = 'fragments' in info_dict
frag_count = len(info_dict['fragments']) if fragmented else 1
status = {
'filename': info_dict.get('_filename'),
'status': 'downloading',
'elapsed': 0,
'downloaded_bytes': 0,
'fragment_count': frag_count if fragmented else None,
'fragment_index': 0 if fragmented else None,
}
self._hook_progress(status, info_dict)
def get_stat(key, *obj, average=False):
val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0]
return sum(val) / (len(val) if average else 1)
with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p:
# Add a small sleep so that RPC client can receive response,
# or the connection stalls infinitely
time.sleep(0.2)
retval = p.poll()
while retval is None:
# We don't use tellStatus as we won't know the GID without reading stdout
# Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive
active = send_rpc('aria2.tellActive')
completed = send_rpc('aria2.tellStopped', [0, frag_count])
downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active)
speed = get_stat('downloadSpeed', active)
total = frag_count * get_stat('totalLength', active, completed, average=True)
if total < downloaded:
total = None
status.update({
'downloaded_bytes': int(downloaded),
'speed': speed,
'total_bytes': None if fragmented else total,
'total_bytes_estimate': total,
'eta': (total - downloaded) / (speed or 1),
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
'elapsed': time.time() - started,
})
self._hook_progress(status, info_dict)
if not active and len(completed) >= frag_count:
send_rpc('aria2.shutdown')
retval = p.wait()
break
time.sleep(0.1)
retval = p.poll()
return '', p.stderr.read(), retval
class HttpieFD(ExternalFD):
AVAILABLE_OPT = '--version'
EXE_NAME = 'http'
def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += [f'{key}:{val}']
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
# 2: https://httpie.io/docs/cli/sessions
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += [f'Cookie:{cookie_header}']
return cmd
class FFmpegFD(ExternalFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS)
@classmethod
def available(cls, path=None):
return FFmpegPostProcessor().available
def on_process_started(self, proc, stdin):
""" Override this in subclasses """
pass
@classmethod
def can_merge_formats(cls, info_dict, params):
return (
info_dict.get('requested_formats')
and info_dict.get('protocol')
and not params.get('allow_unplayable_formats')
and 'no-direct-merge' not in params.get('compat_opts', [])
and cls.can_download(info_dict))
def _call_downloader(self, tmpfilename, info_dict):
ffpp = FFmpegPostProcessor(downloader=self)
if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
return False
ffpp.check_version()
args = [ffpp.executable, '-y']
for log_level in ('quiet', 'verbose'):
if self.params.get(log_level, False):
args += ['-loglevel', log_level]
break
if not self.params.get('verbose'):
args += ['-hide_banner']
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
# These exists only for compatibility. Extractors should use
# info_dict['downloader_options']['ffmpeg_args'] instead
args += info_dict.get('_ffmpeg_args') or []
seekable = info_dict.get('_seekable')
if seekable is not None:
# setting -seekable prevents ffmpeg from guessing if the server
# supports seeking(by adding the header `Range: bytes=0-`), which
# can cause problems in some cases
# https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
env = None
proxy = self.params.get('proxy')
if proxy:
if not re.match(r'[\da-zA-Z]+://', proxy):
proxy = f'http://{proxy}'
if proxy.startswith('socks'):
self.report_warning(
f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. '
'Consider adding --hls-prefer-native to your command.')
# Since December 2015 ffmpeg supports -http_proxy option (see
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
# We could switch to the following code if we are able to detect version properly
# args += ['-http_proxy', proxy]
env = os.environ.copy()
env['HTTP_PROXY'] = proxy
env['http_proxy'] = proxy
protocol = info_dict.get('protocol')
if protocol == 'rtmp':
player_url = info_dict.get('player_url')
page_url = info_dict.get('page_url')
app = info_dict.get('app')
play_path = info_dict.get('play_path')
tc_url = info_dict.get('tc_url')
flash_version = info_dict.get('flash_version')
live = info_dict.get('rtmp_live', False)
conn = info_dict.get('rtmp_conn')
if player_url is not None:
args += ['-rtmp_swfverify', player_url]
if page_url is not None:
args += ['-rtmp_pageurl', page_url]
if app is not None:
args += ['-rtmp_app', app]
if play_path is not None:
args += ['-rtmp_playpath', play_path]
if tc_url is not None:
args += ['-rtmp_tcurl', tc_url]
if flash_version is not None:
args += ['-rtmp_flashver', flash_version]
if live:
args += ['-rtmp_live', 'live']
if isinstance(conn, list):
for entry in conn:
args += ['-rtmp_conn', entry]
elif isinstance(conn, str):
args += ['-rtmp_conn', conn]
start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats):
is_http = re.match(r'https?://', fmt['url'])
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
if cookies:
args.extend(['-cookies', ''.join(
f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
for cookie in cookies)])
if fmt.get('http_headers') and is_http:
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
if start_time:
args += ['-ss', str(start_time)]
if end_time:
args += ['-t', str(end_time - start_time)]
url = fmt['url']
if self.params.get('enable_file_urls') and url.startswith('file:'):
# The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs,
# so only local segments can be read unless we also include 'http,https,tcp,tls'
args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls']
# ffmpeg incorrectly handles 'file:' URLs by only removing the
# 'file:' prefix and treating the rest as if it's a normal filepath.
# FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs:
# - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:'
# - On *nix, replace 'file://localhost/' with 'file:/'
# Ref: https://github.com/yt-dlp/yt-dlp/issues/13781
# https://trac.ffmpeg.org/ticket/2702
url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url)
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy']
if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
for i, fmt in enumerate(selected_formats):
stream_number = fmt.get('manifest_stream_number', 0)
args.extend(['-map', f'{i}:{stream_number}'])
if self.params.get('test', False):
args += ['-fs', str(self._TEST_FILE_SIZE)]
ext = info_dict['ext']
if protocol in ('m3u8', 'm3u8_native'):
use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
if use_mpegts is None:
use_mpegts = info_dict.get('is_live')
if use_mpegts:
args += ['-f', 'mpegts']
else:
args += ['-f', 'mp4']
if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
args += ['-bsf:a', 'aac_adtstoasc']
elif protocol == 'rtmp':
args += ['-f', 'flv']
elif ext == 'mp4' and tmpfilename == '-':
args += ['-f', 'mpegts']
elif ext == 'unknown_video':
ext = determine_ext(remove_end(tmpfilename, '.part'))
if ext == 'unknown_video':
self.report_warning(
'The video format is unknown and cannot be downloaded by ffmpeg. '
'Explicitly set the extension in the filename to attempt download in that format')
else:
self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename')
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
else:
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args_out', ...))
args += self._configuration_args(('_o1', '_o', ''))
args = [encodeArgument(opt) for opt in args]
args.append(ffpp._ffmpeg_filename_argument(tmpfilename))
self._debug_cmd(args)
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)
with Popen(args, stdin=subprocess.PIPE, env=env) as proc:
if piped:
self.on_process_started(proc, proc.stdin)
try:
retval = proc.wait()
except BaseException as e:
# subprocces.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and not piped:
proc.communicate_or_kill(b'q')
else:
proc.kill(timeout=None)
raise
return retval
_BY_NAME = {
klass.get_basename(): klass
for name, klass in globals().items()
if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
}
def list_external_downloaders():
return sorted(_BY_NAME.keys())
def get_external_downloader(external_downloader):
""" Given the name of the executable, see whether we support the given downloader """
bn = os.path.splitext(os.path.basename(external_downloader))[0]
return _BY_NAME.get(bn) or next((
klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn
), None)

View File

@@ -0,0 +1,427 @@
import base64
import io
import itertools
import struct
import time
import urllib.parse
from .fragment import FragmentFD
from ..compat import compat_etree_fromstring
from ..networking.exceptions import HTTPError
from ..utils import fix_xml_ampersands, xpath_text
class DataTruncatedError(Exception):
pass
class FlvReader(io.BytesIO):
"""
Reader for Flv files
The file format is documented in https://www.adobe.com/devnet/f4v.html
"""
def read_bytes(self, n):
data = self.read(n)
if len(data) < n:
raise DataTruncatedError(
'FlvReader error: need %d bytes while only %d bytes got' % (
n, len(data)))
return data
# Utility functions for reading numbers and strings
def read_unsigned_long_long(self):
return struct.unpack('!Q', self.read_bytes(8))[0]
def read_unsigned_int(self):
return struct.unpack('!I', self.read_bytes(4))[0]
def read_unsigned_char(self):
return struct.unpack('!B', self.read_bytes(1))[0]
def read_string(self):
res = b''
while True:
char = self.read_bytes(1)
if char == b'\x00':
break
res += char
return res
def read_box_info(self):
"""
Read a box and return the info as a tuple: (box_size, box_type, box_data)
"""
real_size = size = self.read_unsigned_int()
box_type = self.read_bytes(4)
header_end = 8
if size == 1:
real_size = self.read_unsigned_long_long()
header_end = 16
return real_size, box_type, self.read_bytes(real_size - header_end)
def read_asrt(self):
# version
self.read_unsigned_char()
# flags
self.read_bytes(3)
quality_entry_count = self.read_unsigned_char()
# QualityEntryCount
for _ in range(quality_entry_count):
self.read_string()
segment_run_count = self.read_unsigned_int()
segments = []
for _ in range(segment_run_count):
first_segment = self.read_unsigned_int()
fragments_per_segment = self.read_unsigned_int()
segments.append((first_segment, fragments_per_segment))
return {
'segment_run': segments,
}
def read_afrt(self):
# version
self.read_unsigned_char()
# flags
self.read_bytes(3)
# time scale
self.read_unsigned_int()
quality_entry_count = self.read_unsigned_char()
# QualitySegmentUrlModifiers
for _ in range(quality_entry_count):
self.read_string()
fragments_count = self.read_unsigned_int()
fragments = []
for _ in range(fragments_count):
first = self.read_unsigned_int()
first_ts = self.read_unsigned_long_long()
duration = self.read_unsigned_int()
if duration == 0:
discontinuity_indicator = self.read_unsigned_char()
else:
discontinuity_indicator = None
fragments.append({
'first': first,
'ts': first_ts,
'duration': duration,
'discontinuity_indicator': discontinuity_indicator,
})
return {
'fragments': fragments,
}
def read_abst(self):
# version
self.read_unsigned_char()
# flags
self.read_bytes(3)
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
flags = self.read_unsigned_char()
live = flags & 0x20 != 0
# time scale
self.read_unsigned_int()
# CurrentMediaTime
self.read_unsigned_long_long()
# SmpteTimeCodeOffset
self.read_unsigned_long_long()
self.read_string() # MovieIdentifier
server_count = self.read_unsigned_char()
# ServerEntryTable
for _ in range(server_count):
self.read_string()
quality_count = self.read_unsigned_char()
# QualityEntryTable
for _ in range(quality_count):
self.read_string()
# DrmData
self.read_string()
# MetaData
self.read_string()
segments_count = self.read_unsigned_char()
segments = []
for _ in range(segments_count):
_box_size, box_type, box_data = self.read_box_info()
assert box_type == b'asrt'
segment = FlvReader(box_data).read_asrt()
segments.append(segment)
fragments_run_count = self.read_unsigned_char()
fragments = []
for _ in range(fragments_run_count):
_box_size, box_type, box_data = self.read_box_info()
assert box_type == b'afrt'
fragments.append(FlvReader(box_data).read_afrt())
return {
'segments': segments,
'fragments': fragments,
'live': live,
}
def read_bootstrap_info(self):
_, box_type, box_data = self.read_box_info()
assert box_type == b'abst'
return FlvReader(box_data).read_abst()
def read_bootstrap_info(bootstrap_bytes):
return FlvReader(bootstrap_bytes).read_bootstrap_info()
def build_fragments_list(boot_info):
""" Return a list of (segment, fragment) for each fragment in the video """
res = []
segment_run_table = boot_info['segments'][0]
fragment_run_entry_table = boot_info['fragments'][0]['fragments']
first_frag_number = fragment_run_entry_table[0]['first']
fragments_counter = itertools.count(first_frag_number)
for segment, fragments_count in segment_run_table['segment_run']:
# In some live HDS streams (e.g. Rai), `fragments_count` is
# abnormal and causing out-of-memory errors. It's OK to change the
# number of fragments for live streams as they are updated periodically
if fragments_count == 4294967295 and boot_info['live']:
fragments_count = 2
for _ in range(fragments_count):
res.append((segment, next(fragments_counter)))
if boot_info['live']:
res = res[-2:]
return res
def write_unsigned_int(stream, val):
stream.write(struct.pack('!I', val))
def write_unsigned_int_24(stream, val):
stream.write(struct.pack('!I', val)[1:])
def write_flv_header(stream):
"""Writes the FLV header to stream"""
# FLV header
stream.write(b'FLV\x01')
stream.write(b'\x05')
stream.write(b'\x00\x00\x00\x09')
stream.write(b'\x00\x00\x00\x00')
def write_metadata_tag(stream, metadata):
"""Writes optional metadata tag to stream"""
SCRIPT_TAG = b'\x12'
FLV_TAG_HEADER_LEN = 11
if metadata:
stream.write(SCRIPT_TAG)
write_unsigned_int_24(stream, len(metadata))
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
stream.write(metadata)
write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
def remove_encrypted_media(media):
return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib
and 'drmAdditionalHeaderSetId' not in e.attrib,
media))
def _add_ns(prop, ver=1):
return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
def get_base_url(manifest):
base_url = xpath_text(
manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
return base_url
class F4mFD(FragmentFD):
"""
A downloader for f4m manifests or AdobeHDS.
"""
def _get_unencrypted_media(self, doc):
media = doc.findall(_add_ns('media'))
if not media:
self.report_error('No media found')
if not self.params.get('allow_unplayable_formats'):
for e in (doc.findall(_add_ns('drmAdditionalHeader'))
+ doc.findall(_add_ns('drmAdditionalHeaderSet'))):
# If id attribute is missing it's valid for all media nodes
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
if 'id' not in e.attrib:
self.report_error('Missing ID in f4m DRM')
media = remove_encrypted_media(media)
if not media:
self.report_error('Unsupported DRM')
return media
def _get_bootstrap_from_url(self, bootstrap_url):
bootstrap = self.ydl.urlopen(bootstrap_url).read()
return read_bootstrap_info(bootstrap)
def _update_live_fragments(self, bootstrap_url, latest_fragment):
fragments_list = []
retries = 30
while (not fragments_list) and (retries > 0):
boot_info = self._get_bootstrap_from_url(bootstrap_url)
fragments_list = build_fragments_list(boot_info)
fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
if not fragments_list:
# Retry after a while
time.sleep(5.0)
retries -= 1
if not fragments_list:
self.report_error('Failed to update fragments')
return fragments_list
def _parse_bootstrap_node(self, node, base_url):
# Sometimes non empty inline bootstrap info can be specified along
# with bootstrap url attribute (e.g. dummy inline bootstrap info
# contains whitespace characters in [1]). We will prefer bootstrap
# url over inline bootstrap info when present.
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
bootstrap_url = node.get('url')
if bootstrap_url:
bootstrap_url = urllib.parse.urljoin(
base_url, bootstrap_url)
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
bootstrap = base64.b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return boot_info, bootstrap_url
def real_download(self, filename, info_dict):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest')
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.url
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
# and https://github.com/ytdl-org/youtube-dl/issues/7823)
manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
doc = compat_etree_fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f)
for f in self._get_unencrypted_media(doc)]
if requested_bitrate is None or len(formats) == 1:
# get the best format
formats = sorted(formats, key=lambda f: f[0])
_, media = formats[-1]
else:
_, media = next(filter(
lambda f: int(f[0]) == requested_bitrate, formats))
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
base_url = urllib.parse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
metadata = base64.b64decode(metadata_node.text)
else:
metadata = None
fragments_list = build_fragments_list(boot_info)
test = self.params.get('test', False)
if test:
# We only download the first fragment
fragments_list = fragments_list[:1]
total_frags = len(fragments_list)
# For some akamai manifests we'll need to add a query to the fragment url
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
ctx = {
'filename': filename,
'total_frags': total_frags,
'live': bool(live),
}
self._prepare_frag_download(ctx)
dest_stream = ctx['dest_stream']
if ctx['complete_frags_downloaded_bytes'] == 0:
write_flv_header(dest_stream)
if not live:
write_metadata_tag(dest_stream, metadata)
base_url_parsed = urllib.parse.urlparse(base_url)
self._start_frag_download(ctx, info_dict)
frag_index = 0
while fragments_list:
seg_i, frag_i = fragments_list.pop(0)
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
query = []
if base_url_parsed.query:
query.append(base_url_parsed.query)
if akamai_pv:
query.append(akamai_pv.strip(';'))
if info_dict.get('extra_param_to_segment_url'):
query.append(info_dict['extra_param_to_segment_url'])
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
try:
success = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
if not success:
return False
down_data = self._read_fragment(ctx)
reader = FlvReader(down_data)
while True:
try:
_, box_type, box_data = reader.read_box_info()
except DataTruncatedError:
if test:
# In tests, segments may be truncated, and thus
# FlvReader may not be able to parse the whole
# chunk. If so, write the segment as is
# See https://github.com/ytdl-org/youtube-dl/issues/9214
dest_stream.write(down_data)
break
raise
if box_type == b'mdat':
self._append_fragment(ctx, box_data)
break
except HTTPError as err:
if live and (err.status == 404 or err.status == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
msg = 'Fragment %d unavailable' % frag_i
self.report_warning(msg)
fragments_list = []
else:
raise
if not fragments_list and not test and live and bootstrap_url:
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
total_frags += len(fragments_list)
if fragments_list and (fragments_list[0][1] > frag_i + 1):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
return self._finish_frag_download(ctx, info_dict)

View File

@@ -0,0 +1,46 @@
import threading
from .common import FileDownloader
from .external import FFmpegFD
class FC2LiveFD(FileDownloader):
"""
Downloads FC2 live without being stopped. <br>
Note, this is not a part of public API, and will be removed without notice.
DO NOT USE
"""
def real_download(self, filename, info_dict):
ws = info_dict['ws']
heartbeat_lock = threading.Lock()
heartbeat_state = [None, 1]
def heartbeat():
if heartbeat_state[1] < 0:
return
try:
heartbeat_state[1] += 1
ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1])
except Exception:
self.to_screen('[fc2:live] Heartbeat failed')
with heartbeat_lock:
heartbeat_state[0] = threading.Timer(30, heartbeat)
heartbeat_state[0]._daemonic = True
heartbeat_state[0].start()
heartbeat()
new_info_dict = info_dict.copy()
new_info_dict.update({
'ws': None,
'protocol': 'live_ffmpeg',
})
try:
return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict)
finally:
# stop heartbeating
heartbeat_state[1] = -1

View File

@@ -0,0 +1,526 @@
import concurrent.futures
import contextlib
import json
import math
import os
import struct
import time
from .common import FileDownloader
from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..networking import Request
from ..networking.exceptions import HTTPError, IncompleteRead
from ..utils import DownloadError, RetryManager, traverse_obj
from ..utils.networking import HTTPHeaderDict
from ..utils.progress import ProgressCalculator
class HttpQuietDownloader(HttpFD):
def to_screen(self, *args, **kargs):
pass
to_console_title = to_screen
class FragmentFD(FileDownloader):
"""
A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
Available options:
fragment_retries: Number of times to retry a fragment for HTTP error
(DASH and hlsnative only). Default is 0 for API, but 10 for CLI
skip_unavailable_fragments:
Skip unavailable fragments (DASH and hlsnative only)
keep_fragments: Keep downloaded fragments on disk after downloading is
finished
concurrent_fragment_downloads: The number of threads to use for native hls and dash downloads
_no_ytdl_file: Don't use .ytdl file
For each incomplete fragment download yt-dlp keeps on disk a special
bookkeeping file with download state and metadata (in future such files will
be used for any incomplete download handled by yt-dlp). This file is
used to properly handle resuming, check download file consistency and detect
potential errors. The file has a .ytdl extension and represents a standard
JSON file of the following format:
extractor:
Dictionary of extractor related data. TBD.
downloader:
Dictionary of downloader related data. May contain following data:
current_fragment:
Dictionary with current (being downloaded) fragment data:
index: 0-based index of current fragment among all fragments
fragment_count:
Total count of fragments
This feature is experimental and file format may change in future.
"""
def report_retry_fragment(self, err, frag_index, count, retries):
self.deprecation_warning('yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. '
'Use yt_dlp.downloader.FileDownloader.report_retry instead')
return self.report_retry(err, count, retries, frag_index)
def report_skip_fragment(self, frag_index, err=None):
err = f' {err};' if err else ''
self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...')
def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers')
return Request(url, None, headers) if headers else url
def _prepare_and_start_frag_download(self, ctx, info_dict):
self._prepare_frag_download(ctx)
self._start_frag_download(ctx, info_dict)
def __do_ytdl_file(self, ctx):
return ctx['live'] is not True and ctx['tmpfilename'] != '-' and not self.params.get('_no_ytdl_file')
def _read_ytdl_file(self, ctx):
assert 'ytdl_corrupt' not in ctx
stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
try:
ytdl_data = json.loads(stream.read())
ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index']
if 'extra_state' in ytdl_data['downloader']:
ctx['extra_state'] = ytdl_data['downloader']['extra_state']
except Exception:
ctx['ytdl_corrupt'] = True
finally:
stream.close()
def _write_ytdl_file(self, ctx):
frag_index_stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
try:
downloader = {
'current_fragment': {
'index': ctx['fragment_index'],
},
}
if 'extra_state' in ctx:
downloader['extra_state'] = ctx['extra_state']
if ctx.get('fragment_count') is not None:
downloader['fragment_count'] = ctx['fragment_count']
frag_index_stream.write(json.dumps({'downloader': downloader}))
finally:
frag_index_stream.close()
def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
fragment_info_dict = {
'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'),
'request_data': request_data,
'ctx_id': ctx.get('ctx_id'),
}
frag_resume_len = 0
if ctx['dl'].params.get('continuedl', True):
frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename))
fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len
success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False
if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
ctx['fragment_filename_sanitized'] = fragment_filename
return True
def _read_fragment(self, ctx):
if not ctx.get('fragment_filename_sanitized'):
return None
try:
down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
except FileNotFoundError:
if ctx.get('live'):
return None
raise
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
down.close()
return frag_content
def _append_fragment(self, ctx, frag_content):
try:
ctx['dest_stream'].write(frag_content)
ctx['dest_stream'].flush()
finally:
if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx)
if not self.params.get('keep_fragments', False):
self.try_remove(ctx['fragment_filename_sanitized'])
del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx):
if not ctx.setdefault('live', False):
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
self.report_destination(ctx['filename'])
dl = HttpQuietDownloader(self.ydl, {
**self.params,
'noprogress': True,
'test': False,
'sleep_interval': 0,
'max_sleep_interval': 0,
'sleep_interval_subtitles': 0,
})
tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb'
# Establish possible resume length
resume_len = self.filesize_or_none(tmpfilename)
if resume_len > 0:
open_mode = 'ab'
# Should be initialized before ytdl file check
ctx.update({
'tmpfilename': tmpfilename,
'fragment_index': 0,
})
if self.__do_ytdl_file(ctx):
ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename']))
continuedl = self.params.get('continuedl', True)
if continuedl and ytdl_file_exists:
self._read_ytdl_file(ctx)
is_corrupt = ctx.get('ytdl_corrupt') is True
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
if is_corrupt or is_inconsistent:
message = (
'.ytdl file is corrupt' if is_corrupt else
'Inconsistent state of incomplete fragment download')
self.report_warning(
f'{message}. Restarting from the beginning ...')
ctx['fragment_index'] = resume_len = 0
if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt']
self._write_ytdl_file(ctx)
else:
if not continuedl:
if ytdl_file_exists:
self._read_ytdl_file(ctx)
ctx['fragment_index'] = resume_len = 0
self._write_ytdl_file(ctx)
assert ctx['fragment_index'] == 0
dest_stream, tmpfilename = self.sanitize_open(tmpfilename, open_mode)
ctx.update({
'dl': dl,
'dest_stream': dest_stream,
'tmpfilename': tmpfilename,
# Total complete fragments downloaded so far in bytes
'complete_frags_downloaded_bytes': resume_len,
})
def _start_frag_download(self, ctx, info_dict):
resume_len = ctx['complete_frags_downloaded_bytes']
total_frags = ctx['total_frags']
ctx_id = ctx.get('ctx_id')
# Stores the download progress, updated by the progress hook
state = {
'status': 'downloading',
'downloaded_bytes': resume_len,
'fragment_index': ctx['fragment_index'],
'fragment_count': total_frags,
'filename': ctx['filename'],
'tmpfilename': ctx['tmpfilename'],
}
ctx['started'] = time.time()
progress = ProgressCalculator(resume_len)
def frag_progress_hook(s):
if s['status'] not in ('downloading', 'finished'):
return
if not total_frags and ctx.get('fragment_count'):
state['fragment_count'] = ctx['fragment_count']
if ctx_id is not None and s.get('ctx_id') != ctx_id:
return
state['max_progress'] = ctx.get('max_progress')
state['progress_idx'] = ctx.get('progress_idx')
state['elapsed'] = progress.elapsed
frag_total_bytes = s.get('total_bytes') or 0
s['fragment_info_dict'] = s.pop('info_dict', {})
# XXX: Fragment resume is not accounted for here
if not ctx['live']:
estimated_size = (
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
/ (state['fragment_index'] + 1) * total_frags)
progress.total = estimated_size
progress.update(s.get('downloaded_bytes'))
state['total_bytes_estimate'] = progress.total
else:
progress.update(s.get('downloaded_bytes'))
if s['status'] == 'finished':
state['fragment_index'] += 1
ctx['fragment_index'] = state['fragment_index']
progress.thread_reset()
state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded
state['speed'] = ctx['speed'] = progress.speed.smooth
state['eta'] = progress.eta.smooth
self._hook_progress(state, info_dict)
ctx['dl'].add_progress_hook(frag_progress_hook)
return ctx['started']
def _finish_frag_download(self, ctx, info_dict):
ctx['dest_stream'].close()
if self.__do_ytdl_file(ctx):
self.try_remove(self.ytdl_filename(ctx['filename']))
elapsed = time.time() - ctx['started']
to_file = ctx['tmpfilename'] != '-'
if to_file:
downloaded_bytes = self.filesize_or_none(ctx['tmpfilename'])
else:
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
if not downloaded_bytes:
if to_file:
self.try_remove(ctx['tmpfilename'])
self.report_error('The downloaded file is empty')
return False
elif to_file:
self.try_rename(ctx['tmpfilename'], ctx['filename'])
filetime = ctx.get('fragment_filetime')
if self.params.get('updatetime') and filetime:
with contextlib.suppress(Exception):
os.utime(ctx['filename'], (time.time(), filetime))
self._hook_progress({
'downloaded_bytes': downloaded_bytes,
'total_bytes': downloaded_bytes,
'filename': ctx['filename'],
'status': 'finished',
'elapsed': elapsed,
'ctx_id': ctx.get('ctx_id'),
'max_progress': ctx.get('max_progress'),
'progress_idx': ctx.get('progress_idx'),
}, info_dict)
return True
def _prepare_external_frag_download(self, ctx):
if 'live' not in ctx:
ctx['live'] = False
if not ctx['live']:
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
tmpfilename = self.temp_name(ctx['filename'])
# Should be initialized before ytdl file check
ctx.update({
'tmpfilename': tmpfilename,
'fragment_index': 0,
})
def decrypter(self, info_dict):
_key_cache = {}
def _get_key(url):
if url not in _key_cache:
_key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read()
return _key_cache[url]
def decrypt_fragment(fragment, frag_content):
if frag_content is None:
return
decrypt_info = fragment.get('decrypt_info')
if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
return frag_content
iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence'])
decrypt_info['KEY'] = (decrypt_info.get('KEY')
or _get_key(traverse_obj(info_dict, ('hls_aes', 'uri')) or decrypt_info['URI']))
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
# not what it decrypts to.
if self.params.get('test', False):
return frag_content
return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv))
return decrypt_fragment
def download_and_append_fragments_multiple(self, *args, **kwargs):
"""
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
all args must be either tuple or list
"""
interrupt_trigger = [True]
max_progress = len(args)
if max_progress == 1:
return self.download_and_append_fragments(*args[0], **kwargs)
max_workers = self.params.get('concurrent_fragment_downloads', 1)
if max_progress > 1:
self._prepare_multiline_status(max_progress)
is_live = any(traverse_obj(args, (..., 2, 'is_live')))
def thread_func(idx, ctx, fragments, info_dict, tpe):
ctx['max_progress'] = max_progress
ctx['progress_idx'] = idx
return self.download_and_append_fragments(
ctx, fragments, info_dict, **kwargs, tpe=tpe, interrupt_trigger=interrupt_trigger)
class FTPE(concurrent.futures.ThreadPoolExecutor):
# has to stop this or it's going to wait on the worker thread itself
def __exit__(self, exc_type, exc_val, exc_tb):
pass
if os.name == 'nt':
def future_result(future):
while True:
try:
return future.result(0.1)
except KeyboardInterrupt:
raise
except concurrent.futures.TimeoutError:
continue
else:
def future_result(future):
return future.result()
def interrupt_trigger_iter(fg):
for f in fg:
if not interrupt_trigger[0]:
break
yield f
spins = []
for idx, (ctx, fragments, info_dict) in enumerate(args):
tpe = FTPE(math.ceil(max_workers / max_progress))
job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe)
spins.append((tpe, job))
result = True
for tpe, job in spins:
try:
result = result and future_result(job)
except KeyboardInterrupt:
interrupt_trigger[0] = False
finally:
tpe.shutdown(wait=True)
if not interrupt_trigger[0] and not is_live:
raise KeyboardInterrupt
# we expect the user wants to stop and DO WANT the preceding postprocessors to run;
# so returning a intermediate result here instead of KeyboardInterrupt on live
return result
def download_and_append_fragments(
self, ctx, fragments, info_dict, *, is_fatal=(lambda idx: False),
pack_func=(lambda content, idx: content), finish_func=None,
tpe=None, interrupt_trigger=(True, )):
if not self.params.get('skip_unavailable_fragments', True):
is_fatal = lambda _: True
def download_fragment(fragment, ctx):
if not interrupt_trigger[0]:
return
frag_index = ctx['fragment_index'] = fragment['frag_index']
ctx['last_error'] = None
headers = HTTPHeaderDict(info_dict.get('http_headers'))
byte_range = fragment.get('byte_range')
if byte_range:
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
# Never skip the first fragment
fatal = is_fatal(fragment.get('index') or (frag_index - 1))
def error_callback(err, count, retries):
if fatal and count > retries:
ctx['dest_stream'].close()
self.report_retry(err, count, retries, frag_index, fatal)
ctx['last_error'] = err
for retry in RetryManager(self.params.get('fragment_retries'), error_callback):
try:
ctx['fragment_count'] = fragment.get('fragment_count')
if not self._download_fragment(
ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')):
return
except (HTTPError, IncompleteRead) as err:
retry.error = err
continue
except DownloadError: # has own retry settings
if fatal:
raise
def append_fragment(frag_content, frag_index, ctx):
if frag_content:
self._append_fragment(ctx, pack_func(frag_content, frag_index))
elif not is_fatal(frag_index - 1):
self.report_skip_fragment(frag_index, 'fragment not found')
else:
ctx['dest_stream'].close()
self.report_error(f'fragment {frag_index} not found, unable to continue')
return False
return True
decrypt_fragment = self.decrypter(info_dict)
max_workers = math.ceil(
self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1))
if max_workers > 1:
def _download_fragment(fragment):
ctx_copy = ctx.copy()
download_fragment(fragment, ctx_copy)
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
try:
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
ctx.update({
'fragment_filename_sanitized': frag_filename,
'fragment_index': frag_index,
})
if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx):
return False
except KeyboardInterrupt:
self._finish_multiline_status()
self.report_error(
'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False)
pool.shutdown(wait=False)
raise
else:
for fragment in fragments:
if not interrupt_trigger[0]:
break
try:
download_fragment(fragment, ctx)
result = append_fragment(
decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
except KeyboardInterrupt:
if info_dict.get('is_live'):
break
raise
if not result:
return False
if finish_func is not None:
ctx['dest_stream'].write(finish_func())
ctx['dest_stream'].flush()
return self._finish_frag_download(ctx, info_dict)

View File

@@ -0,0 +1,409 @@
import binascii
import io
import re
import urllib.parse
from . import get_suitable_downloader
from .external import FFmpegFD
from .fragment import FragmentFD
from .. import webvtt
from ..dependencies import Cryptodome
from ..utils import (
bug_reports_message,
parse_m3u8_attributes,
remove_start,
traverse_obj,
update_url_query,
urljoin,
)
from ..utils._utils import _request_dump_filename
class HlsFD(FragmentFD):
"""
Download segments in a m3u8 manifest. External downloaders can take over
the fragment downloads by supporting the 'm3u8_frag_urls' protocol and
re-defining 'supports_manifest' function
"""
FD_NAME = 'hlsnative'
@staticmethod
def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
return bool(re.search('|'.join((
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
)), manifest))
@classmethod
def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
UNSUPPORTED_FEATURES = [
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
# Live streams heuristic does not always work (e.g. geo restricted to Germany
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
# r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
# This heuristic also is not correct since segments may not be appended as well.
# Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
# no segments will definitely be appended to the end of the playlist.
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
# # event media playlists [4]
# r'#EXT-X-MAP:', # media initialization [5]
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
# 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
]
if not allow_unplayable_formats:
UNSUPPORTED_FEATURES += [
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM
]
def check_results():
yield not info_dict.get('is_live')
for feature in UNSUPPORTED_FEATURES:
yield not re.search(feature, manifest)
if not allow_unplayable_formats:
yield not cls._has_drm(manifest)
return all(check_results())
def real_download(self, filename, info_dict):
man_url = info_dict['url']
s = info_dict.get('hls_media_playlist_data')
if s:
self.to_screen(f'[{self.FD_NAME}] Using m3u8 manifest from extracted info')
else:
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.url
s_bytes = urlh.read()
if self.params.get('write_pages'):
dump_filename = _request_dump_filename(
man_url, info_dict['id'], None,
trim_length=self.params.get('trim_file_name'))
self.to_screen(f'[{self.FD_NAME}] Saving request to {dump_filename}')
with open(dump_filename, 'wb') as outf:
outf.write(s_bytes)
s = s_bytes.decode('utf-8', 'ignore')
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
if can_download:
has_ffmpeg = FFmpegFD.available()
if not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s:
# Even if pycryptodomex isn't available, force HlsFD for m3u8s that won't work with ffmpeg
ffmpeg_can_dl = not traverse_obj(info_dict, ((
'extra_param_to_segment_url', 'extra_param_to_key_url',
'hls_media_playlist_data', ('hls_aes', ('uri', 'key', 'iv')),
), any))
message = 'The stream has AES-128 encryption and {} available'.format(
'neither ffmpeg nor pycryptodomex are' if ffmpeg_can_dl and not has_ffmpeg else
'pycryptodomex is not')
if has_ffmpeg and ffmpeg_can_dl:
can_download = False
else:
message += '; decryption will be performed natively, but will be extremely slow'
elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
if not can_download:
if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
if info_dict.get('has_drm') and self.params.get('test'):
self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
else:
self.report_error(
'This format is DRM protected; Try selecting another format with --format or '
'add --check-formats to automatically fallback to the next best format', tb=False)
return False
message = message or 'Unsupported features have been detected'
fd = FFmpegFD(self.ydl, self.params)
self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}')
return fd.real_download(filename, info_dict)
elif message:
self.report_warning(message)
is_webvtt = info_dict['ext'] == 'vtt'
if is_webvtt:
real_downloader = None # Packing the fragments is not currently supported for external downloader
else:
real_downloader = get_suitable_downloader(
info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-'))
if real_downloader and not real_downloader.supports_manifest(s):
real_downloader = None
if real_downloader:
self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
def is_ad_fragment_start(s):
return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s)
or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')))
def is_ad_fragment_end(s):
return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s)
or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')))
fragments = []
media_frags = 0
ad_frags = 0
ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if not line:
continue
if line.startswith('#'):
if is_ad_fragment_start(line):
ad_frag_next = True
elif is_ad_fragment_end(line):
ad_frag_next = False
continue
if ad_frag_next:
ad_frags += 1
continue
media_frags += 1
ctx = {
'filename': filename,
'total_frags': media_frags,
'ad_frags': ad_frags,
}
if real_downloader:
self._prepare_external_frag_download(ctx)
else:
self._prepare_and_start_frag_download(ctx, info_dict)
extra_state = ctx.setdefault('extra_state', {})
format_index = info_dict.get('format_index')
extra_segment_query = None
if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'):
extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url)
extra_key_query = None
if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'):
extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url)
i = 0
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
external_aes_key = traverse_obj(info_dict, ('hls_aes', 'key'))
if external_aes_key:
external_aes_key = binascii.unhexlify(remove_start(external_aes_key, '0x'))
assert len(external_aes_key) in (16, 24, 32), 'Invalid length for HLS AES-128 key'
external_aes_iv = traverse_obj(info_dict, ('hls_aes', 'iv'))
if external_aes_iv:
external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32))
byte_range = {}
byte_range_offset = 0
discontinuity_count = 0
frag_index = 0
ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if line:
if not line.startswith('#'):
if format_index is not None and discontinuity_count != format_index:
continue
if ad_frag_next:
continue
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
frag_url = urljoin(man_url, line)
if extra_segment_query:
frag_url = update_url_query(frag_url, extra_segment_query)
fragments.append({
'frag_index': frag_index,
'url': frag_url,
'decrypt_info': decrypt_info,
'byte_range': byte_range,
'media_sequence': media_sequence,
})
media_sequence += 1
# If the byte_range is truthy, reset it after appending a fragment that uses it
if byte_range:
byte_range_offset = byte_range['end']
byte_range = {}
elif line.startswith('#EXT-X-MAP'):
if format_index is not None and discontinuity_count != format_index:
continue
if frag_index > 0:
self.report_error(
'Initialization fragment found after media fragments, unable to download')
return False
frag_index += 1
map_info = parse_m3u8_attributes(line[11:])
frag_url = urljoin(man_url, map_info.get('URI'))
if extra_segment_query:
frag_url = update_url_query(frag_url, extra_segment_query)
map_byte_range = {}
if map_info.get('BYTERANGE'):
splitted_byte_range = map_info.get('BYTERANGE').split('@')
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else 0
map_byte_range = {
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
fragments.append({
'frag_index': frag_index,
'url': frag_url,
'decrypt_info': decrypt_info,
'byte_range': map_byte_range,
'media_sequence': media_sequence,
})
media_sequence += 1
elif line.startswith('#EXT-X-KEY'):
decrypt_url = decrypt_info.get('URI')
decrypt_info = parse_m3u8_attributes(line[11:])
if decrypt_info['METHOD'] == 'AES-128':
if external_aes_iv:
decrypt_info['IV'] = external_aes_iv
elif 'IV' in decrypt_info:
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
if external_aes_key:
decrypt_info['KEY'] = external_aes_key
else:
decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
if extra_key_query or extra_segment_query:
# Fall back to extra_segment_query to key for backwards compat
decrypt_info['URI'] = update_url_query(
decrypt_info['URI'], extra_key_query or extra_segment_query)
if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
media_sequence = int(line[22:])
elif line.startswith('#EXT-X-BYTERANGE'):
splitted_byte_range = line[17:].split('@')
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range_offset
byte_range = {
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
elif is_ad_fragment_start(line):
ad_frag_next = True
elif is_ad_fragment_end(line):
ad_frag_next = False
elif line.startswith('#EXT-X-DISCONTINUITY'):
discontinuity_count += 1
i += 1
# We only download the first fragment during the test
if self.params.get('test', False):
fragments = [fragments[0] if fragments else None]
if real_downloader:
info_dict['fragments'] = fragments
fd = real_downloader(self.ydl, self.params)
# TODO: Make progress updates work without hooking twice
# for ph in self._progress_hooks:
# fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict)
if is_webvtt:
def pack_fragment(frag_content, frag_index):
output = io.StringIO()
adjust = 0
overflow = False
mpegts_last = None
for block in webvtt.parse_fragment(frag_content):
if isinstance(block, webvtt.CueBlock):
extra_state['webvtt_mpegts_last'] = mpegts_last
if overflow:
extra_state['webvtt_mpegts_adjust'] += 1
overflow = False
block.start += adjust
block.end += adjust
dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
ready = []
i = 0
is_new = True
while i < len(dedup_window):
wcue = dedup_window[i]
wblock = webvtt.CueBlock.from_json(wcue)
i += 1
if wblock.hinges(block):
wcue['end'] = block.end
is_new = False
continue
if wblock == block:
is_new = False
continue
if wblock.end > block.start:
continue
ready.append(wblock)
i -= 1
del dedup_window[i]
if is_new:
dedup_window.append(block.as_json)
for block in ready:
block.write_into(output)
# we only emit cues once they fall out of the duplicate window
continue
elif isinstance(block, webvtt.Magic):
# take care of MPEG PES timestamp overflow
if block.mpegts is None:
block.mpegts = 0
extra_state.setdefault('webvtt_mpegts_adjust', 0)
block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
overflow = True
block.mpegts += 1 << 33
mpegts_last = block.mpegts
if frag_index == 1:
extra_state['webvtt_mpegts'] = block.mpegts or 0
extra_state['webvtt_local'] = block.local or 0
# XXX: block.local = block.mpegts = None ?
else:
if block.mpegts is not None and block.local is not None:
adjust = (
(block.mpegts - extra_state.get('webvtt_mpegts', 0))
- (block.local - extra_state.get('webvtt_local', 0))
)
continue
elif isinstance(block, webvtt.HeaderBlock):
if frag_index != 1:
# XXX: this should probably be silent as well
# or verify that all segments contain the same data
self.report_warning(bug_reports_message(
f'Discarding a {type(block).__name__} block found in the middle of the stream; '
'if the subtitles display incorrectly,'))
continue
block.write_into(output)
return output.getvalue().encode()
def fin_fragments():
dedup_window = extra_state.get('webvtt_dedup_window')
if not dedup_window:
return b''
output = io.StringIO()
for cue in dedup_window:
webvtt.CueBlock.from_json(cue).write_into(output)
return output.getvalue().encode()
if len(fragments) == 1:
self.download_and_append_fragments(ctx, fragments, info_dict)
else:
self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
else:
return self.download_and_append_fragments(ctx, fragments, info_dict)

View File

@@ -0,0 +1,376 @@
import os
import random
import time
from .common import FileDownloader
from ..networking import Request
from ..networking.exceptions import (
CertificateVerifyError,
HTTPError,
TransportError,
)
from ..utils import (
ContentTooShortError,
RetryManager,
ThrottledDownload,
int_or_none,
parse_http_range,
try_call,
)
from ..utils.networking import HTTPHeaderDict
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
request_data = info_dict.get('request_data', None)
request_extensions = {}
impersonate_target = self._get_impersonate_target(info_dict)
if impersonate_target is not None:
request_extensions['impersonate'] = impersonate_target
class DownloadContext(dict):
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
ctx = DownloadContext()
ctx.filename = filename
ctx.tmpfilename = self.temp_name(filename)
ctx.stream = None
# Disable compression
headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers'))
is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else (
self.params.get('http_chunk_size')
or info_dict.get('downloader_options', {}).get('http_chunk_size')
or 0)
ctx.open_mode = 'wb'
ctx.resume_len = 0
ctx.block_size = self.params.get('buffersize', 1024)
ctx.start_time = time.time()
# parse given Range
req_start, req_end, _ = parse_http_range(headers.get('Range'))
if self.params.get('continuedl', True):
# Establish possible resume length
if os.path.isfile(ctx.tmpfilename):
ctx.resume_len = os.path.getsize(ctx.tmpfilename)
ctx.is_resume = ctx.resume_len > 0
class SucceedDownload(Exception):
pass
class RetryDownload(Exception):
def __init__(self, source_error):
self.source_error = source_error
class NextFragment(Exception):
pass
def establish_connection():
ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
if not is_test and chunk_size else chunk_size)
if ctx.resume_len > 0:
range_start = ctx.resume_len
if req_start is not None:
# offset the beginning of Range to be within request
range_start += req_start
if ctx.is_resume:
self.report_resuming_byte(ctx.resume_len)
ctx.open_mode = 'ab'
elif req_start is not None:
range_start = req_start
elif ctx.chunk_size > 0:
range_start = 0
else:
range_start = None
ctx.is_resume = False
if ctx.chunk_size:
chunk_aware_end = range_start + ctx.chunk_size - 1
# we're not allowed to download outside Range
range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end)
elif req_end is not None:
# there's no need for chunked downloads, so download until the end of Range
range_end = req_end
else:
range_end = None
if try_call(lambda: range_start > range_end):
ctx.resume_len = 0
ctx.open_mode = 'wb'
raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})'))
if try_call(lambda: range_end >= ctx.content_len):
range_end = ctx.content_len - 1
request = Request(url, request_data, headers, extensions=request_extensions)
has_range = range_start is not None
if has_range:
request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
# Establish connection
try:
ctx.data = self.ydl.urlopen(request)
# When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
if has_range:
content_range = ctx.data.headers.get('Content-Range')
content_range_start, content_range_end, content_len = parse_http_range(content_range)
# Content-Range is present and matches requested Range, resume is possible
if range_start == content_range_start and (
# Non-chunked download
not ctx.chunk_size
# Chunked download and requested piece or
# its part is promised to be served
or content_range_end == range_end
or content_len < range_end):
ctx.content_len = content_len
if content_len or req_end:
ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0)
return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
elif range_start > 0:
self.report_unable_to_resume()
ctx.resume_len = 0
ctx.open_mode = 'wb'
ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None))
except HTTPError as err:
if err.status == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
ctx.data = self.ydl.urlopen(
Request(url, request_data, headers))
content_length = ctx.data.headers['Content-Length']
except HTTPError as err:
if err.status < 500 or err.status >= 600:
raise
else:
# Examine the reported length
if (content_length is not None
and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# changing the file size slightly and causing problems for some users. So
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
self.report_file_already_downloaded(ctx.filename)
self.try_rename(ctx.tmpfilename, ctx.filename)
self._hook_progress({
'filename': ctx.filename,
'status': 'finished',
'downloaded_bytes': ctx.resume_len,
'total_bytes': ctx.resume_len,
}, info_dict)
raise SucceedDownload
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
ctx.resume_len = 0
ctx.open_mode = 'wb'
return
elif err.status < 500 or err.status >= 600:
# Unexpected HTTP error
raise
raise RetryDownload(err)
except CertificateVerifyError:
raise
except TransportError as err:
raise RetryDownload(err)
def close_stream():
if ctx.stream is not None:
if ctx.tmpfilename != '-':
ctx.stream.close()
ctx.stream = None
def download():
data_len = ctx.data.headers.get('Content-length')
if ctx.data.headers.get('Content-encoding'):
# Content-encoding is present, Content-length is not reliable anymore as we are
# doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176)
data_len = None
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
# However, for a test we still would like to download just a piece of a file.
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
# block size when downloading a file.
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
data_len = self._TEST_FILE_SIZE
if data_len is not None:
data_len = int(data_len) + ctx.resume_len
min_data_len = self.params.get('min_filesize')
max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
self.to_screen(
f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.')
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(
f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.')
return False
byte_counter = 0 + ctx.resume_len
block_size = ctx.block_size
start = time.time()
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
now = None # needed for slow_down() in the first loop run
before = start # start measuring
def retry(e):
close_stream()
if ctx.tmpfilename == '-':
ctx.resume_len = byte_counter
else:
try:
ctx.resume_len = os.path.getsize(ctx.tmpfilename)
except FileNotFoundError:
ctx.resume_len = 0
raise RetryDownload(e)
while True:
try:
# Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
except TransportError as err:
retry(err)
byte_counter += len(data_block)
# exit loop when download is finished
if len(data_block) == 0:
break
# Open destination file just in time
if ctx.stream is None:
try:
ctx.stream, ctx.tmpfilename = self.sanitize_open(
ctx.tmpfilename, ctx.open_mode)
assert ctx.stream is not None
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
self.report_destination(ctx.filename)
except OSError as err:
self.report_error(f'unable to open for writing: {err}')
return False
try:
ctx.stream.write(data_block)
except OSError as err:
self.to_stderr('\n')
self.report_error(f'unable to write data: {err}')
return False
# Apply rate limit
self.slow_down(start, now, byte_counter - ctx.resume_len)
# end measuring of one loop run
now = time.time()
after = now
# Adjust block size
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
before = after
# Progress message
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
if ctx.data_len is None:
eta = None
else:
eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
'total_bytes': ctx.data_len,
'tmpfilename': ctx.tmpfilename,
'filename': ctx.filename,
'eta': eta,
'speed': speed,
'elapsed': now - ctx.start_time,
'ctx_id': info_dict.get('ctx_id'),
}, info_dict)
if data_len is not None and byte_counter == data_len:
break
if speed and speed < (self.params.get('throttledratelimit') or 0):
# The speed must stay below the limit for 3 seconds
# This prevents raising error when the speed temporarily goes down
if ctx.throttle_start is None:
ctx.throttle_start = now
elif now - ctx.throttle_start > 3:
if ctx.stream is not None and ctx.tmpfilename != '-':
ctx.stream.close()
raise ThrottledDownload
elif speed:
ctx.throttle_start = None
if ctx.stream is None:
self.to_stderr('\n')
self.report_error('Did not get any data blocks')
return False
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
raise NextFragment
if ctx.tmpfilename != '-':
ctx.stream.close()
if data_len is not None and byte_counter != data_len:
err = ContentTooShortError(byte_counter, int(data_len))
retry(err)
self.try_rename(ctx.tmpfilename, ctx.filename)
# Update file modification time
if self.params.get('updatetime'):
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': ctx.filename,
'status': 'finished',
'elapsed': time.time() - ctx.start_time,
'ctx_id': info_dict.get('ctx_id'),
}, info_dict)
return True
for retry in RetryManager(self.params.get('retries'), self.report_retry):
try:
establish_connection()
return download()
except RetryDownload as err:
retry.error = err.source_error
continue
except NextFragment:
retry.error = None
retry.attempt -= 1
continue
except SucceedDownload:
return True
except: # noqa: E722
close_stream()
raise
return False

View File

@@ -0,0 +1,283 @@
import binascii
import io
import struct
import time
from .fragment import FragmentFD
from ..networking.exceptions import HTTPError
from ..utils import RetryManager
u8 = struct.Struct('>B')
u88 = struct.Struct('>Bx')
u16 = struct.Struct('>H')
u1616 = struct.Struct('>Hxx')
u32 = struct.Struct('>I')
u64 = struct.Struct('>Q')
s88 = struct.Struct('>bx')
s16 = struct.Struct('>h')
s1616 = struct.Struct('>hxx')
s32 = struct.Struct('>i')
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
TRACK_ENABLED = 0x1
TRACK_IN_MOVIE = 0x2
TRACK_IN_PREVIEW = 0x4
SELF_CONTAINED = 0x1
def box(box_type, payload):
return u32.pack(8 + len(payload)) + box_type + payload
def full_box(box_type, version, flags, payload):
return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
def write_piff_header(stream, params):
track_id = params['track_id']
fourcc = params['fourcc']
duration = params['duration']
timescale = params.get('timescale', 10000000)
language = params.get('language', 'und')
height = params.get('height', 0)
width = params.get('width', 0)
stream_type = params['stream_type']
creation_time = modification_time = int(time.time())
ftyp_payload = b'isml' # major brand
ftyp_payload += u32.pack(1) # minor version
ftyp_payload += b'piff' + b'iso2' # compatible brands
stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
mvhd_payload = u64.pack(creation_time)
mvhd_payload += u64.pack(modification_time)
mvhd_payload += u32.pack(timescale)
mvhd_payload += u64.pack(duration)
mvhd_payload += s1616.pack(1) # rate
mvhd_payload += s88.pack(1) # volume
mvhd_payload += u16.pack(0) # reserved
mvhd_payload += u32.pack(0) * 2 # reserved
mvhd_payload += unity_matrix
mvhd_payload += u32.pack(0) * 6 # pre defined
mvhd_payload += u32.pack(0xffffffff) # next track id
moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
tkhd_payload = u64.pack(creation_time)
tkhd_payload += u64.pack(modification_time)
tkhd_payload += u32.pack(track_id) # track id
tkhd_payload += u32.pack(0) # reserved
tkhd_payload += u64.pack(duration)
tkhd_payload += u32.pack(0) * 2 # reserved
tkhd_payload += s16.pack(0) # layer
tkhd_payload += s16.pack(0) # alternate group
tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume
tkhd_payload += u16.pack(0) # reserved
tkhd_payload += unity_matrix
tkhd_payload += u1616.pack(width)
tkhd_payload += u1616.pack(height)
trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
mdhd_payload = u64.pack(creation_time)
mdhd_payload += u64.pack(modification_time)
mdhd_payload += u32.pack(timescale)
mdhd_payload += u64.pack(duration)
mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
mdhd_payload += u16.pack(0) # pre defined
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
hdlr_payload = u32.pack(0) # pre defined
if stream_type == 'audio': # handler type
hdlr_payload += b'soun'
hdlr_payload += u32.pack(0) * 3 # reserved
hdlr_payload += b'SoundHandler\0' # name
elif stream_type == 'video':
hdlr_payload += b'vide'
hdlr_payload += u32.pack(0) * 3 # reserved
hdlr_payload += b'VideoHandler\0' # name
elif stream_type == 'text':
hdlr_payload += b'subt'
hdlr_payload += u32.pack(0) * 3 # reserved
hdlr_payload += b'SubtitleHandler\0' # name
else:
assert False
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
if stream_type == 'audio':
smhd_payload = s88.pack(0) # balance
smhd_payload += u16.pack(0) # reserved
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
elif stream_type == 'video':
vmhd_payload = u16.pack(0) # graphics mode
vmhd_payload += u16.pack(0) * 3 # opcolor
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
elif stream_type == 'text':
media_header_box = full_box(b'sthd', 0, 0, b'') # Subtitle Media Header
else:
assert False
minf_payload = media_header_box
dref_payload = u32.pack(1) # entry count
dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
minf_payload += box(b'dinf', dinf_payload) # Data Information Box
stsd_payload = u32.pack(1) # entry count
sample_entry_payload = u8.pack(0) * 6 # reserved
sample_entry_payload += u16.pack(1) # data reference index
if stream_type == 'audio':
sample_entry_payload += u32.pack(0) * 2 # reserved
sample_entry_payload += u16.pack(params.get('channels', 2))
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u1616.pack(params['sampling_rate'])
if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload)
if fourcc == 'EC-3':
sample_entry_box = box(b'ec-3', sample_entry_payload)
elif stream_type == 'video':
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u32.pack(0) * 3 # pre defined
sample_entry_payload += u16.pack(width)
sample_entry_payload += u16.pack(height)
sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
sample_entry_payload += u32.pack(0) # reserved
sample_entry_payload += u16.pack(1) # frame count
sample_entry_payload += u8.pack(0) * 32 # compressor name
sample_entry_payload += u16.pack(0x18) # depth
sample_entry_payload += s16.pack(-1) # pre defined
codec_private_data = binascii.unhexlify(params['codec_private_data'].encode())
if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
avcc_payload += u16.pack(len(sps))
avcc_payload += sps
avcc_payload += u8.pack(1) # number of pps
avcc_payload += u16.pack(len(pps))
avcc_payload += pps
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
else:
assert False
elif stream_type == 'text':
if fourcc == 'TTML':
sample_entry_payload += b'http://www.w3.org/ns/ttml\0' # namespace
sample_entry_payload += b'\0' # schema location
sample_entry_payload += b'\0' # auxilary mime types(??)
sample_entry_box = box(b'stpp', sample_entry_payload)
else:
assert False
else:
assert False
stsd_payload += sample_entry_box
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
stts_payload = u32.pack(0) # entry count
stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
stsc_payload = u32.pack(0) # entry count
stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
stco_payload = u32.pack(0) # entry count
stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
mdia_payload += box(b'minf', minf_payload) # Media Information Box
trak_payload += box(b'mdia', mdia_payload) # Media Box
moov_payload += box(b'trak', trak_payload) # Track Box
mehd_payload = u64.pack(duration)
mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
trex_payload = u32.pack(track_id) # track id
trex_payload += u32.pack(1) # default sample description index
trex_payload += u32.pack(0) # default sample duration
trex_payload += u32.pack(0) # default sample size
trex_payload += u32.pack(0) # default sample flags
mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
stream.write(box(b'moov', moov_payload)) # Movie Box
def extract_box_data(data, box_sequence):
data_reader = io.BytesIO(data)
while True:
box_size = u32.unpack(data_reader.read(4))[0]
box_type = data_reader.read(4)
if box_type == box_sequence[0]:
box_data = data_reader.read(box_size - 8)
if len(box_sequence) == 1:
return box_data
return extract_box_data(box_data, box_sequence[1:])
data_reader.seek(box_size - 8, 1)
class IsmFD(FragmentFD):
"""
Download segments in a ISM manifest
"""
def real_download(self, filename, info_dict):
segments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
ctx = {
'filename': filename,
'total_frags': len(segments),
}
self._prepare_and_start_frag_download(ctx, info_dict)
extra_state = ctx.setdefault('extra_state', {
'ism_track_written': False,
})
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0
for segment in segments:
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
frag_index=frag_index, fatal=not skip_unavailable_fragments)
for retry in retry_manager:
try:
success = self._download_fragment(ctx, segment['url'], info_dict)
if not success:
return False
frag_content = self._read_fragment(ctx)
if not extra_state['ism_track_written']:
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
extra_state['ism_track_written'] = True
self._append_fragment(ctx, frag_content)
except HTTPError as err:
retry.error = err
continue
if retry_manager.error:
if not skip_unavailable_fragments:
return False
self.report_skip_fragment(frag_index)
return self._finish_frag_download(ctx, info_dict)

View File

@@ -0,0 +1,174 @@
import io
import quopri
import re
import uuid
from .fragment import FragmentFD
from ..compat import imghdr
from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin
from ..version import __version__ as YT_DLP_VERSION
class MhtmlFD(FragmentFD):
_STYLESHEET = '''\
html, body {
margin: 0;
padding: 0;
height: 100vh;
}
html {
overflow-y: scroll;
scroll-snap-type: y mandatory;
}
body {
scroll-snap-type: y mandatory;
display: flex;
flex-flow: column;
}
body > figure {
max-width: 100vw;
max-height: 100vh;
scroll-snap-align: center;
}
body > figure > figcaption {
text-align: center;
height: 2.5em;
}
body > figure > img {
display: block;
margin: auto;
max-width: 100%;
max-height: calc(100vh - 5em);
}
'''
_STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
_STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
@staticmethod
def _escape_mime(s):
return '=?utf-8?Q?' + (b''.join(
bytes((b,)) if b >= 0x20 else b'=%02X' % b
for b in quopri.encodestring(s.encode(), header=True)
)).decode('us-ascii') + '?='
def _gen_cid(self, i, fragment, frag_boundary):
return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid'
def _gen_stub(self, *, fragments, frag_boundary, title):
output = io.StringIO()
output.write(
'<!DOCTYPE html>'
'<html>'
'<head>'
f'<meta name="generator" content="yt-dlp {escapeHTML(YT_DLP_VERSION)}">'
f'<title>{escapeHTML(title)}</title>'
f'<style>{self._STYLESHEET}</style>'
'<body>')
t0 = 0
for i, frag in enumerate(fragments):
output.write('<figure>')
try:
t1 = t0 + frag['duration']
output.write((
'<figcaption>Slide #{num}: {t0} {t1} (duration: {duration})</figcaption>'
).format(
num=i + 1,
t0=srt_subtitles_timecode(t0),
t1=srt_subtitles_timecode(t1),
duration=formatSeconds(frag['duration'], msec=True),
))
except (KeyError, ValueError, TypeError):
t1 = None
output.write(f'<figcaption>Slide #{i + 1}</figcaption>')
output.write(f'<img src="cid:{self._gen_cid(i, frag, frag_boundary)}">')
output.write('</figure>')
t0 = t1
return output.getvalue()
def real_download(self, filename, info_dict):
fragment_base_url = info_dict.get('fragment_base_url')
fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
title = info_dict.get('title', info_dict['format_id'])
origin = info_dict.get('webpage_url', info_dict['url'])
ctx = {
'filename': filename,
'total_frags': len(fragments),
}
self._prepare_and_start_frag_download(ctx, info_dict)
extra_state = ctx.setdefault('extra_state', {
'header_written': False,
'mime_boundary': str(uuid.uuid4()).replace('-', ''),
})
frag_boundary = extra_state['mime_boundary']
if not extra_state['header_written']:
stub = self._gen_stub(
fragments=fragments,
frag_boundary=frag_boundary,
title=title,
)
ctx['dest_stream'].write((
'MIME-Version: 1.0\r\n'
'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
f'Subject: {self._escape_mime(title)}\r\n'
'Content-type: multipart/related; '
f'boundary="{frag_boundary}"; '
'type="text/html"\r\n'
f'X.yt-dlp.Origin: {origin}\r\n'
'\r\n'
f'--{frag_boundary}\r\n'
'Content-Type: text/html; charset=utf-8\r\n'
f'Content-Length: {len(stub)}\r\n'
'\r\n'
f'{stub}\r\n').encode())
extra_state['header_written'] = True
for i, fragment in enumerate(fragments):
if (i + 1) <= ctx['fragment_index']:
continue
fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
success = self._download_fragment(ctx, fragment_url, info_dict)
if not success:
continue
frag_content = self._read_fragment(ctx)
frag_header = io.BytesIO()
frag_header.write(
b'--%b\r\n' % frag_boundary.encode('us-ascii'))
frag_header.write(
b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'))
frag_header.write(
b'Content-type: %b\r\n' % f'image/{imghdr.what(h=frag_content) or "jpeg"}'.encode())
frag_header.write(
b'Content-length: %u\r\n' % len(frag_content))
frag_header.write(
b'Content-location: %b\r\n' % fragment_url.encode('us-ascii'))
frag_header.write(
b'X.yt-dlp.Duration: %f\r\n' % fragment['duration'])
frag_header.write(b'\r\n')
self._append_fragment(
ctx, frag_header.getvalue() + frag_content + b'\r\n')
ctx['dest_stream'].write(
b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
return self._finish_frag_download(ctx, info_dict)

View File

@@ -0,0 +1,90 @@
import json
import threading
import time
from .common import FileDownloader
from .external import FFmpegFD
from ..networking import Request
from ..networking.websocket import WebSocketResponse
from ..utils import DownloadError, str_or_none, truncate_string
from ..utils.traversal import traverse_obj
class NiconicoLiveFD(FileDownloader):
""" Downloads niconico live without being stopped """
def real_download(self, filename, info_dict):
video_id = info_dict['id']
opts = info_dict['downloader_options']
quality, ws_extractor, ws_url = opts['max_quality'], opts['ws'], opts['ws_url']
dl = FFmpegFD(self.ydl, self.params or {})
new_info_dict = info_dict.copy()
new_info_dict['protocol'] = 'm3u8'
def communicate_ws(reconnect):
# Support --load-info-json as if it is a reconnect attempt
if reconnect or not isinstance(ws_extractor, WebSocketResponse):
ws = self.ydl.urlopen(Request(
ws_url, headers={'Origin': 'https://live.nicovideo.jp'}))
if self.ydl.params.get('verbose', False):
self.write_debug('Sending startWatching request')
ws.send(json.dumps({
'data': {
'reconnect': True,
'room': {
'commentable': True,
'protocol': 'webSocket',
},
'stream': {
'accessRightMethod': 'single_cookie',
'chasePlay': False,
'latency': 'high',
'protocol': 'hls',
'quality': quality,
},
},
'type': 'startWatching',
}))
else:
ws = ws_extractor
with ws:
while True:
recv = ws.recv()
if not recv:
continue
data = json.loads(recv)
if not data or not isinstance(data, dict):
continue
if data.get('type') == 'ping':
ws.send(r'{"type":"pong"}')
ws.send(r'{"type":"keepSeat"}')
elif data.get('type') == 'disconnect':
self.write_debug(data)
return True
elif data.get('type') == 'error':
self.write_debug(data)
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
return DownloadError(message)
elif self.ydl.params.get('verbose', False):
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
def ws_main():
reconnect = False
while True:
try:
ret = communicate_ws(reconnect)
if ret is True:
return
except BaseException as e:
self.to_screen(
f'[niconico:live] {video_id}: Connection error occured, reconnecting after 10 seconds: {e}')
time.sleep(10)
continue
finally:
reconnect = True
thread = threading.Thread(target=ws_main, daemon=True)
thread.start()
return dl.download(filename, new_info_dict)

View File

@@ -0,0 +1,212 @@
import os
import re
import subprocess
import time
from .common import FileDownloader
from ..utils import (
Popen,
check_executable,
encodeArgument,
get_exe_version,
)
def rtmpdump_version():
return get_exe_version(
'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)')
class RtmpFD(FileDownloader):
def real_download(self, filename, info_dict):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
try:
while not proc_stderr_closed:
# read line from stderr
line = ''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1)) * 1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
time_now = time.time()
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': downloaded_data_len,
'total_bytes_estimate': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'eta': eta,
'elapsed': time_now - start,
'speed': speed,
}, info_dict)
cursor_in_new_line = False
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1)) * 1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'elapsed': time_now - start,
'speed': speed,
}, info_dict)
cursor_in_new_line = False
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
cursor_in_new_line = True
self.to_screen('[rtmpdump] ' + line)
if not cursor_in_new_line:
self.to_screen('')
return proc.wait()
except BaseException: # Including KeyboardInterrupt
proc.kill(timeout=None)
raise
url = info_dict['url']
player_url = info_dict.get('player_url')
page_url = info_dict.get('page_url')
app = info_dict.get('app')
play_path = info_dict.get('play_path')
tc_url = info_dict.get('tc_url')
flash_version = info_dict.get('flash_version')
live = info_dict.get('rtmp_live', False)
conn = info_dict.get('rtmp_conn')
protocol = info_dict.get('rtmp_protocol')
real_time = info_dict.get('rtmp_real_time', False)
no_resume = info_dict.get('no_resume', False)
continue_dl = self.params.get('continuedl', True)
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
# Check for rtmpdump first
if not check_executable('rtmpdump', ['-h']):
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install')
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrupted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = [
'rtmpdump', '--verbose', '-r', url,
'-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
if app is not None:
basic_args += ['--app', app]
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', tc_url]
if test:
basic_args += ['--stop', '1']
if flash_version is not None:
basic_args += ['--flashVer', flash_version]
if live:
basic_args += ['--live']
if isinstance(conn, list):
for entry in conn:
basic_args += ['--conn', entry]
elif isinstance(conn, str):
basic_args += ['--conn', conn]
if protocol is not None:
basic_args += ['--protocol', protocol]
if real_time:
basic_args += ['--realtime']
args = basic_args
if not no_resume and continue_dl and not live:
args += ['--resume']
if not live and continue_dl:
args += ['--skip', '1']
args = [encodeArgument(a) for a in args]
self._debug_cmd(args, exe='rtmpdump')
RD_SUCCESS = 0
RD_FAILED = 1
RD_INCOMPLETE = 2
RD_NO_CONNECT = 3
started = time.time()
try:
retval = run_rtmpdump(args)
except KeyboardInterrupt:
if not info_dict.get('is_live'):
raise
retval = RD_SUCCESS
self.to_screen('\n[rtmpdump] Interrupted by user')
if retval == RD_NO_CONNECT:
self.report_error('[rtmpdump] Could not connect to RTMP server.')
return False
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(tmpfilename)
self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
time.sleep(5.0) # This seems to be needed
args = [*basic_args, '--resume']
if retval == RD_FAILED:
args += ['--skip', '1']
args = [encodeArgument(a) for a in args]
retval = run_rtmpdump(args)
cursize = os.path.getsize(tmpfilename)
if prevsize == cursize and retval == RD_FAILED:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = RD_SUCCESS
break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(tmpfilename)
self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
'elapsed': time.time() - started,
}, info_dict)
return True
else:
self.to_stderr('\n')
self.report_error('rtmpdump exited with code %d' % retval)
return False

View File

@@ -0,0 +1,42 @@
import os
import subprocess
from .common import FileDownloader
from ..utils import check_executable
class RtspFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
if check_executable('mplayer', ['-h']):
args = [
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
'-dumpstream', '-dumpfile', tmpfilename, url]
elif check_executable('mpv', ['-h']):
args = [
'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
else:
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one')
return False
self._debug_cmd(args)
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(tmpfilename)
self.to_screen(f'\r[{args[0]}] {fsize} bytes')
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
}, info_dict)
return True
else:
self.to_stderr('\n')
self.report_error('%s exited with code %d' % (args[0], retval))
return False

View File

@@ -0,0 +1,53 @@
import asyncio
import contextlib
import os
import signal
import threading
from .common import FileDownloader
from .external import FFmpegFD
from ..dependencies import websockets
class FFmpegSinkFD(FileDownloader):
""" A sink to ffmpeg for downloading fragments in any form """
def real_download(self, filename, info_dict):
info_copy = info_dict.copy()
info_copy['url'] = '-'
async def call_conn(proc, stdin):
try:
await self.real_connection(stdin, info_dict)
except OSError:
pass
finally:
with contextlib.suppress(OSError):
stdin.flush()
stdin.close()
os.kill(os.getpid(), signal.SIGINT)
class FFmpegStdinFD(FFmpegFD):
@classmethod
def get_basename(cls):
return FFmpegFD.get_basename()
def on_process_started(self, proc, stdin):
thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), ))
thread.start()
return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy)
async def real_connection(self, sink, info_dict):
""" Override this in subclasses """
raise NotImplementedError('This method must be implemented by subclasses')
class WebSocketFragmentFD(FFmpegSinkFD):
async def real_connection(self, sink, info_dict):
async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws:
while True:
recv = await ws.recv()
if isinstance(recv, str):
recv = recv.encode('utf8')
sink.write(recv)

View File

@@ -0,0 +1,228 @@
import json
import time
from .fragment import FragmentFD
from ..networking.exceptions import HTTPError
from ..utils import (
RegexNotFoundError,
RetryManager,
dict_get,
int_or_none,
try_get,
)
from ..utils.networking import HTTPHeaderDict
class YoutubeLiveChatFD(FragmentFD):
""" Downloads YouTube live chats fragment by fragment """
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
self.report_warning('Live chat download runs until the livestream ends. '
'If you wish to download the video simultaneously, run a separate yt-dlp instance')
test = self.params.get('test', False)
ctx = {
'filename': filename,
'live': True,
'total_frags': None,
}
from ..extractor.youtube import YoutubeBaseInfoExtractor
ie = YoutubeBaseInfoExtractor(self.ydl)
start_time = int(time.time() * 1000)
def dl_fragment(url, data=None, headers=None):
http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
return self._download_fragment(ctx, url, info_dict, http_headers, data)
def parse_actions_replay(live_chat_continuation):
offset = continuation_id = click_tracking_params = None
processed_fragment = bytearray()
for action in live_chat_continuation.get('actions', []):
if 'replayChatItemAction' in action:
replay_chat_item_action = action['replayChatItemAction']
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode() + b'\n')
if offset is not None:
continuation = try_get(
live_chat_continuation,
lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
if continuation:
continuation_id = continuation.get('continuation')
click_tracking_params = continuation.get('clickTrackingParams')
self._append_fragment(ctx, processed_fragment)
return continuation_id, offset, click_tracking_params
def try_refresh_replay_beginning(live_chat_continuation):
# choose the second option that contains the unfiltered live chat replay
refresh_continuation = try_get(
live_chat_continuation,
lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
if refresh_continuation:
# no data yet but required to call _append_fragment
self._append_fragment(ctx, b'')
refresh_continuation_id = refresh_continuation.get('continuation')
offset = 0
click_tracking_params = refresh_continuation.get('trackingParams')
return refresh_continuation_id, offset, click_tracking_params
return parse_actions_replay(live_chat_continuation)
live_offset = 0
def parse_actions_live(live_chat_continuation):
nonlocal live_offset
continuation_id = click_tracking_params = None
processed_fragment = bytearray()
for action in live_chat_continuation.get('actions', []):
timestamp = self.parse_live_timestamp(action)
if timestamp is not None:
live_offset = timestamp - start_time
# compatibility with replay format
pseudo_action = {
'replayChatItemAction': {'actions': [action]},
'videoOffsetTimeMsec': str(live_offset),
'isLive': True,
}
processed_fragment.extend(
json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
continuation_data_getters = [
lambda x: x['continuations'][0]['invalidationContinuationData'],
lambda x: x['continuations'][0]['timedContinuationData'],
]
continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
if continuation_data:
continuation_id = continuation_data.get('continuation')
click_tracking_params = continuation_data.get('clickTrackingParams')
timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
if timeout_ms is not None:
time.sleep(timeout_ms / 1000)
self._append_fragment(ctx, processed_fragment)
return continuation_id, live_offset, click_tracking_params
def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index):
try:
success = dl_fragment(url, request_data, headers)
if not success:
return False, None, None, None
raw_fragment = self._read_fragment(ctx)
try:
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
data = None
if not data:
data = json.loads(raw_fragment)
live_chat_continuation = try_get(
data,
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
func = ((info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live)
or (frag_index == 1 and try_refresh_replay_beginning)
or parse_actions_replay)
return (True, *func(live_chat_continuation))
except HTTPError as err:
retry.error = err
continue
return False, None, None, None
self._prepare_and_start_frag_download(ctx, info_dict)
success = dl_fragment(info_dict['url'])
if not success:
return False
raw_fragment = self._read_fragment(ctx)
try:
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
return False
continuation_id = try_get(
data,
lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
# no data yet but required to call _append_fragment
self._append_fragment(ctx, b'')
ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
if not ytcfg:
return False
api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
if not api_key or not innertube_context:
return False
visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
if info_dict['protocol'] == 'youtube_live_chat_replay':
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
elif info_dict['protocol'] == 'youtube_live_chat':
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
frag_index = offset = 0
click_tracking_params = None
while continuation_id is not None:
frag_index += 1
request_data = {
'context': innertube_context,
'continuation': continuation_id,
}
if frag_index > 1:
request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
if click_tracking_params:
request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
headers.update({'content-type': 'application/json'})
fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
url, frag_index, fragment_request_data, headers)
else:
success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
chat_page_url, frag_index)
if not success:
return False
if test:
break
return self._finish_frag_download(ctx, info_dict)
@staticmethod
def parse_live_timestamp(action):
action_content = dict_get(
action,
['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
if not isinstance(action_content, dict):
return None
item = dict_get(action_content, ['item', 'bannerRenderer'])
if not isinstance(item, dict):
return None
renderer = dict_get(item, [
# text
'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
# ticker
'liveChatTickerPaidMessageItemRenderer',
'liveChatTickerSponsorItemRenderer',
# banner
'liveChatBannerRenderer',
])
if not isinstance(renderer, dict):
return None
parent_item_getters = [
lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
lambda x: x['contents'],
]
parent_item = try_get(renderer, parent_item_getters, dict)
if parent_item:
renderer = dict_get(parent_item, [
'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
])
if not isinstance(renderer, dict):
return None
return int_or_none(renderer.get('timestampUsec'), 1000)

View File

@@ -0,0 +1,54 @@
from ..compat.compat_utils import passthrough_module
from ..globals import extractors as _extractors_context
from ..globals import plugin_ies as _plugin_ies_context
from ..plugins import PluginSpec, register_plugin_spec
passthrough_module(__name__, '.extractors')
del passthrough_module
register_plugin_spec(PluginSpec(
module_name='extractor',
suffix='IE',
destination=_extractors_context,
plugin_destination=_plugin_ies_context,
))
def gen_extractor_classes():
""" Return a list of supported extractors.
The order does matter; the first extractor matched is the one handling the URL.
"""
import_extractors()
return list(_extractors_context.value.values())
def gen_extractors():
""" Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL.
"""
return [klass() for klass in gen_extractor_classes()]
def list_extractor_classes(age_limit=None):
"""Return a list of extractors that are suitable for the given age, sorted by extractor name"""
from .generic import GenericIE
yield from sorted(filter(
lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
yield GenericIE
def list_extractors(age_limit=None):
"""Return a list of extractor instances that are suitable for the given age, sorted by extractor name"""
return [ie() for ie in list_extractor_classes(age_limit)]
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
import_extractors()
return _extractors_context.value[f'{ie_name}IE']
def import_extractors():
from . import extractors # noqa: F401

Some files were not shown because too many files have changed in this diff Show More