From f043ce4b38aba9e687185cc41d554763e66f7bc4 Mon Sep 17 00:00:00 2001 From: Mike Kazantsev Date: Sat, 6 Apr 2024 12:06:59 +0500 Subject: [PATCH] desktop.media.audio-split-m4b: update to py3 --- README.md | 20 +++--- desktop/media/audio-split-m4b | 100 ++++++++++++++++++++++++++ desktop/media/audio_split_m4b | 130 ---------------------------------- dev/markdown-checks | 5 +- 4 files changed, 112 insertions(+), 143 deletions(-) create mode 100755 desktop/media/audio-split-m4b delete mode 100755 desktop/media/audio_split_m4b diff --git a/README.md b/README.md index 77dde544..887121be 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ Contents - links to doc section for each script here: - [toogg](#hdr-toogg) - [totty](#hdr-totty) - [split](#hdr-split) - - [audio_split_m4b](#hdr-audio_split_m4b) + - [audio-split-m4b](#hdr-audio-split-m4b) - [video-concat-xfade](#hdr-video-concat-xfade) - [pick-tracks](#hdr-pick-tracks) - [twitch_vod_fetch](#hdr-twitch_vod_fetch) @@ -3231,17 +3231,17 @@ Uses ffprobe (ffmpeg) to get duration and ffmpeg with "-acodec copy -vn" (default, changed by passing these after duration arg) to grab only audio chunks from the source file. - - -##### [audio_split_m4b](desktop/media/audio_split_m4b) + + +##### [audio-split-m4b](desktop/media/audio-split-m4b) -Splits m4b audiobook files on chapters (list of which are encoded into -m4b as metadata) with ffprobe/ffmpeg. +Splits audio files (typically m4b audiobooks) on chapters using ffprobe/ffmpeg, +list of which should be encoded into file metadata. -Chapter offsets and titles are detected via `ffprobe -v 0 -show_chapters`, and -then each gets extracted with `ffmpeg -i ... -acodec copy -ss ... -to ...`, -producing aac files with names corresponding to metadata titles (by default, can -be controlled with --name-format, default is `{n:03d}__{title}.aac`). +Chapter offsets and titles are detected via `ffprobe -v 0 -show_chapters`, +and then each gets extracted with `ffmpeg -i ... -acodec copy -ss ... -to ...`, +producing aac files with names corresponding to metadata titles +(by default, can be controlled with --name-format, e.g. `{n:03d}__{title}.aac`). Doesn't do any transcoding, which can easily be performed later to e.g. convert resulting aac files to mp3 or ogg, if necessary. diff --git a/desktop/media/audio-split-m4b b/desktop/media/audio-split-m4b new file mode 100755 index 00000000..36baa5ae --- /dev/null +++ b/desktop/media/audio-split-m4b @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +import os, sys, re, math, json, subprocess as sp, datetime as dt + + +err_fmt = lambda err: f'[{err.__class__.__name__}] {err}' + +class adict(dict): + def __init__(self, *args, **kws): + super().__init__(*args, **kws) + self.__dict__ = self + +def td_repr( ts, ts0=None, units_max=2, units_res=None, printf=None, + _units=dict(h=3600,m=60,s=1,y=365.25*86400,mo=30.5*86400,w=7*86400,d=1*86400) ): + if ts0 is None and isinstance(ts, dt.datetime): ts0 = dt.datetime.now() + delta = ts if ts0 is None else (ts - ts0) + if isinstance(delta, dt.timedelta): delta = delta.total_seconds() + res, s, n_last = list(), abs(delta), units_max - 1 + units = sorted(_units.items(), key=lambda v: v[1], reverse=True) + for unit, unit_s in units: + if not (val := math.floor(val_raw := s / unit_s)): + if units_res == unit: break + continue + elif val_raw - val > 0.98: val += 1 + if len(res) == n_last or units_res == unit: + val, n_last = round(s / unit_s), True + res.append(f'{val:.0f}{unit}') + if n_last is True: break + if (s := s - val * unit_s) < 1: break + if not res: return 'now' + res = ' '.join(res) + if printf: res = printf % res + return res + + +title_subs = { + r'[\\/]': '_', r'^\.+': '_', r'[\x00-\x1f]': '_', r':': '-_', + r'<': '(', r'>': ')', r'\*': '+', r'[|!"]': '-', r'[\?\*]': '_', + '[\'’]': '', r'\.+$': '_', r'\s+$': '', r'\s': '_' } + +def title_subs_apply(title, _res=list()): + if title_subs and not _res: _res.extend((re.compile(k), v) for k,v in title_subs.items()) + for sub_re, sub in _res: title = sub_re.sub(sub, title) + return title + + +def main(args=None): + import argparse + parser = argparse.ArgumentParser( + description='Split specified m4b audio file on chapters.' + ' Does not do any transcoding, which can be done on resulting aac files afterwards.') + + parser.add_argument('path', help='Path to source m4b file.') + + parser.add_argument('-n', '--name-format', + metavar='str.format', default='{n:03d}__{title}.aac', + help='Template for output filenames as python str.format template string.' + ' Can contain following keys: n, id, title, title_raw, a, b. Default: %(default)s.') + parser.add_argument('--name-format-raw', action='store_true', + help='Avoid doing any string replacements on filename (to make it more fs-friendly).') + + parser.add_argument('--dry-run', action='store_true', + help='Do not slice the file, just print output filenames.') + parser.add_argument('-d', '--debug', action='store_true', help='Verbose operation mode.') + opts = parser.parse_args(sys.argv[1:] if args is None else args) + + import logging + logging.basicConfig( + datefmt='%Y-%m-%d %H:%M:%S', + format='%(asctime)s :: %(name)s %(levelname)s :: %(message)s', + level=logging.DEBUG if opts.debug else logging.INFO ) + log = logging.getLogger() + + log.debug( 'Getting file chapter times with: %s', + ' '.join(cmd := [*'ffprobe -v 0 -output_format json -show_chapters'.split(), opts.path]) ) + meta = json.loads(sp.run(cmd, stdout=sp.PIPE, check=True).stdout) + meta = sorted(( adict( id=c.id, a=float(c.start_time), + b=float(c.end_time), title=(c.get('tags') or dict()).get('title') ) + for c in map(adict, meta['chapters']) ), key=lambda c: c.id) + log.debug('Parsed %s chapters from: %s', len(meta), opts.path) + + ts_fmt = '{:f}' + try: + if not all(int(c.title) == n for n, c in enumerate(meta, 1)): raise ValueError + log.info('Auto-labelling number-only chapters as "cXYZ"') + for c in meta: c.title = f'c{int(c.title):03,d}' + except: raise + for n, c in enumerate(meta, 1): + c.update(n=n, title_raw=c.title) + if not opts.name_format_raw: c.title = title_subs_apply(c.title) + dst_path = opts.name_format.format(**c) + log.info( 'Copying slice %s - %s [ start: %s, len: %s, title: %s ] to file: %s', + c.a, c.b, td_repr(c.a), td_repr(c.b - c.a), c.title_raw, dst_path ) + if not opts.dry_run: + sp.run([ 'ffmpeg', '-loglevel', 'warning', '-y', '-i', opts.path, '-acodec', 'copy', + '-ss', ts_fmt.format(c.a), '-to', ts_fmt.format(c.b), dst_path ], check=True) + + log.debug('Finished') + +if __name__ == '__main__': sys.exit(main()) diff --git a/desktop/media/audio_split_m4b b/desktop/media/audio_split_m4b deleted file mode 100755 index 85188e37..00000000 --- a/desktop/media/audio_split_m4b +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -from __future__ import print_function - -import itertools as it, operator as op, functools as ft -import os, sys, math, re, subprocess, datetime as dt - - -def force_bytes(bytes_or_unicode, encoding='utf-8', errors='backslashreplace'): - if isinstance(bytes_or_unicode, bytes): return bytes_or_unicode - return bytes_or_unicode.encode(encoding, errors) - -def force_unicode(bytes_or_unicode, encoding='utf-8', errors='replace'): - if isinstance(bytes_or_unicode, unicode): return bytes_or_unicode - return bytes_or_unicode.decode(encoding, errors) - -def naturaltime_diff( ts, ts0=None, ext=None, - _units=dict( h=3600, m=60, s=1, - y=365.25*86400, mo=30.5*86400, w=7*86400, d=1*86400 ) ): - if ts0 is None: ts0 = dt.datetime.now() - if not isinstance(ts0, dt.datetime): ts0 = dt.datetime.fromtimestamp(ts0) - if not isinstance(ts, dt.timedelta): - if not isinstance(ts, dt.datetime): ts = dt.datetime.fromtimestamp(ts) - ts = abs(ts - ts0) - res, s = list(), ts.total_seconds() - for unit, unit_s in sorted(_units.viewitems(), key=op.itemgetter(1), reverse=True): - val = math.floor(s / float(unit_s)) - if not val: continue - res.append('{:.0f}{}'.format(val, unit)) - if len(res) >= 2: break - s -= val * unit_s - - if not res: return 'now' - else: - if ext: res.append(ext) - return ' '.join(res) - - -class MetaParseError(Exception): pass - -def get_chapter_info(path): - cmd = ['ffprobe', '-v', '0', '-show_chapters', path] - log.debug('Getting file chapter times with: %s', ' '.join(cmd)) - cmd = subprocess.Popen(cmd, stdout=subprocess.PIPE, close_fds=True) - chaps = cmd.stdout.read() - cmd = cmd.wait() - if cmd: raise MetaParseError('ffprobe failed (exit code: %s)', cmd) - chaps = re.findall(r'(?sm)^\[CHAPTER\]$(.*?)^\[/CHAPTER\]$', chaps) - for n, chap_str in enumerate(chaps): - chap = dict() - for k, k_dst, v, conv in [ - ('start_time', 'a', '[\d.]+', float), ('end_time', 'b', '[\d.]+', float), - ('id', 'id', r'\d+', int), ('TAG:title', 'title', '.*', force_bytes) ]: - m = re.search(r'(?m)^{}=({})$'.format(k, v), chap_str) - if not m: - raise MetaParseError( 'Failed to match key' - ' {!r} from chapter info: {!r}'.format(chap_str, k) ) - try: v = conv(m.group(1)) - except Exception as err: - raise MetaParseError( 'Failed to convert (func: {})' - ' value for key {!r} (raw value: {!r}): {}'.format(conv, k, v, err) ) - chap[k_dst] = v - chaps[n] = chap - for n, chap in enumerate(chaps): # sanity checks - assert chap['id'] == n, [n, chap] - assert chap['title'], chap - log.debug('Parsed %s chapters from: %s', len(chaps), path) - return chaps - - -title_subs = { - r'[\\/]': '_', r'^\.+': '_', r'[\x00-\x1f]': '_', r':': '-_', - r'<': '(', r'>': ')', r'\*': '+', r'[|!"]': '-', r'[\?\*]': '_', - '[\'’]': '', r'\.+$': '_', r'\s+$': '', r'\s': '_' } - -def title_subs_apply(title, _res=list()): - if title_subs and not _res: _res.extend((re.compile(k), v) for k,v in title_subs.viewitems()) - for sub_re, sub in _res: title = sub_re.sub(sub, title) - return title - - -def main(args=None): - import argparse - parser = argparse.ArgumentParser( - description='Split specified m4b audio file on chapters.' - ' Does not do any transcoding, which can be done on resulting aac files afterwards.') - - parser.add_argument('path', help='Path to source m4b file.') - - parser.add_argument('-n', '--name-format', - metavar='str.format', default='{n:03d}__{title}.aac', - help='Template for output filenames as python str.format template string.' - ' Can contain following keys: n, id, title, title_raw, a, b. Default: %(default)s.') - parser.add_argument('--name-format-raw', action='store_true', - help='Avoid doing any string replacements on filename (to make it more fs-friendly).') - - parser.add_argument('--dry-run', action='store_true', - help='Do not slice the file, just print output filenames.') - parser.add_argument('-d', '--debug', action='store_true', help='Verbose operation mode.') - opts = parser.parse_args(sys.argv[1:] if args is None else args) - - global log - import logging - logging.basicConfig( - datefmt='%Y-%m-%d %H:%M:%S', - format='%(asctime)s :: %(name)s %(levelname)s :: %(message)s', - level=logging.DEBUG if opts.debug else logging.INFO ) - log = logging.getLogger() - - ts_fmt = '{:f}' - chaps = get_chapter_info(opts.path) - for n, chap in enumerate(chaps, 1): - meta = dict(n=n) - meta.update((k, chap[k]) for k in ['id', 'title', 'a', 'b']) - meta['title_raw'] = meta['title'] - if not opts.name_format_raw: meta['title'] = title_subs_apply(meta['title']) - dst_path = opts.name_format.format(**meta) - log.info( - 'Copying slice %s - %s (len: %s, start: %s, title: %s) to file: %s', - meta['a'], meta['b'], - naturaltime_diff(meta['b'] - meta['a'], 0), - naturaltime_diff(meta['a'], 0), meta['title_raw'], dst_path ) - if not opts.dry_run: - subprocess.check_call([ 'ffmpeg', '-loglevel', 'warning', - '-y', '-i', opts.path, '-acodec', 'copy', - '-ss', ts_fmt.format(meta['a']), '-to', ts_fmt.format(meta['b']), dst_path ]) - - log.debug('Finished') - -if __name__ == '__main__': sys.exit(main()) diff --git a/dev/markdown-checks b/dev/markdown-checks index faf0c977..caf6f107 100755 --- a/dev/markdown-checks +++ b/dev/markdown-checks @@ -132,9 +132,8 @@ def md_check_quirks(md_lines, errs): def md_check_header_anchors(md_lines, errs, name_max_len=40): 'Check/return a list of header/anchor lines that needs some kind of fixing' - anchors, str_map = dict(), dict() - anchor_re = re.compile(r')') - str_map.update((c, c) for c in string.ascii_lowercase + string.digits + '-._~') + anchors, anchor_re = dict(), re.compile(r')') + str_map = dict((c, c) for c in string.ascii_lowercase + string.digits + '-._~') def _line_prev(last_offset): if k - last_offset < 0: return '' n_last, line_prev = md_lines[k - last_offset]