From f043ce4b38aba9e687185cc41d554763e66f7bc4 Mon Sep 17 00:00:00 2001
From: Mike Kazantsev <mk.fraggod@gmail.com>
Date: Sat, 6 Apr 2024 12:06:59 +0500
Subject: [PATCH] desktop.media.audio-split-m4b: update to py3

---
 README.md                     |  20 +++---
 desktop/media/audio-split-m4b | 100 ++++++++++++++++++++++++++
 desktop/media/audio_split_m4b | 130 ----------------------------------
 dev/markdown-checks           |   5 +-
 4 files changed, 112 insertions(+), 143 deletions(-)
 create mode 100755 desktop/media/audio-split-m4b
 delete mode 100755 desktop/media/audio_split_m4b
diff --git a/README.md b/README.md
index 77dde544..887121be 100644
--- a/README.md
+++ b/README.md
@@ -153,7 +153,7 @@ Contents - links to doc section for each script here:
         - [toogg](#hdr-toogg)
         - [totty](#hdr-totty)
         - [split](#hdr-split)
-        - [audio_split_m4b](#hdr-audio_split_m4b)
+        - [audio-split-m4b](#hdr-audio-split-m4b)
         - [video-concat-xfade](#hdr-video-concat-xfade)
         - [pick-tracks](#hdr-pick-tracks)
         - [twitch_vod_fetch](#hdr-twitch_vod_fetch)
@@ -3231,17 +3231,17 @@ Uses ffprobe (ffmpeg) to get duration and ffmpeg with "-acodec copy -vn"
 (default, changed by passing these after duration arg) to grab only audio
 chunks from the source file.
 
-<a name=hdr-audio_split_m4b></a>
-<a name=user-content-hdr-audio_split_m4b></a>
-##### [audio_split_m4b](desktop/media/audio_split_m4b)
+<a name=hdr-audio-split-m4b></a>
+<a name=user-content-hdr-audio-split-m4b></a>
+##### [audio-split-m4b](desktop/media/audio-split-m4b)
 
-Splits m4b audiobook files on chapters (list of which are encoded into
-m4b as metadata) with ffprobe/ffmpeg.
+Splits audio files (typically m4b audiobooks) on chapters using ffprobe/ffmpeg,
+list of which should be encoded into file metadata.
 
-Chapter offsets and titles are detected via `ffprobe -v 0 -show_chapters`, and
-then each gets extracted with `ffmpeg -i ... -acodec copy -ss ... -to ...`,
-producing aac files with names corresponding to metadata titles (by default, can
-be controlled with --name-format, default is `{n:03d}__{title}.aac`).
+Chapter offsets and titles are detected via `ffprobe -v 0 -show_chapters`,
+and then each gets extracted with `ffmpeg -i ... -acodec copy -ss ... -to ...`,
+producing aac files with names corresponding to metadata titles
+(by default, can be controlled with --name-format, e.g. `{n:03d}__{title}.aac`).
 
 Doesn't do any transcoding, which can easily be performed later to e.g.
 convert resulting aac files to mp3 or ogg, if necessary.
diff --git a/desktop/media/audio-split-m4b b/desktop/media/audio-split-m4b
new file mode 100755
index 00000000..36baa5ae
--- /dev/null
+++ b/desktop/media/audio-split-m4b
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+import os, sys, re, math, json, subprocess as sp, datetime as dt
+
+
+err_fmt = lambda err: f'[{err.__class__.__name__}] {err}'
+
+class adict(dict):
+	def __init__(self, *args, **kws):
+		super().__init__(*args, **kws)
+		self.__dict__ = self
+
+def td_repr( ts, ts0=None, units_max=2, units_res=None, printf=None,
+		_units=dict(h=3600,m=60,s=1,y=365.25*86400,mo=30.5*86400,w=7*86400,d=1*86400) ):
+	if ts0 is None and isinstance(ts, dt.datetime): ts0 = dt.datetime.now()
+	delta = ts if ts0 is None else (ts - ts0)
+	if isinstance(delta, dt.timedelta): delta = delta.total_seconds()
+	res, s, n_last = list(), abs(delta), units_max - 1
+	units = sorted(_units.items(), key=lambda v: v[1], reverse=True)
+	for unit, unit_s in units:
+		if not (val := math.floor(val_raw := s / unit_s)):
+			if units_res == unit: break
+			continue
+		elif val_raw - val > 0.98: val += 1
+		if len(res) == n_last or units_res == unit:
+			val, n_last = round(s / unit_s), True
+		res.append(f'{val:.0f}{unit}')
+		if n_last is True: break
+		if (s := s - val * unit_s) < 1: break
+	if not res: return 'now'
+	res = ' '.join(res)
+	if printf: res = printf % res
+	return res
+
+
+title_subs = {
+	r'[\\/]': '_', r'^\.+': '_', r'[\x00-\x1f]': '_', r':': '-_',
+	r'<': '(', r'>': ')', r'\*': '+', r'[|!"]': '-', r'[\?\*]': '_',
+	'[\'’]': '', r'\.+$': '_', r'\s+$': '', r'\s': '_' }
+
+def title_subs_apply(title, _res=list()):
+	if title_subs and not _res: _res.extend((re.compile(k), v) for k,v in title_subs.items())
+	for sub_re, sub in _res: title = sub_re.sub(sub, title)
+	return title
+
+
+def main(args=None):
+	import argparse
+	parser = argparse.ArgumentParser(
+		description='Split specified m4b audio file on chapters.'
+			' Does not do any transcoding, which can be done on resulting aac files afterwards.')
+
+	parser.add_argument('path', help='Path to source m4b file.')
+
+	parser.add_argument('-n', '--name-format',
+		metavar='str.format', default='{n:03d}__{title}.aac',
+		help='Template for output filenames as python str.format template string.'
+			' Can contain following keys: n, id, title, title_raw, a, b. Default: %(default)s.')
+	parser.add_argument('--name-format-raw', action='store_true',
+		help='Avoid doing any string replacements on filename (to make it more fs-friendly).')
+
+	parser.add_argument('--dry-run', action='store_true',
+		help='Do not slice the file, just print output filenames.')
+	parser.add_argument('-d', '--debug', action='store_true', help='Verbose operation mode.')
+	opts = parser.parse_args(sys.argv[1:] if args is None else args)
+
+	import logging
+	logging.basicConfig(
+		datefmt='%Y-%m-%d %H:%M:%S',
+		format='%(asctime)s :: %(name)s %(levelname)s :: %(message)s',
+		level=logging.DEBUG if opts.debug else logging.INFO )
+	log = logging.getLogger()
+
+	log.debug( 'Getting file chapter times with: %s',
+		' '.join(cmd := [*'ffprobe -v 0 -output_format json -show_chapters'.split(), opts.path]) )
+	meta = json.loads(sp.run(cmd, stdout=sp.PIPE, check=True).stdout)
+	meta = sorted(( adict( id=c.id, a=float(c.start_time),
+			b=float(c.end_time), title=(c.get('tags') or dict()).get('title') )
+		for c in map(adict, meta['chapters']) ), key=lambda c: c.id)
+	log.debug('Parsed %s chapters from: %s', len(meta), opts.path)
+
+	ts_fmt = '{:f}'
+	try:
+		if not all(int(c.title) == n for n, c in enumerate(meta, 1)): raise ValueError
+		log.info('Auto-labelling number-only chapters as "cXYZ"')
+		for c in meta: c.title = f'c{int(c.title):03,d}'
+	except: raise
+	for n, c in enumerate(meta, 1):
+		c.update(n=n, title_raw=c.title)
+		if not opts.name_format_raw: c.title = title_subs_apply(c.title)
+		dst_path = opts.name_format.format(**c)
+		log.info( 'Copying slice %s - %s [ start: %s, len: %s, title: %s ] to file: %s',
+			c.a, c.b, td_repr(c.a), td_repr(c.b - c.a), c.title_raw, dst_path )
+		if not opts.dry_run:
+			sp.run([ 'ffmpeg', '-loglevel', 'warning', '-y', '-i', opts.path, '-acodec', 'copy',
+				'-ss', ts_fmt.format(c.a), '-to', ts_fmt.format(c.b), dst_path ], check=True)
+
+	log.debug('Finished')
+
+if __name__ == '__main__': sys.exit(main())
diff --git a/desktop/media/audio_split_m4b b/desktop/media/audio_split_m4b
deleted file mode 100755
index 85188e37..00000000
--- a/desktop/media/audio_split_m4b
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
-from __future__ import print_function
-
-import itertools as it, operator as op, functools as ft
-import os, sys, math, re, subprocess, datetime as dt
-
-
-def force_bytes(bytes_or_unicode, encoding='utf-8', errors='backslashreplace'):
-	if isinstance(bytes_or_unicode, bytes): return bytes_or_unicode
-	return bytes_or_unicode.encode(encoding, errors)
-
-def force_unicode(bytes_or_unicode, encoding='utf-8', errors='replace'):
-	if isinstance(bytes_or_unicode, unicode): return bytes_or_unicode
-	return bytes_or_unicode.decode(encoding, errors)
-
-def naturaltime_diff( ts, ts0=None, ext=None,
-		_units=dict( h=3600, m=60, s=1,
-			y=365.25*86400, mo=30.5*86400, w=7*86400, d=1*86400 ) ):
-	if ts0 is None: ts0 = dt.datetime.now()
-	if not isinstance(ts0, dt.datetime): ts0 = dt.datetime.fromtimestamp(ts0)
-	if not isinstance(ts, dt.timedelta):
-		if not isinstance(ts, dt.datetime): ts = dt.datetime.fromtimestamp(ts)
-		ts = abs(ts - ts0)
-	res, s = list(), ts.total_seconds()
-	for unit, unit_s in sorted(_units.viewitems(), key=op.itemgetter(1), reverse=True):
-		val = math.floor(s / float(unit_s))
-		if not val: continue
-		res.append('{:.0f}{}'.format(val, unit))
-		if len(res) >= 2: break
-		s -= val * unit_s
-
-	if not res: return 'now'
-	else:
-		if ext: res.append(ext)
-		return ' '.join(res)
-
-
-class MetaParseError(Exception): pass
-
-def get_chapter_info(path):
-	cmd = ['ffprobe', '-v', '0', '-show_chapters', path]
-	log.debug('Getting file chapter times with: %s', ' '.join(cmd))
-	cmd = subprocess.Popen(cmd, stdout=subprocess.PIPE, close_fds=True)
-	chaps = cmd.stdout.read()
-	cmd = cmd.wait()
-	if cmd: raise MetaParseError('ffprobe failed (exit code: %s)', cmd)
-	chaps = re.findall(r'(?sm)^\[CHAPTER\]$(.*?)^\[/CHAPTER\]$', chaps)
-	for n, chap_str in enumerate(chaps):
-		chap = dict()
-		for k, k_dst, v, conv in [
-				('start_time', 'a', '[\d.]+', float), ('end_time', 'b', '[\d.]+', float),
-				('id', 'id', r'\d+', int), ('TAG:title', 'title', '.*', force_bytes) ]:
-			m = re.search(r'(?m)^{}=({})$'.format(k, v), chap_str)
-			if not m:
-				raise MetaParseError( 'Failed to match key'
-					' {!r} from chapter info: {!r}'.format(chap_str, k) )
-			try: v = conv(m.group(1))
-			except Exception as err:
-				raise MetaParseError( 'Failed to convert (func: {})'
-					' value for key {!r} (raw value: {!r}): {}'.format(conv, k, v, err) )
-			chap[k_dst] = v
-		chaps[n] = chap
-	for n, chap in enumerate(chaps): # sanity checks
-		assert chap['id'] == n, [n, chap]
-		assert chap['title'], chap
-	log.debug('Parsed %s chapters from: %s', len(chaps), path)
-	return chaps
-
-
-title_subs = {
-	r'[\\/]': '_', r'^\.+': '_', r'[\x00-\x1f]': '_', r':': '-_',
-	r'<': '(', r'>': ')', r'\*': '+', r'[|!"]': '-', r'[\?\*]': '_',
-	'[\'’]': '', r'\.+$': '_', r'\s+$': '', r'\s': '_' }
-
-def title_subs_apply(title, _res=list()):
-	if title_subs and not _res: _res.extend((re.compile(k), v) for k,v in title_subs.viewitems())
-	for sub_re, sub in _res: title = sub_re.sub(sub, title)
-	return title
-
-
-def main(args=None):
-	import argparse
-	parser = argparse.ArgumentParser(
-		description='Split specified m4b audio file on chapters.'
-			' Does not do any transcoding, which can be done on resulting aac files afterwards.')
-
-	parser.add_argument('path', help='Path to source m4b file.')
-
-	parser.add_argument('-n', '--name-format',
-		metavar='str.format', default='{n:03d}__{title}.aac',
-		help='Template for output filenames as python str.format template string.'
-			' Can contain following keys: n, id, title, title_raw, a, b. Default: %(default)s.')
-	parser.add_argument('--name-format-raw', action='store_true',
-		help='Avoid doing any string replacements on filename (to make it more fs-friendly).')
-
-	parser.add_argument('--dry-run', action='store_true',
-		help='Do not slice the file, just print output filenames.')
-	parser.add_argument('-d', '--debug', action='store_true', help='Verbose operation mode.')
-	opts = parser.parse_args(sys.argv[1:] if args is None else args)
-
-	global log
-	import logging
-	logging.basicConfig(
-		datefmt='%Y-%m-%d %H:%M:%S',
-		format='%(asctime)s :: %(name)s %(levelname)s :: %(message)s',
-		level=logging.DEBUG if opts.debug else logging.INFO )
-	log = logging.getLogger()
-
-	ts_fmt = '{:f}'
-	chaps = get_chapter_info(opts.path)
-	for n, chap in enumerate(chaps, 1):
-		meta = dict(n=n)
-		meta.update((k, chap[k]) for k in ['id', 'title', 'a', 'b'])
-		meta['title_raw'] = meta['title']
-		if not opts.name_format_raw: meta['title'] = title_subs_apply(meta['title'])
-		dst_path = opts.name_format.format(**meta)
-		log.info(
-			'Copying slice %s - %s (len: %s, start: %s, title: %s) to file: %s',
-			meta['a'], meta['b'],
-			naturaltime_diff(meta['b'] - meta['a'], 0),
-			naturaltime_diff(meta['a'], 0), meta['title_raw'], dst_path )
-		if not opts.dry_run:
-			subprocess.check_call([ 'ffmpeg', '-loglevel', 'warning',
-				'-y', '-i', opts.path, '-acodec', 'copy',
-				'-ss', ts_fmt.format(meta['a']), '-to', ts_fmt.format(meta['b']), dst_path ])
-
-	log.debug('Finished')
-
-if __name__ == '__main__': sys.exit(main())
diff --git a/dev/markdown-checks b/dev/markdown-checks
index faf0c977..caf6f107 100755
--- a/dev/markdown-checks
+++ b/dev/markdown-checks
@@ -132,9 +132,8 @@ def md_check_quirks(md_lines, errs):
 
 def md_check_header_anchors(md_lines, errs, name_max_len=40):
 	'Check/return a list of header/anchor lines that needs some kind of fixing'
-	anchors, str_map = dict(), dict()
-	anchor_re = re.compile(r'<a name=((?:user-content-)?hdr(x?)-(\S+)>)</a>')
-	str_map.update((c, c) for c in string.ascii_lowercase + string.digits + '-._~')
+	anchors, anchor_re = dict(), re.compile(r'<a name=((?:user-content-)?hdr(x?)-(\S+)>)</a>')
+	str_map = dict((c, c) for c in string.ascii_lowercase + string.digits + '-._~')
 	def _line_prev(last_offset):
 		if k - last_offset < 0: return ''
 		n_last, line_prev = md_lines[k - last_offset]