desktop.media.audio-split-m4b: update to py3

mk-fg · Apr 6, 2024 · f043ce4 · f043ce4
1 parent 5a72827
commit f043ce4
Show file tree

Hide file tree

Showing 4 changed files with 112 additions and 143 deletions.
diff --git a/README.md b/README.md
@@ -153,7 +153,7 @@ Contents - links to doc section for each script here:
         - [toogg](#hdr-toogg)
         - [totty](#hdr-totty)
         - [split](#hdr-split)
-        - [audio_split_m4b](#hdr-audio_split_m4b)
+        - [audio-split-m4b](#hdr-audio-split-m4b)
         - [video-concat-xfade](#hdr-video-concat-xfade)
         - [pick-tracks](#hdr-pick-tracks)
         - [twitch_vod_fetch](#hdr-twitch_vod_fetch)
@@ -3231,17 +3231,17 @@ Uses ffprobe (ffmpeg) to get duration and ffmpeg with "-acodec copy -vn"
 (default, changed by passing these after duration arg) to grab only audio
 chunks from the source file.
 
-<a name=hdr-audio_split_m4b></a>
-<a name=user-content-hdr-audio_split_m4b></a>
-##### [audio_split_m4b](desktop/media/audio_split_m4b)
+<a name=hdr-audio-split-m4b></a>
+<a name=user-content-hdr-audio-split-m4b></a>
+##### [audio-split-m4b](desktop/media/audio-split-m4b)
 
-Splits m4b audiobook files on chapters (list of which are encoded into
-m4b as metadata) with ffprobe/ffmpeg.
+Splits audio files (typically m4b audiobooks) on chapters using ffprobe/ffmpeg,
+list of which should be encoded into file metadata.
 
-Chapter offsets and titles are detected via `ffprobe -v 0 -show_chapters`, and
-then each gets extracted with `ffmpeg -i ... -acodec copy -ss ... -to ...`,
-producing aac files with names corresponding to metadata titles (by default, can
-be controlled with --name-format, default is `{n:03d}__{title}.aac`).
+Chapter offsets and titles are detected via `ffprobe -v 0 -show_chapters`,
+and then each gets extracted with `ffmpeg -i ... -acodec copy -ss ... -to ...`,
+producing aac files with names corresponding to metadata titles
+(by default, can be controlled with --name-format, e.g. `{n:03d}__{title}.aac`).
 
 Doesn't do any transcoding, which can easily be performed later to e.g.
 convert resulting aac files to mp3 or ogg, if necessary.

diff --git a/desktop/media/audio-split-m4b b/desktop/media/audio-split-m4b
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+import os, sys, re, math, json, subprocess as sp, datetime as dt
+
+
+err_fmt = lambda err: f'[{err.__class__.__name__}] {err}'
+
+class adict(dict):
+	def __init__(self, *args, **kws):
+		super().__init__(*args, **kws)
+		self.__dict__ = self
+
+def td_repr( ts, ts0=None, units_max=2, units_res=None, printf=None,
+		_units=dict(h=3600,m=60,s=1,y=365.25*86400,mo=30.5*86400,w=7*86400,d=1*86400) ):
+	if ts0 is None and isinstance(ts, dt.datetime): ts0 = dt.datetime.now()
+	delta = ts if ts0 is None else (ts - ts0)
+	if isinstance(delta, dt.timedelta): delta = delta.total_seconds()
+	res, s, n_last = list(), abs(delta), units_max - 1
+	units = sorted(_units.items(), key=lambda v: v[1], reverse=True)
+	for unit, unit_s in units:
+		if not (val := math.floor(val_raw := s / unit_s)):
+			if units_res == unit: break
+			continue
+		elif val_raw - val > 0.98: val += 1
+		if len(res) == n_last or units_res == unit:
+			val, n_last = round(s / unit_s), True
+		res.append(f'{val:.0f}{unit}')
+		if n_last is True: break
+		if (s := s - val * unit_s) < 1: break
+	if not res: return 'now'
+	res = ' '.join(res)
+	if printf: res = printf % res
+	return res
+
+
+title_subs = {
+	r'[\\/]': '_', r'^\.+': '_', r'[\x00-\x1f]': '_', r':': '-_',
+	r'<': '(', r'>': ')', r'\*': '+', r'[|!"]': '-', r'[\?\*]': '_',
+	'[\'’]': '', r'\.+$': '_', r'\s+$': '', r'\s': '_' }
+
+def title_subs_apply(title, _res=list()):
+	if title_subs and not _res: _res.extend((re.compile(k), v) for k,v in title_subs.items())
+	for sub_re, sub in _res: title = sub_re.sub(sub, title)
+	return title
+
+
+def main(args=None):
+	import argparse
+	parser = argparse.ArgumentParser(
+		description='Split specified m4b audio file on chapters.'
+			' Does not do any transcoding, which can be done on resulting aac files afterwards.')
+
+	parser.add_argument('path', help='Path to source m4b file.')
+
+	parser.add_argument('-n', '--name-format',
+		metavar='str.format', default='{n:03d}__{title}.aac',
+		help='Template for output filenames as python str.format template string.'
+			' Can contain following keys: n, id, title, title_raw, a, b. Default: %(default)s.')
+	parser.add_argument('--name-format-raw', action='store_true',
+		help='Avoid doing any string replacements on filename (to make it more fs-friendly).')
+
+	parser.add_argument('--dry-run', action='store_true',
+		help='Do not slice the file, just print output filenames.')
+	parser.add_argument('-d', '--debug', action='store_true', help='Verbose operation mode.')
+	opts = parser.parse_args(sys.argv[1:] if args is None else args)
+
+	import logging
+	logging.basicConfig(
+		datefmt='%Y-%m-%d %H:%M:%S',
+		format='%(asctime)s :: %(name)s %(levelname)s :: %(message)s',
+		level=logging.DEBUG if opts.debug else logging.INFO )
+	log = logging.getLogger()
+
+	log.debug( 'Getting file chapter times with: %s',
+		' '.join(cmd := [*'ffprobe -v 0 -output_format json -show_chapters'.split(), opts.path]) )
+	meta = json.loads(sp.run(cmd, stdout=sp.PIPE, check=True).stdout)
+	meta = sorted(( adict( id=c.id, a=float(c.start_time),
+			b=float(c.end_time), title=(c.get('tags') or dict()).get('title') )
+		for c in map(adict, meta['chapters']) ), key=lambda c: c.id)
+	log.debug('Parsed %s chapters from: %s', len(meta), opts.path)
+
+	ts_fmt = '{:f}'
+	try:
+		if not all(int(c.title) == n for n, c in enumerate(meta, 1)): raise ValueError
+		log.info('Auto-labelling number-only chapters as "cXYZ"')
+		for c in meta: c.title = f'c{int(c.title):03,d}'
+	except: raise
+	for n, c in enumerate(meta, 1):
+		c.update(n=n, title_raw=c.title)
+		if not opts.name_format_raw: c.title = title_subs_apply(c.title)
+		dst_path = opts.name_format.format(**c)
+		log.info( 'Copying slice %s - %s [ start: %s, len: %s, title: %s ] to file: %s',
+			c.a, c.b, td_repr(c.a), td_repr(c.b - c.a), c.title_raw, dst_path )
+		if not opts.dry_run:
+			sp.run([ 'ffmpeg', '-loglevel', 'warning', '-y', '-i', opts.path, '-acodec', 'copy',
+				'-ss', ts_fmt.format(c.a), '-to', ts_fmt.format(c.b), dst_path ], check=True)
+
+	log.debug('Finished')
+
+if __name__ == '__main__': sys.exit(main())
diff --git a/desktop/media/audio_split_m4b b/desktop/media/audio_split_m4b
diff --git a/dev/markdown-checks b/dev/markdown-checks
@@ -132,9 +132,8 @@ def md_check_quirks(md_lines, errs):
 
 def md_check_header_anchors(md_lines, errs, name_max_len=40):
 	'Check/return a list of header/anchor lines that needs some kind of fixing'
-	anchors, str_map = dict(), dict()
-	anchor_re = re.compile(r'<a name=((?:user-content-)?hdr(x?)-(\S+)>)</a>')
-	str_map.update((c, c) for c in string.ascii_lowercase + string.digits + '-._~')
+	anchors, anchor_re = dict(), re.compile(r'<a name=((?:user-content-)?hdr(x?)-(\S+)>)</a>')
+	str_map = dict((c, c) for c in string.ascii_lowercase + string.digits + '-._~')
 	def _line_prev(last_offset):
 		if k - last_offset < 0: return ''
 		n_last, line_prev = md_lines[k - last_offset]