Skip to content

Commit

Permalink
desktop.media.audio-split-m4b: update to py3
Browse files Browse the repository at this point in the history
  • Loading branch information
mk-fg committed Apr 6, 2024
1 parent 5a72827 commit f043ce4
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 143 deletions.
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ Contents - links to doc section for each script here:
- [toogg](#hdr-toogg)
- [totty](#hdr-totty)
- [split](#hdr-split)
- [audio_split_m4b](#hdr-audio_split_m4b)
- [audio-split-m4b](#hdr-audio-split-m4b)
- [video-concat-xfade](#hdr-video-concat-xfade)
- [pick-tracks](#hdr-pick-tracks)
- [twitch_vod_fetch](#hdr-twitch_vod_fetch)
Expand Down Expand Up @@ -3231,17 +3231,17 @@ Uses ffprobe (ffmpeg) to get duration and ffmpeg with "-acodec copy -vn"
(default, changed by passing these after duration arg) to grab only audio
chunks from the source file.

<a name=hdr-audio_split_m4b></a>
<a name=user-content-hdr-audio_split_m4b></a>
##### [audio_split_m4b](desktop/media/audio_split_m4b)
<a name=hdr-audio-split-m4b></a>
<a name=user-content-hdr-audio-split-m4b></a>
##### [audio-split-m4b](desktop/media/audio-split-m4b)

Splits m4b audiobook files on chapters (list of which are encoded into
m4b as metadata) with ffprobe/ffmpeg.
Splits audio files (typically m4b audiobooks) on chapters using ffprobe/ffmpeg,
list of which should be encoded into file metadata.

Chapter offsets and titles are detected via `ffprobe -v 0 -show_chapters`, and
then each gets extracted with `ffmpeg -i ... -acodec copy -ss ... -to ...`,
producing aac files with names corresponding to metadata titles (by default, can
be controlled with --name-format, default is `{n:03d}__{title}.aac`).
Chapter offsets and titles are detected via `ffprobe -v 0 -show_chapters`,
and then each gets extracted with `ffmpeg -i ... -acodec copy -ss ... -to ...`,
producing aac files with names corresponding to metadata titles
(by default, can be controlled with --name-format, e.g. `{n:03d}__{title}.aac`).

Doesn't do any transcoding, which can easily be performed later to e.g.
convert resulting aac files to mp3 or ogg, if necessary.
Expand Down
100 changes: 100 additions & 0 deletions desktop/media/audio-split-m4b
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/usr/bin/env python

import os, sys, re, math, json, subprocess as sp, datetime as dt


err_fmt = lambda err: f'[{err.__class__.__name__}] {err}'

class adict(dict):
def __init__(self, *args, **kws):
super().__init__(*args, **kws)
self.__dict__ = self

def td_repr( ts, ts0=None, units_max=2, units_res=None, printf=None,
_units=dict(h=3600,m=60,s=1,y=365.25*86400,mo=30.5*86400,w=7*86400,d=1*86400) ):
if ts0 is None and isinstance(ts, dt.datetime): ts0 = dt.datetime.now()
delta = ts if ts0 is None else (ts - ts0)
if isinstance(delta, dt.timedelta): delta = delta.total_seconds()
res, s, n_last = list(), abs(delta), units_max - 1
units = sorted(_units.items(), key=lambda v: v[1], reverse=True)
for unit, unit_s in units:
if not (val := math.floor(val_raw := s / unit_s)):
if units_res == unit: break
continue
elif val_raw - val > 0.98: val += 1
if len(res) == n_last or units_res == unit:
val, n_last = round(s / unit_s), True
res.append(f'{val:.0f}{unit}')
if n_last is True: break
if (s := s - val * unit_s) < 1: break
if not res: return 'now'
res = ' '.join(res)
if printf: res = printf % res
return res


title_subs = {
r'[\\/]': '_', r'^\.+': '_', r'[\x00-\x1f]': '_', r':': '-_',
r'<': '(', r'>': ')', r'\*': '+', r'[|!"]': '-', r'[\?\*]': '_',
'[\'’]': '', r'\.+$': '_', r'\s+$': '', r'\s': '_' }

def title_subs_apply(title, _res=list()):
if title_subs and not _res: _res.extend((re.compile(k), v) for k,v in title_subs.items())
for sub_re, sub in _res: title = sub_re.sub(sub, title)
return title


def main(args=None):
import argparse
parser = argparse.ArgumentParser(
description='Split specified m4b audio file on chapters.'
' Does not do any transcoding, which can be done on resulting aac files afterwards.')

parser.add_argument('path', help='Path to source m4b file.')

parser.add_argument('-n', '--name-format',
metavar='str.format', default='{n:03d}__{title}.aac',
help='Template for output filenames as python str.format template string.'
' Can contain following keys: n, id, title, title_raw, a, b. Default: %(default)s.')
parser.add_argument('--name-format-raw', action='store_true',
help='Avoid doing any string replacements on filename (to make it more fs-friendly).')

parser.add_argument('--dry-run', action='store_true',
help='Do not slice the file, just print output filenames.')
parser.add_argument('-d', '--debug', action='store_true', help='Verbose operation mode.')
opts = parser.parse_args(sys.argv[1:] if args is None else args)

import logging
logging.basicConfig(
datefmt='%Y-%m-%d %H:%M:%S',
format='%(asctime)s :: %(name)s %(levelname)s :: %(message)s',
level=logging.DEBUG if opts.debug else logging.INFO )
log = logging.getLogger()

log.debug( 'Getting file chapter times with: %s',
' '.join(cmd := [*'ffprobe -v 0 -output_format json -show_chapters'.split(), opts.path]) )
meta = json.loads(sp.run(cmd, stdout=sp.PIPE, check=True).stdout)
meta = sorted(( adict( id=c.id, a=float(c.start_time),
b=float(c.end_time), title=(c.get('tags') or dict()).get('title') )
for c in map(adict, meta['chapters']) ), key=lambda c: c.id)
log.debug('Parsed %s chapters from: %s', len(meta), opts.path)

ts_fmt = '{:f}'
try:
if not all(int(c.title) == n for n, c in enumerate(meta, 1)): raise ValueError
log.info('Auto-labelling number-only chapters as "cXYZ"')
for c in meta: c.title = f'c{int(c.title):03,d}'
except: raise
for n, c in enumerate(meta, 1):
c.update(n=n, title_raw=c.title)
if not opts.name_format_raw: c.title = title_subs_apply(c.title)
dst_path = opts.name_format.format(**c)
log.info( 'Copying slice %s - %s [ start: %s, len: %s, title: %s ] to file: %s',
c.a, c.b, td_repr(c.a), td_repr(c.b - c.a), c.title_raw, dst_path )
if not opts.dry_run:
sp.run([ 'ffmpeg', '-loglevel', 'warning', '-y', '-i', opts.path, '-acodec', 'copy',
'-ss', ts_fmt.format(c.a), '-to', ts_fmt.format(c.b), dst_path ], check=True)

log.debug('Finished')

if __name__ == '__main__': sys.exit(main())
130 changes: 0 additions & 130 deletions desktop/media/audio_split_m4b

This file was deleted.

5 changes: 2 additions & 3 deletions dev/markdown-checks
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,8 @@ def md_check_quirks(md_lines, errs):

def md_check_header_anchors(md_lines, errs, name_max_len=40):
'Check/return a list of header/anchor lines that needs some kind of fixing'
anchors, str_map = dict(), dict()
anchor_re = re.compile(r'<a name=((?:user-content-)?hdr(x?)-(\S+)>)</a>')
str_map.update((c, c) for c in string.ascii_lowercase + string.digits + '-._~')
anchors, anchor_re = dict(), re.compile(r'<a name=((?:user-content-)?hdr(x?)-(\S+)>)</a>')
str_map = dict((c, c) for c in string.ascii_lowercase + string.digits + '-._~')
def _line_prev(last_offset):
if k - last_offset < 0: return ''
n_last, line_prev = md_lines[k - last_offset]
Expand Down

0 comments on commit f043ce4

Please sign in to comment.