-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_mp3.py
57 lines (52 loc) · 1.71 KB
/
read_mp3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Forked: @kylemcdonald kylemcdonald/ffmpeg_load_audio.py
import numpy as np
import subprocess as sp
import os
DEVNULL = open(os.devnull, 'w')
# load_audio can not detect the input type
def ffmpeg_load_audio(filename, sr=44100, mono=False, normalize=False,
in_type=np.int16, out_type=np.float32):
channels = 1 if mono else 2
format_strings = {
np.float64: 'f64le',
np.float32: 'f32le',
np.int16: 's16le',
np.int32: 's32le',
np.uint32: 'u32le'
}
format_string = format_strings[in_type]
command = [
'ffmpeg',
'-i', filename,
'-f', format_string,
'-acodec', 'pcm_' + format_string,
'-ar', str(sr),
'-ac', str(channels),
'-']
p = sp.Popen(command, stdout=sp.PIPE, stderr=DEVNULL, bufsize=4096)
bytes_per_sample = np.dtype(in_type).itemsize
frame_size = bytes_per_sample * channels
chunk_size = frame_size * sr # read in 1-second chunks
raw = b''
with p.stdout as stdout:
while True:
data = stdout.read(chunk_size)
if data:
raw += data
else:
break
audio = np.fromstring(raw, dtype=in_type).astype(out_type)
if channels > 1:
audio = audio.reshape((-1, channels)).transpose()
if audio.size == 0:
return audio, sr
if issubclass(out_type, np.floating):
if normalize:
peak = np.abs(audio).max()
if peak > 0:
audio /= peak
elif issubclass(in_type, np.integer):
audio /= np.iinfo(in_type).max
return audio, sr
if __name__ == '__main__':
audio, sample_rate = ffmpeg_load_audio('test_file.mp3')