diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a3b28af..7d7c49f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -271,7 +271,7 @@ jobs: LLVM_CONFIG_PATH: ${{ github.workspace }}/clang/bin/llvm-config run: | copy ${{ github.workspace }}/ffmpeg_prebuilt_cross/lib/libffmpeg.dll . - cargo test --tests --target i686-pc-windows-msvc -vv -- --skip transcoding + cargo test --tests --target i686-pc-windows-msvc -vv -- --skip transcode # Check if correct documentation can be generated by docs.rs docs_rs_check: diff --git a/src/avfilter/avfilter.rs b/src/avfilter/avfilter.rs index a66cdff..b8ebbd5 100644 --- a/src/avfilter/avfilter.rs +++ b/src/avfilter/avfilter.rs @@ -1,12 +1,12 @@ use std::{ ffi::CStr, - mem::size_of, + mem::{size_of, MaybeUninit}, ops::Drop, ptr::{self, NonNull}, }; use crate::{ - avutil::AVFrame, + avutil::{AVChannelLayout, AVFrame}, error::{Result, RsmpegError}, ffi, shared::*, @@ -100,6 +100,51 @@ impl AVFilterContext { Err(err) => Err(RsmpegError::BufferSinkGetFrameError(err)), } } + + pub fn get_type(&self) -> i32 { + unsafe { ffi::av_buffersink_get_type(self.as_ptr()) } + } + + pub fn get_time_base(&self) -> ffi::AVRational { + unsafe { ffi::av_buffersink_get_time_base(self.as_ptr()) } + } + + pub fn get_format(&self) -> i32 { + unsafe { ffi::av_buffersink_get_format(self.as_ptr()) } + } + + pub fn get_frame_rate(&self) -> ffi::AVRational { + unsafe { ffi::av_buffersink_get_frame_rate(self.as_ptr()) } + } + + pub fn get_w(&self) -> i32 { + unsafe { ffi::av_buffersink_get_w(self.as_ptr()) } + } + + pub fn get_h(&self) -> i32 { + unsafe { ffi::av_buffersink_get_h(self.as_ptr()) } + } + + pub fn get_sample_aspect_ratio(&self) -> ffi::AVRational { + unsafe { ffi::av_buffersink_get_sample_aspect_ratio(self.as_ptr()) } + } + + pub fn get_channels(&self) -> i32 { + unsafe { ffi::av_buffersink_get_channels(self.as_ptr()) } + } + + pub fn get_ch_layout(&self) -> AVChannelLayout { + let mut ch_layout = MaybeUninit::::uninit(); + unsafe { ffi::av_buffersink_get_ch_layout(self.as_ptr(), ch_layout.as_mut_ptr()) } + .upgrade() + .unwrap(); + let ch_layout = Box::leak(Box::new(unsafe { ch_layout.assume_init() })); + unsafe { AVChannelLayout::from_raw(NonNull::new(ch_layout).unwrap()) } + } + + pub fn get_sample_rate(&self) -> i32 { + unsafe { ffi::av_buffersink_get_sample_rate(self.as_ptr()) } + } } wrap!(AVFilterInOut: ffi::AVFilterInOut); diff --git a/src/avutil/frame.rs b/src/avutil/frame.rs index 0ebe782..522f919 100644 --- a/src/avutil/frame.rs +++ b/src/avutil/frame.rs @@ -12,6 +12,7 @@ settable!(AVFrame { width: i32, height: i32, pts: i64, + time_base: ffi::AVRational, pict_type: ffi::AVPictureType, nb_samples: i32, format: i32, diff --git a/tests/transcode.rs b/tests/transcode.rs new file mode 100644 index 0000000..b00992e --- /dev/null +++ b/tests/transcode.rs @@ -0,0 +1,542 @@ +//! RIIR: https://github.com/FFmpeg/FFmpeg/blob/master/doc/examples/transcode.c +use anyhow::{anyhow, bail, Context, Result}; +use cstr::cstr; +use rsmpeg::{ + self, + avcodec::{AVCodec, AVCodecContext}, + avfilter::{AVFilter, AVFilterContextMut, AVFilterGraph, AVFilterInOut}, + avformat::{AVFormatContextInput, AVFormatContextOutput}, + avutil::{ + av_inv_q, av_rescale_q, get_sample_fmt_name, ra, AVChannelLayout, AVDictionary, AVFrame, + }, + error::RsmpegError, + ffi, +}; +use std::ffi::{CStr, CString}; + +struct FilteringContext<'graph> { + dec_ctx: AVCodecContext, + enc_ctx: AVCodecContext, + stream_index: usize, + buffersrc_ctx: AVFilterContextMut<'graph>, + buffersink_ctx: AVFilterContextMut<'graph>, +} + +struct StreamContext { + dec_ctx: AVCodecContext, + enc_ctx: AVCodecContext, + stream_index: usize, +} + +struct FilterContext<'graph> { + buffersrc_ctx: AVFilterContextMut<'graph>, + buffersink_ctx: AVFilterContextMut<'graph>, +} + +/// Get `decode_contexts`, `input_format_context`, the length of +/// `decode_context` equals to the stream num of the input file. And each decode +/// context corresponds to each stream, if the stream is neither audio nor +/// audio, decode context at this index is set to `None`. +fn open_input_file(filename: &CStr) -> Result<(Vec>, AVFormatContextInput)> { + let mut ifmt_ctx = AVFormatContextInput::open(filename, None, &mut None)?; + let mut stream_ctx = Vec::with_capacity(ifmt_ctx.nb_streams as usize); + + for (i, input_stream) in ifmt_ctx.streams().into_iter().enumerate() { + let codecpar = input_stream.codecpar(); + let codec_type = codecpar.codec_type(); + let dec_ctx = if codec_type.is_video() || codec_type.is_audio() { + let decoder = AVCodec::find_decoder(codecpar.codec_id) + .with_context(|| anyhow!("Failed to find decoder for stream #{}", i))?; + + let mut dec_ctx = AVCodecContext::new(&decoder); + dec_ctx.apply_codecpar(&codecpar).with_context(|| { + anyhow!( + "Failed to copy decoder parameters to input decoder context for stream #{}", + i + ) + })?; + dec_ctx.set_pkt_timebase(input_stream.time_base); + if codec_type.is_video() { + if let Some(framerate) = input_stream.guess_framerate() { + dec_ctx.set_framerate(framerate); + } + } + dec_ctx + .open(None) + .with_context(|| anyhow!("Failed to open decoder for stream #{}", i))?; + Some(dec_ctx) + } else { + None + }; + + stream_ctx.push(dec_ctx); + } + ifmt_ctx.dump(0, filename)?; + Ok((stream_ctx, ifmt_ctx)) +} + +/// Accepts a output filename, attach `encode_context` to the corresponding +/// `decode_context` and wrap them into a `stream_context`. `stream_context` is +/// None when the given `decode_context` in the same index is None. +fn open_output_file( + filename: &CStr, + dec_ctx: Vec>, + dict: &mut Option, +) -> Result<(Vec>, AVFormatContextOutput)> { + let mut ofmt_ctx = AVFormatContextOutput::create(filename, None)?; + let mut stream_ctx = vec![]; + + for (i, dec_ctx) in dec_ctx.into_iter().enumerate() { + let Some(dec_ctx) = dec_ctx else { + stream_ctx.push(None); + continue; + }; + let encoder = AVCodec::find_encoder(dec_ctx.codec_id) + .with_context(|| anyhow!("encoder({}) not found.", dec_ctx.codec_id))?; + + let mut enc_ctx = AVCodecContext::new(&encoder); + + if dec_ctx.codec_type == ffi::AVMediaType_AVMEDIA_TYPE_VIDEO { + enc_ctx.set_height(dec_ctx.height); + enc_ctx.set_width(dec_ctx.width); + enc_ctx.set_sample_aspect_ratio(dec_ctx.sample_aspect_ratio); + // take first format from list of supported formats + enc_ctx.set_pix_fmt(encoder.pix_fmts().unwrap()[0]); + enc_ctx.set_time_base(av_inv_q(dec_ctx.framerate)); + } else if dec_ctx.codec_type == ffi::AVMediaType_AVMEDIA_TYPE_AUDIO { + enc_ctx.set_sample_rate(dec_ctx.sample_rate); + enc_ctx.set_ch_layout(dec_ctx.ch_layout().clone().into_inner()); + // take first format from list of supported formats + enc_ctx.set_sample_fmt(encoder.sample_fmts().unwrap()[0]); + enc_ctx.set_time_base(ra(1, dec_ctx.sample_rate)); + } else { + bail!( + "Elementary stream #{} is of unknown type, cannot proceed", + i + ); + } + + // Some formats want stream headers to be separate. + if ofmt_ctx.oformat().flags & ffi::AVFMT_GLOBALHEADER as i32 != 0 { + enc_ctx.set_flags(enc_ctx.flags | ffi::AV_CODEC_FLAG_GLOBAL_HEADER as i32); + } + + enc_ctx.open(None).with_context(|| { + anyhow!( + "Cannot open {} encoder for stream #{}", + encoder.name().to_str().unwrap(), + i + ) + })?; + + let mut out_stream = ofmt_ctx.new_stream(); + out_stream.set_codecpar(enc_ctx.extract_codecpar()); + out_stream.set_time_base(enc_ctx.time_base); + + stream_ctx.push(Some(StreamContext { + enc_ctx, + dec_ctx, + stream_index: out_stream.index as usize, + })); + } + + ofmt_ctx.dump(0, filename)?; + ofmt_ctx + .write_header(dict) + .context("Error occurred when opening output file")?; + + Ok((stream_ctx, ofmt_ctx)) +} + +/// Init a filter between a `decode_context` and a `encode_context` +/// corresponds to the given `filter_spec`. +fn init_filter<'graph>( + filter_graph: &'graph mut AVFilterGraph, + dec_ctx: &mut AVCodecContext, + enc_ctx: &mut AVCodecContext, + filter_spec: &CStr, +) -> Result> { + let (mut buffersrc_ctx, mut buffersink_ctx) = + if dec_ctx.codec_type == ffi::AVMediaType_AVMEDIA_TYPE_VIDEO { + let buffersrc = AVFilter::get_by_name(cstr!("buffer")).unwrap(); + let buffersink = AVFilter::get_by_name(cstr!("buffersink")).unwrap(); + + let args = format!( + "video_size={}x{}:pix_fmt={}:time_base={}/{}:pixel_aspect={}/{}", + dec_ctx.width, + dec_ctx.height, + dec_ctx.pix_fmt, + dec_ctx.pkt_timebase.num, + dec_ctx.pkt_timebase.den, + dec_ctx.sample_aspect_ratio.num, + dec_ctx.sample_aspect_ratio.den, + ); + + let args = &CString::new(args).unwrap(); + + let buffer_src_context = filter_graph + .create_filter_context(&buffersrc, cstr!("in"), Some(args)) + .context("Cannot create buffer source")?; + + let mut buffer_sink_context = filter_graph + .create_filter_context(&buffersink, cstr!("out"), None) + .context("Cannot create buffer sink")?; + + buffer_sink_context + .opt_set_bin(cstr!("pix_fmts"), &enc_ctx.pix_fmt) + .context("Cannot set output pixel format")?; + + (buffer_src_context, buffer_sink_context) + } else if dec_ctx.codec_type == ffi::AVMediaType_AVMEDIA_TYPE_AUDIO { + let buffersrc = AVFilter::get_by_name(cstr!("abuffer")).unwrap(); + let buffersink = AVFilter::get_by_name(cstr!("abuffersink")).unwrap(); + + if dec_ctx.ch_layout.order == ffi::AVChannelOrder_AV_CHANNEL_ORDER_UNSPEC { + dec_ctx.set_ch_layout( + AVChannelLayout::from_nb_channels(dec_ctx.ch_layout.nb_channels).into_inner(), + ); + } + + let args = format!( + "time_base={}/{}:sample_rate={}:sample_fmt={}:channel_layout={}", + dec_ctx.pkt_timebase.num, + dec_ctx.pkt_timebase.den, + dec_ctx.sample_rate, + // We can unwrap here, because we are sure that the given + // sample_fmt is valid. + get_sample_fmt_name(dec_ctx.sample_fmt) + .unwrap() + .to_string_lossy(), + dec_ctx.ch_layout().describe().unwrap().to_string_lossy(), + ); + let args = &CString::new(args).unwrap(); + + let buffersrc_ctx = filter_graph + .create_filter_context(&buffersrc, cstr!("in"), Some(args)) + .context("Cannot create audio buffer source")?; + + let mut buffersink_ctx = filter_graph + .create_filter_context(&buffersink, cstr!("out"), None) + .context("Cannot create audio buffer sink")?; + buffersink_ctx + .opt_set_bin(cstr!("sample_fmts"), &enc_ctx.sample_fmt) + .context("Cannot set output sample format")?; + buffersink_ctx + .opt_set( + cstr!("ch_layouts"), + &enc_ctx.ch_layout().describe().unwrap(), + ) + .context("Cannot set output channel layout")?; + buffersink_ctx + .opt_set_bin(cstr!("sample_rates"), &enc_ctx.sample_rate) + .context("Cannot set output sample rate")?; + + (buffersrc_ctx, buffersink_ctx) + } else { + bail!("Only video and audio needs filter initialization") + }; + + // Endpoints for the filter graph + // + // Yes the outputs' name is `in` -_-b + let outputs = AVFilterInOut::new(cstr!("in"), &mut buffersrc_ctx, 0); + let inputs = AVFilterInOut::new(cstr!("out"), &mut buffersink_ctx, 0); + + let (_inputs, _outputs) = filter_graph.parse_ptr(filter_spec, Some(inputs), Some(outputs))?; + + filter_graph.config()?; + + Ok(FilterContext { + buffersrc_ctx, + buffersink_ctx, + }) +} + +/// Create transcoding context corresponding to the given `stream_contexts`, the +/// added filter contexts is mutable reference to objects stored in +/// `filter_graphs`. +fn init_filters( + filter_graphs: &mut [AVFilterGraph], + stream_contexts: Vec>, +) -> Result>> { + let mut filter_ctx = Vec::with_capacity(stream_contexts.len()); + + for (filter_graph, stream_context) in filter_graphs.iter_mut().zip(stream_contexts.into_iter()) + { + let Some(stream_context) = stream_context else { + filter_ctx.push(None); + continue; + }; + + let StreamContext { + mut dec_ctx, + mut enc_ctx, + stream_index, + } = stream_context; + + // dummy filter + let filter_spec = if dec_ctx.codec_type == ffi::AVMediaType_AVMEDIA_TYPE_VIDEO { + cstr!("null") + } else { + cstr!("anull") + }; + + let FilterContext { + buffersrc_ctx, + buffersink_ctx, + } = init_filter(filter_graph, &mut dec_ctx, &mut enc_ctx, filter_spec)?; + + filter_ctx.push(Some(FilteringContext { + enc_ctx, + dec_ctx, + stream_index, + buffersrc_ctx, + buffersink_ctx, + })); + } + + Ok(filter_ctx) +} + +/// encode -> write_frame +fn encode_write_frame( + mut filt_frame: Option, + enc_ctx: &mut AVCodecContext, + ofmt_ctx: &mut AVFormatContextOutput, + stream_index: usize, +) -> Result<()> { + if let Some(filt_frame) = filt_frame.as_mut() { + if filt_frame.pts != ffi::AV_NOPTS_VALUE { + filt_frame.set_pts(av_rescale_q( + filt_frame.pts, + filt_frame.time_base, + enc_ctx.time_base, + )); + } + } + + enc_ctx + .send_frame(filt_frame.as_ref()) + .context("Encode frame failed.")?; + + loop { + let mut enc_pkt = match enc_ctx.receive_packet() { + Ok(packet) => packet, + Err(RsmpegError::EncoderDrainError) | Err(RsmpegError::EncoderFlushedError) => break, + Err(e) => bail!(e), + }; + + enc_pkt.set_stream_index(stream_index as i32); + enc_pkt.rescale_ts( + enc_ctx.time_base, + ofmt_ctx.streams()[stream_index].time_base, + ); + + ofmt_ctx + .interleaved_write_frame(&mut enc_pkt) + .context("Interleaved write frame failed.")?; + } + + Ok(()) +} + +/// filter -> encode -> write_frame +fn filter_encode_write_frame( + frame: Option, + buffersrc_ctx: &mut AVFilterContextMut, + buffersink_ctx: &mut AVFilterContextMut, + enc_ctx: &mut AVCodecContext, + ofmt_ctx: &mut AVFormatContextOutput, + stream_index: usize, +) -> Result<()> { + buffersrc_ctx + .buffersrc_add_frame(frame, None) + .context("Error submitting the frame to the filtergraph:")?; + loop { + let mut filtered_frame = match buffersink_ctx.buffersink_get_frame(None) { + Ok(frame) => frame, + Err(RsmpegError::BufferSinkDrainError) | Err(RsmpegError::BufferSinkEofError) => break, + Err(_) => bail!("Get frame from buffer sink failed."), + }; + + filtered_frame.set_time_base(buffersink_ctx.get_time_base()); + filtered_frame.set_pict_type(ffi::AVPictureType_AV_PICTURE_TYPE_NONE); + + encode_write_frame(Some(filtered_frame), enc_ctx, ofmt_ctx, stream_index)?; + } + Ok(()) +} + +/// Send an empty packet to the `encode_context` for packet flushing. +fn flush_encoder( + enc_ctx: &mut AVCodecContext, + ofmt_ctx: &mut AVFormatContextOutput, + stream_index: usize, +) -> Result<()> { + if enc_ctx.codec().capabilities & ffi::AV_CODEC_CAP_DELAY as i32 == 0 { + return Ok(()); + } + encode_write_frame(None, enc_ctx, ofmt_ctx, stream_index)?; + Ok(()) +} + +/// Transcoding audio and video stream in a multi media file. +pub fn transcode( + input_file: &CStr, + output_file: &CStr, + dict: &mut Option, +) -> Result<()> { + let (dec_ctx, mut ifmt_ctx) = open_input_file(input_file)?; + let (stream_ctx, mut ofmt_ctx) = open_output_file(output_file, dec_ctx, dict)?; + let mut filter_graphs: Vec<_> = (0..stream_ctx.len()) + .map(|_| AVFilterGraph::new()) + .collect(); + let mut filter_ctx = init_filters(&mut filter_graphs, stream_ctx)?; + + loop { + let packet = match ifmt_ctx.read_packet() { + Ok(Some(x)) => x, + // No more frames + Ok(None) => break, + Err(e) => bail!("Read frame error: {:?}", e), + }; + + let in_stream_index = packet.stream_index as usize; + + if let Some(FilteringContext { + dec_ctx: decode_context, + enc_ctx: encode_context, + stream_index, + buffersrc_ctx, + buffersink_ctx, + }) = filter_ctx[in_stream_index].as_mut() + { + decode_context.send_packet(Some(&packet)).unwrap(); + + loop { + let mut frame = match decode_context.receive_frame() { + Ok(frame) => frame, + Err(RsmpegError::DecoderDrainError) | Err(RsmpegError::DecoderFlushedError) => { + break + } + Err(e) => bail!(e), + }; + + frame.set_pts(frame.best_effort_timestamp); + filter_encode_write_frame( + Some(frame), + buffersrc_ctx, + buffersink_ctx, + encode_context, + &mut ofmt_ctx, + *stream_index, + )?; + } + } + } + + // Flush the filter graph by pushing EOF packet to buffer_src_context. + // Flush the encoder by pushing EOF frame to encode_context. + for filter_ctx in filter_ctx.iter_mut() { + match filter_ctx { + Some(FilteringContext { + dec_ctx: _, + enc_ctx, + stream_index, + buffersrc_ctx, + buffersink_ctx, + }) => { + filter_encode_write_frame( + None, + buffersrc_ctx, + buffersink_ctx, + enc_ctx, + &mut ofmt_ctx, + *stream_index, + ) + .context("Flushing filter failed")?; + flush_encoder(enc_ctx, &mut ofmt_ctx, *stream_index) + .context("Flushing encoder failed")?; + } + None => (), + } + } + ofmt_ctx.write_trailer()?; + Ok(()) +} + +#[test] +fn transcode_test0() { + std::fs::create_dir_all("tests/output/transcode/").unwrap(); + transcode( + cstr!("tests/assets/vids/mov_sample.mov"), + cstr!("tests/output/transcode/mov_sample.mov"), + &mut None, + ) + .unwrap(); +} + +#[test] +fn transcode_test1() { + std::fs::create_dir_all("tests/output/transcode/").unwrap(); + transcode( + cstr!("tests/assets/vids/centaur.mpg"), + cstr!("tests/output/transcode/centaur.mpg"), + &mut None, + ) + .unwrap(); +} + +#[test] +fn transcode_test2() { + std::fs::create_dir_all("tests/output/transcode/").unwrap(); + transcode( + cstr!("tests/assets/vids/bear.mp4"), + cstr!("tests/output/transcode/bear.mp4"), + &mut None, + ) + .unwrap(); +} + +#[test] +fn transcode_test3() { + std::fs::create_dir_all("tests/output/transcode/").unwrap(); + transcode( + cstr!("tests/assets/vids/vp8.mp4"), + cstr!("tests/output/transcode/vp8.webm"), + &mut None, + ) + .unwrap(); +} + +#[test] +fn transcode_test4() { + std::fs::create_dir_all("tests/output/transcode/").unwrap(); + transcode( + cstr!("tests/assets/vids/big_buck_bunny.mp4"), + cstr!("tests/output/transcode/big_buck_bunny.mp4"), + &mut None, + ) + .unwrap(); +} + +#[test] +fn transcode_test5() { + // Fragmented MP4 transcode. + std::fs::create_dir_all("tests/output/transcode/").unwrap(); + let mut dict = Some(AVDictionary::new( + cstr!("movflags"), + cstr!("frag_keyframe+empty_moov"), + 0, + )); + + transcode( + cstr!("tests/assets/vids/big_buck_bunny.mp4"), + cstr!("tests/output/transcode/big_buck_bunny.fmp4.mp4"), + &mut dict, + ) + .unwrap(); + + // Ensure `dict` is consumed. + assert!(dict.is_none()); +} diff --git a/tests/transcoding.rs b/tests/transcoding.rs deleted file mode 100644 index bc98f81..0000000 --- a/tests/transcoding.rs +++ /dev/null @@ -1,573 +0,0 @@ -use anyhow::{anyhow, bail, Context, Result}; -use cstr::cstr; -use rsmpeg::{ - self, - avcodec::{AVCodec, AVCodecContext}, - avfilter::{AVFilter, AVFilterContextMut, AVFilterGraph, AVFilterInOut}, - avformat::{AVFormatContextInput, AVFormatContextOutput}, - avutil::{av_inv_q, av_mul_q, get_sample_fmt_name, ra, AVChannelLayout, AVDictionary, AVFrame}, - error::RsmpegError, - ffi, -}; -use std::ffi::{CStr, CString}; - -struct StreamContext { - decode_context: AVCodecContext, - encode_context: AVCodecContext, - out_stream_index: usize, -} - -struct FilterContext<'graph> { - buffer_src_context: AVFilterContextMut<'graph>, - buffer_sink_context: AVFilterContextMut<'graph>, -} - -struct TranscodingContext<'graph> { - decode_context: AVCodecContext, - encode_context: AVCodecContext, - out_stream_index: usize, - buffer_src_context: AVFilterContextMut<'graph>, - buffer_sink_context: AVFilterContextMut<'graph>, -} - -/// Get `decode_contexts`, `input_format_context`, the length of -/// `decode_context` equals to the stream num of the input file. And each decode -/// context corresponds to each stream, if the stream is neither audio nor -/// audio, decode context at this index is set to `None`. -fn open_input_file(filename: &CStr) -> Result<(Vec>, AVFormatContextInput)> { - let mut stream_contexts = vec![]; - let mut input_format_context = AVFormatContextInput::open(filename, None, &mut None)?; - - for input_stream in input_format_context.streams().into_iter() { - let codecpar = input_stream.codecpar(); - let codec_type = codecpar.codec_type(); - - let decode_context = if codec_type.is_video() { - let codec_id = codecpar.codec_id; - let decoder = AVCodec::find_decoder(codec_id) - .with_context(|| anyhow!("video decoder ({}) not found.", codec_id))?; - let mut decode_context = AVCodecContext::new(&decoder); - decode_context.apply_codecpar(&codecpar)?; - if let Some(framerate) = input_stream.guess_framerate() { - decode_context.set_framerate(framerate); - } - decode_context.open(None)?; - Some(decode_context) - } else if codec_type.is_audio() { - let codec_id = codecpar.codec_id; - let decoder = AVCodec::find_decoder(codec_id) - .with_context(|| anyhow!("audio decoder ({}) not found.", codec_id))?; - let mut decode_context = AVCodecContext::new(&decoder); - decode_context.apply_codecpar(&codecpar)?; - decode_context.open(None)?; - Some(decode_context) - } else { - None - }; - - stream_contexts.push(decode_context); - } - input_format_context.dump(0, filename)?; - Ok((stream_contexts, input_format_context)) -} - -/// Accepts a output filename, attach `encode_context` to the corresponding -/// `decode_context` and wrap them into a `stream_context`. `stream_context` is -/// None when the given `decode_context` in the same index is None. -fn open_output_file( - filename: &CStr, - decode_contexts: Vec>, - dict: &mut Option, -) -> Result<(Vec>, AVFormatContextOutput)> { - let mut output_format_context = AVFormatContextOutput::create(filename, None)?; - let mut stream_contexts = vec![]; - - for decode_context in decode_contexts { - let stream_context = if let Some(decode_context) = decode_context { - let encoder = AVCodec::find_encoder(decode_context.codec_id) - .with_context(|| anyhow!("encoder({}) not found.", decode_context.codec_id))?; - let mut new_encode_context = AVCodecContext::new(&encoder); - - if decode_context.codec_type == ffi::AVMediaType_AVMEDIA_TYPE_VIDEO { - new_encode_context.set_height(decode_context.height); - new_encode_context.set_width(decode_context.width); - new_encode_context.set_sample_aspect_ratio(decode_context.sample_aspect_ratio); - new_encode_context.set_pix_fmt(if let Some(pix_fmts) = encoder.pix_fmts() { - pix_fmts[0] - } else { - decode_context.pix_fmt - }); - new_encode_context.set_time_base(av_inv_q(av_mul_q( - decode_context.framerate, - ra(decode_context.ticks_per_frame, 1), - ))); - } else if decode_context.codec_type == ffi::AVMediaType_AVMEDIA_TYPE_AUDIO { - new_encode_context.set_sample_rate(decode_context.sample_rate); - new_encode_context.set_ch_layout(decode_context.ch_layout().clone().into_inner()); - new_encode_context.set_channels(decode_context.ch_layout.nb_channels); - new_encode_context.set_sample_fmt(encoder.sample_fmts().unwrap()[0]); - new_encode_context.set_time_base(ra(1, decode_context.sample_rate)); - } else { - unreachable!("Shouldn't have decode_context when a codec is non-av!") - } - - // Some formats want stream headers to be separate. - if output_format_context.oformat().flags & ffi::AVFMT_GLOBALHEADER as i32 != 0 { - new_encode_context - .set_flags(new_encode_context.flags | ffi::AV_CODEC_FLAG_GLOBAL_HEADER as i32); - } - - new_encode_context.open(None)?; - - let mut out_stream = output_format_context.new_stream(); - out_stream.set_codecpar(new_encode_context.extract_codecpar()); - out_stream.set_time_base(new_encode_context.time_base); - - Some(StreamContext { - encode_context: new_encode_context, - decode_context, - out_stream_index: out_stream.index as usize, - }) - } else { - None - }; - stream_contexts.push(stream_context); - } - - output_format_context.dump(0, filename)?; - output_format_context.write_header(dict)?; - - Ok((stream_contexts, output_format_context)) -} - -/// Init a filter between a `decode_context` and a `encode_context` -/// corresponds to the given `filter_spec`. -fn init_filter<'graph>( - filter_graph: &'graph mut AVFilterGraph, - decode_context: &mut AVCodecContext, - encode_context: &mut AVCodecContext, - filter_spec: &CStr, -) -> Result> { - let (mut buffer_src_context, mut buffer_sink_context) = - if decode_context.codec_type == ffi::AVMediaType_AVMEDIA_TYPE_VIDEO { - let buffer_src = AVFilter::get_by_name(cstr!("buffer")).unwrap(); - let buffer_sink = AVFilter::get_by_name(cstr!("buffersink")).unwrap(); - - let time_base = ffi::AVRational { - num: decode_context.framerate.den, - den: decode_context.framerate.num, - }; - - let args = format!( - "video_size={}x{}:pix_fmt={}:time_base={}/{}:pixel_aspect={}/{}", - decode_context.width, - decode_context.height, - decode_context.pix_fmt, - time_base.num, - time_base.den, - decode_context.sample_aspect_ratio.num, - decode_context.sample_aspect_ratio.den, - ); - - let args = &CString::new(args).unwrap(); - - let buffer_src_context = - filter_graph.create_filter_context(&buffer_src, cstr!("in"), Some(args))?; - - let mut buffer_sink_context = - filter_graph.create_filter_context(&buffer_sink, cstr!("out"), None)?; - buffer_sink_context.opt_set_bin(cstr!("pix_fmts"), &encode_context.pix_fmt)?; - - (buffer_src_context, buffer_sink_context) - } else if decode_context.codec_type == ffi::AVMediaType_AVMEDIA_TYPE_AUDIO { - let buffer_src = AVFilter::get_by_name(cstr!("abuffer")).unwrap(); - let buffer_sink = AVFilter::get_by_name(cstr!("abuffersink")).unwrap(); - - if decode_context.ch_layout.order == ffi::AVChannelOrder_AV_CHANNEL_ORDER_UNSPEC { - decode_context.set_ch_layout( - AVChannelLayout::from_nb_channels(decode_context.ch_layout.nb_channels) - .into_inner(), - ); - } - - let args = format!( - "time_base={}/{}:sample_rate={}:sample_fmt={}:channel_layout={}", - decode_context.time_base.num, - decode_context.time_base.den, - decode_context.sample_rate, - // We can unwrap here, because we are sure that the given - // sample_fmt is valid. - get_sample_fmt_name(decode_context.sample_fmt) - .unwrap() - .to_string_lossy(), - decode_context - .ch_layout() - .describe() - .unwrap() - .to_string_lossy(), - ); - let args = &CString::new(args).unwrap(); - - let buffer_src_context = - filter_graph.create_filter_context(&buffer_src, cstr!("in"), Some(args))?; - - let mut buffer_sink_context = - filter_graph.create_filter_context(&buffer_sink, cstr!("out"), None)?; - buffer_sink_context.opt_set_bin(cstr!("sample_fmts"), &encode_context.sample_fmt)?; - buffer_sink_context.opt_set( - cstr!("ch_layouts"), - &encode_context.ch_layout().describe().unwrap(), - )?; - buffer_sink_context.opt_set_bin(cstr!("sample_rates"), &encode_context.sample_rate)?; - - (buffer_src_context, buffer_sink_context) - } else { - bail!("Only video and audio needs filter initialization") - }; - - // Yes the outputs' name is `in` -_-b - let outputs = AVFilterInOut::new(cstr!("in"), &mut buffer_src_context, 0); - let inputs = AVFilterInOut::new(cstr!("out"), &mut buffer_sink_context, 0); - let (_inputs, _outputs) = filter_graph.parse_ptr(filter_spec, Some(inputs), Some(outputs))?; - - filter_graph.config()?; - - Ok(FilterContext { - buffer_src_context, - buffer_sink_context, - }) -} - -/// Create transcoding context corresponding to the given `stream_contexts`, the -/// added filter contexts is mutable reference to objects stored in -/// `filter_graphs`. -fn init_filters( - filter_graphs: &mut [AVFilterGraph], - stream_contexts: Vec>, -) -> Result>> { - let mut filter_contexts = vec![]; - - for (filter_graph, stream_context) in filter_graphs.iter_mut().zip(stream_contexts.into_iter()) - { - let filter_context = if let Some(StreamContext { - mut decode_context, - mut encode_context, - out_stream_index, - }) = stream_context - { - // dummy filter - let filter_spec = if decode_context.codec_type == ffi::AVMediaType_AVMEDIA_TYPE_VIDEO { - cstr!("null") - } else { - cstr!("anull") - }; - - let FilterContext { - buffer_src_context, - buffer_sink_context, - } = init_filter( - filter_graph, - &mut decode_context, - &mut encode_context, - filter_spec, - )?; - - Some(TranscodingContext { - encode_context, - decode_context, - out_stream_index, - buffer_src_context, - buffer_sink_context, - }) - } else { - None - }; - filter_contexts.push(filter_context); - } - - Ok(filter_contexts) -} - -/// encode -> write_frame -fn encode_write_frame( - frame_after: Option<&AVFrame>, - encode_context: &mut AVCodecContext, - output_format_context: &mut AVFormatContextOutput, - out_stream_index: usize, -) -> Result<()> { - encode_context - .send_frame(frame_after) - .context("Encode frame failed.")?; - - loop { - let mut packet = match encode_context.receive_packet() { - Ok(packet) => packet, - Err(RsmpegError::EncoderDrainError) | Err(RsmpegError::EncoderFlushedError) => break, - Err(e) => bail!(e), - }; - - packet.set_stream_index(out_stream_index as i32); - packet.rescale_ts( - encode_context.time_base, - output_format_context.streams()[out_stream_index].time_base, - ); - - match output_format_context.interleaved_write_frame(&mut packet) { - Ok(()) => Ok(()), - Err(RsmpegError::InterleavedWriteFrameError(-22)) => Ok(()), - Err(e) => Err(e), - } - .context("Interleaved write frame failed.")?; - } - - Ok(()) -} - -/// filter -> encode -> write_frame -fn filter_encode_write_frame( - frame_before: Option, - buffer_src_context: &mut AVFilterContextMut, - buffer_sink_context: &mut AVFilterContextMut, - encode_context: &mut AVCodecContext, - output_format_context: &mut AVFormatContextOutput, - out_stream_index: usize, -) -> Result<()> { - buffer_src_context - .buffersrc_add_frame(frame_before, None) - .context("Error while feeding the filtergraph")?; - loop { - let mut frame_after = match buffer_sink_context.buffersink_get_frame(None) { - Ok(frame) => frame, - Err(RsmpegError::BufferSinkDrainError) | Err(RsmpegError::BufferSinkEofError) => break, - Err(_) => bail!("Get frame from buffer sink failed."), - }; - frame_after.set_pict_type(ffi::AVPictureType_AV_PICTURE_TYPE_NONE); - - encode_write_frame( - Some(&frame_after), - encode_context, - output_format_context, - out_stream_index, - )?; - } - Ok(()) -} - -/// Send an empty packet to the `encode_context` for packet flushing. -fn flush_encoder( - encode_context: &mut AVCodecContext, - output_format_context: &mut AVFormatContextOutput, - out_stream_index: usize, -) -> Result<()> { - if encode_context.codec().capabilities & ffi::AV_CODEC_CAP_DELAY as i32 == 0 { - return Ok(()); - } - encode_write_frame( - None, - encode_context, - output_format_context, - out_stream_index, - )?; - Ok(()) -} - -/// Transcoding audio and video stream in a multi media file. -pub fn transcoding( - input_file: &CStr, - output_file: &CStr, - dict: &mut Option, -) -> Result<()> { - let (decode_contexts, mut input_format_context) = open_input_file(input_file)?; - let (stream_contexts, mut output_format_context) = - open_output_file(output_file, decode_contexts, dict)?; - let mut filter_graphs: Vec<_> = (0..stream_contexts.len()) - .map(|_| AVFilterGraph::new()) - .collect(); - let mut transcoding_contexts = init_filters(&mut filter_graphs, stream_contexts)?; - let mut last_timestamp = vec![-1; transcoding_contexts.len()]; - - loop { - let mut packet = match input_format_context.read_packet() { - Ok(Some(x)) => x, - // No more frames - Ok(None) => break, - Err(e) => bail!("Read frame error: {:?}", e), - }; - - let in_stream_index = packet.stream_index as usize; - - if let Some(TranscodingContext { - decode_context, - encode_context, - out_stream_index, - buffer_src_context, - buffer_sink_context, - }) = transcoding_contexts[in_stream_index].as_mut() - { - let input_stream = &input_format_context.streams()[in_stream_index]; - packet.rescale_ts(input_stream.time_base, encode_context.time_base); - - decode_context.send_packet(Some(&packet)).unwrap(); - - loop { - let mut frame = match decode_context.receive_frame() { - Ok(frame) => frame, - Err(RsmpegError::DecoderDrainError) | Err(RsmpegError::DecoderFlushedError) => { - break - } - Err(e) => bail!(e), - }; - - let mut best_effort_timestamp = frame.best_effort_timestamp; - if best_effort_timestamp == last_timestamp[in_stream_index] { - best_effort_timestamp += 1; - eprintln!( - "fix timestamp: {} -> {}", - last_timestamp[in_stream_index], best_effort_timestamp - ); - } - last_timestamp[in_stream_index] = best_effort_timestamp; - frame.set_pts(best_effort_timestamp); - filter_encode_write_frame( - Some(frame), - buffer_src_context, - buffer_sink_context, - encode_context, - &mut output_format_context, - *out_stream_index, - )?; - } - } - } - - // Flush the filter graph by pushing EOF packet to buffer_src_context. - // Flush the encoder by pushing EOF frame to encode_context. - for transcoding_context in transcoding_contexts.iter_mut() { - match transcoding_context { - Some(TranscodingContext { - decode_context: _, - encode_context, - out_stream_index, - buffer_src_context, - buffer_sink_context, - }) => { - filter_encode_write_frame( - None, - buffer_src_context, - buffer_sink_context, - encode_context, - &mut output_format_context, - *out_stream_index, - )?; - flush_encoder( - encode_context, - &mut output_format_context, - *out_stream_index, - )?; - } - None => (), - } - } - output_format_context.write_trailer()?; - Ok(()) -} - -#[test] -fn transcoding_test0() { - std::fs::create_dir_all("tests/output/transcoding/").unwrap(); - transcoding( - cstr!("tests/assets/vids/mov_sample.mov"), - cstr!("tests/output/transcoding/mov_sample.mov"), - &mut None, - ) - .unwrap(); -} - -#[test] -fn transcoding_test1() { - std::fs::create_dir_all("tests/output/transcoding/").unwrap(); - transcoding( - cstr!("tests/assets/vids/centaur.mpg"), - cstr!("tests/output/transcoding/centaur.mpg"), - &mut None, - ) - .unwrap(); -} - -#[test] -fn transcoding_test2() { - std::fs::create_dir_all("tests/output/transcoding/").unwrap(); - transcoding( - cstr!("tests/assets/vids/bear.mp4"), - cstr!("tests/output/transcoding/bear.mp4"), - &mut None, - ) - .unwrap(); -} - -#[test] -fn transcoding_test3() { - std::fs::create_dir_all("tests/output/transcoding/").unwrap(); - transcoding( - cstr!("tests/assets/vids/vp8.mp4"), - cstr!("tests/output/transcoding/vp8.webm"), - &mut None, - ) - .unwrap(); -} - -#[test] -fn transcoding_test4() { - std::fs::create_dir_all("tests/output/transcoding/").unwrap(); - transcoding( - cstr!("tests/assets/vids/big_buck_bunny.mp4"), - cstr!("tests/output/transcoding/big_buck_bunny.mp4"), - &mut None, - ) - .unwrap(); -} - -#[test] -#[ignore = "FFmpeg 6.0 frame size bug"] -fn transcoding_test5() { - std::fs::create_dir_all("tests/output/transcoding/").unwrap(); - transcoding( - cstr!("tests/assets/vids/with_pic.mp4"), - cstr!("tests/output/transcoding/with_pic.mp4"), - &mut None, - ) - .unwrap(); -} - -#[test] -fn transcoding_test6() { - std::fs::create_dir_all("tests/output/transcoding/").unwrap(); - transcoding( - cstr!("tests/assets/vids/screen-fragment.mp4"), - cstr!("tests/output/transcoding/screen-fragment.mp4"), - &mut None, - ) - .unwrap(); -} - -#[test] -#[ignore = "FFmpeg 6.0 frame size bug"] -fn transcoding_test7() { - // Fragmented MP4 transcoding. - std::fs::create_dir_all("tests/output/transcoding/").unwrap(); - let mut dict = Some(AVDictionary::new( - cstr!("movflags"), - cstr!("frag_keyframe+empty_moov"), - 0, - )); - - transcoding( - cstr!("tests/assets/vids/with_pic.mp4"), - cstr!("tests/output/transcoding/with_pic_fragmented.mp4"), - &mut dict, - ) - .unwrap(); - - // Ensure `dict` is consumed. - assert!(dict.is_none()); -}