From 38d3c7207cd6da5bac1a604f10499f3ae273ae43 Mon Sep 17 00:00:00 2001 From: zoff99 Date: Tue, 19 Mar 2024 19:32:37 +0100 Subject: [PATCH] make H265 encoder optional, and autodetection for H265 encoder --- toxav/codecs/h264/codec.c | 2 +- toxav/rtp.c | 5 ++ toxav/rtp.h | 5 ++ toxav/toxav.c | 128 ++++++++++++++++++++++---------------- toxav/toxav.h | 3 + toxav/video.c | 87 +++++++++++++++++--------- 6 files changed, 144 insertions(+), 86 deletions(-) diff --git a/toxav/codecs/h264/codec.c b/toxav/codecs/h264/codec.c index a39b805d0e..579079e490 100644 --- a/toxav/codecs/h264/codec.c +++ b/toxav/codecs/h264/codec.c @@ -2468,7 +2468,7 @@ uint32_t send_frames_h265(ToxAV *av, uint32_t friend_number, uint16_t width, uin 0, *video_frame_record_timestamp, (int32_t)0, - TOXAV_ENCODER_CODEC_USED_H264, + TOXAV_ENCODER_CODEC_USED_H265, call->video_bit_rate, call->video->client_video_capture_delay_ms, call->video->video_encoder_frame_orientation_angle, diff --git a/toxav/rtp.c b/toxav/rtp.c index f1775a344a..70206a9a31 100644 --- a/toxav/rtp.c +++ b/toxav/rtp.c @@ -1150,6 +1150,11 @@ int rtp_send_data(RTPSession *session, const uint8_t *data, uint32_t length, boo header.flags = header.flags | RTP_ENCODER_IS_H264; } + if ((codec_used == TOXAV_ENCODER_CODEC_USED_H265) && + (is_video_payload == 1)) { + header.flags = header.flags | RTP_ENCODER_IS_H265; + } + if (video_frame_orientation_angle == TOXAV_CLIENT_INPUT_VIDEO_ORIENTATION_90) { header.flags = header.flags | RTP_ENCODER_VIDEO_ROTATION_ANGLE_BIT0; } else if (video_frame_orientation_angle == TOXAV_CLIENT_INPUT_VIDEO_ORIENTATION_180) { diff --git a/toxav/rtp.h b/toxav/rtp.h index 34b719f319..4bb4e42979 100644 --- a/toxav/rtp.h +++ b/toxav/rtp.h @@ -105,6 +105,11 @@ typedef enum RTPFlags { RTP_ENCODER_VIDEO_ROTATION_ANGLE_BIT0 = 1 << 4, RTP_ENCODER_VIDEO_ROTATION_ANGLE_BIT1 = 1 << 5, + /** + * Whether H265 codec was used to encode this video frame + */ + RTP_ENCODER_IS_H265 = 1 << 6, + } RTPFlags; diff --git a/toxav/toxav.c b/toxav/toxav.c index f4c4f48f40..ec03df506d 100644 --- a/toxav/toxav.c +++ b/toxav/toxav.c @@ -819,7 +819,7 @@ bool toxav_option_set(ToxAV *av, uint32_t friend_number, TOXAV_OPTIONS_OPTION op VCSession *vc = (VCSession *)call->video; if (((int32_t)value >= TOXAV_ENCODER_CODEC_USED_VP8) - && ((int32_t)value <= TOXAV_ENCODER_CODEC_USED_H264)) { + && ((int32_t)value <= TOXAV_ENCODER_CODEC_USED_H265)) { if (vc->video_encoder_coded_used == (int32_t)value) { LOGGER_API_WARNING(av->tox, "video video_encoder_coded_used already set to: %d", (int)value); @@ -1517,7 +1517,8 @@ bool toxav_video_send_frame_age(ToxAV *av, uint32_t friend_number, uint16_t widt pthread_mutex_lock(call->toxav_call_mutex); // HINT: auto switch encoder, if we got capabilities packet from friend ------ - if (call->video->video_encoder_coded_used != TOXAV_ENCODER_CODEC_USED_H264) { + if ((call->video->video_encoder_coded_used != TOXAV_ENCODER_CODEC_USED_H264) && + (call->video->video_encoder_coded_used != TOXAV_ENCODER_CODEC_USED_H265)) { const uint64_t friend_caps = tox_friend_get_capabilities(av->tox, friend_number); LOGGER_API_DEBUG(av->tox, "-------> CCCCCC:%ld", (long)friend_caps); if ((friend_caps & TOX_CAPABILITY_TOXAV_H264) != 0) { @@ -1528,7 +1529,10 @@ bool toxav_video_send_frame_age(ToxAV *av, uint32_t friend_number, uint16_t widt if ((call->video->h264_video_capabilities_received == 1) && - (call->video->video_encoder_coded_used != TOXAV_ENCODER_CODEC_USED_H264)) { + (call->video->video_encoder_coded_used != TOXAV_ENCODER_CODEC_USED_H264) + && + (call->video->video_encoder_coded_used != TOXAV_ENCODER_CODEC_USED_H265) + ) { // when switching to H264 set default video bitrate if (call->video_bit_rate > 0) { @@ -1686,24 +1690,10 @@ bool toxav_video_send_frame_age(ToxAV *av, uint32_t friend_number, uint16_t widt // for the H265 encoder ------- #endif - uint32_t result2_h265 = 1; int h265_num_nals = 0; { /* Encode */ -#ifdef HAVE_H265_ENCODER - LOGGER_API_DEBUG(av->tox, "**__** encoding H265 frame **__**"); - result2_h265 = encode_frame_h265(av, friend_number, width, height, - y, u, v, call, - &video_frame_record_timestamp, - vpx_encode_flags, - &h265_num_nals, - &nal, - &i_frame_size, &h265_nals); -#endif - - - /* if ((call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_VP8) || (call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_VP9)) { @@ -1721,21 +1711,42 @@ bool toxav_video_send_frame_age(ToxAV *av, uint32_t friend_number, uint16_t widt goto END; } } else { - LOGGER_API_DEBUG(av->tox, "**##** encoding H264 frame **##**"); - uint32_t result = encode_frame_h264(av, friend_number, width, height, - y, u, v, call, - &video_frame_record_timestamp, - vpx_encode_flags, - &nal, - &i_frame_size); - if (result != 0) { - pthread_mutex_unlock(call->mutex_video); - rc = TOXAV_ERR_SEND_FRAME_INVALID; - goto END; +#ifdef HAVE_H265_ENCODER + if (call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_H265) + { + LOGGER_API_DEBUG(av->tox, "**__** encoding H265 frame **__**"); + uint32_t result = encode_frame_h265(av, friend_number, width, height, + y, u, v, call, + &video_frame_record_timestamp, + vpx_encode_flags, + &h265_num_nals, + &nal, + &i_frame_size, &h265_nals); + if (result != 0) { + pthread_mutex_unlock(call->mutex_video); + rc = TOXAV_ERR_SEND_FRAME_INVALID; + goto END; + } + } +#endif + + if (call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_H265) + { + LOGGER_API_DEBUG(av->tox, "**##** encoding H264 frame **##**"); + uint32_t result = encode_frame_h264(av, friend_number, width, height, + y, u, v, call, + &video_frame_record_timestamp, + vpx_encode_flags, + &nal, + &i_frame_size); + if (result != 0) { + pthread_mutex_unlock(call->mutex_video); + rc = TOXAV_ERR_SEND_FRAME_INVALID; + goto END; + } } } - */ } @@ -1746,21 +1757,7 @@ bool toxav_video_send_frame_age(ToxAV *av, uint32_t friend_number, uint16_t widt { /* Send frames */ -#ifdef HAVE_H265_ENCODER - if (result2_h265 == 0) { - uint32_t result3 = send_frames_h265(av, friend_number, width, height, - y, u, v, call, - &video_frame_record_timestamp, - vpx_encode_flags, - &nal, - &i_frame_size, - h265_num_nals, - &h265_nals, - &rc); - } -#endif - /* if ((call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_VP8) || (call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_VP9)) { @@ -1778,20 +1775,40 @@ bool toxav_video_send_frame_age(ToxAV *av, uint32_t friend_number, uint16_t widt } } else { - uint32_t result = send_frames_h264(av, friend_number, width, height, - y, u, v, call, - &video_frame_record_timestamp, - vpx_encode_flags, - &nal, - &i_frame_size, - &rc); - if (result != 0) { - pthread_mutex_unlock(call->mutex_video); - goto END; +#ifdef HAVE_H265_ENCODER + if (call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_H265) { + uint32_t result = send_frames_h265(av, friend_number, width, height, + y, u, v, call, + &video_frame_record_timestamp, + vpx_encode_flags, + &nal, + &i_frame_size, + h265_num_nals, + &h265_nals, + &rc); + if (result != 0) { + pthread_mutex_unlock(call->mutex_video); + goto END; + } + } +#endif + + if (call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_H264) { + uint32_t result = send_frames_h264(av, friend_number, width, height, + y, u, v, call, + &video_frame_record_timestamp, + vpx_encode_flags, + &nal, + &i_frame_size, + &rc); + + if (result != 0) { + pthread_mutex_unlock(call->mutex_video); + goto END; + } } } - */ } pthread_mutex_unlock(call->mutex_video); @@ -2174,7 +2191,8 @@ void callback_bwc(BWController *bwc, uint32_t friend_number, float loss, void *u } // HINT: sanity check -------------- - if (call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_H264) { + if ((call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_H264) || + (call->video->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_H265)) { if (call->video_bit_rate < VIDEO_BITRATE_MIN_AUTO_VALUE_H264) { call->video_bit_rate = VIDEO_BITRATE_MIN_AUTO_VALUE_H264; } else if (call->video_bit_rate > VIDEO_BITRATE_MAX_AUTO_VALUE_H264) { diff --git a/toxav/toxav.h b/toxav/toxav.h index 27a53caa75..5f356f2454 100644 --- a/toxav/toxav.h +++ b/toxav/toxav.h @@ -297,9 +297,11 @@ bool toxav_answer(ToxAV *av, uint32_t friend_number, uint32_t audio_bit_rate, ui typedef enum TOXAV_CALL_COMM_INFO { TOXAV_CALL_COMM_DECODER_IN_USE_VP8 = 0, TOXAV_CALL_COMM_DECODER_IN_USE_H264 = 1, + TOXAV_CALL_COMM_DECODER_IN_USE_H265 = 16, TOXAV_CALL_COMM_ENCODER_IN_USE_VP8 = 2, TOXAV_CALL_COMM_ENCODER_IN_USE_H264 = 3, TOXAV_CALL_COMM_ENCODER_IN_USE_H264_OMX_PI = 6, + TOXAV_CALL_COMM_ENCODER_IN_USE_H265 = 15, TOXAV_CALL_COMM_DECODER_CURRENT_BITRATE = 4, TOXAV_CALL_COMM_ENCODER_CURRENT_BITRATE = 5, TOXAV_CALL_COMM_NETWORK_ROUND_TRIP_MS = 7, @@ -984,6 +986,7 @@ typedef enum TOXAV_ENCODER_CODEC_USED_VALUE { TOXAV_ENCODER_CODEC_USED_VP8 = 0, TOXAV_ENCODER_CODEC_USED_VP9 = 1, TOXAV_ENCODER_CODEC_USED_H264 = 2, + TOXAV_ENCODER_CODEC_USED_H265 = 3, } TOXAV_ENCODER_CODEC_USED_VALUE; typedef enum TOXAV_ENCODER_KF_METHOD_VALUE { diff --git a/toxav/video.c b/toxav/video.c index 9f425b2af1..9a715357ee 100644 --- a/toxav/video.c +++ b/toxav/video.c @@ -175,6 +175,9 @@ VCSession *vc_new(Mono_Time *mono_time, const Logger *log, ToxAV *av, uint32_t f if (vc->video_decoder_codec_used == TOXAV_ENCODER_CODEC_USED_H264) { cmi = TOXAV_CALL_COMM_DECODER_IN_USE_H264; } + if (vc->video_decoder_codec_used == TOXAV_ENCODER_CODEC_USED_H265) { + cmi = TOXAV_CALL_COMM_DECODER_IN_USE_H265; + } av->call_comm_cb(av, friend_number, cmi, 0, av->call_comm_cb_user_data); @@ -187,6 +190,9 @@ VCSession *vc_new(Mono_Time *mono_time, const Logger *log, ToxAV *av, uint32_t f cmi = TOXAV_CALL_COMM_ENCODER_IN_USE_H264; } } + if (vc->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_H265) { + cmi = TOXAV_CALL_COMM_ENCODER_IN_USE_H265; + } av->call_comm_cb(av, friend_number, cmi, 0, av->call_comm_cb_user_data); } @@ -250,6 +256,7 @@ void video_switch_decoder(VCSession *vc, TOXAV_ENCODER_CODEC_USED_VALUE decoder_ if (vc->video_decoder_codec_used != (int32_t)decoder_to_use) { if ((decoder_to_use == TOXAV_ENCODER_CODEC_USED_VP8) || (decoder_to_use == TOXAV_ENCODER_CODEC_USED_VP9) + || (decoder_to_use == TOXAV_ENCODER_CODEC_USED_H265) || (decoder_to_use == TOXAV_ENCODER_CODEC_USED_H264)) { vc->video_decoder_codec_used = decoder_to_use; @@ -265,6 +272,9 @@ void video_switch_decoder(VCSession *vc, TOXAV_ENCODER_CODEC_USED_VALUE decoder_ if (vc->video_decoder_codec_used == TOXAV_ENCODER_CODEC_USED_H264) { cmi = TOXAV_CALL_COMM_DECODER_IN_USE_H264; } + if (vc->video_decoder_codec_used == TOXAV_ENCODER_CODEC_USED_H265) { + cmi = TOXAV_CALL_COMM_DECODER_IN_USE_H265; + } vc->av->call_comm_cb(vc->av, vc->friend_number, cmi, 0, vc->av->call_comm_cb_user_data); @@ -307,6 +317,7 @@ uint8_t vc_iterate(VCSession *vc, Tox *tox, uint8_t skip_video_flag, uint64_t *a uint64_t frame_flags = 0; uint8_t data_type = 0; uint8_t h264_encoded_video_frame = 0; + uint8_t h265_encoded_video_frame = 0; uint32_t full_data_len = 0; uint32_t timestamp_out_ = 0; uint32_t timestamp_min = 0; @@ -609,6 +620,7 @@ uint8_t vc_iterate(VCSession *vc, Tox *tox, uint8_t skip_video_flag, uint64_t *a data_type = (uint8_t)((frame_flags & RTP_KEY_FRAME) != 0); h264_encoded_video_frame = (uint8_t)((frame_flags & RTP_ENCODER_IS_H264) != 0); + h265_encoded_video_frame = (uint8_t)((frame_flags & RTP_ENCODER_IS_H265) != 0); uint8_t video_orientation_bit0 = (uint8_t)((frame_flags & RTP_ENCODER_VIDEO_ROTATION_ANGLE_BIT0) != 0); uint8_t video_orientation_bit1 = (uint8_t)((frame_flags & RTP_ENCODER_VIDEO_ROTATION_ANGLE_BIT1) != 0); @@ -684,7 +696,9 @@ uint8_t vc_iterate(VCSession *vc, Tox *tox, uint8_t skip_video_flag, uint64_t *a LOGGER_API_WARNING(tox, "missing %d video frames (m1)", (int)missing_frames_count); #endif - if (vc->video_decoder_codec_used != TOXAV_ENCODER_CODEC_USED_H264) { + if ((vc->video_decoder_codec_used != TOXAV_ENCODER_CODEC_USED_H264) && + (vc->video_decoder_codec_used != TOXAV_ENCODER_CODEC_USED_H265)) + { rc = vpx_codec_decode(vc->decoder, NULL, 0, NULL, VPX_DL_REALTIME); } @@ -755,11 +769,18 @@ uint8_t vc_iterate(VCSession *vc, Tox *tox, uint8_t skip_video_flag, uint64_t *a && (h264_encoded_video_frame == 1)) { LOGGER_API_WARNING(tox, "h264_encoded_video_frame:AA"); video_switch_decoder(vc, TOXAV_ENCODER_CODEC_USED_H264); - - } else if ((vc->video_decoder_codec_used == TOXAV_ENCODER_CODEC_USED_H264) - && (h264_encoded_video_frame == 0)) { - LOGGER_API_WARNING(tox, "h264_encoded_video_frame:BB"); - // HINT: once we switched to H264 never switch back to VP8 until this call ends + } else if ((vc->video_decoder_codec_used != TOXAV_ENCODER_CODEC_USED_H265) + && (h265_encoded_video_frame == 1)) { + LOGGER_API_WARNING(tox, "h265_encoded_video_frame:AA"); + video_switch_decoder(vc, TOXAV_ENCODER_CODEC_USED_H265); + } else if ( + ((vc->video_decoder_codec_used == TOXAV_ENCODER_CODEC_USED_H264) || + (vc->video_decoder_codec_used == TOXAV_ENCODER_CODEC_USED_H265)) + && ((h264_encoded_video_frame == 0) && (h265_encoded_video_frame == 0)) + ) + { + LOGGER_API_WARNING(tox, "h26(4|5)_encoded_video_frame:BB"); + // HINT: once we switched to H264 (or H265) never switch back to VP8 until this call ends } } @@ -767,8 +788,11 @@ uint8_t vc_iterate(VCSession *vc, Tox *tox, uint8_t skip_video_flag, uint64_t *a // as workaround send it again on the first 30 frames if (DISABLE_H264_DECODER_FEATURE != 1) { - if ((vc->video_decoder_codec_used != TOXAV_ENCODER_CODEC_USED_H264) - && ((long)header_v3->sequnum < 30)) { + if ( + ((vc->video_decoder_codec_used != TOXAV_ENCODER_CODEC_USED_H264) && + (vc->video_decoder_codec_used != TOXAV_ENCODER_CODEC_USED_H265)) + && ((long)header_v3->sequnum < 30)) + { // HINT: tell friend that we have H264 decoder capabilities (3) ------- uint32_t pkg_buf_len = 2; @@ -786,7 +810,7 @@ uint8_t vc_iterate(VCSession *vc, Tox *tox, uint8_t skip_video_flag, uint64_t *a //* MID UNLOCK *// // pthread_mutex_unlock(vc->queue_mutex); - if (vc->video_decoder_codec_used != TOXAV_ENCODER_CODEC_USED_H264) { + if ((vc->video_decoder_codec_used != TOXAV_ENCODER_CODEC_USED_H264) && (vc->video_decoder_codec_used != TOXAV_ENCODER_CODEC_USED_H265)) { decode_frame_vpx(vc, tox, skip_video_flag, a_r_timestamp, a_l_timestamp, v_r_timestamp, v_l_timestamp, @@ -794,25 +818,24 @@ uint8_t vc_iterate(VCSession *vc, Tox *tox, uint8_t skip_video_flag, uint64_t *a rc, full_data_len, &ret_value); } else { - -// --- H265 --- H265 --- H265 --- H265 --- H265 --- H265 --- H265 --- -// --- H265 --- H265 --- H265 --- H265 --- H265 --- H265 --- H265 --- - decode_frame_h265(vc, tox, skip_video_flag, a_r_timestamp, - a_l_timestamp, - v_r_timestamp, v_l_timestamp, - header_v3, p, - rc, full_data_len, - &ret_value); -// --- H265 --- H265 --- H265 --- H265 --- H265 --- H265 --- H265 --- -// --- H265 --- H265 --- H265 --- H265 --- H265 --- H265 --- H265 --- -/* - decode_frame_h264(vc, tox, skip_video_flag, a_r_timestamp, - a_l_timestamp, - v_r_timestamp, v_l_timestamp, - header_v3, p, - rc, full_data_len, - &ret_value); -*/ + if (vc->video_decoder_codec_used == TOXAV_ENCODER_CODEC_USED_H265) + { + decode_frame_h265(vc, tox, skip_video_flag, a_r_timestamp, + a_l_timestamp, + v_r_timestamp, v_l_timestamp, + header_v3, p, + rc, full_data_len, + &ret_value); + } + else + { + decode_frame_h264(vc, tox, skip_video_flag, a_r_timestamp, + a_l_timestamp, + v_r_timestamp, v_l_timestamp, + header_v3, p, + rc, full_data_len, + &ret_value); + } } //* NEW UNLOCK *// @@ -1047,9 +1070,13 @@ int vc_reconfigure_encoder(Logger *log, VCSession *vc, uint32_t bit_rate, uint16 ret = vc_reconfigure_encoder_vpx(log, vc, bit_rate, width, height, kf_max_dist); } else { #ifdef HAVE_H265_ENCODER - vc_reconfigure_encoder_h265(log, vc, bit_rate, width, height, kf_max_dist); + if (vc->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_H265) { + vc_reconfigure_encoder_h265(log, vc, bit_rate, width, height, kf_max_dist); + } #endif - ret = vc_reconfigure_encoder_h264(log, vc, bit_rate, width, height, kf_max_dist); + if (vc->video_encoder_coded_used == TOXAV_ENCODER_CODEC_USED_H264) { + ret = vc_reconfigure_encoder_h264(log, vc, bit_rate, width, height, kf_max_dist); + } } vc->video_encoder_coded_used_prev = vc->video_encoder_coded_used;