From 0b54b0e13b381c4bbbbe8668f0ffbdc967ba890c Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 24 Dec 2022 18:28:39 +0800 Subject: [PATCH] MP3: Fix bug for TS or HLS with mp3 codec. v4.0.269 (#296) 1. Refresh HLS audio codec if changed in stream. 2. Refresh TS audio codec if changed in stream. 3. Fix mp3 demux bug in SrsFormat::audio_mp3_demux. 4. Use 3(MPEG1) not 4(MPEG2) as PMT stream type, follow FFmpeg. --- trunk/doc/CHANGELOG.md | 1 + trunk/src/app/srs_app_hls.cpp | 28 +++++++++++++++++++++++++++ trunk/src/app/srs_app_hls.hpp | 6 ++++++ trunk/src/core/srs_core_version4.hpp | 2 +- trunk/src/kernel/srs_kernel_codec.cpp | 13 +++---------- trunk/src/kernel/srs_kernel_ts.cpp | 25 ++++++++++++++++++++---- trunk/src/kernel/srs_kernel_ts.hpp | 10 +++++++--- 7 files changed, 67 insertions(+), 18 deletions(-) diff --git a/trunk/doc/CHANGELOG.md b/trunk/doc/CHANGELOG.md index 4c4372600f..170633de08 100644 --- a/trunk/doc/CHANGELOG.md +++ b/trunk/doc/CHANGELOG.md @@ -8,6 +8,7 @@ The changelog for SRS. ## SRS 4.0 Changelog +* v4.0, 2022-12-24, For [#296](https://github.com/ossrs/srs/issues/296): MP3: Fix bug for TS or HLS with mp3 codec. v4.0.269 * v4.0, 2022-11-22, Pick [#3079](https://github.com/ossrs/srs/issues/3079): WebRTC: Fix no audio and video issue for Firefox. v4.0.268 * v4.0, 2022-10-10, For [#2901](https://github.com/ossrs/srs/issues/2901): Edge: Fast disconnect and reconnect. v4.0.267 * v4.0, 2022-09-27, For [#3167](https://github.com/ossrs/srs/issues/3167): WebRTC: Refine sequence jitter algorithm. v4.0.266 diff --git a/trunk/src/app/srs_app_hls.cpp b/trunk/src/app/srs_app_hls.cpp index 8ec6407612..b8dec2b300 100644 --- a/trunk/src/app/srs_app_hls.cpp +++ b/trunk/src/app/srs_app_hls.cpp @@ -202,6 +202,7 @@ SrsHlsMuxer::SrsHlsMuxer() async = new SrsAsyncCallWorker(); context = new SrsTsContext(); segments = new SrsFragmentWindow(); + latest_acodec_ = SrsAudioCodecIdForbidden; memset(key, 0, 16); memset(iv, 0, 16); @@ -263,6 +264,24 @@ int SrsHlsMuxer::deviation() return deviation_ts; } +SrsAudioCodecId SrsHlsMuxer::latest_acodec() +{ + // If current context writer exists, we query from it. + if (current && current->tscw) return current->tscw->acodec(); + + // Get the configured or updated config. + return latest_acodec_; +} + +void SrsHlsMuxer::set_latest_acodec(SrsAudioCodecId v) +{ + // Refresh the codec in context writer for current segment. + if (current && current->tscw) current->tscw->set_acodec(v); + + // Refresh the codec for future segments. + latest_acodec_ = v; +} + srs_error_t SrsHlsMuxer::initialize() { return srs_success; @@ -371,6 +390,8 @@ srs_error_t SrsHlsMuxer::segment_open() srs_warn("hls: use aac for other codec=%s", default_acodec_str.c_str()); } } + // Now that we know the latest audio codec in stream, use it. + if (latest_acodec_ != SrsAudioCodecIdForbidden) default_acodec = latest_acodec_; // load the default vcodec from config. SrsVideoCodecId default_vcodec = SrsVideoCodecIdAVC; @@ -963,6 +984,13 @@ srs_error_t SrsHlsController::on_sequence_header() srs_error_t SrsHlsController::write_audio(SrsAudioFrame* frame, int64_t pts) { srs_error_t err = srs_success; + + // Refresh the codec ASAP. + if (muxer->latest_acodec() != frame->acodec()->id) { + srs_trace("HLS: Switch audio codec %d(%s) to %d(%s)", muxer->latest_acodec(), srs_audio_codec_id2str(muxer->latest_acodec()).c_str(), + frame->acodec()->id, srs_audio_codec_id2str(frame->acodec()->id).c_str()); + muxer->set_latest_acodec(frame->acodec()->id); + } // write audio to cache. if ((err = tsmc->cache_audio(frame, pts)) != srs_success) { diff --git a/trunk/src/app/srs_app_hls.hpp b/trunk/src/app/srs_app_hls.hpp index 90b13736d5..1e683c0478 100644 --- a/trunk/src/app/srs_app_hls.hpp +++ b/trunk/src/app/srs_app_hls.hpp @@ -156,6 +156,9 @@ class SrsHlsMuxer SrsHlsSegment* current; // The ts context, to keep cc continous between ts. SrsTsContext* context; +private: + // Latest audio codec, parsed from stream. + SrsAudioCodecId latest_acodec_; public: SrsHlsMuxer(); virtual ~SrsHlsMuxer(); @@ -166,6 +169,9 @@ class SrsHlsMuxer virtual std::string ts_url(); virtual srs_utime_t duration(); virtual int deviation(); +public: + SrsAudioCodecId latest_acodec(); + void set_latest_acodec(SrsAudioCodecId v); public: // Initialize the hls muxer. virtual srs_error_t initialize(); diff --git a/trunk/src/core/srs_core_version4.hpp b/trunk/src/core/srs_core_version4.hpp index d26350ab1e..0e99a52e55 100644 --- a/trunk/src/core/srs_core_version4.hpp +++ b/trunk/src/core/srs_core_version4.hpp @@ -9,6 +9,6 @@ #define VERSION_MAJOR 4 #define VERSION_MINOR 0 -#define VERSION_REVISION 268 +#define VERSION_REVISION 269 #endif diff --git a/trunk/src/kernel/srs_kernel_codec.cpp b/trunk/src/kernel/srs_kernel_codec.cpp index c0289c00a5..224feaf74a 100644 --- a/trunk/src/kernel/srs_kernel_codec.cpp +++ b/trunk/src/kernel/srs_kernel_codec.cpp @@ -1407,20 +1407,13 @@ srs_error_t SrsFormat::audio_mp3_demux(SrsBuffer* stream, int64_t timestamp) // we always decode aac then mp3. srs_assert(acodec->id == SrsAudioCodecIdMP3); - // Update the RAW MP3 data. + // Update the RAW MP3 data. Note the start is 12 bits syncword 0xFFF, so we should not skip any bytes, for detail + // please see ISO_IEC_11172-3-MP3-1993.pdf page 20 and 26. raw = stream->data() + stream->pos(); nb_raw = stream->size() - stream->pos(); - stream->skip(1); - if (stream->empty()) { - return err; - } - - char* data = stream->data() + stream->pos(); - int size = stream->size() - stream->pos(); - // mp3 payload. - if ((err = audio->add_sample(data, size)) != srs_success) { + if ((err = audio->add_sample(raw, nb_raw)) != srs_success) { return srs_error_wrap(err, "add audio frame"); } diff --git a/trunk/src/kernel/srs_kernel_ts.cpp b/trunk/src/kernel/srs_kernel_ts.cpp index 96a95c0ffa..0016e5720a 100644 --- a/trunk/src/kernel/srs_kernel_ts.cpp +++ b/trunk/src/kernel/srs_kernel_ts.cpp @@ -2598,8 +2598,8 @@ SrsTsContextWriter::SrsTsContextWriter(ISrsStreamWriter* w, SrsTsContext* c, Srs { writer = w; context = c; - - acodec = ac; + + acodec_ = ac; vcodec = vc; } @@ -2614,7 +2614,7 @@ srs_error_t SrsTsContextWriter::write_audio(SrsTsMessage* audio) srs_info("hls: write audio pts=%" PRId64 ", dts=%" PRId64 ", size=%d", audio->pts, audio->dts, audio->PES_packet_length); - if ((err = context->encode(writer, audio, vcodec, acodec)) != srs_success) { + if ((err = context->encode(writer, audio, vcodec, acodec_)) != srs_success) { return srs_error_wrap(err, "ts: write audio"); } srs_info("hls encode audio ok"); @@ -2629,7 +2629,7 @@ srs_error_t SrsTsContextWriter::write_video(SrsTsMessage* video) srs_info("hls: write video pts=%" PRId64 ", dts=%" PRId64 ", size=%d", video->pts, video->dts, video->PES_packet_length); - if ((err = context->encode(writer, video, vcodec, acodec)) != srs_success) { + if ((err = context->encode(writer, video, vcodec, acodec_)) != srs_success) { return srs_error_wrap(err, "ts: write video"); } srs_info("hls encode video ok"); @@ -2642,6 +2642,16 @@ SrsVideoCodecId SrsTsContextWriter::video_codec() return vcodec; } +SrsAudioCodecId SrsTsContextWriter::acodec() +{ + return acodec_; +} + +void SrsTsContextWriter::set_acodec(SrsAudioCodecId v) +{ + acodec_ = v; +} + SrsEncFileWriter::SrsEncFileWriter() { memset(iv,0,16); @@ -3079,6 +3089,13 @@ srs_error_t SrsTsTransmuxer::write_audio(int64_t timestamp, char* data, int size if (format->acodec->id == SrsAudioCodecIdAAC && format->audio->aac_packet_type == SrsAudioAacFrameTraitSequenceHeader) { return err; } + + // Switch audio codec if not AAC. + if (tscw->acodec() != format->acodec->id) { + srs_trace("TS: Switch audio codec %d(%s) to %d(%s)", tscw->acodec(), srs_audio_codec_id2str(tscw->acodec()).c_str(), + format->acodec->id, srs_audio_codec_id2str(format->acodec->id).c_str()); + tscw->set_acodec(format->acodec->id); + } // the dts calc from rtmp/flv header. // @remark for http ts stream, the timestamp is always monotonically increase, diff --git a/trunk/src/kernel/srs_kernel_ts.hpp b/trunk/src/kernel/srs_kernel_ts.hpp index 4e411802eb..8c4f4f7c11 100644 --- a/trunk/src/kernel/srs_kernel_ts.hpp +++ b/trunk/src/kernel/srs_kernel_ts.hpp @@ -97,7 +97,7 @@ enum SrsTsPidApply SrsTsPidApplyAudio, // vor audio }; -// Table 2-29 - Stream type assignments +// Table 2-29 - Stream type assignments, hls-mpeg-ts-iso13818-1.pdf, page 66 enum SrsTsStream { // ITU-T | ISO/IEC Reserved @@ -106,8 +106,8 @@ enum SrsTsStream // ISO/IEC 11172 Video // ITU-T Rec. H.262 | ISO/IEC 13818-2 Video or ISO/IEC 11172-2 constrained parameter video stream // ISO/IEC 11172 Audio + SrsTsStreamAudioMp3 = 0x03, // ISO/IEC 13818-3 Audio - SrsTsStreamAudioMp3 = 0x04, // ITU-T Rec. H.222.0 | ISO/IEC 13818-1 private_sections // ITU-T Rec. H.222.0 | ISO/IEC 13818-1 PES packets containing private data // ISO/IEC 13522 MHEG @@ -1243,7 +1243,7 @@ class SrsTsContextWriter // User must config the codec in right way. // @see https://github.com/ossrs/srs/issues/301 SrsVideoCodecId vcodec; - SrsAudioCodecId acodec; + SrsAudioCodecId acodec_; private: SrsTsContext* context; ISrsStreamWriter* writer; @@ -1259,6 +1259,10 @@ class SrsTsContextWriter public: // get the video codec of ts muxer. virtual SrsVideoCodecId video_codec(); +public: + // Get and set the audio codec. + SrsAudioCodecId acodec(); + void set_acodec(SrsAudioCodecId v); }; // Used for HLS Encryption