From 0b54b0e13b381c4bbbbe8668f0ffbdc967ba890c Mon Sep 17 00:00:00 2001
From: winlin <winlin@vip.126.com>
Date: Sat, 24 Dec 2022 18:28:39 +0800
Subject: [PATCH] MP3: Fix bug for TS or HLS with mp3 codec. v4.0.269 (#296)

1. Refresh HLS audio codec if changed in stream.
2. Refresh TS audio codec if changed in stream.
3. Fix mp3 demux bug in SrsFormat::audio_mp3_demux.
4. Use 3(MPEG1) not 4(MPEG2) as PMT stream type, follow FFmpeg.
---
 trunk/doc/CHANGELOG.md                |  1 +
 trunk/src/app/srs_app_hls.cpp         | 28 +++++++++++++++++++++++++++
 trunk/src/app/srs_app_hls.hpp         |  6 ++++++
 trunk/src/core/srs_core_version4.hpp  |  2 +-
 trunk/src/kernel/srs_kernel_codec.cpp | 13 +++----------
 trunk/src/kernel/srs_kernel_ts.cpp    | 25 ++++++++++++++++++++----
 trunk/src/kernel/srs_kernel_ts.hpp    | 10 +++++++---
 7 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/trunk/doc/CHANGELOG.md b/trunk/doc/CHANGELOG.md
index 4c4372600f..170633de08 100644
--- a/trunk/doc/CHANGELOG.md
+++ b/trunk/doc/CHANGELOG.md
@@ -8,6 +8,7 @@ The changelog for SRS.
 
 ## SRS 4.0 Changelog
 
+* v4.0, 2022-12-24, For [#296](https://github.com/ossrs/srs/issues/296): MP3: Fix bug for TS or HLS with mp3 codec. v4.0.269
 * v4.0, 2022-11-22, Pick [#3079](https://github.com/ossrs/srs/issues/3079): WebRTC: Fix no audio and video issue for Firefox. v4.0.268
 * v4.0, 2022-10-10, For [#2901](https://github.com/ossrs/srs/issues/2901): Edge: Fast disconnect and reconnect. v4.0.267
 * v4.0, 2022-09-27, For [#3167](https://github.com/ossrs/srs/issues/3167): WebRTC: Refine sequence jitter algorithm. v4.0.266
diff --git a/trunk/src/app/srs_app_hls.cpp b/trunk/src/app/srs_app_hls.cpp
index 8ec6407612..b8dec2b300 100644
--- a/trunk/src/app/srs_app_hls.cpp
+++ b/trunk/src/app/srs_app_hls.cpp
@@ -202,6 +202,7 @@ SrsHlsMuxer::SrsHlsMuxer()
     async = new SrsAsyncCallWorker();
     context = new SrsTsContext();
     segments = new SrsFragmentWindow();
+    latest_acodec_ = SrsAudioCodecIdForbidden;
     
     memset(key, 0, 16);
     memset(iv, 0, 16);
@@ -263,6 +264,24 @@ int SrsHlsMuxer::deviation()
     return deviation_ts;
 }
 
+SrsAudioCodecId SrsHlsMuxer::latest_acodec()
+{
+    // If current context writer exists, we query from it.
+    if (current && current->tscw) return current->tscw->acodec();
+
+    // Get the configured or updated config.
+    return latest_acodec_;
+}
+
+void SrsHlsMuxer::set_latest_acodec(SrsAudioCodecId v)
+{
+    // Refresh the codec in context writer for current segment.
+    if (current && current->tscw) current->tscw->set_acodec(v);
+
+    // Refresh the codec for future segments.
+    latest_acodec_ = v;
+}
+
 srs_error_t SrsHlsMuxer::initialize()
 {
     return srs_success;
@@ -371,6 +390,8 @@ srs_error_t SrsHlsMuxer::segment_open()
             srs_warn("hls: use aac for other codec=%s", default_acodec_str.c_str());
         }
     }
+    // Now that we know the latest audio codec in stream, use it.
+    if (latest_acodec_ != SrsAudioCodecIdForbidden) default_acodec = latest_acodec_;
     
     // load the default vcodec from config.
     SrsVideoCodecId default_vcodec = SrsVideoCodecIdAVC;
@@ -963,6 +984,13 @@ srs_error_t SrsHlsController::on_sequence_header()
 srs_error_t SrsHlsController::write_audio(SrsAudioFrame* frame, int64_t pts)
 {
     srs_error_t err = srs_success;
+
+    // Refresh the codec ASAP.
+    if (muxer->latest_acodec() != frame->acodec()->id) {
+        srs_trace("HLS: Switch audio codec %d(%s) to %d(%s)", muxer->latest_acodec(), srs_audio_codec_id2str(muxer->latest_acodec()).c_str(),
+            frame->acodec()->id, srs_audio_codec_id2str(frame->acodec()->id).c_str());
+        muxer->set_latest_acodec(frame->acodec()->id);
+    }
     
     // write audio to cache.
     if ((err = tsmc->cache_audio(frame, pts)) != srs_success) {
diff --git a/trunk/src/app/srs_app_hls.hpp b/trunk/src/app/srs_app_hls.hpp
index 90b13736d5..1e683c0478 100644
--- a/trunk/src/app/srs_app_hls.hpp
+++ b/trunk/src/app/srs_app_hls.hpp
@@ -156,6 +156,9 @@ class SrsHlsMuxer
     SrsHlsSegment* current;
     // The ts context, to keep cc continous between ts.
     SrsTsContext* context;
+private:
+    // Latest audio codec, parsed from stream.
+    SrsAudioCodecId latest_acodec_;
 public:
     SrsHlsMuxer();
     virtual ~SrsHlsMuxer();
@@ -166,6 +169,9 @@ class SrsHlsMuxer
     virtual std::string ts_url();
     virtual srs_utime_t duration();
     virtual int deviation();
+public:
+    SrsAudioCodecId latest_acodec();
+    void set_latest_acodec(SrsAudioCodecId v);
 public:
     // Initialize the hls muxer.
     virtual srs_error_t initialize();
diff --git a/trunk/src/core/srs_core_version4.hpp b/trunk/src/core/srs_core_version4.hpp
index d26350ab1e..0e99a52e55 100644
--- a/trunk/src/core/srs_core_version4.hpp
+++ b/trunk/src/core/srs_core_version4.hpp
@@ -9,6 +9,6 @@
 
 #define VERSION_MAJOR       4
 #define VERSION_MINOR       0
-#define VERSION_REVISION    268
+#define VERSION_REVISION    269
 
 #endif
diff --git a/trunk/src/kernel/srs_kernel_codec.cpp b/trunk/src/kernel/srs_kernel_codec.cpp
index c0289c00a5..224feaf74a 100644
--- a/trunk/src/kernel/srs_kernel_codec.cpp
+++ b/trunk/src/kernel/srs_kernel_codec.cpp
@@ -1407,20 +1407,13 @@ srs_error_t SrsFormat::audio_mp3_demux(SrsBuffer* stream, int64_t timestamp)
     // we always decode aac then mp3.
     srs_assert(acodec->id == SrsAudioCodecIdMP3);
     
-    // Update the RAW MP3 data.
+    // Update the RAW MP3 data. Note the start is 12 bits syncword 0xFFF, so we should not skip any bytes, for detail
+    // please see ISO_IEC_11172-3-MP3-1993.pdf page 20 and 26.
     raw = stream->data() + stream->pos();
     nb_raw = stream->size() - stream->pos();
     
-    stream->skip(1);
-    if (stream->empty()) {
-        return err;
-    }
-    
-    char* data = stream->data() + stream->pos();
-    int size = stream->size() - stream->pos();
-    
     // mp3 payload.
-    if ((err = audio->add_sample(data, size)) != srs_success) {
+    if ((err = audio->add_sample(raw, nb_raw)) != srs_success) {
         return srs_error_wrap(err, "add audio frame");
     }
     
diff --git a/trunk/src/kernel/srs_kernel_ts.cpp b/trunk/src/kernel/srs_kernel_ts.cpp
index 96a95c0ffa..0016e5720a 100644
--- a/trunk/src/kernel/srs_kernel_ts.cpp
+++ b/trunk/src/kernel/srs_kernel_ts.cpp
@@ -2598,8 +2598,8 @@ SrsTsContextWriter::SrsTsContextWriter(ISrsStreamWriter* w, SrsTsContext* c, Srs
 {
     writer = w;
     context = c;
-    
-    acodec = ac;
+
+    acodec_ = ac;
     vcodec = vc;
 }
 
@@ -2614,7 +2614,7 @@ srs_error_t SrsTsContextWriter::write_audio(SrsTsMessage* audio)
     srs_info("hls: write audio pts=%" PRId64 ", dts=%" PRId64 ", size=%d",
         audio->pts, audio->dts, audio->PES_packet_length);
     
-    if ((err = context->encode(writer, audio, vcodec, acodec)) != srs_success) {
+    if ((err = context->encode(writer, audio, vcodec, acodec_)) != srs_success) {
         return srs_error_wrap(err, "ts: write audio");
     }
     srs_info("hls encode audio ok");
@@ -2629,7 +2629,7 @@ srs_error_t SrsTsContextWriter::write_video(SrsTsMessage* video)
     srs_info("hls: write video pts=%" PRId64 ", dts=%" PRId64 ", size=%d",
         video->pts, video->dts, video->PES_packet_length);
     
-    if ((err = context->encode(writer, video, vcodec, acodec)) != srs_success) {
+    if ((err = context->encode(writer, video, vcodec, acodec_)) != srs_success) {
         return srs_error_wrap(err, "ts: write video");
     }
     srs_info("hls encode video ok");
@@ -2642,6 +2642,16 @@ SrsVideoCodecId SrsTsContextWriter::video_codec()
     return vcodec;
 }
 
+SrsAudioCodecId SrsTsContextWriter::acodec()
+{
+    return acodec_;
+}
+
+void SrsTsContextWriter::set_acodec(SrsAudioCodecId v)
+{
+    acodec_ = v;
+}
+
 SrsEncFileWriter::SrsEncFileWriter()
 {
     memset(iv,0,16);
@@ -3079,6 +3089,13 @@ srs_error_t SrsTsTransmuxer::write_audio(int64_t timestamp, char* data, int size
     if (format->acodec->id == SrsAudioCodecIdAAC && format->audio->aac_packet_type == SrsAudioAacFrameTraitSequenceHeader) {
         return err;
     }
+
+    // Switch audio codec if not AAC.
+    if (tscw->acodec() != format->acodec->id) {
+        srs_trace("TS: Switch audio codec %d(%s) to %d(%s)", tscw->acodec(), srs_audio_codec_id2str(tscw->acodec()).c_str(),
+            format->acodec->id, srs_audio_codec_id2str(format->acodec->id).c_str());
+        tscw->set_acodec(format->acodec->id);
+    }
     
     // the dts calc from rtmp/flv header.
     // @remark for http ts stream, the timestamp is always monotonically increase,
diff --git a/trunk/src/kernel/srs_kernel_ts.hpp b/trunk/src/kernel/srs_kernel_ts.hpp
index 4e411802eb..8c4f4f7c11 100644
--- a/trunk/src/kernel/srs_kernel_ts.hpp
+++ b/trunk/src/kernel/srs_kernel_ts.hpp
@@ -97,7 +97,7 @@ enum SrsTsPidApply
     SrsTsPidApplyAudio, // vor audio
 };
 
-// Table 2-29 - Stream type assignments
+// Table 2-29 - Stream type assignments, hls-mpeg-ts-iso13818-1.pdf, page 66
 enum SrsTsStream
 {
     // ITU-T | ISO/IEC Reserved
@@ -106,8 +106,8 @@ enum SrsTsStream
     // ISO/IEC 11172 Video
     // ITU-T Rec. H.262 | ISO/IEC 13818-2 Video or ISO/IEC 11172-2 constrained parameter video stream
     // ISO/IEC 11172 Audio
+    SrsTsStreamAudioMp3 = 0x03,
     // ISO/IEC 13818-3 Audio
-    SrsTsStreamAudioMp3 = 0x04,
     // ITU-T Rec. H.222.0 | ISO/IEC 13818-1 private_sections
     // ITU-T Rec. H.222.0 | ISO/IEC 13818-1 PES packets containing private data
     // ISO/IEC 13522 MHEG
@@ -1243,7 +1243,7 @@ class SrsTsContextWriter
     // User must config the codec in right way.
     // @see https://github.com/ossrs/srs/issues/301
     SrsVideoCodecId vcodec;
-    SrsAudioCodecId acodec;
+    SrsAudioCodecId acodec_;
 private:
     SrsTsContext* context;
     ISrsStreamWriter* writer;
@@ -1259,6 +1259,10 @@ class SrsTsContextWriter
 public:
     // get the video codec of ts muxer.
     virtual SrsVideoCodecId video_codec();
+public:
+    // Get and set the audio codec.
+    SrsAudioCodecId acodec();
+    void set_acodec(SrsAudioCodecId v);
 };
 
 // Used for HLS Encryption