[MP3] Parse complete Xing/Info header info

author: trav90 <travawine@openmailbox.org> 2017-09-14 02:32:46 -0500
committer: trav90 <travawine@openmailbox.org> 2017-09-14 02:32:46 -0500
commit: bcd4aadcd6aa4f75f4c83e0c0b6d48b5a50331cf (patch)
tree: b0a249dc5749942794b587dbe4ed418e18d26359
parent: ab06e5040bc4b0a15352101eba292cb35a291e97 (diff)
download: palemoon-bcd4aadcd6aa4f75f4c83e0c0b6d48b5a50331cf.tar.gz
3 files changed, 129 insertions, 37 deletions
diff --git a/dom/media/MP3Demuxer.cpp b/dom/media/MP3Demuxer.cpp
index 643416be4..346ce32ea 100644
--- a/dom/media/MP3Demuxer.cpp
+++ b/dom/media/MP3Demuxer.cpp
@@ -280,10 +280,13 @@ MP3TrackDemuxer::Duration() const {
     return TimeUnit::FromMicroseconds(-1);
   }
 
-  const int64_t streamLen = StreamLength();
-  // Assume we know the exact number of frames from the VBR header.
-  int64_t numFrames = mParser.VBRInfo().NumFrames();
-  if (numFrames < 0) {
+  int64_t numFrames = 0;
+  const auto numAudioFrames = mParser.VBRInfo().NumAudioFrames();
+  if (numAudioFrames) {
+    // VBR headers don't include the VBR header frame.
+    numFrames = numAudioFrames.value() + 1;
+  } else {
+    const int64_t streamLen = StreamLength();
     if (streamLen < 0) {
       // Unknown length, we can't estimate duration.
       return TimeUnit::FromMicroseconds(-1);
@@ -467,10 +470,15 @@ MP3TrackDemuxer::Read(uint8_t* aBuffer, int64_t aOffset, int32_t aSize) {
 
 double
 MP3TrackDemuxer::AverageFrameLength() const {
-  if (!mNumParsedFrames) {
-    return 0.0;
+  if (mNumParsedFrames) {
+    return static_cast<double>(mTotalFrameLen) / mNumParsedFrames;
+  }
+  const auto& vbr = mParser.VBRInfo();
+  if (vbr.NumBytes() && vbr.NumAudioFrames()) {
+    return static_cast<double>(vbr.NumBytes().value()) /
+           (vbr.NumAudioFrames().value() + 1);
   }
-  return static_cast<double>(mTotalFrameLen) / mNumParsedFrames;
+  return 0.0;
 }
 
 // FrameParser
@@ -764,9 +772,13 @@ FrameParser::FrameHeader::Update(uint8_t c) {
 
 // FrameParser::VBRHeader
 
+namespace vbr_header {
+static const char* TYPE_STR[3] = {"NONE", "XING", "VBRI"};
+static const uint32_t TOC_SIZE = 100;
+} // namespace vbr_header
+
 FrameParser::VBRHeader::VBRHeader()
-  : mNumFrames(-1),
-    mType(NONE)
+  : mType(NONE)
 {
 }
 
@@ -775,17 +787,51 @@ FrameParser::VBRHeader::Type() const {
   return mType;
 }
 
+const Maybe<uint32_t>&
+FrameParser::VBRHeader::NumAudioFrames() const {
+  return mNumAudioFrames;
+}
+
+const Maybe<uint32_t>&
+FrameParser::VBRHeader::NumBytes() const {
+  return mNumBytes;
+}
+
+const Maybe<uint32_t>&
+FrameParser::VBRHeader::Scale() const {
+  return mScale;
+}
+
+bool
+FrameParser::VBRHeader::IsTOCPresent() const {
+  return mTOC.size() == vbr_header::TOC_SIZE;
+}
+
 int64_t
-FrameParser::VBRHeader::NumFrames() const {
-  return mNumFrames;
+FrameParser::VBRHeader::Offset(float aDurationFac) const {
+  if (!IsTOCPresent()) {
+    return -1;
+  }
+
+  // Constrain the duration percentage to [0, 99].
+  const float durationPer = 100.0f * std::min(0.99f, std::max(0.0f, aDurationFac));
+  const size_t fullPer = durationPer;
+  const float rest = durationPer - fullPer;
+
+  MOZ_ASSERT(fullPer < mTOC.size());
+  int64_t offset = mTOC.at(fullPer);
+
+  if (rest > 0.0 && fullPer + 1 < mTOC.size()) {
+    offset += rest * (mTOC.at(fullPer + 1) - offset);
+  }
+
+  return offset;
 }
 
 bool
 FrameParser::VBRHeader::ParseXing(ByteReader* aReader) {
-  static const uint32_t TAG = BigEndian::readUint32("Xing");
-  static const uint32_t TAG2 = BigEndian::readUint32("Info");
-  static const uint32_t FRAME_COUNT_OFFSET = 8;
-  static const uint32_t FRAME_COUNT_SIZE = 4;
+  static const uint32_t XING_TAG = BigEndian::readUint32("Xing");
+  static const uint32_t INFO_TAG = BigEndian::readUint32("Info");
 
   enum Flags {
     NUM_FRAMES = 0x01,
@@ -798,24 +844,44 @@ FrameParser::VBRHeader::ParseXing(ByteReader* aReader) {
   const size_t prevReaderOffset = aReader->Offset();
 
   // We have to search for the Xing header as its position can change.
-  while (aReader->Remaining() >= FRAME_COUNT_OFFSET + FRAME_COUNT_SIZE) {
-    if (aReader->PeekU32() != TAG && aReader->PeekU32() != TAG2) {
-      aReader->Read(1);
-      continue;
-    }
-    // Skip across the VBR header ID tag.
-    aReader->Read(sizeof(TAG));
+  while (aReader->CanRead32() &&
+         aReader->PeekU32() != XING_TAG && aReader->PeekU32() != INFO_TAG) {
+    aReader->Read(1);
+  }
 
-    const uint32_t flags = aReader->ReadU32();
-    if (flags & NUM_FRAMES) {
-      mNumFrames = aReader->ReadU32();
-    }
+  if (aReader->CanRead32()) {
+    // Skip across the VBR header ID tag.
+    aReader->ReadU32();
     mType = XING;
-    aReader->Seek(prevReaderOffset);
-    return true;
+    }
+  uint32_t flags = 0;
+  if (aReader->CanRead32()) {
+    flags = aReader->ReadU32();
+  }
+  if (flags & NUM_FRAMES && aReader->CanRead32()) {
+    mNumAudioFrames = Some(aReader->ReadU32());
   }
+  if (flags & NUM_BYTES && aReader->CanRead32()) {
+    mNumBytes = Some(aReader->ReadU32());
+  }
+  if (flags & TOC && aReader->Remaining() >= vbr_header::TOC_SIZE) {
+    if (!mNumBytes) {
+      // We don't have the stream size to calculate offsets, skip the TOC.
+      aReader->Read(vbr_header::TOC_SIZE);
+    } else {
+      mTOC.clear();
+      mTOC.reserve(vbr_header::TOC_SIZE);
+      for (size_t i = 0; i < vbr_header::TOC_SIZE; ++i) {
+        mTOC.push_back(1.0f / 256.0f * aReader->ReadU8() * mNumBytes.value());
+      }
+    }
+  }
+  if (flags & VBR_SCALE && aReader->CanRead32()) {
+    mScale = Some(aReader->ReadU32());
+  }
+
   aReader->Seek(prevReaderOffset);
-  return false;
+  return mType == XING;
 }
 
 bool
@@ -836,7 +902,7 @@ FrameParser::VBRHeader::ParseVBRI(ByteReader* aReader) {
     aReader->Seek(prevReaderOffset + OFFSET);
     if (aReader->ReadU32() == TAG) {
       aReader->Seek(prevReaderOffset + FRAME_COUNT_OFFSET);
-      mNumFrames = aReader->ReadU32();
+      mNumAudioFrames = Some(aReader->ReadU32());
       mType = VBRI;
       aReader->Seek(prevReaderOffset);
       return true;
@@ -848,7 +914,8 @@ FrameParser::VBRHeader::ParseVBRI(ByteReader* aReader) {
 
 bool
 FrameParser::VBRHeader::Parse(ByteReader* aReader) {
-  return ParseVBRI(aReader) || ParseXing(aReader);
+  const bool rv = ParseVBRI(aReader) || ParseXing(aReader);
+  return rv;
 }
 
 // FrameParser::Frame
diff --git a/dom/media/MP3Demuxer.h b/dom/media/MP3Demuxer.h
index 2b20c4254..c362f25af 100644
--- a/dom/media/MP3Demuxer.h
+++ b/dom/media/MP3Demuxer.h
@@ -6,6 +6,7 @@
 #define MP3_DEMUXER_H_
 
 #include "mozilla/Attributes.h"
+#include "mozilla/Maybe.h"
 #include "MediaDataDemuxer.h"
 #include "MediaResource.h"
 #include "mp4_demuxer/ByteReader.h"
@@ -201,8 +202,9 @@ public:
   // this class to parse them and access this info.
   class VBRHeader {
   public:
+    // Synchronize with vbr_header TYPE_STR on change.
     enum VBRHeaderType {
-      NONE,
+      NONE = 0,
       XING,
       VBRI
     };
@@ -213,8 +215,22 @@ public:
     // Returns the parsed VBR header type, or NONE if no valid header found.
     VBRHeaderType Type() const;
 
-    // Returns the total number of frames expected in the stream/file.
-    int64_t NumFrames() const;
+    // Returns the total number of audio frames (excluding the VBR header frame)
+    // expected in the stream/file.
+    const Maybe<uint32_t>& NumAudioFrames() const;
+
+    // Returns the expected size of the stream.
+    const Maybe<uint32_t>& NumBytes() const;
+
+    // Returns the VBR scale factor (0: best quality, 100: lowest quality).
+    const Maybe<uint32_t>& Scale() const;
+
+    // Returns true iff Xing/Info TOC (table of contents) is present.
+    bool IsTOCPresent() const;
+
+    // Returns the byte offset for the given duration percentage as a factor
+    // (0: begin, 1.0: end).
+    int64_t Offset(float aDurationFac) const;
 
     // Parses contents of given ByteReader for a valid VBR header.
     // The offset of the passed ByteReader needs to point to an MPEG frame begin,
@@ -236,7 +252,16 @@ public:
     bool ParseVBRI(mp4_demuxer::ByteReader* aReader);
 
     // The total number of frames expected as parsed from a VBR header.
-    int64_t mNumFrames;
+    Maybe<uint32_t> mNumAudioFrames;
+
+    // The total number of bytes expected in the stream.
+    Maybe<uint32_t> mNumBytes;
+
+    // The VBR scale factor.
+    Maybe<uint32_t> mScale;
+
+    // The TOC table mapping duration percentage to byte offset.
+    std::vector<int64_t> mTOC;
 
     // The detected VBR header type.
     VBRHeaderType mType;
diff --git a/dom/media/gtest/TestMP3Demuxer.cpp b/dom/media/gtest/TestMP3Demuxer.cpp
index f3f42a7e0..5bd576ffc 100644
--- a/dom/media/gtest/TestMP3Demuxer.cpp
+++ b/dom/media/gtest/TestMP3Demuxer.cpp
@@ -224,10 +224,10 @@ TEST_F(MP3DemuxerTest, VBRHeader) {
     if (target.mIsVBR) {
       EXPECT_EQ(FrameParser::VBRHeader::XING, vbr.Type());
       // TODO: find reference number which accounts for trailing headers.
-      // EXPECT_EQ(target.mNumSamples / target.mSamplesPerFrame, vbr.NumFrames());
+      // EXPECT_EQ(target.mNumSamples / target.mSamplesPerFrame, vbr.NumAudioFrames().value());
     } else {
       EXPECT_EQ(FrameParser::VBRHeader::NONE, vbr.Type());
-      EXPECT_EQ(-1, vbr.NumFrames());
+      EXPECT_FALSE(vbr.NumAudioFrames());
     }
   }
 }
author	trav90 <travawine@openmailbox.org>	2017-09-14 02:32:46 -0500
committer	trav90 <travawine@openmailbox.org>	2017-09-14 02:32:46 -0500
commit	bcd4aadcd6aa4f75f4c83e0c0b6d48b5a50331cf (patch)
tree	b0a249dc5749942794b587dbe4ed418e18d26359
parent	ab06e5040bc4b0a15352101eba292cb35a291e97 (diff)
download	palemoon-bcd4aadcd6aa4f75f4c83e0c0b6d48b5a50331cf.tar.gz