Skip to content

Commit

Permalink
feat: add ID3 parsing for text, link, and APIC frames (#412)
Browse files Browse the repository at this point in the history
* Make parsing ID3 APIC frames clearer. Fix assertion description in tests.
* Handle multiple string values in text frames
* Return plain and split frame values on multiple string text frames
* Rename `tag` to `frame` as we're parsing frames here
  • Loading branch information
pszemus committed Jun 8, 2022
1 parent 6d7bd5b commit 5454bdd
Show file tree
Hide file tree
Showing 2 changed files with 222 additions and 47 deletions.
175 changes: 129 additions & 46 deletions lib/m2ts/metadata-stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
var
Stream = require('../utils/stream'),
StreamTypes = require('./stream-types'),
// Frames that allow different types of text encoding contain a text
// encoding description byte [ID3v2.4.0 section 4.]
textEncodingDescriptionByte = {
Iso88591: 0x00, // ISO-8859-1, terminated with \0.
Utf16: 0x01, // UTF-16 encoded Unicode BOM, terminated with \0\0
Utf16be: 0x02, // UTF-16BE encoded Unicode, without BOM, terminated with \0\0
Utf8: 0x03 // UTF-8 encoded Unicode, terminated with \0
},
// return a percent-encoded representation of the specified byte range
// @see http://en.wikipedia.org/wiki/Percent-encoding
percentEncode = function(bytes, start, end) {
Expand All @@ -37,53 +45,118 @@ var
(data[2] << 7) |
(data[3]);
},
tagParsers = {
TXXX: function(tag) {
frameParsers = {
'APIC': function(frame) {
var
i = 1,
mimeTypeEndIndex,
descriptionEndIndex,
LINK_MIME_TYPE = '-->';

if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}

// parsing fields [ID3v2.4.0 section 4.14.]
mimeTypeEndIndex = frame.data.indexOf(0, i);
if (mimeTypeEndIndex < 0) {
// malformed frame
return;
}

// parsing Mime type field (terminated with \0)
frame.mimeType = parseIso88591(frame.data, i, mimeTypeEndIndex);
i = mimeTypeEndIndex + 1;

// parsing 1-byte Picture Type field
frame.pictureType = frame.data[i];
i++

descriptionEndIndex = frame.data.indexOf(0, i);
if (descriptionEndIndex < 0) {
// malformed frame
return;
}

// parsing Description field (terminated with \0)
frame.description = parseUtf8(frame.data, i, descriptionEndIndex);
i = descriptionEndIndex + 1;

if (frame.mimeType === LINK_MIME_TYPE) {
// parsing Picture Data field as URL (always represented as ISO-8859-1 [ID3v2.4.0 section 4.])
frame.url = parseIso88591(frame.data, i, frame.data.length)
} else {
// parsing Picture Data field as binary data
frame.pictureData = frame.data.subarray(i, frame.data.length);
}
},
'T*': function(frame) {
if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}

// parse text field, do not include null terminator in the frame value
// frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
frame.value = parseUtf8(frame.data, 1, frame.data.length).replace(/\0*$/, '');
// text information frames supports multiple strings, stored as a terminator separated list [ID3v2.4.0 section 4.2.]
frame.values = frame.value.split('\0');
},
'TXXX': function(frame) {
var i;
if (tag.data[0] !== 3) {
if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}

for (i = 1; i < tag.data.length; i++) {
if (tag.data[i] === 0) {
for (i = 1; i < frame.data.length; i++) {
if (frame.data[i] === 0) {
// parse the text fields
tag.description = parseUtf8(tag.data, 1, i);
frame.description = parseUtf8(frame.data, 1, i);
// do not include the null terminator in the tag value
tag.value = parseUtf8(tag.data, i + 1, tag.data.length).replace(/\0*$/, '');
// frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
frame.value = parseUtf8(frame.data, i + 1, frame.data.length).replace(/\0*$/, '');
break;
}
}
tag.data = tag.value;
frame.data = frame.value;
},
WXXX: function(tag) {
'W*': function(frame) {
// parse URL field; URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
// if the value is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
frame.url = parseIso88591(frame.data, 0, frame.data.length).replace(/\0.*$/, '');
},
'WXXX': function(frame) {
var i;
if (tag.data[0] !== 3) {
if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}

for (i = 1; i < tag.data.length; i++) {
if (tag.data[i] === 0) {
for (i = 1; i < frame.data.length; i++) {
if (frame.data[i] === 0) {
// parse the description and URL fields
tag.description = parseUtf8(tag.data, 1, i);
tag.url = parseUtf8(tag.data, i + 1, tag.data.length);
frame.description = parseUtf8(frame.data, 1, i);
// URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
// if the value is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
frame.url = parseIso88591(frame.data, i + 1, frame.data.length).replace(/\0.*$/, '');
break;
}
}
},
PRIV: function(tag) {
'PRIV': function(frame) {
var i;

for (i = 0; i < tag.data.length; i++) {
if (tag.data[i] === 0) {
for (i = 0; i < frame.data.length; i++) {
if (frame.data[i] === 0) {
// parse the description and URL fields
tag.owner = parseIso88591(tag.data, 0, i);
frame.owner = parseIso88591(frame.data, 0, i);
break;
}
}
tag.privateData = tag.data.subarray(i + 1);
tag.data = tag.privateData;
frame.privateData = frame.data.subarray(i + 1);
frame.data = frame.privateData;
}
},
MetadataStream;
Expand Down Expand Up @@ -215,34 +288,44 @@ MetadataStream = function(options) {
data: tag.data.subarray(frameStart + 10, frameStart + frameSize + 10)
};
frame.key = frame.id;
if (tagParsers[frame.id]) {
tagParsers[frame.id](frame);

// handle the special PRIV frame used to indicate the start
// time for raw AAC data
if (frame.owner === 'com.apple.streaming.transportStreamTimestamp') {
var
d = frame.data,
size = ((d[3] & 0x01) << 30) |
(d[4] << 22) |
(d[5] << 14) |
(d[6] << 6) |
(d[7] >>> 2);

size *= 4;
size += d[7] & 0x03;
frame.timeStamp = size;
// in raw AAC, all subsequent data will be timestamped based
// on the value of this frame
// we couldn't have known the appropriate pts and dts before
// parsing this ID3 tag so set those values now
if (tag.pts === undefined && tag.dts === undefined) {
tag.pts = frame.timeStamp;
tag.dts = frame.timeStamp;
}
this.trigger('timestamp', frame);

// parse frame values
if (frameParsers[frame.id]) {
// use frame specific parser
frameParsers[frame.id](frame);
} else if (frame.id[0] === 'T') {
// use text frame generic parser
frameParsers['T*'](frame);
} else if (frame.id[0] === 'W') {
// use URL link frame generic parser
frameParsers['W*'](frame);
}

// handle the special PRIV frame used to indicate the start
// time for raw AAC data
if (frame.owner === 'com.apple.streaming.transportStreamTimestamp') {
var
d = frame.data,
size = ((d[3] & 0x01) << 30) |
(d[4] << 22) |
(d[5] << 14) |
(d[6] << 6) |
(d[7] >>> 2);

size *= 4;
size += d[7] & 0x03;
frame.timeStamp = size;
// in raw AAC, all subsequent data will be timestamped based
// on the value of this frame
// we couldn't have known the appropriate pts and dts before
// parsing this ID3 tag so set those values now
if (tag.pts === undefined && tag.dts === undefined) {
tag.pts = frame.timeStamp;
tag.dts = frame.timeStamp;
}
this.trigger('timestamp', frame);
}

tag.frames.push(frame);

frameStart += 10; // advance past the frame header
Expand Down
94 changes: 93 additions & 1 deletion test/metadata-stream.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,48 @@ QUnit.test('should skip tag frame parsing on malformed frame, preserving previou
})

assert.equal(events.length, 1, 'parsed 1 tag')
assert.equal(events[0].frames.length, 1, 'parsed one frame');
assert.equal(events[0].frames.length, 1, 'parsed 1 frame');
assert.equal(events[0].frames[0].key, 'TIT2');
});

QUnit.test('can parse APIC frame in web worker', function(assert) {
var worker = new MetadataStreamTestWorker(),
done = assert.async();

worker.addEventListener('message', function(e) {
assert.equal(e.data.frames[0].key, 'APIC', 'frame key is APIC');
assert.equal(e.data.frames[0].mimeType, 'image/jpeg', 'parsed MIME type is "image/jpeg"');
assert.equal(e.data.frames[0].pictureType, 0x03, 'parsed picture type is 0x03');
assert.equal(e.data.frames[0].description, 'sample description', 'parsed description');
assert.deepEqual(e.data.frames[0].pictureData, new Uint8Array(stringToInts("picture binary data")), 'parsed picture data');
assert.equal(e.data.frames[1].key, 'APIC', 'frame key is APIC');
assert.equal(e.data.frames[1].mimeType, '-->', 'parsed MIME type is "-->"');
assert.equal(e.data.frames[1].pictureType, 0x04, 'parsed picture type is 0x04');
assert.equal(e.data.frames[1].description, 'sample description 2', 'parsed description');
assert.equal(e.data.frames[1].url, 'http://example.org/cover-back.jpg', 'parsed picture data');
worker.terminate();
done();
});

worker.postMessage({
type: 'timed-metadata',
data: new Uint8Array(id3Tag(id3Frame('APIC',
0x03, // Text encoding: UTF-8
stringToCString('image/jpeg'), // MIME type + \0
0x03, // Picture type: Cover (front) [ID3v2.4.0 section 4.14]
stringToCString('sample description'), // Decription + \0
stringToInts('picture binary data')
),
id3Frame('APIC',
0x03, // Text encoding: UTF-8
stringToCString('-->'), // MIME type: link to the image [ID3v2.4.0 section 4.14] + \0
0x04, // Picture type: Cover (back) [ID3v2.4.0 section 4.14]
stringToCString('sample description 2'), // Decription + \0
stringToInts('http://example.org/cover-back.jpg')
)))
});
});

QUnit.test('can parse PRIV frames in web worker', function(assert) {
var payload = stringToInts('arbitrary'),
worker = new MetadataStreamTestWorker(),
Expand Down Expand Up @@ -591,6 +629,60 @@ QUnit.test('can parse TXXX frames in web worker', function(assert) {
});
});

QUnit.test('should parse text frames in web worker', function(assert) {
var worker = new MetadataStreamTestWorker(),
done = assert.async();

worker.addEventListener('message', function(e) {
assert.equal(e.data.frames.length, 2, 'got 2 frames');
assert.equal(e.data.frames[0].key, 'TIT2', 'frame key is TIT2');
assert.equal(e.data.frames[0].value, 'sample song title', 'parsed value')
assert.equal(e.data.frames[0].values.length, 1, 'parsed value is an array of size 1')
assert.equal(e.data.frames[0].values[0], 'sample song title', 'parsed a non multiple strings value')
assert.equal(e.data.frames[1].key, 'TIT3', 'frame key is TIT3');
assert.equal(e.data.frames[1].value, 'sample title 1\0sample title 2', 'parsed value')
assert.equal(e.data.frames[1].values.length, 2, 'parsed value is an array of size 2')
assert.equal(e.data.frames[1].values[0], 'sample title 1', 'parsed 1st multiple strings value')
assert.equal(e.data.frames[1].values[1], 'sample title 2', 'parsed 2nd multiple strings value')
worker.terminate();
done();
});

worker.postMessage({
type: 'timed-metadata',
data: new Uint8Array(id3Tag(id3Frame('TIT2',
0x03, // utf-8
// frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
stringToCString('sample song title')),
id3Frame('TIT3',
0x03, // utf-8
// frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
// text information frames supports multiple strings, stored as a terminator separated list [ID3v2.4.0 section 4.2.]
stringToCString('sample title 1'), stringToCString('sample title 2'))))
});
});

QUnit.test('should parse URL link frames in web worker', function(assert) {
var worker = new MetadataStreamTestWorker(),
done = assert.async(),
payloadBytes;

// if the payload is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
payloadBytes = stringToInts('http://example.org\0 ignored \0 part')

worker.addEventListener('message', function(e) {
assert.equal(e.data.frames[0].key, 'WOAF', 'frame key is WOAF');
assert.equal(e.data.frames[0].url, 'http://example.org', 'parsed URL')
worker.terminate();
done();
});

worker.postMessage({
type: 'timed-metadata',
data: new Uint8Array(id3Tag(id3Frame('WOAF', payloadBytes)))
});
});

QUnit.test('triggers special event after parsing a timestamp ID3 tag', function(assert) {
var
array = new Uint8Array(73),
Expand Down

0 comments on commit 5454bdd

Please sign in to comment.