Skip to content

Commit

Permalink
avi: Add extended chunks support and option
Browse files Browse the repository at this point in the history
This is used for >1gb files. Disable decode will speed up
deocde a lot but will probably also produce some gaps as same
part of the movi chunks will not be reference by the indx index.
  • Loading branch information
wader committed Oct 12, 2023
1 parent 5af1174 commit 4df6f52
Show file tree
Hide file tree
Showing 10 changed files with 1,049 additions and 997 deletions.
18 changes: 13 additions & 5 deletions doc/formats.md
Original file line number Diff line number Diff line change
Expand Up @@ -267,20 +267,21 @@ Decode value as avc_au

### Options

|Name |Default|Description|
|- |- |-|
|`decode_samples`|true |Decode samples|
|Name |Default|Description|
|- |- |-|
|`decode_extended_chunks`|true |Decode extended chunks|
|`decode_samples` |true |Decode samples|

### Examples

Decode file using avi options
```
$ fq -d avi -o decode_samples=true . file
$ fq -d avi -o decode_extended_chunks=true -o decode_samples=true . file
```

Decode value as avi
```
... | avi({decode_samples:true})
... | avi({decode_extended_chunks:true,decode_samples:true})
```

### Samples
Expand All @@ -298,6 +299,13 @@ $ fq '.streams[1].samples[] | tobytes' file.avi > stream01.mp3
$ fq -o decode_samples=false '[.chunks[0] | grep_by(.id=="LIST" and .type=="strl") | grep_by(.id=="strh") as {$type} | grep_by(.id=="strf") as {$format_tag, $compression} | {$type,$format_tag,$compression}]' *.avi
```

### Speed up decoding by disabling sample and extended chunks decoding

If your not interested in sample details or extended chunks you can speed up decoding by using:
```sh
$ fq -o decode_samples=false -o decode_extended_chunks=false d file.avi
```

### References

- [AVI RIFF File Reference](https://learn.microsoft.com/en-us/windows/win32/directshow/avi-riff-file-reference)
Expand Down
1,788 changes: 894 additions & 894 deletions doc/formats.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion format/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,8 @@ type MP4_In struct {
}

type AVI_In struct {
DecodeSamples bool `doc:"Decode samples"`
DecodeSamples bool `doc:"Decode samples"`
DecodeExtendedChunks bool `doc:"Decode extended chunks"`
}

type Zip_In struct {
Expand Down
213 changes: 119 additions & 94 deletions format/riff/avi.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package riff
// DV handler https://learn.microsoft.com/en-us/windows/win32/directshow/dv-data-in-the-avi-file-format
// palette change
// rec groups
// AVIX, multiple RIFF headers?
// nested indexes
// unknown fields for unreachable chunk header for > 1gb samples
// 2fields, field index?
Expand Down Expand Up @@ -42,7 +41,8 @@ func init() {
Description: "Audio Video Interleaved",
DecodeFn: aviDecode,
DefaultInArg: format.AVI_In{
DecodeSamples: true,
DecodeSamples: true,
DecodeExtendedChunks: true,
},
Dependencies: []decode.Dependency{
{Groups: []*decode.Group{format.AVC_AU}, Out: &aviMpegAVCAUGroup},
Expand Down Expand Up @@ -110,8 +110,6 @@ var aviStreamChunkTypeDescriptions = scalar.StrMapDescription{
aviStreamChunkTypeIndex: "Index",
}

const aviRiffType = "AVI "

type idx1Sample struct {
offset int64
size int64
Expand Down Expand Up @@ -220,17 +218,17 @@ func aviDecodeChunkIndex(d *decode.D) []ranges.Range {
return rs
}

func aviDecode(d *decode.D) any {
var ai format.AVI_In
d.ArgAs(&ai)

d.Endian = decode.LittleEndian

func aviDecodeEx(d *decode.D, ai format.AVI_In, extendedChunk bool) {
var streams []*aviStream
var idx1Samples []idx1Sample
var moviListPos int64 // point to first bit after type

var riffType string
requiredRiffType := "AVI "
if extendedChunk {
requiredRiffType = "AVIX"
}
var foundRiffType string

riffDecode(
d,
nil,
Expand All @@ -243,7 +241,7 @@ func aviDecode(d *decode.D) any {
func(d *decode.D, id string, path path) (bool, any) {
switch id {
case "RIFF":
riffType = d.FieldUTF8("type", 4, d.StrAssert(aviRiffType))
foundRiffType = d.FieldUTF8("type", 4, d.StrAssert(requiredRiffType))
return true, nil

case "LIST":
Expand Down Expand Up @@ -536,97 +534,124 @@ func aviDecode(d *decode.D) any {
},
)

if riffType != aviRiffType {
d.Errorf("wrong or no AVI riff type found (%s)", riffType)
if foundRiffType != requiredRiffType {
d.Errorf("wrong or no AVI riff type found (%s)", requiredRiffType)
}

d.FieldArray("streams", func(d *decode.D) {
for streamIndex, stream := range streams {
if !extendedChunk {
d.FieldArray("streams", func(d *decode.D) {
for streamIndex, stream := range streams {

d.FieldStruct("stream", func(d *decode.D) {
d.FieldValueStr("type", stream.typ)
d.FieldValueStr("handler", stream.handler)
switch stream.typ {
case aviStrhTypeAudio:
d.FieldValueUint("format_tag", stream.formatTag, format.WAVTagNames)
case aviStrhTypeVideo:
d.FieldValueStr("compression", stream.compression)
}

d.FieldStruct("stream", func(d *decode.D) {
d.FieldValueStr("type", stream.typ)
d.FieldValueStr("handler", stream.handler)
switch stream.typ {
case aviStrhTypeAudio:
d.FieldValueUint("format_tag", stream.formatTag, format.WAVTagNames)
case aviStrhTypeVideo:
d.FieldValueStr("compression", stream.compression)
}
var streamIndexSampleRanges []ranges.Range
if len(stream.indexes) > 0 {
d.FieldArray("indexes", func(d *decode.D) {
for _, i := range stream.indexes {
d.FieldStruct("index", func(d *decode.D) {
d.RangeFn(i.Start, i.Len, func(d *decode.D) {
d.FieldUTF8("type", 4)
d.FieldU32("cb")
sampleRanges := aviDecodeChunkIndex(d)
streamIndexSampleRanges = append(streamIndexSampleRanges, sampleRanges...)
})
})
}
})
}

// TODO: palette change
decodeSample := func(d *decode.D, sr ranges.Range) {
d.RangeFn(sr.Start, sr.Len, func(d *decode.D) {
if sr.Len == 0 {
d.FieldRawLen("sample", d.BitsLeft())
return
}

var streamIndexSampleRanges []ranges.Range
if len(stream.indexes) > 0 {
d.FieldArray("indexes", func(d *decode.D) {
for _, i := range stream.indexes {
d.FieldStruct("index", func(d *decode.D) {
d.RangeFn(i.Start, i.Len, func(d *decode.D) {
d.FieldUTF8("type", 4)
d.FieldU32("cb")
sampleRanges := aviDecodeChunkIndex(d)
streamIndexSampleRanges = append(streamIndexSampleRanges, sampleRanges...)
subSampleSize := int64(stream.sampleSize) * 8
// TODO: <= no format and <= 8*8 heuristics to not create separate pcm samples
if subSampleSize == 0 || (!stream.hasFormat && subSampleSize <= 8*8) {
subSampleSize = sr.Len
}

for d.BitsLeft() > 0 {
d.FramedFn(subSampleSize, func(d *decode.D) {
if ai.DecodeSamples && stream.hasFormat {
d.FieldFormat("sample", stream.format, stream.formatInArg)
} else {
d.FieldRawLen("sample", d.BitsLeft())
}
})
})
}
})
}
}
})
}

// TODO: palette change
decodeSample := func(d *decode.D, sr ranges.Range) {
d.RangeFn(sr.Start, sr.Len, func(d *decode.D) {
if sr.Len == 0 {
d.FieldRawLen("sample", d.BitsLeft())
return
}

subSampleSize := int64(stream.sampleSize) * 8
// TODO: <= no format and <= 8*8 heuristics to not create separate pcm samples
if subSampleSize == 0 || (!stream.hasFormat && subSampleSize <= 8*8) {
subSampleSize = sr.Len
}

for d.BitsLeft() > 0 {
d.FramedFn(subSampleSize, func(d *decode.D) {
if ai.DecodeSamples && stream.hasFormat {
d.FieldFormat("sample", stream.format, stream.formatInArg)
} else {
d.FieldRawLen("sample", d.BitsLeft())
// try only add indexed samples once with priority:
// stream index
// ix chunks (might be same as stream index)
// idx1 chunks
if len(streamIndexSampleRanges) > 0 {
d.FieldArray("samples", func(d *decode.D) {
for _, sr := range streamIndexSampleRanges {
decodeSample(d, sr)
}
})
} else if len(stream.ixSamples) > 0 {
d.FieldArray("samples", func(d *decode.D) {
for _, sr := range stream.ixSamples {
decodeSample(d, sr)
}
})
} else if len(idx1Samples) > 0 {
d.FieldArray("samples", func(d *decode.D) {
for _, is := range idx1Samples {
if is.streamNr != streamIndex {
continue
}
})
}
})
}

// try only add indexed samples once with priority:
// stream index
// ix chunks (might be same as stream index)
// idx chunks
if len(streamIndexSampleRanges) > 0 {
d.FieldArray("samples", func(d *decode.D) {
for _, sr := range streamIndexSampleRanges {
decodeSample(d, sr)
}
})
} else if len(stream.ixSamples) > 0 {
d.FieldArray("samples", func(d *decode.D) {
for _, sr := range stream.ixSamples {
decodeSample(d, sr)
}
})
} else if len(idx1Samples) > 0 {
d.FieldArray("samples", func(d *decode.D) {
for _, is := range idx1Samples {
if is.streamNr != streamIndex {
continue
decodeSample(d, ranges.Range{
Start: moviListPos + is.offset + 32, // +32 skip size field
Len: is.size,
})
}
decodeSample(d, ranges.Range{
Start: moviListPos + is.offset + 32, // +32 skip size field
Len: is.size,
})
}
})
})
}
})
}
})
}
}

func aviDecode(d *decode.D) any {
var ai format.AVI_In
d.ArgAs(&ai)

d.Endian = decode.LittleEndian

aviDecodeEx(d, ai, false)

if ai.DecodeExtendedChunks {
d.FieldArray("extended_chunks", func(d *decode.D) {
for {
// TODO: other way? spec says check hdrx chunk but there seems to be none?
riff, _ := d.TryPeekBytes(4)
if string(riff) != "RIFF" {
break
}
})
}
})

d.FieldStruct("chunk", func(d *decode.D) {
aviDecodeEx(d, ai, true)
})
}
})
}

return nil
}
7 changes: 7 additions & 0 deletions format/riff/avi.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ $ fq '.streams[1].samples[] | tobytes' file.avi > stream01.mp3
$ fq -o decode_samples=false '[.chunks[0] | grep_by(.id=="LIST" and .type=="strl") | grep_by(.id=="strh") as {$type} | grep_by(.id=="strf") as {$format_tag, $compression} | {$type,$format_tag,$compression}]' *.avi
```

### Speed up decoding by disabling sample and extended chunks decoding

If your not interested in sample details or extended chunks you can speed up decoding by using:
```sh
$ fq -o decode_samples=false -o decode_extended_chunks=false d file.avi
```

### References

- [AVI RIFF File Reference](https://learn.microsoft.com/en-us/windows/win32/directshow/avi-riff-file-reference)
Expand Down
1 change: 1 addition & 0 deletions format/riff/testdata/avc.avi.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -490,3 +490,4 @@ $ fq dv avc.avi
| | | type: "vids"
| | | handler: "H264"
| | | compression: "H264"
| | | extended_chunks[0:0]: 0x2442-NA (0)
1 change: 1 addition & 0 deletions format/riff/testdata/flac.avi.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,4 @@ $ fq dv flac.avi
| | | type: "auds"
| | | handler: "\x01\x00\x00\x00"
| | | format_tag: "flac" (61868)
| | | extended_chunks[0:0]: 0x18cc-NA (0)
13 changes: 10 additions & 3 deletions format/riff/testdata/help_avi.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ avi: Audio Video Interleaved decoder
Options
=======

decode_samples=true Decode samples
decode_extended_chunks=true Decode extended chunks
decode_samples=true Decode samples

Decode examples
===============
Expand All @@ -14,9 +15,9 @@ Decode examples
# Decode value as avi
... | avi
# Decode file using avi options
$ fq -d avi -o decode_samples=true . file
$ fq -d avi -o decode_extended_chunks=true -o decode_samples=true . file
# Decode value as avi
... | avi({decode_samples:true})
... | avi({decode_extended_chunks:true,decode_samples:true})

Samples
=======
Expand All @@ -31,6 +32,12 @@ Show stream summary
===================
$ fq -o decode_samples=false '[.chunks[0] | grep_by(.id=="LIST" and .type=="strl") | grep_by(.id=="strh") as {$type} | grep_by(.id=="strf") as {$format_tag, $compression} | {$type,$format_tag,$compression}]' *.avi

Speed up decoding by disabling sample and extended chunks decoding
==================================================================
If your not interested in sample details or extended chunks you can speed up decoding by using:

$ fq -o decode_samples=false -o decode_extended_chunks=false d file.avi

References
==========
- AVI RIFF File Reference (https://learn.microsoft.com/en-us/windows/win32/directshow/avi-riff-file-reference)
Expand Down
1 change: 1 addition & 0 deletions format/riff/testdata/mp3.avi.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -537,3 +537,4 @@ $ fq dv mp3.avi
| | | type: "auds"
| | | handler: "\x01\x00\x00\x00"
| | | format_tag: "mp3" (85)
| | | extended_chunks[0:0]: 0x18e8-NA (0)
1 change: 1 addition & 0 deletions format/riff/testdata/pcm.avi.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,4 @@ $ fq dv pcm.avi
| | | type: "auds"
| | | handler: "\x01\x00\x00\x00"
| | | format_tag: "pcm_s16le" (1)
| | | extended_chunks[0:0]: 0x390a-NA (0)

0 comments on commit 4df6f52

Please sign in to comment.