Skip to content

Commit

Permalink
WavWriter enforce a maximum number of samples per frame (choose at …
Browse files Browse the repository at this point in the history
…construction time).

  - Helps align behavior with `SampleProcessorBase`, before becoming one in the near future.
  - Drive-by:
    - Add a test when trying to render with multiple codec configs with different number of samples per frame (invalid in IAMF v1.1.0).
    - Remove obsolete doc that the `WavWriter` requires 1 or 2 channels. It has long been used with multiple channels. Add test coverage to mimic use with 9.1.6.
    - b/384048095: Defer by adding a note to the bug that `WavFileSplicer` needs more coverage.

PiperOrigin-RevId: 715007629
  • Loading branch information
jwcullen committed Jan 13, 2025
1 parent abd7698 commit 0c88d6e
Show file tree
Hide file tree
Showing 10 changed files with 272 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include "iamf/cli/tests/cli_test_utils.h"
#include "iamf/obu/ia_sequence_header.h"

// TODO(b/384048095): Add better tests for spliced wav files with LFE channels.

namespace iamf_tools {
namespace adm_to_user_metadata {
namespace {
Expand Down
21 changes: 15 additions & 6 deletions iamf/cli/adm_to_user_metadata/adm/wav_file_splicer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ namespace {

constexpr int32_t kBitsPerByte = 8;
constexpr size_t kSizeToFlush = 4096;

// Arbitrary limit on how many samples will be written to the wav file at
// once. Chosen to agree with `kSizeToFlush`, even if there are 16-bit
// samples and one channel.
constexpr size_t kMaxNumSamplesPerFrame = kSizeToFlush / 2;

// Error tolerance set to the minimum precision allowed by ADM file to describe
// timing related parameters.
constexpr double kErrorTolerance = 1e-5;
Expand Down Expand Up @@ -305,7 +311,7 @@ absl::Status ConvertFromObjectsTo3OA(
// Output channels set to 16 as objects get panned to 3OA.
auto output_wav_writer = WavWriter::Create(
output_file.string(), kOutputWavChannels, wav_file_fmt.samples_per_sec,
wav_file_fmt.bits_per_sample);
wav_file_fmt.bits_per_sample, kMaxNumSamplesPerFrame);

// Calculate number of bytes per sample based on bits per sample.
const int32_t bytes_per_sample =
Expand Down Expand Up @@ -383,7 +389,8 @@ absl::Status ConvertFromObjectsTo3OA(
{
auto wav_writer = WavWriter::Create(
input_file.string(), wav_file_fmt.num_channels,
wav_file_fmt.samples_per_sec, wav_file_fmt.bits_per_sample);
wav_file_fmt.samples_per_sec, wav_file_fmt.bits_per_sample,
kMaxNumSamplesPerFrame);
// Compute the length of audio samples corresponding to the current
// segment duration. The samples excluded due the rounding error at each
// segment is accounted in the next segment.
Expand Down Expand Up @@ -445,14 +452,15 @@ absl::Status SeparateLfeChannels(const std::filesystem::path& output_file_path,
// the wav writer corresponding to non-LFE channels and subsequent indices
// correspond to each LFE channel present.
std::vector<std::unique_ptr<WavWriter>> nonlfe_lfe_wav_writer;
nonlfe_lfe_wav_writer.emplace_back(WavWriter::Create(
non_lfe_file_path, non_lfe_count, samples_per_sec, bits_per_sample));
nonlfe_lfe_wav_writer.emplace_back(
WavWriter::Create(non_lfe_file_path, non_lfe_count, samples_per_sec,
bits_per_sample, kMaxNumSamplesPerFrame));
for (int lfe_index = 1; lfe_index <= lfe_ids.size(); ++lfe_index) {
nonlfe_lfe_wav_writer.emplace_back(WavWriter::Create(
(output_file_path /
absl::StrCat(file_prefix, "_converted", lfe_index + 1, ".wav"))
.string(),
1, samples_per_sec, bits_per_sample));
1, samples_per_sec, bits_per_sample, kMaxNumSamplesPerFrame));
}

// The samples in the input wav are packed in a channel-interleaved fashion.
Expand Down Expand Up @@ -603,7 +611,8 @@ absl::Status SpliceWavFilesFromAdm(
audio_object_index + 1, ".wav"))
.string(),
audio_tracks_for_audio_objects[audio_object_index].size(),
wav_file_fmt.samples_per_sec, wav_file_fmt.bits_per_sample));
wav_file_fmt.samples_per_sec, wav_file_fmt.bits_per_sample,
kMaxNumSamplesPerFrame));
}

// Write audio samples into the corresponding output wav file(s).
Expand Down
6 changes: 4 additions & 2 deletions iamf/cli/encoder_main_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
*/
#include "iamf/cli/encoder_main_lib.h"

#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <limits>
Expand Down Expand Up @@ -57,11 +58,12 @@ using iamf_tools_cli_proto::UserMetadata;
std::unique_ptr<WavWriter> ProduceAllWavWriters(
DecodedUleb128 mix_presentation_id, int sub_mix_index, int layout_index,
const Layout&, const std::filesystem::path& prefix, int num_channels,
int sample_rate, int bit_depth) {
int sample_rate, int bit_depth, size_t max_input_samples_per_frame) {
const auto wav_path = absl::StrCat(
prefix.string(), "_rendered_id_", mix_presentation_id, "_sub_mix_",
sub_mix_index, "_layout_", layout_index, ".wav");
return WavWriter::Create(wav_path, num_channels, sample_rate, bit_depth);
return WavWriter::Create(wav_path, num_channels, sample_rate, bit_depth,
max_input_samples_per_frame);
}

absl::Status PartitionParameterMetadata(UserMetadata& user_metadata) {
Expand Down
25 changes: 18 additions & 7 deletions iamf/cli/rendering_mix_presentation_finalizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,18 @@ absl::Status CollectAudioElementsInSubMix(
return absl::OkStatus();
}

absl::Status GetCommonSampleRateAndBitDepthFromAudioElementIds(
absl::Status GetCommonCodecConfigPropertiesFromAudioElementIds(
const std::vector<const AudioElementWithData*>& audio_elements_in_sub_mix,
uint32_t& common_sample_rate, uint8_t& common_bit_depth,
bool& requires_resampling) {
uint32_t& common_num_samples_per_frame, bool& requires_resampling) {
absl::flat_hash_set<uint32_t> sample_rates;
absl::flat_hash_set<uint32_t> num_samples_per_frame;
absl::flat_hash_set<uint8_t> bit_depths;

// Get all the bit-depths and sample_rates from each Audio Element.
for (const auto* audio_element : audio_elements_in_sub_mix) {
num_samples_per_frame.insert(
audio_element->codec_config->GetNumSamplesPerFrame());
sample_rates.insert(audio_element->codec_config->GetOutputSampleRate());
bit_depths.insert(
audio_element->codec_config->GetBitDepthToMeasureLoudness());
Expand All @@ -94,6 +97,13 @@ absl::Status GetCommonSampleRateAndBitDepthFromAudioElementIds(
RETURN_IF_NOT_OK(GetCommonSampleRateAndBitDepth(
sample_rates, bit_depths, common_sample_rate, common_bit_depth,
requires_resampling));
if (num_samples_per_frame.size() != 1) {
return absl::InvalidArgumentError(
"Audio elements in a submix must have the same number of samples per "
"frame.");
}
common_num_samples_per_frame = *num_samples_per_frame.begin();

return absl::OkStatus();
}

Expand Down Expand Up @@ -526,7 +536,7 @@ absl::Status GenerateRenderingMetadataForLayouts(
int sub_mix_index,
std::vector<const AudioElementWithData*> audio_elements_in_sub_mix,
uint32_t common_sample_rate, uint8_t loudness_calculator_bit_depth,
uint8_t wav_file_bit_depth,
uint8_t wav_file_bit_depth, uint32_t common_num_samples_per_frame,
std::vector<LayoutRenderingMetadata>& output_layout_rendering_metadata) {
output_layout_rendering_metadata.resize(sub_mix.layouts.size());
for (int layout_index = 0; layout_index < sub_mix.layouts.size();
Expand Down Expand Up @@ -561,7 +571,7 @@ absl::Status GenerateRenderingMetadataForLayouts(
layout_rendering_metadata.wav_writer = wav_writer_factory(
mix_presentation_id, sub_mix_index, layout_index,
layout.loudness_layout, file_path_prefix, num_channels,
common_sample_rate, wav_file_bit_depth);
common_sample_rate, wav_file_bit_depth, common_num_samples_per_frame);
}

return absl::OkStatus();
Expand Down Expand Up @@ -606,10 +616,11 @@ absl::Status GenerateRenderingMetadataForSubmixes(

// Data common to all audio elements and layouts.
bool requires_resampling;
RETURN_IF_NOT_OK(GetCommonSampleRateAndBitDepthFromAudioElementIds(
uint32_t common_num_samples_per_frame;
RETURN_IF_NOT_OK(GetCommonCodecConfigPropertiesFromAudioElementIds(
audio_elements_in_sub_mix, submix_rendering_metadata.common_sample_rate,
submix_rendering_metadata.loudness_calculator_bit_depth,
requires_resampling));
common_num_samples_per_frame, requires_resampling));
if (requires_resampling) {
// TODO(b/274689885): Convert to a common sample rate and/or bit-depth.
return absl::UnimplementedError(
Expand All @@ -630,7 +641,7 @@ absl::Status GenerateRenderingMetadataForSubmixes(
audio_elements_in_sub_mix, submix_rendering_metadata.common_sample_rate,
submix_rendering_metadata.loudness_calculator_bit_depth,
submix_rendering_metadata.wav_file_bit_depth,
layout_rendering_metadata));
common_num_samples_per_frame, layout_rendering_metadata));
}
return absl::OkStatus();
}
Expand Down
5 changes: 4 additions & 1 deletion iamf/cli/rendering_mix_presentation_finalizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#ifndef CLI_RENDERING_MIX_PRESENTATION_FINALIZER_H_
#define CLI_RENDERING_MIX_PRESENTATION_FINALIZER_H_

#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <list>
Expand Down Expand Up @@ -109,12 +110,14 @@ class RenderingMixPresentationFinalizer {
* \param num_channels Number of channels.
* \param sample_rate Sample rate.
* \param bit_depth Bit depth.
* \param num_samples_per_frame Number of samples per frame.
* \return Unique pointer to a wav writer or `nullptr` if none is desired.
*/
typedef absl::AnyInvocable<std::unique_ptr<WavWriter>(
DecodedUleb128 mix_presentation_id, int sub_mix_index, int layout_index,
const Layout& layout, const std::filesystem::path& prefix,
int num_channels, int sample_rate, int bit_depth) const>
int num_channels, int sample_rate, int bit_depth,
size_t num_samples_per_frame) const>
WavWriterFactory;

/*!\brief Creates a rendering mix presentation finalizer.
Expand Down
4 changes: 4 additions & 0 deletions iamf/cli/tests/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,10 @@ cc_test(
"//iamf/cli:rendering_mix_presentation_finalizer",
"//iamf/cli:wav_reader",
"//iamf/cli:wav_writer",
"//iamf/cli/proto:codec_config_cc_proto",
"//iamf/cli/proto_to_obu:codec_config_generator",
"//iamf/cli/renderer:audio_element_renderer_base",
"//iamf/cli/user_metadata_builder:codec_config_obu_metadata_builder",
"//iamf/cli/user_metadata_builder:iamf_input_layout",
"//iamf/obu:audio_element",
"//iamf/obu:codec_config",
Expand All @@ -401,6 +404,7 @@ cc_test(
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:span",
"@com_google_googletest//:gtest_main",
"@com_google_protobuf//:protobuf",
],
)

Expand Down
63 changes: 53 additions & 10 deletions iamf/cli/tests/rendering_mix_presentation_finalizer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@
#include "iamf/cli/loudness_calculator_base.h"
#include "iamf/cli/loudness_calculator_factory_base.h"
#include "iamf/cli/parameter_block_with_data.h"
#include "iamf/cli/proto/codec_config.pb.h"
#include "iamf/cli/proto_to_obu/codec_config_generator.h"
#include "iamf/cli/renderer/audio_element_renderer_base.h"
#include "iamf/cli/renderer_factory.h"
#include "iamf/cli/tests/cli_test_utils.h"
#include "iamf/cli/user_metadata_builder/codec_config_obu_metadata_builder.h"
#include "iamf/cli/user_metadata_builder/iamf_input_layout.h"
#include "iamf/cli/wav_reader.h"
#include "iamf/cli/wav_writer.h"
Expand All @@ -50,6 +53,7 @@
#include "iamf/obu/mix_presentation.h"
#include "iamf/obu/param_definitions.h"
#include "iamf/obu/types.h"
#include "src/google/protobuf/repeated_ptr_field.h"

namespace iamf_tools {
namespace {
Expand All @@ -76,8 +80,15 @@ constexpr uint32_t kCommonParameterRate = kSampleRate;
constexpr uint32_t kNumSamplesPerFrame = 8;
constexpr uint8_t kCodecConfigBitDepth = 16;
constexpr uint8_t kNoTrimFromEnd = 0;
constexpr std::array<DecodedUleb128, 1> kMonoSubstreamIds = {0};
constexpr std::array<DecodedUleb128, 1> kStereoSubstreamIds = {1};

constexpr std::array<ChannelLabel::Label, 2> kStereoLabels = {kL2, kR2};

typedef ::google::protobuf::RepeatedPtrField<
iamf_tools_cli_proto::CodecConfigObuMetadata>
CodecConfigObuMetadatas;

class MockRenderer : public AudioElementRendererBase {
public:
MockRenderer(absl::Span<const ChannelLabel::Label> ordered_labels,
Expand Down Expand Up @@ -167,32 +178,32 @@ std::string GetFirstSubmixFirstLayoutExpectedPath() {
kMixPresentationId, kSuffixAfterMixPresentationId);
}

std::unique_ptr<WavWriter> ProduceNoWavWriters(DecodedUleb128, int, int,
const Layout&,
const std::filesystem::path&,
int, int, int) {
std::unique_ptr<WavWriter> ProduceNoWavWriters(
DecodedUleb128 /*mix_presentation_id*/, int /*sub_mix_index*/,
int /*layout_index*/, const Layout& /*layout*/,
const std::filesystem::path& /*prefix*/, int /*num_channels*/,
int /*sample_rate*/, int /*bit_depth*/, size_t /*num_samples_per_frame*/) {
return nullptr;
}

std::unique_ptr<WavWriter> ProduceFirstSubMixFirstLayoutWavWriter(
DecodedUleb128 mix_presentation_id, int sub_mix_index, int layout_index,
const Layout&, const std::filesystem::path& prefix, int num_channels,
int sample_rate, int bit_depth) {
int sample_rate, int bit_depth, size_t num_samples_per_frame) {
if (sub_mix_index != 0 || layout_index != 0) {
return nullptr;
}

const auto wav_path =
absl::StrCat(prefix.string(), "_id_", mix_presentation_id,
kSuffixAfterMixPresentationId);
return WavWriter::Create(wav_path, num_channels, sample_rate, bit_depth);
return WavWriter::Create(wav_path, num_channels, sample_rate, bit_depth,
num_samples_per_frame);
}

class FinalizerTest : public ::testing::Test {
public:
void InitPrerequisiteObusForMonoInput(DecodedUleb128 audio_element_id) {
const std::vector<DecodedUleb128> kMonoSubstreamIds = {0};

AddLpcmCodecConfigWithIdAndSampleRate(kCodecConfigId, kSampleRate,
codec_configs_);
AddScalableAudioElementWithSubstreamIds(
Expand All @@ -201,8 +212,6 @@ class FinalizerTest : public ::testing::Test {
}

void InitPrerequisiteObusForStereoInput(DecodedUleb128 audio_element_id) {
const std::vector<DecodedUleb128> kStereoSubstreamIds = {0};

AddLpcmCodecConfigWithIdAndSampleRate(kCodecConfigId, kSampleRate,
codec_configs_);
AddScalableAudioElementWithSubstreamIds(
Expand Down Expand Up @@ -321,6 +330,40 @@ TEST_F(FinalizerTest,
CreateFinalizerExpectOk();
}

TEST_F(FinalizerTest, CreateFailsWitMismatchingNumSamplesPerFrame) {
// The first audio element references an LPCM codec config.
renderer_factory_ = std::make_unique<AlwaysNullRendererFactory>();
CodecConfigObuMetadatas metadata;
metadata.Add(CodecConfigObuMetadataBuilder::GetOpusCodecConfigObuMetadata(
kCodecConfigId, 960));
constexpr uint32_t kSecondCodecConfigId = kCodecConfigId + 1;
metadata.Add(CodecConfigObuMetadataBuilder::GetOpusCodecConfigObuMetadata(
kSecondCodecConfigId, 1920));
CodecConfigGenerator generator(metadata);
ASSERT_THAT(generator.Generate(codec_configs_), IsOk());

AddScalableAudioElementWithSubstreamIds(
IamfInputLayout::kMono, kAudioElementId, kCodecConfigId,
kMonoSubstreamIds, codec_configs_, audio_elements_);
// The second audio element references a codec Config with a different
// number of samples per frame.
constexpr DecodedUleb128 kStereoAudioElementId = kAudioElementId + 1;
AddScalableAudioElementWithSubstreamIds(
IamfInputLayout::kStereo, kStereoAudioElementId, kSecondCodecConfigId,
kStereoSubstreamIds, codec_configs_, audio_elements_);
// Mixing these is invalid because there must be only one codec config in IAMF
// v1.1.0.
AddMixPresentationObuWithAudioElementIds(
kMixPresentationId, {kAudioElementId, kStereoAudioElementId},
/*common_parameter_id=*/999, kCommonParameterRate, obus_to_finalize_);

EXPECT_FALSE(RenderingMixPresentationFinalizer::Create(
output_directory_, output_wav_file_bit_depth_override_,
renderer_factory_.get(), loudness_calculator_factory_.get(),
audio_elements_, wav_writer_factory_, obus_to_finalize_)
.ok());
}

// =========== Tests that work is delegated to the renderer factory. ===========
TEST_F(FinalizerTest, ForwardsAudioElementToRenderer) {
InitPrerequisiteObusForStereoInput(kAudioElementId);
Expand Down
Loading

0 comments on commit 0c88d6e

Please sign in to comment.