-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Generalize
SampleProcessorBase
out of ResamplerBase
and harden mi…
…suse. - Harden misuse: - Base `PushFrame` validates input vector shape is reasonable. - Base prevents usage after `Flush`. - Base sets `num_output_ticks_` to 0 before calling the derived virtual functions. - Generalize name and documentation `SampleProcessorBase`. - In this generalization, some processors may adjust the frame size, or introduce delay. So the base helps keep assumptions in check. - Anticipated possible future concrete implementations: - `MixGainApplier`, which applies mix gain. - `LoudnessLimiter`/`LoudnessNormalizer`, which implements the IAMF spec 7.5 recomendations. - `WavFileRedirector`: tees intermediate output to wav file. - For example when rendering with all post-processing enabled we may have a chain like: `MixGainApplier` -> `LoudnessLimiter` -> `ResamplerQWrapper` -> `WavFileRedirector`. - But if certain post-processing is not desired, the relevant components could be omitted. - b/382257677: Finish a TODO, by enforcing improper flush usage at the base level; this was the final loose end of the bug. PiperOrigin-RevId: 713280314
- Loading branch information
Showing
10 changed files
with
339 additions
and
212 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
* Copyright (c) 2024, Alliance for Open Media. All rights reserved | ||
* | ||
* This source code is subject to the terms of the BSD 3-Clause Clear License | ||
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear | ||
* License was not distributed with this source code in the LICENSE file, you | ||
* can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the | ||
* Alliance for Open Media Patent License 1.0 was not distributed with this | ||
* source code in the PATENTS file, you can obtain it at | ||
* www.aomedia.org/license/patent. | ||
*/ | ||
#include "iamf/cli/sample_processor_base.h" | ||
|
||
#include <cstdint> | ||
#include <vector> | ||
|
||
#include "absl/status/status.h" | ||
#include "absl/strings/str_cat.h" | ||
#include "absl/types/span.h" | ||
|
||
namespace iamf_tools { | ||
|
||
SampleProcessorBase::~SampleProcessorBase() {}; | ||
|
||
absl::Status SampleProcessorBase::PushFrame( | ||
absl::Span<const std::vector<int32_t>> time_channel_samples) { | ||
if (state_ != State::kTakingSamples) { | ||
return absl::FailedPreconditionError(absl::StrCat( | ||
"Do not use PushFrame() after Flush() is called. State= ", state_)); | ||
} | ||
|
||
// Check the shape of the input data. | ||
if (time_channel_samples.size() > max_input_samples_per_frame_) { | ||
return absl::InvalidArgumentError( | ||
"Too many samples per frame. The maximum number of samples per frame " | ||
"is: " + | ||
absl::StrCat(max_input_samples_per_frame_) + | ||
". The number of samples per frame received is: " + | ||
absl::StrCat(time_channel_samples.size())); | ||
} | ||
for (const auto& channel_samples : time_channel_samples) { | ||
if (channel_samples.size() != num_channels_) { | ||
return absl::InvalidArgumentError(absl::StrCat( | ||
"Number of channels does not match the expected number of channels, " | ||
"num_channels_", | ||
num_channels_, " vs. ", channel_samples.size())); | ||
} | ||
} | ||
|
||
num_valid_ticks_ = 0; | ||
return PushFrameDerived(time_channel_samples); | ||
} | ||
|
||
absl::Status SampleProcessorBase::Flush() { | ||
if (state_ == State::kFlushCalled) { | ||
return absl::FailedPreconditionError( | ||
"Flush() called in unexpected state. Do not call Flush() twice."); | ||
} | ||
|
||
state_ = State::kFlushCalled; | ||
num_valid_ticks_ = 0; | ||
return FlushDerived(); | ||
} | ||
|
||
absl::Span<const std::vector<int32_t>> | ||
SampleProcessorBase::GetOutputSamplesAsSpan() const { | ||
return absl::MakeConstSpan(output_time_channel_samples_) | ||
.first(num_valid_ticks_); | ||
} | ||
|
||
} // namespace iamf_tools |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
/* | ||
* Copyright (c) 2024, Alliance for Open Media. All rights reserved | ||
* | ||
* This source code is subject to the terms of the BSD 3-Clause Clear License | ||
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear | ||
* License was not distributed with this source code in the LICENSE file, you | ||
* can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the | ||
* Alliance for Open Media Patent License 1.0 was not distributed with this | ||
* source code in the PATENTS file, you can obtain it at | ||
* www.aomedia.org/license/patent. | ||
*/ | ||
#ifndef CLI_SAMPLE_PROCESSOR_BASE_H_ | ||
#define CLI_SAMPLE_PROCESSOR_BASE_H_ | ||
|
||
#include <cstddef> | ||
#include <cstdint> | ||
#include <vector> | ||
|
||
#include "absl/status/status.h" | ||
#include "absl/types/span.h" | ||
|
||
namespace iamf_tools { | ||
|
||
/*!\brief Abstract class to process PCM samples. | ||
* | ||
* This class represents an abstract interface to reprocess PCM samples. In | ||
* general, processors could introduce delay or could result in a different | ||
* number of samples per frame. | ||
* | ||
* Usage pattern: | ||
* - While input samples are available: | ||
* - Call `PushFrame()` to push in samples. | ||
* - Call `GetOutputSamplesAsSpan()` to retrieve the samples. | ||
* - Call `Flush()` to signal that no more frames will be pushed. | ||
* - Call `GetOutputSamplesAsSpan()` one last time to retrieve any remaining | ||
* samples. | ||
* | ||
* - Note: Results from `GetOutputSamplesAsSpan()` are invalidated by | ||
* further calls to `PushFrame()` or `Flush()`. | ||
*/ | ||
class SampleProcessorBase { | ||
public: | ||
/*!\brief Constructor. | ||
* | ||
* \param max_input_samples_per_frame Maximum number of samples per frame in | ||
* the input timescale. | ||
* \param num_channels Number of channels. Later calls to `PushFrame()` must | ||
* contain this many channels. | ||
* \param max_output_samples_per_frame Maximum number of samples per frame in | ||
* the output timescale. | ||
*/ | ||
SampleProcessorBase(size_t max_input_samples_per_frame, size_t num_channels, | ||
size_t max_output_samples_per_frame) | ||
: max_input_samples_per_frame_(max_input_samples_per_frame), | ||
num_channels_(num_channels), | ||
output_time_channel_samples_(max_output_samples_per_frame, | ||
std::vector<int32_t>(num_channels)) {} | ||
|
||
/*!\brief Destructor. */ | ||
virtual ~SampleProcessorBase() = 0; | ||
|
||
/*!\brief Pushes a frame of samples to the processor. | ||
* | ||
* \param time_channel_samples Samples to push arranged in (time, channel). | ||
* \return `absl::OkStatus()` on success. `absl::FailedPreconditionError` if | ||
* called after `Flush()`. Other specific statuses on failure. | ||
*/ | ||
absl::Status PushFrame( | ||
absl::Span<const std::vector<int32_t>> time_channel_samples); | ||
|
||
/*!\brief Signals to close the processor and flush any remaining samples. | ||
* | ||
* After calling `Flush()`, it is invalid to call `PushFrame()` | ||
* or `Flush()` again. | ||
* | ||
* \return `absl::OkStatus()` on success. `absl::FailedPreconditionError` if | ||
* called after `Flush()`. Other specific statuses on failure. | ||
*/ | ||
absl::Status Flush(); | ||
|
||
/*!\brief Gets a span of the output samples. | ||
* | ||
* \return Span of the output samples. The span will be invalidated when | ||
* `PushFrame()` or `Flush()` is called. | ||
*/ | ||
absl::Span<const std::vector<int32_t>> GetOutputSamplesAsSpan() const; | ||
|
||
protected: | ||
/*!\brief Pushes a frame of samples to the processor. | ||
* | ||
* \param time_channel_samples Samples to push arranged in (time, channel). | ||
* \return `absl::OkStatus()` on success. A specific status on failure. | ||
*/ | ||
virtual absl::Status PushFrameDerived( | ||
absl::Span<const std::vector<int32_t>> time_channel_samples) = 0; | ||
|
||
/*!\brief Signals to close the processor and flush any remaining samples. | ||
* | ||
* \return `absl::OkStatus()` on success. A specific status on failure. | ||
*/ | ||
virtual absl::Status FlushDerived() = 0; | ||
|
||
const size_t max_input_samples_per_frame_; | ||
const size_t num_channels_; | ||
|
||
// Stores the output decoded frames arranged in (time, sample) axes. That | ||
// is to say, each inner vector has one sample for per channel and the outer | ||
// vector contains one inner vector for each time tick. When the decoded | ||
// samples is shorter than a frame, only the first `num_valid_ticks_` ticks | ||
// should be used. | ||
std::vector<std::vector<int32_t>> output_time_channel_samples_; | ||
|
||
// Number of ticks (time samples) in `output_time_channel_samples_` that are | ||
// valid. | ||
size_t num_valid_ticks_ = 0; | ||
|
||
private: | ||
enum class State { | ||
kTakingSamples, | ||
kFlushCalled, | ||
} state_ = State::kTakingSamples; | ||
}; | ||
|
||
} // namespace iamf_tools | ||
|
||
#endif // CLI_SAMPLE_PROCESSOR_BASE_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.