Skip to content

Commit

Permalink
Tiny quality improvements for ATRAC3 compatible mode:
Browse files Browse the repository at this point in the history
* Use adaptive ATH, but efficient is limited due to qmf aliasing.
* Use loudness instead of energy to split M/S frame size.
  • Loading branch information
dcherednik committed Dec 1, 2024
1 parent 4be2e38 commit 2ca3d7f
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 54 deletions.
56 changes: 44 additions & 12 deletions src/atrac/atrac3_bitstream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,31 @@ static const uint32_t FixedBitAllocTable[TAtrac3Data::MaxBfus] = {
1, 0
};

std::vector<TFloat> TAtrac3BitStreamWriter::ATH;
TAtrac3BitStreamWriter::TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst)
: Container(container)
, Params(params)
, BfuIdxConst(bfuIdxConst)
{
NEnv::SetRoundFloat();
if (ATH.size()) {
return;
}
ATH.reserve(MaxBfus);
auto ATHSpec = CalcATH(1024, 44100);
for (size_t bandNum = 0; bandNum < this->NumQMF; ++bandNum) {
for (size_t blockNum = this->BlocksPerBand[bandNum]; blockNum < this->BlocksPerBand[bandNum + 1]; ++blockNum) {
const size_t specNumStart = this->SpecsStartLong[blockNum];
float x = 999;
for (size_t line = specNumStart; line < specNumStart + this->SpecsPerBlock[blockNum]; line++) {
x = fmin(x, ATHSpec[line]);
}
x = pow(10, 0.1 * x);
ATH.push_back(x / 100); //reduce efficiency of ATH, but prevents aliasing problem, TODO: fix it?
}
}
}

uint32_t TAtrac3BitStreamWriter::CLCEnc(const uint32_t selector, const int mantissas[MaxSpecsPerBlock],
const uint32_t blockSize, NBitStream::TBitStream* bitStream)
{
Expand Down Expand Up @@ -165,7 +190,7 @@ static inline bool CheckBfus(uint16_t* numBfu, const vector<uint32_t>& precision
static const std::pair<uint8_t, vector<uint32_t>> DUMMY_ALLOC{1, vector<uint32_t>{0}};

std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(const TSingleChannelElement& sce,
const uint16_t targetBits, int mt[MaxSpecs])
const uint16_t targetBits, int mt[MaxSpecs], float laudness)
{
const vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks;
if (scaledBlocks.empty()) {
Expand Down Expand Up @@ -194,7 +219,7 @@ std::pair<uint8_t, vector<uint32_t>> TAtrac3BitStreamWriter::CreateAllocation(co
TFloat minShift = -8;
for (;;) {
TFloat shift = (maxShift + minShift) / 2;
const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift);
const vector<uint32_t>& tmpAlloc = CalcBitsAllocation(scaledBlocks, numBfu, spread, shift, laudness);
auto consumption = CalcSpecsBitsConsumption(sce, tmpAlloc, mt);

auto bitsUsedByTonal = EncodeTonalComponents(sce, tmpAlloc, nullptr);
Expand Down Expand Up @@ -453,18 +478,25 @@ uint16_t TAtrac3BitStreamWriter::EncodeTonalComponents(const TSingleChannelEleme
vector<uint32_t> TAtrac3BitStreamWriter::CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
const uint32_t bfuNum,
const TFloat spread,
const TFloat shift)
const TFloat shift,
const TFloat loudness)
{
vector<uint32_t> bitsPerEachBlock(bfuNum);
for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) {
const uint32_t fix = FixedBitAllocTable[i];
int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
if (tmp > 7) {
bitsPerEachBlock[i] = 7;
} else if (tmp < 0) {
float ath = ATH[i] * loudness;
//std::cerr << "block: " << i << " Loudness: " << loudness << " " << 10 * log10(scaledBlocks[i].MaxEnergy / ath) << std::endl;
if (scaledBlocks[i].MaxEnergy < ath) {
bitsPerEachBlock[i] = 0;
} else {
bitsPerEachBlock[i] = tmp;
const uint32_t fix = FixedBitAllocTable[i];
int tmp = spread * ( (TFloat)scaledBlocks[i].ScaleFactorIndex/3.2) + (1.0 - spread) * fix - shift;
if (tmp > 7) {
bitsPerEachBlock[i] = 7;
} else if (tmp < 0) {
bitsPerEachBlock[i] = 0;
} else {
bitsPerEachBlock[i] = tmp;
}
}
}
return bitsPerEachBlock;
Expand Down Expand Up @@ -503,13 +535,13 @@ static int32_t CalcMSBytesShift(uint32_t frameSz,
if (elements[1].ScaledBlocks.empty()) {
return maxAllowedShift;
} else {
TFloat ratio = CalcMSRatio(elements[0].Energy, elements[1].Energy);
TFloat ratio = CalcMSRatio(elements[0].Loudness, elements[1].Loudness);
//std::cerr << ratio << std::endl;
return std::max(std::min(ToInt(frameSz * ratio), maxAllowedShift), -maxAllowedShift);
}
}

void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements)
void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>& singleChannelElements, float laudness)
{

ASSERT(singleChannelElements.size() == 1 || singleChannelElements.size() == 2);
Expand Down Expand Up @@ -566,7 +598,7 @@ void TAtrac3BitStreamWriter::WriteSoundUnit(const vector<TSingleChannelElement>&

for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) {
const TSingleChannelElement& sce = singleChannelElements[channel];
allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel]);
allocations[channel] = CreateAllocation(sce, bitsToAlloc[channel], mt[channel], laudness);
}

for (uint32_t channel = 0; channel < singleChannelElements.size(); channel++) {
Expand Down
17 changes: 6 additions & 11 deletions src/atrac/atrac3_bitstream.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,10 @@ class TAtrac3BitStreamWriter : public virtual TAtrac3Data {
TAtrac3Data::SubbandInfo SubbandInfo;
std::vector<TTonalBlock> TonalBlocks;
std::vector<TScaledBlock> ScaledBlocks;
TFloat Energy;
TFloat Loudness;
};
private:
static std::vector<TFloat> ATH;

struct TTonalComponentsSubGroup {
std::vector<uint8_t> SubGroupMap;
Expand All @@ -65,10 +66,10 @@ class TAtrac3BitStreamWriter : public virtual TAtrac3Data {
const uint32_t blockSize, NBitStream::TBitStream* bitStream);

std::vector<uint32_t> CalcBitsAllocation(const std::vector<TScaledBlock>& scaledBlocks,
uint32_t bfuNum, TFloat spread, TFloat shift);
uint32_t bfuNum, TFloat spread, TFloat shift, TFloat loudness);

std::pair<uint8_t, std::vector<uint32_t>> CreateAllocation(const TSingleChannelElement& sce,
uint16_t targetBits, int mt[MaxSpecs]);
uint16_t targetBits, int mt[MaxSpecs], float laudness);

std::pair<uint8_t, uint32_t> CalcSpecsBitsConsumption(const TSingleChannelElement& sce,
const std::vector<uint32_t>& precisionPerEachBlocks,
Expand All @@ -85,15 +86,9 @@ class TAtrac3BitStreamWriter : public virtual TAtrac3Data {
const std::vector<uint32_t>& allocTable,
NBitStream::TBitStream* bitStream);
public:
TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst) //no mono mode for atrac3
: Container(container)
, Params(params)
, BfuIdxConst(bfuIdxConst)
{
NEnv::SetRoundFloat();
}
TAtrac3BitStreamWriter(ICompressedOutput* container, const TContainerParams& params, uint32_t bfuIdxConst);

void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements);
void WriteSoundUnit(const std::vector<TSingleChannelElement>& singleChannelElements, float laudness);
};

} // namespace NAtrac3
Expand Down
19 changes: 0 additions & 19 deletions src/atrac/atrac_psy_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,25 +135,6 @@ vector<float> CalcATH(int len, int sampleRate)
return res;
}

float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz)
{
float s = 0;
if (e1 != nullptr) {
for (size_t i = 0; i < sz; i++) {
s += (e0[i] + e1[i]) * weight[i];
}

s *= 0.5;

} else {
for (size_t i = 0; i < sz; i++) {
s += e0[i] * weight[i];
}
}

return 0.98 * prevLoud + 0.02 * s;
}

vector<float> CreateLoudnessCurve(size_t sz)
{
std::vector<float> res;
Expand Down
12 changes: 11 additions & 1 deletion src/atrac/atrac_psy_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,17 @@ namespace NAtracDEnc {

TFloat AnalizeScaleFactorSpread(const std::vector<TScaledBlock>& scaledBlocks);
std::vector<float> CalcATH(int len, int sampleRate);
float TrackLoudness(float prevLoud, const TFloat* e0, const TFloat* e1, const float* weight, size_t sz);

inline float TrackLoudness(float prevLoud, float l0, float l1)
{
return 0.98 * prevLoud + 0.01 * (l0 + l1);
}

inline float TrackLoudness(float prevLoud, float l)
{
return 0.98 * prevLoud + 0.02 * l;
}

std::vector<float> CreateLoudnessCurve(size_t sz);

} //namespace NAtracDEnc
15 changes: 8 additions & 7 deletions src/atrac1denc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,11 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() {
struct TChannelData {
TChannelData()
: Specs(NumSamples)
, Energy(NumSamples)
, Loudness(0.0)
{}

vector<TFloat> Specs;
vector<TFloat> Energy;
float Loudness;
};

using TData = vector<TChannelData>;
Expand Down Expand Up @@ -219,17 +219,18 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac1Encoder::GetLambda() {

Mdct(&specs[0], &PcmBufLow[channel][0], &PcmBufMid[channel][0], &PcmBufHi[channel][0], blockSz[channel]);

auto& erg = (*buf)[channel].Energy;

float l = 0.0;
for (size_t i = 0; i < specs.size(); i++) {
erg[i] = specs[i] * specs[i];
float e = specs[i] * specs[i];
l += e * LoudnessCurve[i];
}
(*buf)[channel].Loudness = l;
}

if (srcChannels == 2 && windowMasks[0] == 0 && windowMasks[1] == 0) {
Loudness = TrackLoudness(Loudness, (*buf)[0].Energy.data(), (*buf)[1].Energy.data(), LoudnessCurve.data(), NumSamples);
Loudness = TrackLoudness(Loudness, (*buf)[0].Loudness, (*buf)[1].Loudness);
} else if (windowMasks[0] == 0) {
Loudness = TrackLoudness(Loudness, (*buf)[0].Energy.data(), nullptr, LoudnessCurve.data(), NumSamples);
Loudness = TrackLoudness(Loudness, (*buf)[0].Loudness);
}

for (uint32_t channel = 0; channel < srcChannels; channel++) {
Expand Down
37 changes: 33 additions & 4 deletions src/atrac3denc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "atrac3denc.h"
#include "transient_detector.h"
#include "atrac/atrac_psy_common.h"
#include <assert.h>
#include <algorithm>
#include <iostream>
Expand Down Expand Up @@ -91,6 +92,7 @@ void TAtrac3MDCT::Midct(TFloat specs[1024], TFloat* bands[4], TGainDemodulatorAr
TAtrac3Encoder::TAtrac3Encoder(TCompressedOutputPtr&& oma, TAtrac3EncoderSettings&& encoderSettings)
: Oma(std::move(oma))
, Params(std::move(encoderSettings))
, LoudnessCurve(CreateLoudnessCurve(NumSamples))
, SingleChannelElements(Params.SourceChannels)
, TransientParamsHistory(Params.SourceChannels, std::vector<TTransientParam>(4))
{}
Expand Down Expand Up @@ -289,7 +291,19 @@ void TAtrac3Encoder::Matrixing()
TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
{
std::shared_ptr<TAtrac3BitStreamWriter> bitStreamWriter(new TAtrac3BitStreamWriter(Oma.get(), *Params.ConteinerParams, Params.BfuIdxConst));
return [this, bitStreamWriter](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {

struct TChannelData {
TChannelData()
: Specs(NumSamples)
{}

vector<TFloat> Specs;
};

using TData = vector<TChannelData>;
auto buf = std::make_shared<TData>(2);

return [this, bitStreamWriter, buf](TFloat* data, const TPCMEngine<TFloat>::ProcessMeta& meta) {
using TSce = TAtrac3BitStreamWriter::TSingleChannelElement;

for (uint32_t channel = 0; channel < meta.Channels; channel++) {
Expand All @@ -310,7 +324,7 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
}

for (uint32_t channel = 0; channel < meta.Channels; channel++) {
vector<TFloat> specs(1024);
auto& specs = (*buf)[channel].Specs;
TSce* sce = &SingleChannelElements[channel];

sce->SubbandInfo.Reset();
Expand All @@ -326,11 +340,26 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
Mdct(specs.data(), p, maxOverlapLevels, MakeGainModulatorArray(sce->SubbandInfo));
}

sce->Energy = CalcEnergy(specs);
float l = 0;
for (size_t i = 0; i < specs.size(); i++) {
float e = specs[i] * specs[i];
l += e * LoudnessCurve[i];
}

sce->Loudness = l;

//TBlockSize for ATRAC3 - 4 subband, all are long (no short window)
sce->ScaledBlocks = Scaler.ScaleFrame(specs, TBlockSize());
}

if (meta.Channels == 2 && !Params.ConteinerParams->Js) {
const TSce& sce0 = SingleChannelElements[0];
const TSce& sce1 = SingleChannelElements[1];
Loudness = TrackLoudness(Loudness, sce0.Loudness, sce1.Loudness);
} else {
// 1 channel or Js. In case of Js we do not use side channel to adjust loudness
const TSce& sce0 = SingleChannelElements[0];
Loudness = TrackLoudness(Loudness, sce0.Loudness);
}

if (Params.ConteinerParams->Js && meta.Channels == 1) {
Expand All @@ -341,7 +370,7 @@ TPCMEngine<TFloat>::TProcessLambda TAtrac3Encoder::GetLambda()
SingleChannelElements[1].SubbandInfo.Info.resize(1);
}

bitStreamWriter->WriteSoundUnit(SingleChannelElements);
bitStreamWriter->WriteSoundUnit(SingleChannelElements, Loudness);
};
}

Expand Down
4 changes: 4 additions & 0 deletions src/atrac3denc.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,13 @@ class TAtrac3MDCT : public NAtrac3::TAtrac3Data {
class TAtrac3Encoder : public IProcessor<TFloat>, public TAtrac3MDCT {
TCompressedOutputPtr Oma;
const NAtrac3::TAtrac3EncoderSettings Params;
const std::vector<float> LoudnessCurve;
TDelayBuffer<TFloat, 8, 256> PcmBuffer; //8 = 2 channels * 4 bands

TFloat PrevPeak[2][4]; //2 channel, 4 band - peak level (after windowing), used to check overflow during scalling

Atrac3AnalysisFilterBank<TFloat> AnalysisFilterBank[2];

TScaler<TAtrac3Data> Scaler;
std::vector<NAtrac3::TAtrac3BitStreamWriter::TSingleChannelElement> SingleChannelElements;
public:
Expand All @@ -100,6 +102,8 @@ class TAtrac3Encoder : public IProcessor<TFloat>, public TAtrac3MDCT {
};
private:
std::vector<std::vector<TTransientParam>> TransientParamsHistory;
static constexpr float LoudFactor = 0.006;
float Loudness = LoudFactor;
#ifdef ATRAC_UT_PUBLIC
public:
#endif
Expand Down

0 comments on commit 2ca3d7f

Please sign in to comment.