Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions src/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,37 +73,49 @@ getClusterReader(const Reader& zimReader, offset_t offset, Cluster::Compression*

} // unnamed namespace

std::shared_ptr<Cluster> Cluster::read(const Reader& zimReader, offset_t clusterOffset)
std::shared_ptr<Cluster> Cluster::read(const Reader& zimReader, offset_t clusterOffset, size_t maxBlobCount)
{
Compression comp;
bool extended;
auto reader = getClusterReader(zimReader, clusterOffset, &comp, &extended);
return std::make_shared<Cluster>(std::move(reader), comp, extended);
return std::make_shared<Cluster>(std::move(reader), comp, extended, maxBlobCount);
}

Cluster::Cluster(std::unique_ptr<IStreamReader> reader_, Compression comp, bool isExtended)
Cluster::Cluster(std::unique_ptr<IStreamReader> reader_, Compression comp, bool isExtended, size_t maxBlobCount)
: compression(comp),
isExtended(isExtended),
m_reader(std::move(reader_))
{
if (isExtended) {
read_header<uint64_t>();
read_header<uint64_t>(maxBlobCount);
} else {
read_header<uint32_t>();
read_header<uint32_t>(maxBlobCount);
}
}

Cluster::~Cluster() = default;

/* This return the number of char read */
template<typename OFFSET_TYPE>
void Cluster::read_header()
void Cluster::read_header(size_t maxBlobCount)
{
// read first offset, which specifies, how many offsets we need to read
OFFSET_TYPE offset = m_reader->read<OFFSET_TYPE>();

if ( offset < 2 * sizeof(OFFSET_TYPE) ) {
throw zim::ZimFileFormatError("Error parsing cluster. Offset of the first blob is too small.");
}

size_t n_offset = offset / sizeof(OFFSET_TYPE);

if ( n_offset * sizeof(OFFSET_TYPE) != offset ) {
throw zim::ZimFileFormatError("Error parsing cluster. Offset of the first blob is not properly aligned.");
}

if ( n_offset > maxBlobCount + 1 ) {
throw zim::ZimFileFormatError("Error parsing cluster. Offset of the first blob is too large.");
}

// read offsets
m_blobOffsets.clear();
m_blobOffsets.reserve(n_offset);
Expand Down
6 changes: 3 additions & 3 deletions src/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ namespace zim


template<typename OFFSET_TYPE>
void read_header();
void read_header(size_t maxBlobCount);
const Reader& getReader(blob_index_t n) const;

public:
Cluster(std::unique_ptr<IStreamReader> reader, Compression comp, bool isExtended);
Cluster(std::unique_ptr<IStreamReader> reader, Compression comp, bool isExtended, size_t maxBlobCount);
~Cluster();
Compression getCompression() const { return compression; }
bool isCompressed() const { return compression != Compression::None; }
Expand All @@ -92,7 +92,7 @@ namespace zim

size_t getMemorySize() const;

static std::shared_ptr<Cluster> read(const Reader& zimReader, offset_t clusterOffset);
static std::shared_ptr<Cluster> read(const Reader& zimReader, offset_t clusterOffset, size_t maxBlobCount);
};

struct ClusterMemorySize {
Expand Down
8 changes: 7 additions & 1 deletion src/fileimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,11 +500,17 @@ class Grouping
return entry_index_t(m_articleListByCluster[idx.v]);
}

size_t FileImpl::getMaxBlobCountInCluster(cluster_index_t idx) const
{
return getCountArticles().v;
}

ClusterHandle FileImpl::readCluster(cluster_index_t idx) const
{
offset_t clusterOffset(getClusterOffset(idx));
log_debug("read cluster " << idx << " from offset " << clusterOffset);
return Cluster::read(*zimReader, clusterOffset);
const auto maxBlobCountInCluster = getMaxBlobCountInCluster(idx);
return Cluster::read(*zimReader, clusterOffset, maxBlobCountInCluster);
}

ClusterHandle FileImpl::getCluster(cluster_index_t idx) const
Expand Down
1 change: 1 addition & 0 deletions src/fileimpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ namespace zim
offset_type getMimeListEndUpperLimit() const;
void readMimeTypes();
void quickCheckForCorruptFile();
size_t getMaxBlobCountInCluster(cluster_index_t idx) const;

bool checkChecksum();
bool checkDirentPtrs();
Expand Down
35 changes: 35 additions & 0 deletions test/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,41 @@ TEST_F(ZimArchive, validate)
"Invalid cluster pointer\n"
);

TEST_BROKEN_ZIM_NAME(
"invalid.too_small_offset_of_first_blob_in_cluster_0.zim",
"Error parsing cluster. Offset of the first blob is too small.\n"
)

TEST_BROKEN_ZIM_NAME(
"invalid.too_small_offset_of_first_blob_in_cluster_4.zim",
"Error parsing cluster. Offset of the first blob is too small.\n"
)

TEST_BROKEN_ZIM_NAME(
"invalid.too_small_offset_of_first_blob_in_cluster_7.zim",
"Error parsing cluster. Offset of the first blob is too small.\n"
)

TEST_BROKEN_ZIM_NAME(
"invalid.misaligned_offset_of_first_blob_in_cluster_9.zim",
"Error parsing cluster. Offset of the first blob is not properly aligned.\n"
)

TEST_BROKEN_ZIM_NAME(
"invalid.misaligned_offset_of_first_blob_in_cluster_10.zim",
"Error parsing cluster. Offset of the first blob is not properly aligned.\n"
)

TEST_BROKEN_ZIM_NAME(
"invalid.misaligned_offset_of_first_blob_in_cluster_11.zim",
"Error parsing cluster. Offset of the first blob is not properly aligned.\n"
)

TEST_BROKEN_ZIM_NAME(
"invalid.too_large_offset_of_first_blob_in_cluster.zim",
"Error parsing cluster. Offset of the first blob is too large.\n"
)

TEST_BROKEN_ZIM_NAME(
"invalid.offset_in_cluster.zim",
"Error parsing cluster. Offsets are not ordered.\n"
Expand Down
Loading