Skip to content

Commit d2e8f88

Browse files
veloman-yunkankelson42
authored andcommitted
Quick detection of too large offset of 1st blob in cluster
The value of the offset of 1st blob in cluster is detected to be corrupted if it suggests that there are more blobs in this cluster than there are articles in the ZIM file. Note that a stronger check could be performed if a tighter upper limit on the count of blobs in the given cluster was known. In particular, a precise check would be possible if the count of blobs in the cluster was available, however the only quick source of that information is the value that has to be checked itself.
1 parent f6d4bdd commit d2e8f88

File tree

5 files changed

+26
-10
lines changed

5 files changed

+26
-10
lines changed

src/cluster.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,31 +73,31 @@ getClusterReader(const Reader& zimReader, offset_t offset, Cluster::Compression*
7373

7474
} // unnamed namespace
7575

76-
std::shared_ptr<Cluster> Cluster::read(const Reader& zimReader, offset_t clusterOffset)
76+
std::shared_ptr<Cluster> Cluster::read(const Reader& zimReader, offset_t clusterOffset, size_t maxBlobCount)
7777
{
7878
Compression comp;
7979
bool extended;
8080
auto reader = getClusterReader(zimReader, clusterOffset, &comp, &extended);
81-
return std::make_shared<Cluster>(std::move(reader), comp, extended);
81+
return std::make_shared<Cluster>(std::move(reader), comp, extended, maxBlobCount);
8282
}
8383

84-
Cluster::Cluster(std::unique_ptr<IStreamReader> reader_, Compression comp, bool isExtended)
84+
Cluster::Cluster(std::unique_ptr<IStreamReader> reader_, Compression comp, bool isExtended, size_t maxBlobCount)
8585
: compression(comp),
8686
isExtended(isExtended),
8787
m_reader(std::move(reader_))
8888
{
8989
if (isExtended) {
90-
read_header<uint64_t>();
90+
read_header<uint64_t>(maxBlobCount);
9191
} else {
92-
read_header<uint32_t>();
92+
read_header<uint32_t>(maxBlobCount);
9393
}
9494
}
9595

9696
Cluster::~Cluster() = default;
9797

9898
/* This return the number of char read */
9999
template<typename OFFSET_TYPE>
100-
void Cluster::read_header()
100+
void Cluster::read_header(size_t maxBlobCount)
101101
{
102102
// read first offset, which specifies, how many offsets we need to read
103103
OFFSET_TYPE offset = m_reader->read<OFFSET_TYPE>();
@@ -112,6 +112,10 @@ getClusterReader(const Reader& zimReader, offset_t offset, Cluster::Compression*
112112
throw zim::ZimFileFormatError("Error parsing cluster. Offset of the first blob is not properly aligned.");
113113
}
114114

115+
if ( n_offset > maxBlobCount + 1 ) {
116+
throw zim::ZimFileFormatError("Error parsing cluster. Offset of the first blob is too large.");
117+
}
118+
115119
// read offsets
116120
m_blobOffsets.clear();
117121
m_blobOffsets.reserve(n_offset);

src/cluster.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,11 @@ namespace zim
7373

7474

7575
template<typename OFFSET_TYPE>
76-
void read_header();
76+
void read_header(size_t maxBlobCount);
7777
const Reader& getReader(blob_index_t n) const;
7878

7979
public:
80-
Cluster(std::unique_ptr<IStreamReader> reader, Compression comp, bool isExtended);
80+
Cluster(std::unique_ptr<IStreamReader> reader, Compression comp, bool isExtended, size_t maxBlobCount);
8181
~Cluster();
8282
Compression getCompression() const { return compression; }
8383
bool isCompressed() const { return compression != Compression::None; }
@@ -92,7 +92,7 @@ namespace zim
9292

9393
size_t getMemorySize() const;
9494

95-
static std::shared_ptr<Cluster> read(const Reader& zimReader, offset_t clusterOffset);
95+
static std::shared_ptr<Cluster> read(const Reader& zimReader, offset_t clusterOffset, size_t maxBlobCount);
9696
};
9797

9898
struct ClusterMemorySize {

src/fileimpl.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,11 +500,17 @@ class Grouping
500500
return entry_index_t(m_articleListByCluster[idx.v]);
501501
}
502502

503+
size_t FileImpl::getMaxBlobCountInCluster(cluster_index_t idx) const
504+
{
505+
return getCountArticles().v;
506+
}
507+
503508
ClusterHandle FileImpl::readCluster(cluster_index_t idx) const
504509
{
505510
offset_t clusterOffset(getClusterOffset(idx));
506511
log_debug("read cluster " << idx << " from offset " << clusterOffset);
507-
return Cluster::read(*zimReader, clusterOffset);
512+
const auto maxBlobCountInCluster = getMaxBlobCountInCluster(idx);
513+
return Cluster::read(*zimReader, clusterOffset, maxBlobCountInCluster);
508514
}
509515

510516
ClusterHandle FileImpl::getCluster(cluster_index_t idx) const

src/fileimpl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ namespace zim
192192
offset_type getMimeListEndUpperLimit() const;
193193
void readMimeTypes();
194194
void quickCheckForCorruptFile();
195+
size_t getMaxBlobCountInCluster(cluster_index_t idx) const;
195196

196197
bool checkChecksum();
197198
bool checkDirentPtrs();

test/archive.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,11 @@ TEST_F(ZimArchive, validate)
821821
"Error parsing cluster. Offset of the first blob is not properly aligned.\n"
822822
)
823823

824+
TEST_BROKEN_ZIM_NAME(
825+
"invalid.too_large_offset_of_first_blob_in_cluster.zim",
826+
"Error parsing cluster. Offset of the first blob is too large.\n"
827+
)
828+
824829
TEST_BROKEN_ZIM_NAME(
825830
"invalid.offset_in_cluster.zim",
826831
"Error parsing cluster. Offsets are not ordered.\n"

0 commit comments

Comments
 (0)