Skip to content

Commit 387a794

Browse files
Add bz2 decompression for non archive file (emscripten-forge#34)
* Add bz2 decompression for non archive file * Refactoring * Fix eslint * Fix typos
1 parent e96a535 commit 387a794

File tree

3 files changed

+34
-17
lines changed

3 files changed

+34
-17
lines changed

src/index.ts

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@ const fetchByteArray = async (url: string): Promise<Uint8Array> => {
1313
export const initUntarJS = async (): Promise<IUnpackJSAPI> => {
1414
const wasmModule = await initializeWasm();
1515

16-
const extractData = async (data: Uint8Array): Promise<FilesData> => {
16+
const extractData = async (
17+
data: Uint8Array,
18+
decompressionOnly: boolean = false
19+
): Promise<FilesData> => {
1720
/**Since WebAssembly, memory is accessed using pointers
1821
and the first parameter of extract_archive method from unpack.c, which is Uint8Array of file data, should be a pointer
1922
so we have to allocate memory for file data
@@ -27,9 +30,10 @@ export const initUntarJS = async (): Promise<IUnpackJSAPI> => {
2730
let resultPtr: number | null = wasmModule._extract_archive(
2831
inputPtr,
2932
data.length,
30-
fileCountPtr
33+
fileCountPtr,
34+
decompressionOnly
3135
);
32-
36+
const files: FilesData = {};
3337
/**
3438
* Since extract_archive returns a pointer that refers to an instance of the ExtractedArchive in unpack.c
3539
typedef struct {
@@ -71,8 +75,6 @@ export const initUntarJS = async (): Promise<IUnpackJSAPI> => {
7175
const filesPtr = wasmModule.getValue(resultPtr, 'i32');
7276
const fileCount = wasmModule.getValue(resultPtr + 4, 'i32');
7377

74-
const files: FilesData = {};
75-
7678
/**
7779
* FilesPtr is a pointer that refers to an instance of the FileData in unpack.c
7880
typedef struct {
@@ -102,26 +104,38 @@ export const initUntarJS = async (): Promise<IUnpackJSAPI> => {
102104
dataPtr,
103105
dataSize
104106
);
105-
106-
const fileDataCopy = fileData.slice(0);
107107

108+
const fileDataCopy = fileData.slice(0);
108109
files[filename] = fileDataCopy;
109110
}
110-
111+
111112
wasmModule._free(inputPtr);
112113
wasmModule._free(fileCountPtr);
113114
wasmModule._free_extracted_archive(resultPtr);
114115
inputPtr = null;
115116
fileCountPtr = null;
116117
resultPtr = null;
117118
errorMessagePtr = null;
119+
118120
return files;
119121
};
120122

121123
const extract = async (url: string): Promise<FilesData> => {
124+
let isArchive: boolean = checkIsArchive(url);
122125
const data = await fetchByteArray(url);
123-
return extractData(data);
124-
}
126+
return extractData(data, !isArchive);
127+
};
128+
129+
const checkIsArchive = (url: string): boolean => {
130+
let isArchive: boolean = false;
131+
let archiveExtArr = ['.conda', 'tar.bz2', 'tar.gz'];
132+
archiveExtArr.forEach(type => {
133+
if (url.toLowerCase().endsWith(type)) {
134+
isArchive = true;
135+
}
136+
});
137+
return isArchive;
138+
};
125139

126140
return {
127141
extract,

src/unpack.d.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ export interface IWasmModule {
88
_extract_archive(
99
inputPtr: number,
1010
inputSize: number,
11-
fileCountPtr: number
11+
fileCountPtr: number,
12+
decompressionOnly: boolean
1213
): number;
1314
getValue(ptr: number, type: string): number;
1415
}

unpack.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <stdio.h>
22
#include <string.h>
33
#include <stdlib.h>
4+
#include <stdbool.h>
45
#include <archive.h>
56
#include <archive_entry.h>
67
#include <emscripten.h>
@@ -18,8 +19,9 @@ typedef struct {
1819
char error_message[256];
1920
} ExtractedArchive;
2021

22+
2123
EMSCRIPTEN_KEEPALIVE
22-
ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, size_t* fileCount) {
24+
ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, size_t* fileCount, bool decompressionOnly ) {
2325
struct archive* archive;
2426
struct archive_entry* entry;
2527
FileData* files = NULL;
@@ -38,6 +40,9 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, size_t*
3840
archive = archive_read_new();
3941
archive_read_support_filter_all(archive);
4042
archive_read_support_format_all(archive);
43+
if (decompressionOnly) {
44+
archive_read_support_format_raw(archive);
45+
}
4146

4247
if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) {
4348
result->status = 0;
@@ -47,10 +52,8 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, size_t*
4752
}
4853

4954
while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
50-
const char* filename = archive_entry_pathname(entry);
51-
size_t entrySize = archive_entry_size(entry);
52-
53-
55+
const char* filename = decompressionOnly ? "decompression.json": archive_entry_pathname(entry);
56+
size_t entrySize = decompressionOnly ? inputSize: archive_entry_size(entry);
5457
files= realloc(files, sizeof(FileData) * (files_count + 1));
5558
if (!files) {
5659
archive_read_free(archive);
@@ -96,7 +99,6 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, size_t*
9699
return result;
97100
}
98101

99-
100102
EMSCRIPTEN_KEEPALIVE
101103
void free_extracted_archive(ExtractedArchive* archive) {
102104
if (!archive) {

0 commit comments

Comments
 (0)