Skip to content

Commit 9545af9

Browse files
Fix decompression (emscripten-forge#40)
* Add decompression method * Saving data into a temporary file, fix reading decompressed data * Refactoring code
1 parent a303beb commit 9545af9

File tree

4 files changed

+166
-30
lines changed

4 files changed

+166
-30
lines changed

build_wasm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,6 @@ emcc unpack.c -o $WASM_LIB/unpack.js \
8888
-s INITIAL_MEMORY=128MB \
8989
-s ENVIRONMENT=web \
9090
-s EXPORTED_RUNTIME_METHODS='["ccall", "cwrap", "getValue", "UTF8ToString", "wasmMemory"]' \
91-
-s EXPORTED_FUNCTIONS="['_extract_archive', '_free_extracted_archive', '_malloc', '_free']"
91+
-s EXPORTED_FUNCTIONS="['_extract', '_free_extracted_archive', '_malloc', '_free']"
9292

9393
echo "Build completed successfully!"

src/index.ts

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@ export const initUntarJS = async (): Promise<IUnpackJSAPI> => {
1818
decompressionOnly: boolean = false
1919
): Promise<FilesData> => {
2020
/**Since WebAssembly, memory is accessed using pointers
21-
and the first parameter of extract_archive method from unpack.c, which is Uint8Array of file data, should be a pointer
21+
and the first parameter of extract method from unpack.c, which is Uint8Array of file data, should be a pointer
2222
so we have to allocate memory for file data
2323
**/
2424
let inputPtr: number | null = wasmModule._malloc(data.length);
2525
wasmModule.HEAPU8.set(data, inputPtr);
2626

2727

28-
let resultPtr: number | null = wasmModule._extract_archive(
28+
let resultPtr: number | null = wasmModule._extract(
2929
inputPtr,
3030
data.length,
3131
decompressionOnly
3232
);
3333
const files: FilesData = {};
3434
/**
35-
* Since extract_archive returns a pointer that refers to an instance of the ExtractedArchive in unpack.c
35+
* Since extract returns a pointer that refers to an instance of the ExtractedArchive in unpack.c
3636
typedef struct {
3737
FileData* files;
3838
size_t fileCount;
@@ -99,7 +99,6 @@ export const initUntarJS = async (): Promise<IUnpackJSAPI> => {
9999
dataPtr,
100100
dataSize
101101
);
102-
103102
const fileDataCopy = fileData.slice(0);
104103
files[filename] = fileDataCopy;
105104
}
@@ -121,7 +120,7 @@ export const initUntarJS = async (): Promise<IUnpackJSAPI> => {
121120

122121
const checkIsArchive = (url: string): boolean => {
123122
let isArchive: boolean = false;
124-
let archiveExtArr = ['.conda', 'tar.bz2', 'tar.gz'];
123+
let archiveExtArr = ['.conda', 'tar.bz2', 'tar.gz', '.zip'];
125124
archiveExtArr.forEach(type => {
126125
if (url.toLowerCase().endsWith(type)) {
127126
isArchive = true;

src/unpack.d.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ export interface IWasmModule {
55
HEAPU8: Uint8Array;
66
_malloc(size: number): number;
77
_free(ptr: number): void;
8-
_extract_archive(
8+
_extract(
99
inputPtr: number,
1010
inputSize: number,
1111
decompressionOnly: boolean

unpack.c

Lines changed: 160 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,28 @@ typedef struct {
1919
char error_message[256];
2020
} ExtractedArchive;
2121

22+
ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_message, struct archive* archive) {
23+
24+
if (!result || !archive) {
25+
fprintf(stderr, "Archive is null\n");
26+
return NULL;
27+
}
28+
29+
result->status = 0;
30+
31+
snprintf(result->error_message, sizeof(result->error_message), "%s", error_message);
32+
archive_read_free(archive);
33+
return result;
34+
}
2235

2336
EMSCRIPTEN_KEEPALIVE
24-
ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool decompressionOnly ) {
37+
ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) {
2538
struct archive* archive;
2639
struct archive_entry* entry;
2740
size_t files_struct_length = 100;
2841
FileData* files = NULL;
2942
size_t files_count = 0;
43+
const char *error_message;
3044

3145
ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive));
3246
if (!result) {
@@ -41,32 +55,24 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec
4155
archive = archive_read_new();
4256
archive_read_support_filter_all(archive);
4357
archive_read_support_format_all(archive);
44-
if (decompressionOnly) {
45-
archive_read_support_format_raw(archive);
46-
}
4758

4859
if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) {
49-
result->status = 0;
50-
snprintf(result->error_message, sizeof(result->error_message), "%s", archive_error_string(archive));
51-
archive_read_free(archive);
52-
return result;
60+
return error_handler(result,archive_error_string(archive), archive);
5361
}
5462
files = malloc(sizeof(FileData) * files_struct_length);
5563

5664
while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
57-
const char* filename = decompressionOnly ? "decompression.json": archive_entry_pathname(entry);
58-
size_t entrySize = decompressionOnly ? inputSize: archive_entry_size(entry);
65+
const char* filename = archive_entry_pathname(entry);
66+
size_t entrySize = archive_entry_size(entry);
5967
if (files_count + 1 > files_struct_length) {
6068
files_struct_length *= 2; // double the length
6169
FileData* oldfiles = files;
6270
files= realloc(files, sizeof(FileData) * files_struct_length);
6371
if (!files) {
64-
archive_read_free(archive);
65-
result->status = 0;
6672
result->fileCount = files_count;
6773
result->files = oldfiles; // otherwise memory is lost, alternatively also everything can be freed.
68-
snprintf(result->error_message, sizeof(result->error_message), "Memory allocation error for file data.");
69-
return result;
74+
error_message = "Memory allocation error for file data.";
75+
return error_handler(result, error_message, archive);
7076
}
7177
}
7278
files[files_count].filename = strdup(filename);
@@ -75,13 +81,11 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec
7581

7682
if (!files[files_count].data) {
7783
free(files[files_count].filename);
78-
files[files_count].filename = NULL;
79-
archive_read_free(archive);
80-
result->status = 0;
84+
files[files_count].filename = NULL;
8185
result->fileCount = files_count;
8286
result->files = files; // otherwise memory is lost, alternatively also everything can be freed.
83-
snprintf(result->error_message, sizeof(result->error_message), "Memory allocation error for file contents.");
84-
return result;
87+
error_message = "Memory allocation error for file contents.";
88+
return error_handler(result, error_message, archive);
8589
}
8690

8791
size_t bytesRead = 0;
@@ -94,10 +98,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec
9498
}
9599
free(files);
96100
result->files = NULL;
97-
result->status = 0;
98-
snprintf(result->error_message, sizeof(result->error_message), "%s", archive_error_string(archive));
99-
archive_read_free(archive);
100-
return result;
101+
return error_handler(result, archive_error_string(archive), archive);
101102
}
102103
bytesRead += ret;
103104
}
@@ -111,6 +112,142 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec
111112
return result;
112113
}
113114

115+
char* write_to_temp_file(uint8_t* data, size_t size) {
116+
char* temp_file_name = strdup("/tmp/decompressionXXXXXX");
117+
int fd = mkstemp(temp_file_name);
118+
if (fd == -1) {
119+
perror("Failed to create temporary file for decompression file");
120+
free(temp_file_name);
121+
return NULL;
122+
}
123+
124+
FILE* temp_file = fdopen(fd, "wb");
125+
if (!temp_file) {
126+
perror("Failed to open temporary file");
127+
close(fd);
128+
unlink(temp_file_name);
129+
free(temp_file_name);
130+
return NULL;
131+
}
132+
133+
if (fwrite(data, 1, size, temp_file) != size) {
134+
perror("Failed to write to temporary file");
135+
fclose(temp_file);
136+
unlink(temp_file_name);
137+
free(temp_file_name);
138+
return NULL;
139+
}
140+
141+
fclose(temp_file);
142+
return temp_file_name;
143+
}
144+
145+
EMSCRIPTEN_KEEPALIVE
146+
ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) {
147+
struct archive* archive;
148+
struct archive_entry* entry;
149+
size_t files_count = 0;
150+
151+
const size_t buffsize = 64 * 1024;
152+
char buff[buffsize];
153+
size_t total_size = 0;
154+
const char *error_message;
155+
156+
FileData* files = malloc(sizeof(FileData) * (files_count + 1));
157+
158+
if (!files) {
159+
printf("Failed to allocate memory for files array\n");
160+
return NULL;
161+
}
162+
163+
ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive));
164+
if (!result) {
165+
free(files);
166+
return NULL;
167+
}
168+
169+
result->files = NULL;
170+
result->fileCount = 0;
171+
result->status = 1;
172+
result->error_message[0] = '\0';
173+
174+
char* temp_file_name = write_to_temp_file(inputData, inputSize);
175+
if (!temp_file_name) {
176+
free(files);
177+
error_message = "Failed to create temporary file";
178+
return error_handler(result, error_message, archive);
179+
}
180+
181+
archive = archive_read_new();
182+
archive_read_support_filter_all(archive);
183+
archive_read_support_format_raw(archive);
184+
185+
if (archive_read_open_filename(archive, temp_file_name, inputSize) != ARCHIVE_OK) {
186+
unlink(temp_file_name);
187+
free(temp_file_name);
188+
free(files);
189+
return error_handler(result, archive_error_string(archive), archive);
190+
}
191+
192+
while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
193+
const char* filename = archive_entry_pathname(entry);
194+
if (!filename) filename = "decompression";
195+
196+
files[files_count].filename = strdup(filename);
197+
files[files_count].data = NULL;
198+
files[files_count].data_size = 0;
199+
200+
ssize_t ret;
201+
202+
for (;;) {
203+
ret = archive_read_data(archive, buff, buffsize);
204+
if (ret < 0) {
205+
for (size_t i = 0; i <= files_count; i++) {
206+
free(files[i].filename);
207+
free(files[i].data);
208+
}
209+
free(files);
210+
result->files = NULL;
211+
return error_handler(result, archive_error_string(archive), archive);
212+
}
213+
if (ret == 0) {
214+
break;
215+
}
216+
217+
void* new_data = realloc(files[files_count].data, total_size + ret);
218+
if (!new_data) {
219+
free(files[files_count].data);
220+
error_message = "Memory allocation error";
221+
return error_handler(result, error_message, archive);
222+
}
223+
224+
files[files_count].data = new_data;
225+
memcpy(files[files_count].data + total_size, buff, ret);
226+
total_size += ret;
227+
}
228+
files[files_count].data_size = total_size;
229+
files_count++;
230+
}
231+
232+
archive_read_free(archive);
233+
unlink(temp_file_name);
234+
free(temp_file_name);
235+
236+
result->files = files;
237+
result->fileCount = files_count;
238+
result->status = 1;
239+
return result;
240+
}
241+
242+
EMSCRIPTEN_KEEPALIVE
243+
ExtractedArchive* extract(uint8_t* inputData, size_t inputSize, bool decompressionOnly ) {
244+
if (!decompressionOnly) {
245+
return extract_archive(inputData, inputSize);
246+
} else {
247+
return decompression(inputData, inputSize);
248+
}
249+
}
250+
114251
EMSCRIPTEN_KEEPALIVE
115252
void free_extracted_archive(ExtractedArchive* archive) {
116253
if (!archive) {

0 commit comments

Comments
 (0)