Skip to content

Commit b898025

Browse files
authored
Merge pull request emscripten-forge#22 from AnastasiaSliusar/separation-files
Separation of files
2 parents 58b242f + 93f233e commit b898025

File tree

7 files changed

+104
-51
lines changed

7 files changed

+104
-51
lines changed

README.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,22 @@ Fetching and unpacking archives. This package uses compiled `libarchive` into wa
77
## Using
88

99
This package has 2 methods:
10-
- extract(url) - downloads an archive throught the url and returns extracted data in Uint8Array.
11-
- exctractData(data) - accepts Uint8Array archive data and returns exracted data.
10+
- extract(url) - downloads an archive throught the url and returns extracted data int the array of objects where each of them has next structure:
11+
```
12+
{
13+
"data": new Uint8Array([5, 6, 7, 8]),
14+
"filename": "info/paths.json"
15+
}
16+
```
17+
- exctractData(data) - accepts Uint8Array archive data and returns exracted data in the same format which `extract` method does.
1218

1319
The example of using:
1420
```sh
1521
import untarjs from "@emscripten-forge/untarjs";
1622

1723
const condaPackageUrl = 'https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2';
18-
untarjs.extract(condaPackageUrl).then((data)=>{
19-
console.log(data);
24+
untarjs.extract(condaPackageUrl).then((files)=>{
25+
console.log(files);
2026
});
2127
```
2228
> Note: If this package is used in the project where Webpack is used then webpack.config should include next:

build_wasm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ emcc unpack.c -o $WASM_LIB/unpack.js \
8686
${PREFIX}/lib/libz.a ${PREFIX}/lib/libbz2.a ${PREFIX}/lib/libzstd.a ${PREFIX}/lib/libiconv.a\
8787
-s MODULARIZE=1 -s WASM=1 -O3 -s ALLOW_MEMORY_GROWTH=1 \
8888
-s ENVIRONMENT=web \
89-
-s EXPORTED_RUNTIME_METHODS='["ccall", "cwrap", "getValue"]' \
89+
-s EXPORTED_RUNTIME_METHODS='["ccall", "cwrap", "getValue", "UTF8ToString"]' \
9090
-s EXPORTED_FUNCTIONS="['_extract_archive', '_malloc', '_free']"
9191

9292
echo "Build completed successfully!"

src/index.ts

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import initializeWasm from './helper';
2+
import { IFileData } from './types';
23
import { IWasmModule } from './unpack';
34

45
const fetchByteArray = async (url: string): Promise<Uint8Array> => {
@@ -20,55 +21,66 @@ const init = async (): Promise<IWasmModule | null> => {
2021
}
2122
};
2223

23-
const extractData = async (data: Uint8Array): Promise<Uint8Array | null> => {
24+
const extractData = async (data: Uint8Array): Promise<IFileData[]> => {
2425
const wasmModule = await init();
25-
2626
if (!wasmModule) {
2727
console.error('WASM module not initialized.');
28-
return null;
28+
return [];
2929
}
30+
const inputPtr = wasmModule._malloc(data.length);
31+
wasmModule.HEAPU8.set(data, inputPtr);
32+
const fileCountPtr = wasmModule._malloc(4);
33+
const outputSizePtr = wasmModule._malloc(4);
3034

3135
try {
32-
const inputPtr = wasmModule._malloc(data.length);
33-
wasmModule.HEAPU8.set(data, inputPtr);
34-
35-
const outputSizePtr = wasmModule._malloc(data.length);
36-
const extractedDataPtr = wasmModule._extract_archive(
36+
const extractedFilesPtr = wasmModule._extract_archive(
3737
inputPtr,
3838
data.length,
39-
outputSizePtr
39+
outputSizePtr,
40+
fileCountPtr
4041
);
41-
const extractedSize = wasmModule.getValue(outputSizePtr, 'i32');
42-
if (extractedDataPtr === 0) {
43-
throw new Error('Archive extraction failed.');
42+
43+
const fileCount = wasmModule.getValue(fileCountPtr, 'i32');
44+
const files: IFileData[] = [];
45+
46+
for (let i = 0; i < fileCount; i++) {
47+
const fileDataPtr = extractedFilesPtr + i * (3 * 4);
48+
const filenamePtr = wasmModule.getValue(fileDataPtr, 'i32');
49+
const dataSize = wasmModule.getValue(fileDataPtr + 8, 'i32');
50+
const dataPtr = wasmModule.getValue(fileDataPtr + 4, 'i32');
51+
const filename = wasmModule.UTF8ToString(filenamePtr);
52+
const fileData = new Uint8Array(
53+
wasmModule.HEAPU8.buffer,
54+
dataPtr,
55+
dataSize
56+
);
57+
58+
files.push({
59+
filename: filename,
60+
data: fileData
61+
});
4462
}
45-
const extractedData = new Uint8Array(
46-
wasmModule.HEAPU8.subarray(
47-
extractedDataPtr,
48-
extractedDataPtr + extractedSize
49-
)
50-
);
5163

52-
wasmModule._free(inputPtr);
64+
wasmModule._free(fileCountPtr);
5365
wasmModule._free(outputSizePtr);
54-
wasmModule._free(extractedDataPtr);
66+
wasmModule._free(inputPtr);
67+
wasmModule._free(extractedFilesPtr);
5568

56-
console.log('Extracted size:', extractedSize);
57-
return extractedData;
69+
return files;
5870
} catch (error) {
5971
console.error('Error during extracting:', error);
60-
return null;
72+
return [];
6173
}
6274
};
6375

64-
const extract = async (url: string): Promise<Uint8Array | null> => {
76+
const extract = async (url: string): Promise<IFileData[]> => {
6577
try {
6678
const data = await fetchByteArray(url);
6779
console.log('Data downloaded:', data);
6880
return await extractData(data);
6981
} catch (error) {
7082
console.error('Error during extracting:', error);
71-
return null;
83+
return [];
7284
}
7385
};
7486

src/types.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
export interface IFileData {
2+
filename: string;
3+
data: Uint8Array;
4+
}

src/unpack.d.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
export interface IWasmModule {
2+
UTF8ToString(filenamePtr: number): string;
23
HEAPU8: Uint8Array;
34
_malloc(size: number): number;
45
_free(ptr: number): void;
56
_extract_archive(
67
inputPtr: number,
78
inputSize: number,
8-
outputSizePtr: number
9+
outputSizePtr: number,
10+
fileCountPtr: number
911
): number;
1012
getValue(ptr: number, type: string): number;
1113
}

tests/index.spec.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ jest.mock('../src/index', () => {
1515
describe('extract', () => {
1616
it('should download and extract data successfully', async () => {
1717
const mockData = new Uint8Array([1, 2, 3, 4]);
18-
const extractedData = new Uint8Array([5, 6, 7, 8]);
19-
18+
const extractedData = {
19+
data: new Uint8Array([5, 6, 7, 8]),
20+
filename: "info/paths.json"
21+
}
2022
const mockFetchByteArray = jest.fn().mockResolvedValue(mockData);
2123
const mockExtractData = jest.fn().mockResolvedValue(extractedData);
2224

unpack.c

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,52 +5,79 @@
55
#include <archive_entry.h>
66
#include <emscripten.h>
77

8+
typedef struct {
9+
char* filename;
10+
uint8_t* data;
11+
size_t data_size;
12+
} FileData;
13+
814
EMSCRIPTEN_KEEPALIVE
9-
char* extract_archive(uint8_t* inputData, size_t inputSize, size_t* outputSize) {
15+
FileData* extract_archive(uint8_t* inputData, size_t inputSize, size_t* outputSize, size_t* fileCount) {
1016
struct archive* archive;
1117
struct archive_entry* entry;
12-
char* outputBuffer = NULL;
13-
size_t bufferCapacity = 0;
14-
size_t totalBytes = 0;
18+
FileData* files = NULL;
19+
size_t files_count = 0;
1520

1621
archive = archive_read_new();
17-
archive_read_support_filter_all(archive);
18-
archive_read_support_format_all(archive);
19-
22+
archive_read_support_filter_all(archive);
23+
archive_read_support_format_all(archive);
24+
2025
if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) {
2126
fprintf(stderr, "Error opening archive: %s\n", archive_error_string(archive));
2227
archive_read_free(archive);
2328
return NULL;
2429
}
30+
printf("Archive opened successfully.\n");
2531

2632
while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
2733
const char* filename = archive_entry_pathname(entry);
2834
size_t entrySize = archive_entry_size(entry);
35+
printf("Extracting file: %s, size: %zu\n", filename, entrySize);
36+
37+
files = realloc(files, sizeof(FileData) * (files_count + 1));
38+
if (!files) {
39+
fprintf(stderr, "Memory allocation error for FileData array.\n");
40+
archive_read_free(archive);
41+
return NULL;
42+
}
2943

30-
if (totalBytes + entrySize > bufferCapacity) {
31-
bufferCapacity = totalBytes + entrySize + 1024;
32-
outputBuffer = realloc(outputBuffer, bufferCapacity);
44+
files[files_count].filename = strdup(filename);
45+
files[files_count].data = malloc(entrySize);
46+
printf("Setting data_size for file: %s, size: %zu\n", filename, entrySize);
47+
files[files_count].data_size = entrySize;
48+
49+
if (!files[files_count].data) {
50+
fprintf(stderr, "Memory allocation error for file data.\n");
51+
free(files[files_count].filename);
52+
archive_read_free(archive);
53+
return NULL;
3354
}
3455

3556
size_t bytesRead = 0;
3657
while (bytesRead < entrySize) {
37-
ssize_t ret = archive_read_data(archive, outputBuffer + totalBytes, entrySize - bytesRead);
58+
ssize_t ret = archive_read_data(archive, files[files_count].data + bytesRead, entrySize - bytesRead);
3859
if (ret < 0) {
39-
fprintf(stderr, "Error reading data: %s\n", archive_error_string(archive));
40-
free(outputBuffer);
60+
fprintf(stderr, "Error reading data for %s: %s\n", filename, archive_error_string(archive));
61+
for (size_t i = 0; i <= files_count; i++) {
62+
free(files[i].filename);
63+
free(files[i].data);
64+
}
65+
free(files);
4166
archive_read_free(archive);
4267
return NULL;
4368
}
4469
bytesRead += ret;
45-
totalBytes += ret;
70+
printf("Read %zd bytes for file: %s\n", ret, filename);
4671
}
47-
48-
printf("Extracted file: %s, Size: %zu bytes\n", filename, entrySize);
72+
files_count++;
4973
}
5074

5175
archive_read_free(archive);
76+
*outputSize = files_count;
77+
*fileCount = files_count;
5278

53-
*outputSize = totalBytes;
54-
return outputBuffer;
79+
return files;
5580
}
5681

82+
83+

0 commit comments

Comments
 (0)