Skip to content

Commit 5b09c90

Browse files
authored
Resolve symlinks content (#49)
* Resolve symlinks content * Recursive resolve of symlinks * Rename * Iterate
1 parent 866ffca commit 5b09c90

File tree

1 file changed

+147
-7
lines changed

1 file changed

+147
-7
lines changed

unpack.c

Lines changed: 147 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <libgen.h>
12
#include <stdio.h>
23
#include <string.h>
34
#include <stdlib.h>
@@ -19,6 +20,11 @@ typedef struct {
1920
char error_message[256];
2021
} ExtractedArchive;
2122

23+
typedef struct {
24+
char *linkname;
25+
char *target;
26+
} SymlinkInfo;
27+
2228
ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_message, struct archive* archive) {
2329

2430
if (!result || !archive) {
@@ -33,14 +39,52 @@ ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_mess
3339
return result;
3440
}
3541

42+
static char* join_paths(const char *dir, const char *relative) {
43+
if (!dir || !*dir) return strdup(relative);
44+
size_t len = strlen(dir) + 1 + strlen(relative) + 1;
45+
char *buf = malloc(len);
46+
snprintf(buf, len, "%s/%s", dir, relative);
47+
return buf;
48+
}
49+
50+
static const FileData *resolve_symlink(
51+
const FileData *files, size_t file_count,
52+
const SymlinkInfo *symlinks, size_t symlink_count,
53+
const char *target, int depth
54+
) {
55+
if (!target || depth > 32) // prevent infinite recursion
56+
return NULL;
57+
58+
// First, check if target is a regular file
59+
for (size_t i = 0; i < file_count; i++) {
60+
if (strcmp(files[i].filename, target) == 0) {
61+
if (files[i].data && files[i].data_size > 0) {
62+
return &files[i]; // Found real file
63+
}
64+
}
65+
}
66+
67+
// If not found among files, maybe it's another symlink
68+
for (size_t i = 0; i < symlink_count; i++) {
69+
if (strcmp(symlinks[i].linkname, target) == 0) {
70+
// Recurse into that symlink's target
71+
return resolve_symlink(files, file_count, symlinks, symlink_count,
72+
symlinks[i].target, depth + 1);
73+
}
74+
}
75+
76+
return NULL; // Not found
77+
}
78+
3679
EMSCRIPTEN_KEEPALIVE
3780
ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) {
3881
struct archive* archive;
3982
struct archive_entry* entry;
4083
size_t files_struct_length = 100;
4184
FileData* files = NULL;
4285
size_t files_count = 0;
43-
const char *error_message;
86+
const char* error_message;
87+
bool hasSymLinks = false;
4488

4589
ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive));
4690
if (!result) {
@@ -57,24 +101,32 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) {
57101
archive_read_support_format_all(archive);
58102

59103
if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) {
60-
return error_handler(result,archive_error_string(archive), archive);
104+
return error_handler(result,archive_error_string(archive), archive);
61105
}
62106
files = malloc(sizeof(FileData) * files_struct_length);
63107

64108
while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
65109
const char* filename = archive_entry_pathname(entry);
66110
size_t entrySize = archive_entry_size(entry);
111+
112+
// Ignore symbolic links for now
113+
if (archive_entry_filetype(entry) == AE_IFLNK) {
114+
hasSymLinks = true;
115+
continue;
116+
}
117+
67118
if (files_count + 1 > files_struct_length) {
68119
files_struct_length *= 2; // double the length
69120
FileData* oldfiles = files;
70-
files= realloc(files, sizeof(FileData) * files_struct_length);
121+
files = realloc(files, sizeof(FileData) * files_struct_length);
71122
if (!files) {
72123
result->fileCount = files_count;
73124
result->files = oldfiles; // otherwise memory is lost, alternatively also everything can be freed.
74125
error_message = "Memory allocation error for file data.";
75126
return error_handler(result, error_message, archive);
76-
}
127+
}
77128
}
129+
78130
files[files_count].filename = strdup(filename);
79131
files[files_count].data = malloc(entrySize);
80132
files[files_count].data_size = entrySize;
@@ -105,6 +157,94 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) {
105157
files_count++;
106158
}
107159

160+
// Resolve symlinks
161+
if (hasSymLinks) {
162+
// Reopen the archive to iterate over symlinks
163+
archive_read_free(archive);
164+
archive = archive_read_new();
165+
archive_read_support_filter_all(archive);
166+
archive_read_support_format_all(archive);
167+
168+
if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) {
169+
return error_handler(result, archive_error_string(archive), archive);
170+
}
171+
172+
struct archive_entry *symlink_entry;
173+
174+
size_t symlink_count = 0;
175+
size_t symlink_alloc = 16;
176+
SymlinkInfo *symlinks = malloc(sizeof(SymlinkInfo) * symlink_alloc);
177+
178+
// Collect all symlink entries
179+
while (archive_read_next_header(archive, &symlink_entry) == ARCHIVE_OK) {
180+
if (archive_entry_filetype(symlink_entry) != AE_IFLNK)
181+
continue;
182+
183+
const char *tgt = archive_entry_symlink(symlink_entry);
184+
185+
if (!tgt) {
186+
continue;
187+
}
188+
189+
if (symlink_count + 1 > symlink_alloc) {
190+
symlink_alloc *= 2;
191+
symlinks = realloc(symlinks, sizeof(SymlinkInfo) * symlink_alloc);
192+
}
193+
194+
// Compute directory of the symlink
195+
char *link_dir = strdup(archive_entry_pathname(symlink_entry));
196+
char *dir = dirname(link_dir);
197+
char *resolved_target_path = join_paths(dir, tgt);
198+
free(dir);
199+
free(link_dir);
200+
201+
symlinks[symlink_count].linkname = strdup(archive_entry_pathname(symlink_entry));
202+
symlinks[symlink_count].target = strdup(resolved_target_path);
203+
symlink_count++;
204+
}
205+
206+
// Resolve and populate symlinks
207+
for (size_t i = 0; i < symlink_count; i++) {
208+
const char *linkname = symlinks[i].linkname;
209+
const char *target = symlinks[i].target;
210+
211+
const FileData *resolved = resolve_symlink(files, files_count,
212+
symlinks, symlink_count,
213+
target, 0);
214+
215+
if (!resolved) {
216+
error_message = "Failed to resolve symlink.";
217+
return error_handler(result, error_message, archive);
218+
}
219+
220+
if (files_count + 1 > files_struct_length) {
221+
files_struct_length *= 2;
222+
FileData *oldfiles = files;
223+
files = realloc(files, sizeof(FileData) * files_struct_length);
224+
if (!files) {
225+
result->fileCount = files_count;
226+
result->files = oldfiles;
227+
error_message = "Memory allocation error for symlink data.";
228+
return error_handler(result, error_message, archive);
229+
}
230+
}
231+
232+
files[files_count].filename = strdup(linkname);
233+
234+
files[files_count].data_size = resolved->data_size;
235+
files[files_count].data = malloc(resolved->data_size);
236+
memcpy(files[files_count].data, resolved->data, resolved->data_size);
237+
238+
files_count++;
239+
}
240+
241+
for (size_t i = 0; i < symlink_count; i++) {
242+
free(symlinks[i].linkname);
243+
free(symlinks[i].target);
244+
}
245+
free(symlinks);
246+
}
247+
108248
archive_read_free(archive);
109249
result->files = files;
110250
result->fileCount = files_count;
@@ -150,7 +290,7 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) {
150290

151291
const size_t buffsize = 64 * 1024;
152292
char buff[buffsize];
153-
size_t total_size = 0;
293+
size_t total_size = 0;
154294
const char *error_message;
155295

156296
FileData* files = malloc(sizeof(FileData) * (files_count + 1));
@@ -159,7 +299,7 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) {
159299
printf("Failed to allocate memory for files array\n");
160300
return NULL;
161301
}
162-
302+
163303
ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive));
164304
if (!result) {
165305
free(files);
@@ -259,4 +399,4 @@ void free_extracted_archive(ExtractedArchive* archive) {
259399
}
260400
free(archive->files);
261401
free(archive);
262-
}
402+
}

0 commit comments

Comments
 (0)