Skip to content

Commit

Permalink
Use pg_read_binary_file_all #9
Browse files Browse the repository at this point in the history
  • Loading branch information
Florents-Tselai authored Nov 7, 2024
1 parent e159c25 commit 866e95d
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 37 deletions.
50 changes: 14 additions & 36 deletions pgpdf.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,58 +52,36 @@ PG_FUNCTION_INFO_V1(pdf_in);
Datum
pdf_in(PG_FUNCTION_ARGS)
{
char* file_path = PG_GETARG_CSTRING(0);
Datum filename_t = CStringGetTextDatum(PG_GETARG_CSTRING(0));
Datum pdf_bytes;
int32 pdf_bytes_len;

pdftype* result;
GBytes* g_bytes = NULL;
PopplerDocument* doc = NULL;
GError* error = NULL;
int fd;
struct stat file_info;
ssize_t bytes_read;
GBytes* pdf_data;

// Open file and check if accessible
fd = open(file_path, O_RDONLY);
if (fd == -1)
{
elog(ERROR, "Could not open PDF file: %s", file_path);
}

// Get file size
if (fstat(fd, &file_info) == -1)
{
close(fd);
elog(ERROR, "Could not retrieve file information for: %s", file_path);
}
pdf_bytes = DirectFunctionCall1(pg_read_binary_file_all, filename_t);
pdf_bytes_len = VARSIZE_ANY_EXHDR(pdf_bytes);

// Allocate pdftype struct with space for PDF data
int32 data_size = file_info.st_size;
result = (pdftype*)palloc(VARHDRSZ + data_size);
SET_VARSIZE(result, VARHDRSZ + data_size);
result = (pdftype*)palloc(VARHDRSZ + pdf_bytes_len);
SET_VARSIZE(result, VARHDRSZ + pdf_bytes_len);

// Read file contents into pdftype->data
bytes_read = read(fd, VARDATA(result), data_size);
if (bytes_read != data_size)
{
close(fd);
elog(ERROR, "Could not read entire PDF file: %s", file_path);
}
memcpy(VARDATA(result), VARDATA_ANY(pdf_bytes), pdf_bytes_len);

// Close file
close(fd);
g_bytes = g_bytes_new(VARDATA(result), pdf_bytes_len);

// Create GBytes from the PDF data for validation
pdf_data = g_bytes_new(VARDATA(result), data_size);
doc = poppler_document_new_from_bytes(g_bytes, NULL, &error);
g_bytes_unref(g_bytes);

// Validate PDF using Poppler
doc = poppler_document_new_from_bytes(pdf_data, NULL, &error);
g_bytes_unref(pdf_data); // Free GBytes after use
if (!doc)
{
elog(ERROR, "Error parsing PDF document: %s", error->message);
pfree(result);
g_clear_error(&error);
PG_RETURN_NULL();
}

g_object_unref(doc);

PG_RETURN_POINTER(result);
Expand Down
2 changes: 1 addition & 1 deletion test/expected/pgpdf.out
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* Errors */
SELECT 'notexists.pdf'::pdf;
ERROR: Could not open PDF file: notexists.pdf
ERROR: could not open file "notexists.pdf" for reading: No such file or directory
LINE 2: SELECT 'notexists.pdf'::pdf;
^
SELECT '/tmp/bad.pdf'::pdf;
Expand Down

0 comments on commit 866e95d

Please sign in to comment.