Skip to content

Commit

Permalink
Save literal value of the parsed number to preserve it for the output
Browse files Browse the repository at this point in the history
Extend jv_number to use decNumber for storing number literals. Any math
operations on the numbers will truncate them to double precision.
Comparisons when both numbers are literal numbers will compare them
without truncation.

Delay conversion of numbers to doubles until a math operation is performed,
to preserve precision. A literal jv_number will only need conversion to
double once, and will reuse the resultant double on subsequent
conversions.

Outputting literal jv_numbers preserves the original precision.

Add strong pthread requirement to manage contexts/allocations for
converting numbers between their decNumber, string, and double formats.
  • Loading branch information
Leonid S. Usov authored and wtlangford committed Oct 22, 2019
1 parent b6be13d commit cf4b48c
Show file tree
Hide file tree
Showing 21 changed files with 1,374 additions and 755 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ tests/*.trs
cscope.in.out
cscope.out
cscope.po.out
jq.dSYM
12 changes: 9 additions & 3 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ LIBJQ_SRC = src/builtin.c src/bytecode.c src/compile.c src/execute.c \
src/jq_test.c src/jv.c src/jv_alloc.c src/jv_aux.c \
src/jv_dtoa.c src/jv_file.c src/jv_parse.c src/jv_print.c \
src/jv_unicode.c src/linker.c src/locfile.c src/util.c \
src/decNumber/decContext.c src/decNumber/decNumber.c \
src/jv_dtoa_tsd.c \
${LIBJQ_INCS}

### C build options
Expand Down Expand Up @@ -186,9 +188,13 @@ EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER) \
tests/modules/test_bind_order.jq \
tests/modules/test_bind_order0.jq \
tests/modules/test_bind_order1.jq \
tests/modules/test_bind_order2.jq tests/onig.supp \
tests/onig.test tests/optional.test tests/setup \
tests/torture/input0.json tests/utf8-truncate.jq
tests/modules/test_bind_order2.jq \
tests/onig.supp tests/local.supp \
tests/onig.test tests/setup tests/torture/input0.json \
tests/optional.test tests/optionaltest \
tests/utf8-truncate.jq tests/utf8test \
tests/base64.test tests/base64test \
tests/jq-f-test.sh tests/shtest

# README.md is expected in Github projects, good stuff in it, so we'll
# distribute it and install it with the package in the doc directory.
Expand Down
14 changes: 3 additions & 11 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -136,17 +136,9 @@ AC_CHECK_MEMBER([struct tm.tm_gmtoff], [AC_DEFINE([HAVE_TM_TM_GMT_OFF],1,[Define
AC_CHECK_MEMBER([struct tm.__tm_gmtoff], [AC_DEFINE([HAVE_TM___TM_GMT_OFF],1,[Define to 1 if the system has the __tm_gmt_off field in struct tm])],
[], [[#include <time.h>]])

AC_ARG_ENABLE([pthread-tls],
[AC_HELP_STRING([--enable-pthread-tls],
[Enable use of pthread thread local storage])],
[],
[enable_pthread_tls=no])

if test $enable_pthread_tls = yes; then
AC_FIND_FUNC([pthread_key_create], [pthread], [#include <pthread.h>], [NULL, NULL])
AC_FIND_FUNC([pthread_once], [pthread], [#include <pthread.h>], [NULL, NULL])
AC_FIND_FUNC([atexit], [pthread], [#include <stdlib.h>], [NULL])
fi
AC_FIND_FUNC([pthread_key_create], [pthread], [#include <pthread.h>], [NULL, NULL])
AC_FIND_FUNC([pthread_once], [pthread], [#include <pthread.h>], [NULL, NULL])
AC_FIND_FUNC([atexit], [pthread], [#include <stdlib.h>], [NULL])

dnl libm math.h functions
AC_CHECK_MATH_FUNC(acos)
Expand Down
48 changes: 48 additions & 0 deletions docs/content/manual/v1.6/manual.yml
Original file line number Diff line number Diff line change
Expand Up @@ -292,11 +292,37 @@ sections:
program can be a useful way of formatting JSON output from,
say, `curl`.
An important point about the identity filter is that it
guarantees to preserve the literal decimal representation
of values. This is particularly important when dealing with numbers
which can't be losslessly converted to an IEEE754 double precision
representation.
jq doesn't truncate the literal numbers to double unless there
is a need to make arithmetic operations with the number.
Comparisions are carried out over the untruncated big decimal
representation of the number.
jq will also try to maintain the original decimal precision of the provided
number literal. See below for examples.
examples:
- program: '.'
input: '"Hello, world!"'
output: ['"Hello, world!"']

- program: '. | tojson'
input: '12345678909876543212345'
output: ['"12345678909876543212345"']

- program: 'map([., . == 1]) | tojson'
input: '[1, 1.000, 1.0, 100e-2]'
output: ['"[[1,true],[1.000,true],[1.0,true],[1.00,true]]"']

- program: '. as $big | [$big, $big + 1] | map(. > 10000000000000000000000000000000)'
input: '10000000000000000000000000000001'
output: ['[true, false]']

- title: "Object Identifier-Index: `.foo`, `.foo.bar`"
body: |
Expand Down Expand Up @@ -512,6 +538,16 @@ sections:
expression that takes an input, ignores it, and returns 42
instead.
Numbers in jq are internally represented by their IEEE754 double
precision approximation. Any arithmetic operation with numbers,
whether they are literals or results of previous filters, will
produce a double precision floating point result.
However, when parsing a literal jq will store the original literal
string. If no mutation is applied to this value then it will make
to the output in its original form, even if conversion to double
would result in a loss.
entries:
- title: "Array construction: `[]`"
body: |
Expand Down Expand Up @@ -630,6 +666,18 @@ sections:
try to add a string to an object you'll get an error message and
no result.
Please note that all numbers are converted to IEEE754 double precision
floating point representation. Arithmetic and logical operators are working
with these converted doubles. Results of all such operations are also limited
to the double precision.
The only exception to this behaviour of number is a snapshot of original number
literal. When a number which originally was provided as a literal is never
mutated until the end of the program then it is printed to the output in its
original literal form. This also includes cases when the original literal
would be truncated when converted to the IEEE754 double precision floating point
number.
entries:
- title: "Addition: `+`"
body: |
Expand Down
29 changes: 23 additions & 6 deletions src/builtin.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,11 @@ static jv f_plus(jq_state *jq, jv input, jv a, jv b) {
jv_free(b);
return a;
} else if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) {
return jv_number(jv_number_value(a) +
jv r = jv_number(jv_number_value(a) +
jv_number_value(b));
jv_free(a);
jv_free(b);
return r;
} else if (jv_get_kind(a) == JV_KIND_STRING && jv_get_kind(b) == JV_KIND_STRING) {
return jv_string_concat(a, b);
} else if (jv_get_kind(a) == JV_KIND_ARRAY && jv_get_kind(b) == JV_KIND_ARRAY) {
Expand Down Expand Up @@ -274,7 +277,10 @@ static jv f_rtrimstr(jq_state *jq, jv input, jv right) {
static jv f_minus(jq_state *jq, jv input, jv a, jv b) {
jv_free(input);
if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) {
return jv_number(jv_number_value(a) - jv_number_value(b));
jv r = jv_number(jv_number_value(a) - jv_number_value(b));
jv_free(a);
jv_free(b);
return r;
} else if (jv_get_kind(a) == JV_KIND_ARRAY && jv_get_kind(b) == JV_KIND_ARRAY) {
jv out = jv_array();
jv_array_foreach(a, i, x) {
Expand Down Expand Up @@ -302,7 +308,10 @@ static jv f_multiply(jq_state *jq, jv input, jv a, jv b) {
jv_kind bk = jv_get_kind(b);
jv_free(input);
if (ak == JV_KIND_NUMBER && bk == JV_KIND_NUMBER) {
return jv_number(jv_number_value(a) * jv_number_value(b));
jv r = jv_number(jv_number_value(a) * jv_number_value(b));
jv_free(a);
jv_free(b);
return r;
} else if ((ak == JV_KIND_STRING && bk == JV_KIND_NUMBER) ||
(ak == JV_KIND_NUMBER && bk == JV_KIND_STRING)) {
jv str = a;
Expand Down Expand Up @@ -336,7 +345,10 @@ static jv f_divide(jq_state *jq, jv input, jv a, jv b) {
if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) {
if (jv_number_value(b) == 0.0)
return type_error2(a, b, "cannot be divided because the divisor is zero");
return jv_number(jv_number_value(a) / jv_number_value(b));
jv r = jv_number(jv_number_value(a) / jv_number_value(b));
jv_free(a);
jv_free(b);
return r;
} else if (jv_get_kind(a) == JV_KIND_STRING && jv_get_kind(b) == JV_KIND_STRING) {
return jv_string_split(a, b);
} else {
Expand All @@ -349,7 +361,10 @@ static jv f_mod(jq_state *jq, jv input, jv a, jv b) {
if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) {
if ((intmax_t)jv_number_value(b) == 0)
return type_error2(a, b, "cannot be divided (remainder) because the divisor is zero");
return jv_number((intmax_t)jv_number_value(a) % (intmax_t)jv_number_value(b));
jv r = jv_number((intmax_t)jv_number_value(a) % (intmax_t)jv_number_value(b));
jv_free(a);
jv_free(b);
return r;
} else {
return type_error2(a, b, "cannot be divided (remainder)");
}
Expand Down Expand Up @@ -440,7 +455,9 @@ static jv f_length(jq_state *jq, jv input) {
} else if (jv_get_kind(input) == JV_KIND_STRING) {
return jv_number(jv_string_length_codepoints(input));
} else if (jv_get_kind(input) == JV_KIND_NUMBER) {
return jv_number(fabs(jv_number_value(input)));
jv r = jv_number(fabs(jv_number_value(input)));
jv_free(input);
return r;
} else if (jv_get_kind(input) == JV_KIND_NULL) {
jv_free(input);
return jv_number(0);
Expand Down
15 changes: 11 additions & 4 deletions src/execute.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,21 +509,25 @@ jv jq_next(jq_state *jq) {
uint16_t v = *pc++;
jv* var = frame_local_var(jq, v, level);
jv max = stack_pop(jq);
if (raising) goto do_backtrack;
if (raising) {
jv_free(max);
goto do_backtrack;
}
if (jv_get_kind(*var) != JV_KIND_NUMBER ||
jv_get_kind(max) != JV_KIND_NUMBER) {
set_error(jq, jv_invalid_with_msg(jv_string_fmt("Range bounds must be numeric")));
jv_free(max);
goto do_backtrack;
} else if (jv_number_value(jv_copy(*var)) >= jv_number_value(jv_copy(max))) {
} else if (jv_number_value(*var) >= jv_number_value(max)) {
/* finished iterating */
jv_free(max);
goto do_backtrack;
} else {
jv curr = jv_copy(*var);
jv curr = *var;
*var = jv_number(jv_number_value(*var) + 1);

struct stack_pos spos = stack_get_pos(jq);
stack_push(jq, jv_copy(max));
stack_push(jq, max);
stack_save(jq, pc - 3, spos);

stack_push(jq, curr);
Expand Down Expand Up @@ -1010,6 +1014,9 @@ jq_state *jq_init(void) {
jq->attrs = jv_object();
jq->path = jv_null();
jq->value_at_path = jv_null();

jq->nomem_handler = NULL;
jq->nomem_handler_data = NULL;
return jq;
}

Expand Down
73 changes: 65 additions & 8 deletions src/jq_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,32 @@
#include "jq.h"

static void jv_test();
static void run_jq_tests(jv, int, FILE *);
static void run_jq_tests(jv, int, FILE *, int, int);


int jq_testsuite(jv libdirs, int verbose, int argc, char* argv[]) {
FILE *testdata = stdin;
int skip = -1;
int take = -1;
jv_test();
if (argc > 0) {
testdata = fopen(argv[0], "r");
if (!testdata) {
perror("fopen");
exit(1);
for(int i = 0; i < argc; i++) {
if (!strcmp(argv[i], "--skip")) {
skip = atoi(argv[i+1]);
i++;
} else if (!strcmp(argv[i], "--take")) {
take = atoi(argv[i+1]);
i++;
} else {
testdata = fopen(argv[i], "r");
if (!testdata) {
perror("fopen");
exit(1);
}
}
}
}
run_jq_tests(libdirs, verbose, testdata);
run_jq_tests(libdirs, verbose, testdata, skip, take);
return 0;
}

Expand Down Expand Up @@ -53,7 +65,7 @@ static void test_err_cb(void *data, jv e) {
jv_free(e);
}

static void run_jq_tests(jv lib_dirs, int verbose, FILE *testdata) {
static void run_jq_tests(jv lib_dirs, int verbose, FILE *testdata, int skip, int take) {
char prog[4096];
char buf[4096];
struct err_data err_msg;
Expand All @@ -63,6 +75,9 @@ static void run_jq_tests(jv lib_dirs, int verbose, FILE *testdata) {
int check_msg = 0;
jq_state *jq = NULL;

int tests_to_skip = skip;
int tests_to_take = take;

jq = jq_init();
assert(jq);
if (jv_get_kind(lib_dirs) == JV_KIND_NULL)
Expand All @@ -80,6 +95,34 @@ static void run_jq_tests(jv lib_dirs, int verbose, FILE *testdata) {
continue;
}
if (prog[strlen(prog)-1] == '\n') prog[strlen(prog)-1] = 0;

if (skip > 0) {
skip--;

// skip past test data
while (fgets(buf, sizeof(buf), testdata)) {
lineno++;
if (buf[0] == '\n' || (buf[0] == '\r' && buf[1] == '\n'))
break;
}

must_fail = 0;
check_msg = 0;

continue;
} else if (skip == 0) {
printf("Skipped %d tests\n", tests_to_skip);
skip = -1;
}

if (take > 0) {
take--;
} else if (take == 0) {
printf("Hit the number of tests limit (%d), breaking\n", tests_to_take);
take = -1;
break;
}

printf("Testing '%s' at line number %u\n", prog, lineno);
int pass = 1;
tests++;
Expand Down Expand Up @@ -179,7 +222,21 @@ static void run_jq_tests(jv lib_dirs, int verbose, FILE *testdata) {
passed+=pass;
}
jq_teardown(&jq);
printf("%d of %d tests passed (%d malformed)\n", passed,tests,invalid);

int total_skipped = tests_to_skip > 0 ? tests_to_skip : 0;

if (skip > 0) {
total_skipped = tests_to_skip - skip;
}

printf("%d of %d tests passed (%d malformed, %d skipped)\n",
passed, tests, invalid, total_skipped);

if (skip > 0) {
printf("WARN: skipped past the end of file, exiting with status 2\n");
exit(2);
}

if (passed != tests) exit(1);
}

Expand Down
Loading

0 comments on commit cf4b48c

Please sign in to comment.