Skip to content

Commit 9c905ce

Browse files
authored
maint: avoid duplicated boolean properties and bad script extensions (PCRE2Project#202)
`ucptest` was misbehaving and showing the wrong properties and finding the wrong characters.
1 parent 4678857 commit 9c905ce

File tree

8 files changed

+634
-782
lines changed

8 files changed

+634
-782
lines changed

maint/GenerateCommon.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ def getbpropslist():
139139
if re.match(pat, bplast) != None:
140140
break
141141
else:
142-
bplist.append(bplast)
142+
if bplast not in bplist:
143+
bplist.append(bplast)
143144

144145
file.close()
145146

@@ -204,7 +205,7 @@ def collect_property_names():
204205
if match_obj == None:
205206
continue
206207

207-
if match_obj.group(2) in bool_properties:
208+
if match_obj.group(2) != match_obj.group(1) and match_obj.group(2) in bool_properties:
208209
if match_obj.group(3) == None:
209210
abbreviations[match_obj.group(2)] = (match_obj.group(1),)
210211
else:
@@ -294,7 +295,7 @@ def open_output(default):
294295
try:
295296
file = open(output_name, "w")
296297
except IOError:
297-
print ("** Couldn't open %s" % output_name)
298+
print("** Couldn't open %s" % output_name)
298299
sys.exit(1)
299300

300301
script_name = sys.argv[0]

maint/GenerateTest26.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def to_string_char(ch_idx):
4040
input_file = open(output_directory + "testinput26", "w")
4141
output_file = open(output_directory + "testoutput26", "w")
4242
except IOError:
43-
print ("** Couldn't open output files")
43+
print("** Couldn't open output files")
4444
sys.exit(1)
4545

4646
write_both("# These tests are generated by maint/GenerateTest26.py, do not edit.\n\n")

maint/GenerateUcd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,7 @@ def write_bitsets(list, item_size):
751751
size = len(records) * record_size
752752
stage1, stage2 = compress_table(table, block_size)
753753
size += get_tables_size(stage1, stage2)
754-
#print "/* block size %5d => %5d bytes */" % (block_size, size)
754+
#print("/* block size {:3d} => {:5d} bytes */".format(block_size, size))
755755
if size < min_size:
756756
min_size = size
757757
min_stage1, min_stage2 = stage1, stage2

maint/GenerateUcpTables.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def stdnames(x):
114114

115115
for name in bool_properties:
116116
utt_table.append((stdname(name), name, 'PT_BOOL'))
117-
if name in abbreviations:
117+
if name in abbreviations:
118118
for abbrev in abbreviations[name]:
119119
utt_table.append((stdname(abbrev), name, 'PT_BOOL'))
120120

maint/ucptest.c

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,7 @@ if (scriptx != 0)
427427
if (bprops != 0)
428428
{
429429
const char *sep = "";
430-
const uint32_t *p = PRIV(ucd_boolprop_sets) +
431-
bprops * ucd_boolprop_sets_item_size;
430+
const uint32_t *p = PRIV(ucd_boolprop_sets) + bprops;
432431
printf(", [");
433432
for (int i = 0; i < ucp_Bprop_Count; i++)
434433
if (MAPBIT(p, i) != 0)
@@ -497,13 +496,13 @@ while (*s != 0)
497496
if (strcmp(CS name, "script") == 0 ||
498497
strcmp(CS name, "scriptx") == 0)
499498
{
499+
BOOL x = (name[6] == 'x');
500500
BOOL scriptx_not = FALSE;
501501
for (t = value; *t != 0; t++) *t = tolower(*t);
502502

503503
if (value[0] == '!')
504504
{
505-
if (name[6] == 'x') scriptx_not = TRUE;
506-
else script_not = TRUE;
505+
if (x) scriptx_not = TRUE; else script_not = TRUE;
507506
offset = 1;
508507
}
509508

@@ -514,7 +513,21 @@ while (*s != 0)
514513
PRIV(utt_names) + u->name_offset) == 0)
515514
{
516515
c = u->value;
517-
if (name[6] == 'x')
516+
if (x && !scriptx_not && u->type == PT_SC)
517+
{
518+
if (script < 0)
519+
{
520+
x = FALSE;
521+
script = -1;
522+
script_not = scriptx_not;
523+
}
524+
else if (!script_not)
525+
{
526+
printf("No characters found\n");
527+
return;
528+
}
529+
}
530+
if (x)
518531
{
519532
scriptx_list[scriptx_count++] = scriptx_not? (-c):c;
520533
}
@@ -689,12 +702,15 @@ for (c = 0; c <= 0x10ffff; c++)
689702
/* Positive requirment */
690703
if (scriptx_list[i] >= 0)
691704
{
692-
if ((bits_scriptx[x] & (1u<<y)) != 0) found++;
705+
if (scriptx_list[i] == UCD_SCRIPT(c) ||
706+
((scriptx_list[i] < ucp_Unknown) &&
707+
(bits_scriptx[x] & (1u<<y)) != 0)) found++;
693708
}
694709
/* Negative requirement */
695710
else
696711
{
697-
if ((bits_scriptx[x] & (1u<<y)) == 0) found++;
712+
if ((-(scriptx_list[i]) < ucp_Unknown) &&
713+
(bits_scriptx[x] & (1u<<y)) == 0) found++;
698714
}
699715
}
700716

@@ -703,8 +719,7 @@ for (c = 0; c <= 0x10ffff; c++)
703719

704720
if (bprop_count > 0)
705721
{
706-
const uint32_t *bits_bprop = PRIV(ucd_boolprop_sets) +
707-
UCD_BPROPS(c) * ucd_boolprop_sets_item_size;
722+
const uint32_t *bits_bprop = PRIV(ucd_boolprop_sets) + UCD_BPROPS(c);
708723
unsigned int found = 0;
709724

710725
for (i = 0; i < bprop_count; i++)

maint/ucptestdata/testinput2

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
find script Han
22
find type Pe script Common scriptx Hangul
3+
find script !latin scriptx sundanese
34
find type Sk
45
find type Pd
56
find gbreak LVT

maint/ucptestdata/testoutput1

Lines changed: 326 additions & 326 deletions
Large diffs are not rendered by default.

maint/ucptestdata/testoutput2

Lines changed: 276 additions & 441 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)