Skip to content
This repository was archived by the owner on Nov 9, 2017. It is now read-only.

Commit 5d23e13

Browse files
author
Junio C Hamano
committed
Refactor patch-id filtering out of git-cherry and git-format-patch.
This implements the patch-id computation and recording library, patch-ids.c, and rewrites the get_patch_ids() function used in cherry and format-patch to use it, so that they do not pollute the object namespace. Earlier code threw non-objects into the in-core object database, and hoped for not getting bitten by SHA-1 collisions. While it may be practically Ok, it still was an ugly hack. Signed-off-by: Junio C Hamano <[email protected]>
1 parent 199c45b commit 5d23e13

File tree

4 files changed

+228
-32
lines changed

4 files changed

+228
-32
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ LIB_H = \
283283
diff.h object.h pack.h pkt-line.h quote.h refs.h list-objects.h sideband.h \
284284
run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
285285
tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
286-
utf8.h reflog-walk.h
286+
utf8.h reflog-walk.h patch-ids.h
287287

288288
DIFF_OBJS = \
289289
diff.o diff-lib.o diffcore-break.o diffcore-order.o \
@@ -295,6 +295,7 @@ LIB_OBJS = \
295295
date.o diff-delta.o entry.o exec_cmd.o ident.o \
296296
interpolate.o \
297297
lockfile.o \
298+
patch-ids.o \
298299
object.o pack-check.o patch-delta.o path.o pkt-line.o sideband.o \
299300
reachable.o reflog-walk.o \
300301
quote.o read-cache.o refs.o run-command.o dir.o object-refs.o \

builtin-log.c

Lines changed: 13 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "builtin.h"
1313
#include "tag.h"
1414
#include "reflog-walk.h"
15+
#include "patch-ids.h"
1516

1617
static int default_show_root = 1;
1718

@@ -333,25 +334,12 @@ static int reopen_stdout(struct commit *commit, int nr, int keep_subject)
333334

334335
}
335336

336-
static int get_patch_id(struct commit *commit, struct diff_options *options,
337-
unsigned char *sha1)
338-
{
339-
if (commit->parents)
340-
diff_tree_sha1(commit->parents->item->object.sha1,
341-
commit->object.sha1, "", options);
342-
else
343-
diff_root_tree_sha1(commit->object.sha1, "", options);
344-
diffcore_std(options);
345-
return diff_flush_patch_id(options, sha1);
346-
}
347-
348-
static void get_patch_ids(struct rev_info *rev, struct diff_options *options, const char *prefix)
337+
static void get_patch_ids(struct rev_info *rev, struct patch_ids *ids, const char *prefix)
349338
{
350339
struct rev_info check_rev;
351340
struct commit *commit;
352341
struct object *o1, *o2;
353342
unsigned flags1, flags2;
354-
unsigned char sha1[20];
355343

356344
if (rev->pending.nr != 2)
357345
die("Need exactly one range.");
@@ -364,10 +352,7 @@ static void get_patch_ids(struct rev_info *rev, struct diff_options *options, co
364352
if ((flags1 & UNINTERESTING) == (flags2 & UNINTERESTING))
365353
die("Not a range.");
366354

367-
diff_setup(options);
368-
options->recursive = 1;
369-
if (diff_setup_done(options) < 0)
370-
die("diff_setup_done failed");
355+
init_patch_ids(ids);
371356

372357
/* given a range a..b get all patch ids for b..a */
373358
init_revisions(&check_rev, prefix);
@@ -382,8 +367,7 @@ static void get_patch_ids(struct rev_info *rev, struct diff_options *options, co
382367
if (commit->parents && commit->parents->next)
383368
continue;
384369

385-
if (!get_patch_id(commit, options, sha1))
386-
created_object(sha1, xcalloc(1, sizeof(struct object)));
370+
add_commit_patch_id(commit, ids);
387371
}
388372

389373
/* reset for next revision walk */
@@ -421,7 +405,7 @@ int cmd_format_patch(int argc, const char **argv, const char *prefix)
421405
int ignore_if_in_upstream = 0;
422406
int thread = 0;
423407
const char *in_reply_to = NULL;
424-
struct diff_options patch_id_opts;
408+
struct patch_ids ids;
425409
char *add_signoff = NULL;
426410
char message_id[1024];
427411
char ref_message_id[1024];
@@ -560,22 +544,19 @@ int cmd_format_patch(int argc, const char **argv, const char *prefix)
560544
}
561545

562546
if (ignore_if_in_upstream)
563-
get_patch_ids(&rev, &patch_id_opts, prefix);
547+
get_patch_ids(&rev, &ids, prefix);
564548

565549
if (!use_stdout)
566550
realstdout = fdopen(dup(1), "w");
567551

568552
prepare_revision_walk(&rev);
569553
while ((commit = get_revision(&rev)) != NULL) {
570-
unsigned char sha1[20];
571-
572554
/* ignore merges */
573555
if (commit->parents && commit->parents->next)
574556
continue;
575557

576558
if (ignore_if_in_upstream &&
577-
!get_patch_id(commit, &patch_id_opts, sha1) &&
578-
lookup_object(sha1))
559+
has_commit_patch_id(commit, &ids))
579560
continue;
580561

581562
nr++;
@@ -630,6 +611,8 @@ int cmd_format_patch(int argc, const char **argv, const char *prefix)
630611
fclose(stdout);
631612
}
632613
free(list);
614+
if (ignore_if_in_upstream)
615+
free_patch_ids(&ids);
633616
return 0;
634617
}
635618

@@ -652,7 +635,7 @@ static const char cherry_usage[] =
652635
int cmd_cherry(int argc, const char **argv, const char *prefix)
653636
{
654637
struct rev_info revs;
655-
struct diff_options patch_id_opts;
638+
struct patch_ids ids;
656639
struct commit *commit;
657640
struct commit_list *list = NULL;
658641
const char *upstream;
@@ -698,7 +681,7 @@ int cmd_cherry(int argc, const char **argv, const char *prefix)
698681
return 0;
699682
}
700683

701-
get_patch_ids(&revs, &patch_id_opts, prefix);
684+
get_patch_ids(&revs, &ids, prefix);
702685

703686
if (limit && add_pending_commit(limit, &revs, UNINTERESTING))
704687
die("Unknown commit %s", limit);
@@ -714,12 +697,10 @@ int cmd_cherry(int argc, const char **argv, const char *prefix)
714697
}
715698

716699
while (list) {
717-
unsigned char sha1[20];
718700
char sign = '+';
719701

720702
commit = list->item;
721-
if (!get_patch_id(commit, &patch_id_opts, sha1) &&
722-
lookup_object(sha1))
703+
if (has_commit_patch_id(commit, &ids))
723704
sign = '-';
724705

725706
if (verbose) {
@@ -737,5 +718,6 @@ int cmd_cherry(int argc, const char **argv, const char *prefix)
737718
list = list->next;
738719
}
739720

721+
free_patch_ids(&ids);
740722
return 0;
741723
}

patch-ids.c

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
#include "cache.h"
2+
#include "diff.h"
3+
#include "commit.h"
4+
#include "patch-ids.h"
5+
6+
static int commit_patch_id(struct commit *commit, struct diff_options *options,
7+
unsigned char *sha1)
8+
{
9+
if (commit->parents)
10+
diff_tree_sha1(commit->parents->item->object.sha1,
11+
commit->object.sha1, "", options);
12+
else
13+
diff_root_tree_sha1(commit->object.sha1, "", options);
14+
diffcore_std(options);
15+
return diff_flush_patch_id(options, sha1);
16+
}
17+
18+
static uint32_t take2(const unsigned char *id)
19+
{
20+
return ((id[0] << 8) | id[1]);
21+
}
22+
23+
/*
24+
* Conventional binary search loop looks like this:
25+
*
26+
* do {
27+
* int mi = (lo + hi) / 2;
28+
* int cmp = "entry pointed at by mi" minus "target";
29+
* if (!cmp)
30+
* return (mi is the wanted one)
31+
* if (cmp > 0)
32+
* hi = mi; "mi is larger than target"
33+
* else
34+
* lo = mi+1; "mi is smaller than target"
35+
* } while (lo < hi);
36+
*
37+
* The invariants are:
38+
*
39+
* - When entering the loop, lo points at a slot that is never
40+
* above the target (it could be at the target), hi points at a
41+
* slot that is guaranteed to be above the target (it can never
42+
* be at the target).
43+
*
44+
* - We find a point 'mi' between lo and hi (mi could be the same
45+
* as lo, but never can be the same as hi), and check if it hits
46+
* the target. There are three cases:
47+
*
48+
* - if it is a hit, we are happy.
49+
*
50+
* - if it is strictly higher than the target, we update hi with
51+
* it.
52+
*
53+
* - if it is strictly lower than the target, we update lo to be
54+
* one slot after it, because we allow lo to be at the target.
55+
*
56+
* When choosing 'mi', we do not have to take the "middle" but
57+
* anywhere in between lo and hi, as long as lo <= mi < hi is
58+
* satisfied. When we somehow know that the distance between the
59+
* target and lo is much shorter than the target and hi, we could
60+
* pick mi that is much closer to lo than the midway.
61+
*/
62+
static int patch_pos(struct patch_id **table, int nr, const unsigned char *id)
63+
{
64+
int hi = nr;
65+
int lo = 0;
66+
int mi = 0;
67+
68+
if (!nr)
69+
return -1;
70+
71+
if (nr != 1) {
72+
unsigned lov, hiv, miv, ofs;
73+
74+
for (ofs = 0; ofs < 18; ofs += 2) {
75+
lov = take2(table[0]->patch_id + ofs);
76+
hiv = take2(table[nr-1]->patch_id + ofs);
77+
miv = take2(id + ofs);
78+
if (miv < lov)
79+
return -1;
80+
if (hiv < miv)
81+
return -1 - nr;
82+
if (lov != hiv) {
83+
/*
84+
* At this point miv could be equal
85+
* to hiv (but id could still be higher);
86+
* the invariant of (mi < hi) should be
87+
* kept.
88+
*/
89+
mi = (nr-1) * (miv - lov) / (hiv - lov);
90+
if (lo <= mi && mi < hi)
91+
break;
92+
die("oops");
93+
}
94+
}
95+
if (18 <= ofs)
96+
die("cannot happen -- lo and hi are identical");
97+
}
98+
99+
do {
100+
int cmp;
101+
cmp = hashcmp(table[mi]->patch_id, id);
102+
if (!cmp)
103+
return mi;
104+
if (cmp > 0)
105+
hi = mi;
106+
else
107+
lo = mi + 1;
108+
mi = (hi + lo) / 2;
109+
} while (lo < hi);
110+
return -lo-1;
111+
}
112+
113+
#define BUCKET_SIZE 190 /* 190 * 21 = 3990, with slop close enough to 4K */
114+
struct patch_id_bucket {
115+
struct patch_id_bucket *next;
116+
int nr;
117+
struct patch_id bucket[BUCKET_SIZE];
118+
};
119+
120+
int init_patch_ids(struct patch_ids *ids)
121+
{
122+
memset(ids, 0, sizeof(*ids));
123+
diff_setup(&ids->diffopts);
124+
ids->diffopts.recursive = 1;
125+
if (diff_setup_done(&ids->diffopts) < 0)
126+
return error("diff_setup_done failed");
127+
return 0;
128+
}
129+
130+
int free_patch_ids(struct patch_ids *ids)
131+
{
132+
struct patch_id_bucket *next, *patches;
133+
134+
free(ids->table);
135+
for (patches = ids->patches; patches; patches = next) {
136+
next = patches->next;
137+
free(patches);
138+
}
139+
return 0;
140+
}
141+
142+
static struct patch_id *add_commit(struct commit *commit,
143+
struct patch_ids *ids,
144+
int no_add)
145+
{
146+
struct patch_id_bucket *bucket;
147+
struct patch_id *ent;
148+
unsigned char sha1[20];
149+
int pos;
150+
151+
if (commit_patch_id(commit, &ids->diffopts, sha1))
152+
return NULL;
153+
pos = patch_pos(ids->table, ids->nr, sha1);
154+
if (0 <= pos)
155+
return ids->table[pos];
156+
if (no_add)
157+
return NULL;
158+
159+
pos = -1 - pos;
160+
161+
bucket = ids->patches;
162+
if (!bucket || (BUCKET_SIZE <= bucket->nr)) {
163+
bucket = xcalloc(1, sizeof(*bucket));
164+
bucket->next = ids->patches;
165+
ids->patches = bucket;
166+
}
167+
ent = &bucket->bucket[bucket->nr++];
168+
hashcpy(ent->patch_id, sha1);
169+
170+
if (ids->alloc <= ids->nr) {
171+
ids->alloc = alloc_nr(ids->nr);
172+
ids->table = xrealloc(ids->table, sizeof(ent) * ids->alloc);
173+
}
174+
if (pos < ids->nr)
175+
memmove(ids->table + pos + 1, ids->table + pos,
176+
sizeof(ent) * (ids->nr - pos));
177+
ids->nr++;
178+
ids->table[pos] = ent;
179+
return ids->table[pos];
180+
}
181+
182+
struct patch_id *has_commit_patch_id(struct commit *commit,
183+
struct patch_ids *ids)
184+
{
185+
return add_commit(commit, ids, 1);
186+
}
187+
188+
struct patch_id *add_commit_patch_id(struct commit *commit,
189+
struct patch_ids *ids)
190+
{
191+
return add_commit(commit, ids, 0);
192+
}

patch-ids.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#ifndef PATCH_IDS_H
2+
#define PATCH_IDS_H
3+
4+
struct patch_id {
5+
unsigned char patch_id[20];
6+
char seen;
7+
};
8+
9+
struct patch_ids {
10+
struct diff_options diffopts;
11+
int nr, alloc;
12+
struct patch_id **table;
13+
struct patch_id_bucket *patches;
14+
};
15+
16+
int init_patch_ids(struct patch_ids *);
17+
int free_patch_ids(struct patch_ids *);
18+
struct patch_id *add_commit_patch_id(struct commit *, struct patch_ids *);
19+
struct patch_id *has_commit_patch_id(struct commit *, struct patch_ids *);
20+
21+
#endif /* PATCH_IDS_H */

0 commit comments

Comments
 (0)