Skip to content

Commit 03d0549

Browse files
author
fredrik.lundh
committed
needforspeed: added Py_MEMCPY macro (currently tuned for Visual C only),
and use it for string copy operations. this gives a 20% speedup on some string benchmarks. git-svn-id: http://svn.python.org/projects/python/trunk@46499 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 5eaa10f commit 03d0549

File tree

3 files changed

+60
-46
lines changed

3 files changed

+60
-46
lines changed

Include/pyport.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,27 @@ typedef Py_intptr_t Py_ssize_t;
174174
#define Py_LOCAL_INLINE(type) static type
175175
#endif
176176

177+
/* Py_MEMCPY can be used instead of memcpy in cases where the copied blocks
178+
* are often very short. While most platforms have highly optimized code for
179+
* large transfers, the setup costs for memcpy are often quite high. MEMCPY
180+
* solves this by doing short copies "in line".
181+
*/
182+
183+
#if defined(_MSC_VER)
184+
#define Py_MEMCPY(target, source, length) do { \
185+
size_t i_, n_ = (length); \
186+
char *t_ = (void*) (target); \
187+
const char *s_ = (void*) (source); \
188+
if (n_ >= 16) \
189+
memcpy(t_, s_, n_); \
190+
else \
191+
for (i_ = 0; i_ < n_; i_++) \
192+
t_[i_] = s_[i_]; \
193+
} while (0)
194+
#else
195+
#define Py_MEMCPY memcpy
196+
#endif
197+
177198
#include <stdlib.h>
178199

179200
#include <math.h> /* Moved here from the math section, before extern "C" */

Include/unicodeobject.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -357,15 +357,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
357357
Py_UNICODE_ISDIGIT(ch) || \
358358
Py_UNICODE_ISNUMERIC(ch))
359359

360-
/* memcpy has a considerable setup overhead on many platforms; use a
361-
loop for short strings (the "16" below is pretty arbitary) */
362-
#define Py_UNICODE_COPY(target, source, length) do\
363-
{Py_ssize_t i_; Py_UNICODE *t_ = (target); const Py_UNICODE *s_ = (source);\
364-
if (length > 16)\
365-
memcpy(t_, s_, (length)*sizeof(Py_UNICODE));\
366-
else\
367-
for (i_ = 0; i_ < (length); i_++) t_[i_] = s_[i_];\
368-
} while (0)
360+
#define Py_UNICODE_COPY(target, source, length) \
361+
Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
369362

370363
#define Py_UNICODE_FILL(target, value, length) do\
371364
{Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\

Objects/stringobject.c

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ static PyStringObject *nullstring;
2323
*/
2424
static PyObject *interned;
2525

26-
2726
/*
2827
For both PyString_FromString() and PyString_FromStringAndSize(), the
2928
parameter `size' denotes number of characters to allocate, not counting any
@@ -80,7 +79,7 @@ PyString_FromStringAndSize(const char *str, Py_ssize_t size)
8079
op->ob_shash = -1;
8180
op->ob_sstate = SSTATE_NOT_INTERNED;
8281
if (str != NULL)
83-
memcpy(op->ob_sval, str, size);
82+
Py_MEMCPY(op->ob_sval, str, size);
8483
op->ob_sval[size] = '\0';
8584
/* share short strings */
8685
if (size == 0) {
@@ -134,7 +133,7 @@ PyString_FromString(const char *str)
134133
PyObject_INIT_VAR(op, &PyString_Type, size);
135134
op->ob_shash = -1;
136135
op->ob_sstate = SSTATE_NOT_INTERNED;
137-
memcpy(op->ob_sval, str, size+1);
136+
Py_MEMCPY(op->ob_sval, str, size+1);
138137
/* share short strings */
139138
if (size == 0) {
140139
PyObject *t = (PyObject *)op;
@@ -162,7 +161,7 @@ PyString_FromFormatV(const char *format, va_list vargs)
162161
PyObject* string;
163162

164163
#ifdef VA_LIST_IS_ARRAY
165-
memcpy(count, vargs, sizeof(va_list));
164+
Py_MEMCPY(count, vargs, sizeof(va_list));
166165
#else
167166
#ifdef __va_copy
168167
__va_copy(count, vargs);
@@ -304,7 +303,7 @@ PyString_FromFormatV(const char *format, va_list vargs)
304303
i = strlen(p);
305304
if (n > 0 && i > n)
306305
i = n;
307-
memcpy(s, p, i);
306+
Py_MEMCPY(s, p, i);
308307
s += i;
309308
break;
310309
case 'p':
@@ -583,7 +582,7 @@ PyObject *PyString_DecodeEscape(const char *s,
583582
assert(PyString_Check(w));
584583
r = PyString_AS_STRING(w);
585584
rn = PyString_GET_SIZE(w);
586-
memcpy(p, r, rn);
585+
Py_MEMCPY(p, r, rn);
587586
p += rn;
588587
Py_DECREF(w);
589588
s = t;
@@ -967,8 +966,8 @@ string_concat(register PyStringObject *a, register PyObject *bb)
967966
PyObject_INIT_VAR(op, &PyString_Type, size);
968967
op->ob_shash = -1;
969968
op->ob_sstate = SSTATE_NOT_INTERNED;
970-
memcpy(op->ob_sval, a->ob_sval, a->ob_size);
971-
memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
969+
Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
970+
Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
972971
op->ob_sval[size] = '\0';
973972
return (PyObject *) op;
974973
#undef b
@@ -1017,12 +1016,12 @@ string_repeat(register PyStringObject *a, register Py_ssize_t n)
10171016
}
10181017
i = 0;
10191018
if (i < size) {
1020-
memcpy(op->ob_sval, a->ob_sval, a->ob_size);
1019+
Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
10211020
i = a->ob_size;
10221021
}
10231022
while (i < size) {
10241023
j = (i <= size-i) ? i : size-i;
1025-
memcpy(op->ob_sval+i, op->ob_sval, j);
1024+
Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
10261025
i += j;
10271026
}
10281027
return (PyObject *) op;
@@ -1808,10 +1807,10 @@ string_join(PyStringObject *self, PyObject *orig)
18081807
size_t n;
18091808
item = PySequence_Fast_GET_ITEM(seq, i);
18101809
n = PyString_GET_SIZE(item);
1811-
memcpy(p, PyString_AS_STRING(item), n);
1810+
Py_MEMCPY(p, PyString_AS_STRING(item), n);
18121811
p += n;
18131812
if (i < seqlen - 1) {
1814-
memcpy(p, sep, seplen);
1813+
Py_MEMCPY(p, sep, seplen);
18151814
p += seplen;
18161815
}
18171816
}
@@ -1851,7 +1850,6 @@ string_find_internal(PyStringObject *self, PyObject *args, int dir)
18511850
Py_ssize_t sub_len;
18521851
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
18531852

1854-
/* XXX ssize_t i */
18551853
if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
18561854
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
18571855
return -2;
@@ -1865,6 +1863,8 @@ string_find_internal(PyStringObject *self, PyObject *args, int dir)
18651863
(PyObject *)self, subobj, start, end, dir);
18661864
#endif
18671865
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1866+
/* XXX - the "expected a character buffer object" is pretty
1867+
confusing for a non-expert. remap to something else ? */
18681868
return -2;
18691869

18701870
if (dir > 0)
@@ -2131,7 +2131,7 @@ string_lower(PyStringObject *self)
21312131

21322132
s = PyString_AS_STRING(newobj);
21332133

2134-
memcpy(s, PyString_AS_STRING(self), n);
2134+
Py_MEMCPY(s, PyString_AS_STRING(self), n);
21352135

21362136
for (i = 0; i < n; i++) {
21372137
int c = Py_CHARMASK(s[i]);
@@ -2164,7 +2164,7 @@ string_upper(PyStringObject *self)
21642164

21652165
s = PyString_AS_STRING(newobj);
21662166

2167-
memcpy(s, PyString_AS_STRING(self), n);
2167+
Py_MEMCPY(s, PyString_AS_STRING(self), n);
21682168

21692169
for (i = 0; i < n; i++) {
21702170
int c = Py_CHARMASK(s[i]);
@@ -2615,18 +2615,18 @@ replace_interleave(PyStringObject *self,
26152615
/* TODO: special case single character, which doesn't need memcpy */
26162616

26172617
/* Lay the first one down (guaranteed this will occur) */
2618-
memcpy(result_s, to_s, to_len);
2618+
Py_MEMCPY(result_s, to_s, to_len);
26192619
result_s += to_len;
26202620
count -= 1;
26212621

26222622
for (i=0; i<count; i++) {
26232623
*result_s++ = *self_s++;
2624-
memcpy(result_s, to_s, to_len);
2624+
Py_MEMCPY(result_s, to_s, to_len);
26252625
result_s += to_len;
26262626
}
26272627

26282628
/* Copy the rest of the original string */
2629-
memcpy(result_s, self_s, self_len-i);
2629+
Py_MEMCPY(result_s, self_s, self_len-i);
26302630

26312631
return result;
26322632
}
@@ -2665,11 +2665,11 @@ replace_delete_single_character(PyStringObject *self,
26652665
next = findchar(start, end-start, from_c);
26662666
if (next == NULL)
26672667
break;
2668-
memcpy(result_s, start, next-start);
2668+
Py_MEMCPY(result_s, start, next-start);
26692669
result_s += (next-start);
26702670
start = next+1;
26712671
}
2672-
memcpy(result_s, start, end-start);
2672+
Py_MEMCPY(result_s, start, end-start);
26732673

26742674
return result;
26752675
}
@@ -2719,12 +2719,12 @@ replace_delete_substring(PyStringObject *self, PyStringObject *from,
27192719
break;
27202720
next = start + offset;
27212721

2722-
memcpy(result_s, start, next-start);
2722+
Py_MEMCPY(result_s, start, next-start);
27232723

27242724
result_s += (next-start);
27252725
start = next+from_len;
27262726
}
2727-
memcpy(result_s, start, end-start);
2727+
Py_MEMCPY(result_s, start, end-start);
27282728
return result;
27292729
}
27302730

@@ -2754,7 +2754,7 @@ replace_single_character_in_place(PyStringObject *self,
27542754
if (result == NULL)
27552755
return NULL;
27562756
result_s = PyString_AS_STRING(result);
2757-
memcpy(result_s, self_s, self_len);
2757+
Py_MEMCPY(result_s, self_s, self_len);
27582758

27592759
/* change everything in-place, starting with this one */
27602760
start = result_s + (next-self_s);
@@ -2808,12 +2808,12 @@ replace_substring_in_place(PyStringObject *self,
28082808
if (result == NULL)
28092809
return NULL;
28102810
result_s = PyString_AS_STRING(result);
2811-
memcpy(result_s, self_s, self_len);
2811+
Py_MEMCPY(result_s, self_s, self_len);
28122812

28132813

28142814
/* change everything in-place, starting with this one */
28152815
start = result_s + offset;
2816-
memcpy(start, to_s, from_len);
2816+
Py_MEMCPY(start, to_s, from_len);
28172817
start += from_len;
28182818
end = result_s + self_len;
28192819

@@ -2823,7 +2823,7 @@ replace_substring_in_place(PyStringObject *self,
28232823
0, end-start, FORWARD);
28242824
if (offset==-1)
28252825
break;
2826-
memcpy(start+offset, to_s, from_len);
2826+
Py_MEMCPY(start+offset, to_s, from_len);
28272827
start += offset+from_len;
28282828
}
28292829

@@ -2883,20 +2883,20 @@ replace_single_character(PyStringObject *self,
28832883

28842884
if (next == start) {
28852885
/* replace with the 'to' */
2886-
memcpy(result_s, to_s, to_len);
2886+
Py_MEMCPY(result_s, to_s, to_len);
28872887
result_s += to_len;
28882888
start += 1;
28892889
} else {
28902890
/* copy the unchanged old then the 'to' */
2891-
memcpy(result_s, start, next-start);
2891+
Py_MEMCPY(result_s, start, next-start);
28922892
result_s += (next-start);
2893-
memcpy(result_s, to_s, to_len);
2893+
Py_MEMCPY(result_s, to_s, to_len);
28942894
result_s += to_len;
28952895
start = next+1;
28962896
}
28972897
}
28982898
/* Copy the remainder of the remaining string */
2899-
memcpy(result_s, start, end-start);
2899+
Py_MEMCPY(result_s, start, end-start);
29002900

29012901
return result;
29022902
}
@@ -2958,20 +2958,20 @@ replace_substring(PyStringObject *self,
29582958
next = start+offset;
29592959
if (next == start) {
29602960
/* replace with the 'to' */
2961-
memcpy(result_s, to_s, to_len);
2961+
Py_MEMCPY(result_s, to_s, to_len);
29622962
result_s += to_len;
29632963
start += from_len;
29642964
} else {
29652965
/* copy the unchanged old then the 'to' */
2966-
memcpy(result_s, start, next-start);
2966+
Py_MEMCPY(result_s, start, next-start);
29672967
result_s += (next-start);
2968-
memcpy(result_s, to_s, to_len);
2968+
Py_MEMCPY(result_s, to_s, to_len);
29692969
result_s += to_len;
29702970
start = next+from_len;
29712971
}
29722972
}
29732973
/* Copy the remainder of the remaining string */
2974-
memcpy(result_s, start, end-start);
2974+
Py_MEMCPY(result_s, start, end-start);
29752975

29762976
return result;
29772977
}
@@ -3358,7 +3358,7 @@ pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
33583358
if (u) {
33593359
if (left)
33603360
memset(PyString_AS_STRING(u), fill, left);
3361-
memcpy(PyString_AS_STRING(u) + left,
3361+
Py_MEMCPY(PyString_AS_STRING(u) + left,
33623362
PyString_AS_STRING(self),
33633363
PyString_GET_SIZE(self));
33643364
if (right)
@@ -3896,7 +3896,7 @@ str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
38963896
n = PyString_GET_SIZE(tmp);
38973897
pnew = type->tp_alloc(type, n);
38983898
if (pnew != NULL) {
3899-
memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3899+
Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
39003900
((PyStringObject *)pnew)->ob_shash =
39013901
((PyStringObject *)tmp)->ob_shash;
39023902
((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
@@ -4792,7 +4792,7 @@ PyString_Format(PyObject *format, PyObject *args)
47924792
*res++ = *pbuf++;
47934793
}
47944794
}
4795-
memcpy(res, pbuf, len);
4795+
Py_MEMCPY(res, pbuf, len);
47964796
res += len;
47974797
rescnt -= len;
47984798
while (--width >= len) {

0 commit comments

Comments
 (0)