Skip to content

Commit fb660d7

Browse files
committed
modified CPU ccv_convnet_classify implementation and start to gather
data for CPU version
1 parent a0b477e commit fb660d7

File tree

4 files changed

+121
-99
lines changed

4 files changed

+121
-99
lines changed

bin/cnnclassify.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ int main(int argc, char** argv)
4343
chdir(argv[3]);
4444
if(r)
4545
{
46-
ccv_convnet_t* convnet = ccv_convnet_read(1, argv[2]);
46+
ccv_convnet_t* convnet = ccv_convnet_read(0, argv[2]);
4747
int i, j, k = 0;
4848
ccv_dense_matrix_t* images[32] = {
4949
0
@@ -63,6 +63,7 @@ int main(int argc, char** argv)
6363
assert(image != 0);
6464
images[k % 32] = 0;
6565
ccv_convnet_input_formation(convnet, image, images + (k % 32));
66+
ccv_matrix_free(image);
6667
++k;
6768
if (k % 32 == 0)
6869
{

lib/ccv_convnet.c

Lines changed: 116 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#endif
1313
#include "3rdparty/sqlite3/sqlite3.h"
1414

15-
inline static void _ccv_convnet_layer_deduce_output_format(ccv_convnet_layer_t* layer, int input_rows, int input_cols, int* rows, int* cols, int* partition)
15+
inline static void _ccv_convnet_layer_derive_output(ccv_convnet_layer_t* layer, int input_rows, int input_cols, int* rows, int* cols, int* partition)
1616
{
1717
assert(rows != 0 && cols != 0);
1818
switch(layer->type)
@@ -138,7 +138,7 @@ int ccv_convnet_verify(ccv_convnet_t* convnet, int output)
138138
ccv_convnet_layer_t* layer = convnet->layers + i;
139139
if (i > 0 && (out_rows != layer->input.matrix.rows || out_cols != layer->input.matrix.cols))
140140
return -1;
141-
_ccv_convnet_layer_deduce_output_format(layer, layer->input.matrix.rows, layer->input.matrix.cols, &out_rows, &out_cols, &out_partition);
141+
_ccv_convnet_layer_derive_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &out_rows, &out_cols, &out_partition);
142142
}
143143
if (out_rows * out_cols != output)
144144
return -1;
@@ -150,7 +150,7 @@ int ccv_convnet_verify(ccv_convnet_t* convnet, int output)
150150
static void _ccv_convnet_convolutional_forward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b)
151151
{
152152
int rows, cols, partition;
153-
_ccv_convnet_layer_deduce_output_format(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
153+
_ccv_convnet_layer_derive_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
154154
int ch = layer->net.convolutional.channels;
155155
int count = layer->net.convolutional.count;
156156
int strides = layer->net.convolutional.strides;
@@ -236,7 +236,7 @@ static void _ccv_convnet_full_connect_forward_propagate(ccv_convnet_layer_t* lay
236236
static void _ccv_convnet_rnorm_forward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, ccv_dense_matrix_t** denoms)
237237
{
238238
int rows, cols, partition;
239-
_ccv_convnet_layer_deduce_output_format(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
239+
_ccv_convnet_layer_derive_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
240240
int size = layer->net.rnorm.size;
241241
float kappa = layer->net.rnorm.kappa;
242242
float alpha = layer->net.rnorm.alpha;
@@ -295,7 +295,7 @@ static void _ccv_convnet_rnorm_forward_propagate(ccv_convnet_layer_t* layer, ccv
295295
static void _ccv_convnet_max_pool_forward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b)
296296
{
297297
int rows, cols, partition;
298-
_ccv_convnet_layer_deduce_output_format(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
298+
_ccv_convnet_layer_derive_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
299299
int size = layer->net.pool.size;
300300
int strides = layer->net.pool.strides;
301301
int border = layer->net.pool.border;
@@ -334,7 +334,7 @@ static void _ccv_convnet_max_pool_forward_propagate(ccv_convnet_layer_t* layer,
334334
static void _ccv_convnet_average_pool_forward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b)
335335
{
336336
int rows, cols, partition;
337-
_ccv_convnet_layer_deduce_output_format(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
337+
_ccv_convnet_layer_derive_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
338338
int size = layer->net.pool.size;
339339
int strides = layer->net.pool.strides;
340340
int border = layer->net.pool.border;
@@ -492,138 +492,158 @@ void ccv_convnet_encode(ccv_convnet_t* convnet, ccv_dense_matrix_t** a, ccv_dens
492492
#endif
493493
}
494494

495+
// find the layer for scanning (it is the last convolutional layer)
496+
static int _ccv_convnet_find_scan(ccv_convnet_t* convnet)
497+
{
498+
int i;
499+
ccv_convnet_layer_t* layers = convnet->layers;
500+
for (i = convnet->count - 1; i >= 0; i--)
501+
if (layers[i].type == CCV_CONVNET_CONVOLUTIONAL)
502+
return i;
503+
return -1;
504+
}
505+
506+
static int _ccv_convnet_derive_scale(ccv_convnet_t* convnet, int scan)
507+
{
508+
int i, scale = 1;
509+
for (i = scan; i >= 0; i--)
510+
{
511+
ccv_convnet_layer_t* layer = convnet->layers + i;
512+
switch (layer->type)
513+
{
514+
case CCV_CONVNET_CONVOLUTIONAL:
515+
scale *= layer->net.convolutional.strides;
516+
break;
517+
case CCV_CONVNET_MAX_POOL:
518+
case CCV_CONVNET_AVERAGE_POOL:
519+
scale *= layer->net.pool.strides;
520+
break;
521+
}
522+
}
523+
return scale;
524+
}
525+
526+
static int _ccv_convnet_find_full_connect(ccv_convnet_t* convnet)
527+
{
528+
int i;
529+
for (i = 0; i < convnet->count; i++)
530+
if (convnet->layers[i].type == CCV_CONVNET_FULL_CONNECT)
531+
return i;
532+
return -1;
533+
}
534+
495535
void ccv_convnet_classify(ccv_convnet_t* convnet, ccv_dense_matrix_t** a, int symmetric, ccv_array_t** ranks, int tops, int batch)
496536
{
497537
#ifdef HAVE_CUDA
498538
if (convnet->use_cwc_accel)
499539
cwc_convnet_classify(convnet, a, symmetric, ranks, tops, batch);
500540
else {
501541
#endif
502-
int i, j, k;
542+
int i, j, k, t;
503543
ccv_dense_matrix_t** b = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * (convnet->count + 1));
544+
int scan = _ccv_convnet_find_scan(convnet);
545+
int scale = _ccv_convnet_derive_scale(convnet, scan);
546+
int full_connect = _ccv_convnet_find_full_connect(convnet);
547+
assert(scan >= 0 && scan < convnet->count);
548+
assert(full_connect >= 0 && full_connect < convnet->count);
504549
memset(b, 0, sizeof(ccv_dense_matrix_t*) * (convnet->count + 1));
505-
int last = -1;
506-
for (i = 0; i < convnet->count; i++)
507-
// find the first full connect layer
508-
if (convnet->layers[i].type == CCV_CONVNET_FULL_CONNECT)
509-
{
510-
last = i;
511-
break;
512-
}
513-
int second = last;
514-
assert(last >= 0);
515-
for (i = last - 1; i >= 0; i--)
516-
// find the last convolutional layer
517-
if (convnet->layers[i].type == CCV_CONVNET_CONVOLUTIONAL)
518-
{
519-
last = i + 1;
520-
break;
521-
}
522-
assert(last >= 0 && last < convnet->count);
523550
for (i = 0; i < batch; i++)
524551
{
525552
assert(CCV_GET_CHANNEL(a[i]->type) == convnet->channels);
526-
assert(a[i]->rows == convnet->input.height);
527-
assert(a[i]->cols == convnet->input.width);
528-
ccv_subtract(a[i], convnet->mean_activity, (ccv_matrix_t**)b, CCV_32F);
529-
// doing the first few layers until the first full connect layer
530-
int previous_rows = convnet->input.height;
531-
int previous_cols = convnet->input.width;
532-
for (j = 0; j < last; j++)
533-
{
534-
ccv_convnet_layer_t* layer = convnet->layers + j;
535-
_ccv_convnet_layer_forward_propagate(layer, b[j], b + j + 1, 0);
536-
int partition;
537-
_ccv_convnet_layer_deduce_output_format(layer, previous_rows, previous_cols, &previous_rows, &previous_cols, &partition);
538-
ccv_matrix_free(b[j]);
539-
b[j] = 0;
540-
}
541-
int c = (!!symmetric + 1) * 5;
542-
ccv_convnet_layer_t* start_layer = convnet->layers + last;
543-
int d[5][2] = {
544-
{(b[last]->cols - start_layer->input.matrix.cols) / 2, (b[last]->rows - start_layer->input.matrix.rows) / 2}, // center
545-
{0, 0}, // left top corner
546-
{b[last]->cols - start_layer->input.matrix.cols, 0}, // right top corner
547-
{0, b[last]->rows - start_layer->input.matrix.rows}, // left bottom corner
548-
{b[last]->cols - start_layer->input.matrix.cols, b[last]->rows - start_layer->input.matrix.rows}, // right bottom corner
549-
};
550-
ccv_dense_matrix_t* multi = ccv_dense_matrix_new(c, convnet->layers[second].input.node.count, CCV_32F | CCV_C1, 0, 0);
551-
// for the last convolutional layer, we sample the layer at different locations (and horizontal mirrors), and average all of them
552-
for (k = 0; k < 5; k++)
553+
assert(a[i]->rows == convnet->input.height || a[i]->cols == convnet->input.width);
554+
assert(a[i]->rows >= convnet->input.height && a[i]->cols >= convnet->input.width);
555+
// find optimal rows and cols to slice to
556+
int rows = convnet->rows + ((a[i]->rows - convnet->rows) / scale) * scale;
557+
int cols = convnet->cols + ((a[i]->cols - convnet->cols) / scale) * scale;
558+
assert(rows == convnet->input.height || cols == convnet->input.width);
559+
assert(rows <= a[i]->rows && cols <= a[i]->cols);
560+
ccv_dense_matrix_t* slice = 0;
561+
ccv_slice(a[i], (ccv_matrix_t**)&slice, CCV_32F, (a[i]->rows - rows) / 2, (a[i]->cols - cols) / 2, rows, cols);
562+
ccv_dense_matrix_t* mean_activity = 0;
563+
// scale mean activity up to be substractable (from this one, the CPU implementation is an approximation of GPU implementation)
564+
ccv_resample(convnet->mean_activity, &mean_activity, 0, rows, cols, CCV_INTER_CUBIC);
565+
ccv_subtract(slice, mean_activity, (ccv_matrix_t**)b, CCV_32F);
566+
ccv_matrix_free(mean_activity);
567+
ccv_matrix_free(slice);
568+
// doing the first few layers until the first scan layer
569+
int out_rows, out_cols, out_partition;
570+
ccv_dense_matrix_t* c = ccv_dense_matrix_new(5 * (!!symmetric + 1), convnet->layers[full_connect].input.node.count, CCV_32F | CCV_C1, 0, 0);
571+
for (t = 0; t <= !!symmetric; t++)
553572
{
554-
int x = d[k][0], y = d[k][1];
555-
ccv_dense_matrix_t* input = 0;
556-
ccv_slice(b[last], (ccv_matrix_t**)&input, CCV_32F, y, x, start_layer->input.matrix.rows, start_layer->input.matrix.cols);
557-
ccv_dense_matrix_t* full = b[last];
558-
b[last] = input;
559-
b[second] = ccv_dense_matrix_new(convnet->layers[second].input.matrix.rows, convnet->layers[second].input.matrix.cols, CCV_NO_DATA_ALLOC | CCV_32F | convnet->layers[second].input.matrix.channels, multi->data.f32 + k * convnet->layers[second].input.node.count, 0);
560-
for (j = last; j < second; j++)
573+
rows = b[0]->rows, cols = b[0]->cols;
574+
for (j = 0; j < scan + 1; j++)
561575
{
562576
ccv_convnet_layer_t* layer = convnet->layers + j;
577+
_ccv_convnet_layer_derive_output(layer, rows, cols, &out_rows, &out_cols, &out_partition);
563578
_ccv_convnet_layer_forward_propagate(layer, b[j], b + j + 1, 0);
564-
if (j > last)
565-
{
579+
if (j > 0)
566580
ccv_matrix_free(b[j]);
567-
b[j] = 0;
568-
}
581+
rows = out_rows, cols = out_cols;
569582
}
570-
ccv_matrix_free(b[second]);
571-
b[second] = 0;
572-
if (symmetric)
583+
int offsets[5][2] = {
584+
{0, 0},
585+
{cols - convnet->layers[scan + 1].input.matrix.cols, 0},
586+
{(cols - convnet->layers[scan + 1].input.matrix.cols) / 2, (rows - convnet->layers[scan + 1].input.matrix.rows) / 2},
587+
{0, rows - convnet->layers[scan + 1].input.matrix.rows},
588+
{cols - convnet->layers[scan + 1].input.matrix.cols, rows - convnet->layers[scan + 1].input.matrix.rows},
589+
};
590+
for (k = 0; k < 5; k++)
573591
{
574-
ccv_flip(input, &input, 0, CCV_FLIP_X);
575-
b[second] = ccv_dense_matrix_new(convnet->layers[second].input.matrix.rows, convnet->layers[second].input.matrix.cols, CCV_NO_DATA_ALLOC | CCV_32F | convnet->layers[second].input.matrix.channels, multi->data.f32 + (k + 5) * convnet->layers[second].input.node.count, 0);
576-
// horizontal mirroring
577-
for (j = last; j < second; j++)
592+
ccv_dense_matrix_t* input = 0;
593+
ccv_convnet_layer_t* layer = convnet->layers + scan + 1;
594+
ccv_slice(b[scan + 1], (ccv_matrix_t**)&input, CCV_32F, offsets[k][1], offsets[k][0], layer->input.matrix.rows, layer->input.matrix.cols);
595+
// copy the last layer for full connect compute
596+
b[full_connect] = ccv_dense_matrix_new(convnet->layers[full_connect].input.matrix.rows, convnet->layers[full_connect].input.matrix.cols, CCV_NO_DATA_ALLOC | CCV_32F | convnet->layers[full_connect].input.matrix.channels, c->data.f32 + (t * 5 + k) * convnet->layers[full_connect].input.node.count, 0);
597+
for (j = scan + 1; j < full_connect; j++)
578598
{
579-
ccv_convnet_layer_t* layer = convnet->layers + j;
580-
_ccv_convnet_layer_forward_propagate(layer, b[j], b + j + 1, 0);
581-
if (j > last)
582-
{
599+
layer = convnet->layers + j;
600+
_ccv_convnet_layer_forward_propagate(layer, j > scan + 1 ? b[j] : input, b + j + 1, 0);
601+
if (j > scan + 1)
583602
ccv_matrix_free(b[j]);
584-
b[j] = 0;
585-
}
603+
else
604+
ccv_matrix_free(input);
586605
}
587-
ccv_matrix_free(b[second]);
588-
b[second] = 0;
606+
ccv_matrix_free(b[full_connect]);
607+
// set it to 0
608+
memset(b + scan + 2, 0, sizeof(ccv_dense_matrix_t*) * (full_connect - scan - 1));
589609
}
590-
ccv_matrix_free(input);
591-
b[last] = full;
610+
ccv_matrix_free(b[scan + 1]);
611+
memset(b + 1, 0, sizeof(ccv_dense_matrix_t*) * (scan + 1));
612+
ccv_flip(b[0], &b[0], 0, CCV_FLIP_X);
592613
}
593-
ccv_matrix_free(b[last]);
594-
b[last] = 0;
595-
b[second] = multi;
596-
for (j = second; j < convnet->count; j++)
614+
ccv_matrix_free(b[0]);
615+
// now have everything in c, do the last full connect propagate
616+
b[full_connect] = c;
617+
for (j = full_connect; j < convnet->count; j++)
597618
{
598619
ccv_convnet_layer_t* layer = convnet->layers + j;
599620
assert(layer->type == CCV_CONVNET_FULL_CONNECT);
600621
_ccv_convnet_full_connect_forward_propagate_parallel(layer, b[j], b + j + 1);
601622
ccv_matrix_free(b[j]);
602-
b[j] = 0;
603623
}
604624
ccv_dense_matrix_t* softmax = 0;
605625
_ccv_convnet_compute_softmax_parallel(b[convnet->count], &softmax, 0);
606626
ccv_matrix_free(b[convnet->count]);
607-
b[convnet->count] = 0;
608627
ranks[i] = ccv_array_new(sizeof(ccv_classification_t), tops, 0);
609628
float* r = softmax->data.f32;
610629
assert(tops <= softmax->cols);
611630
for (j = 0; j < tops; j++)
612631
{
613-
float maxr = -1;
614-
int id = -1;
632+
float max_val = -1;
633+
int max_idx = -1;
615634
for (k = 0; k < softmax->cols; k++)
616-
if (r[k] >= 0 && r[k] > maxr)
617-
maxr = r[k], id = k;
618-
assert(id >= 0);
619-
r[id] = -1;
635+
if (r[k] >= 0 && r[k] > max_val)
636+
max_val = r[k], max_idx = k;
637+
assert(max_idx >= 0);
638+
r[max_idx] = -1;
620639
ccv_classification_t classification = {
621-
.id = id,
622-
.confidence = maxr / c,
640+
.id = max_idx,
641+
.confidence = max_val / ((!!symmetric + 1) * 5),
623642
};
624643
ccv_array_push(ranks[i], &classification);
625644
}
626645
ccv_matrix_free(softmax);
646+
memset(b, 0, sizeof(ccv_dense_matrix_t*) * (convnet->count + 1));
627647
}
628648
#ifdef HAVE_CUDA
629649
}
@@ -641,7 +661,7 @@ static void _ccv_convnet_convolutional_backward_propagate(ccv_convnet_layer_t* l
641661
// x is the input (for forward prop), b is the output gradient (gradient, or known as propagated error)
642662
// note that y (the output from forward prop) is not included because the full connect net is simple enough that we don't need it
643663
int rows, cols, partition;
644-
_ccv_convnet_layer_deduce_output_format(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
664+
_ccv_convnet_layer_derive_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
645665
int ch = layer->net.convolutional.channels;
646666
int count = layer->net.convolutional.count;
647667
int strides = layer->net.convolutional.strides;
@@ -796,7 +816,7 @@ static void _ccv_convnet_full_connect_backward_propagate(ccv_convnet_layer_t* la
796816
static void _ccv_convnet_rnorm_backward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* n, ccv_dense_matrix_t* m, ccv_dense_matrix_t* denoms, ccv_dense_matrix_t** b)
797817
{
798818
int rows, cols, partition;
799-
_ccv_convnet_layer_deduce_output_format(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
819+
_ccv_convnet_layer_derive_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
800820
int size = layer->net.rnorm.size;
801821
float alpha = layer->net.rnorm.alpha;
802822
float beta = layer->net.rnorm.beta;

lib/ccv_icf.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ const ccv_icf_param_t ccv_icf_default_params = {
2020
// provides a very crude approximation
2121
static inline float cbrt_5_f32(float f)
2222
{
23-
unsigned int* p = (unsigned int*)(&f);
24-
*p = *p / 3 + 709921077;
23+
const uint32_t b1 = 709921077;
24+
uint32_t* p = (uint32_t*)(&f);
25+
*p = *p / 3 + b1;
2526
return f;
2627
}
2728

samples/dex.png

91.5 KB
Loading

0 commit comments

Comments
 (0)