1212#endif
1313#include "3rdparty/sqlite3/sqlite3.h"
1414
15- inline static void _ccv_convnet_layer_deduce_output_format (ccv_convnet_layer_t * layer , int input_rows , int input_cols , int * rows , int * cols , int * partition )
15+ inline static void _ccv_convnet_layer_derive_output (ccv_convnet_layer_t * layer , int input_rows , int input_cols , int * rows , int * cols , int * partition )
1616{
1717 assert (rows != 0 && cols != 0 );
1818 switch (layer -> type )
@@ -138,7 +138,7 @@ int ccv_convnet_verify(ccv_convnet_t* convnet, int output)
138138 ccv_convnet_layer_t * layer = convnet -> layers + i ;
139139 if (i > 0 && (out_rows != layer -> input .matrix .rows || out_cols != layer -> input .matrix .cols ))
140140 return -1 ;
141- _ccv_convnet_layer_deduce_output_format (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & out_rows , & out_cols , & out_partition );
141+ _ccv_convnet_layer_derive_output (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & out_rows , & out_cols , & out_partition );
142142 }
143143 if (out_rows * out_cols != output )
144144 return -1 ;
@@ -150,7 +150,7 @@ int ccv_convnet_verify(ccv_convnet_t* convnet, int output)
150150static void _ccv_convnet_convolutional_forward_propagate (ccv_convnet_layer_t * layer , ccv_dense_matrix_t * a , ccv_dense_matrix_t * * b )
151151{
152152 int rows , cols , partition ;
153- _ccv_convnet_layer_deduce_output_format (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
153+ _ccv_convnet_layer_derive_output (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
154154 int ch = layer -> net .convolutional .channels ;
155155 int count = layer -> net .convolutional .count ;
156156 int strides = layer -> net .convolutional .strides ;
@@ -236,7 +236,7 @@ static void _ccv_convnet_full_connect_forward_propagate(ccv_convnet_layer_t* lay
236236static void _ccv_convnet_rnorm_forward_propagate (ccv_convnet_layer_t * layer , ccv_dense_matrix_t * a , ccv_dense_matrix_t * * b , ccv_dense_matrix_t * * denoms )
237237{
238238 int rows , cols , partition ;
239- _ccv_convnet_layer_deduce_output_format (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
239+ _ccv_convnet_layer_derive_output (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
240240 int size = layer -> net .rnorm .size ;
241241 float kappa = layer -> net .rnorm .kappa ;
242242 float alpha = layer -> net .rnorm .alpha ;
@@ -295,7 +295,7 @@ static void _ccv_convnet_rnorm_forward_propagate(ccv_convnet_layer_t* layer, ccv
295295static void _ccv_convnet_max_pool_forward_propagate (ccv_convnet_layer_t * layer , ccv_dense_matrix_t * a , ccv_dense_matrix_t * * b )
296296{
297297 int rows , cols , partition ;
298- _ccv_convnet_layer_deduce_output_format (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
298+ _ccv_convnet_layer_derive_output (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
299299 int size = layer -> net .pool .size ;
300300 int strides = layer -> net .pool .strides ;
301301 int border = layer -> net .pool .border ;
@@ -334,7 +334,7 @@ static void _ccv_convnet_max_pool_forward_propagate(ccv_convnet_layer_t* layer,
334334static void _ccv_convnet_average_pool_forward_propagate (ccv_convnet_layer_t * layer , ccv_dense_matrix_t * a , ccv_dense_matrix_t * * b )
335335{
336336 int rows , cols , partition ;
337- _ccv_convnet_layer_deduce_output_format (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
337+ _ccv_convnet_layer_derive_output (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
338338 int size = layer -> net .pool .size ;
339339 int strides = layer -> net .pool .strides ;
340340 int border = layer -> net .pool .border ;
@@ -492,138 +492,158 @@ void ccv_convnet_encode(ccv_convnet_t* convnet, ccv_dense_matrix_t** a, ccv_dens
492492#endif
493493}
494494
495+ // find the layer for scanning (it is the last convolutional layer)
496+ static int _ccv_convnet_find_scan (ccv_convnet_t * convnet )
497+ {
498+ int i ;
499+ ccv_convnet_layer_t * layers = convnet -> layers ;
500+ for (i = convnet -> count - 1 ; i >= 0 ; i -- )
501+ if (layers [i ].type == CCV_CONVNET_CONVOLUTIONAL )
502+ return i ;
503+ return -1 ;
504+ }
505+
506+ static int _ccv_convnet_derive_scale (ccv_convnet_t * convnet , int scan )
507+ {
508+ int i , scale = 1 ;
509+ for (i = scan ; i >= 0 ; i -- )
510+ {
511+ ccv_convnet_layer_t * layer = convnet -> layers + i ;
512+ switch (layer -> type )
513+ {
514+ case CCV_CONVNET_CONVOLUTIONAL :
515+ scale *= layer -> net .convolutional .strides ;
516+ break ;
517+ case CCV_CONVNET_MAX_POOL :
518+ case CCV_CONVNET_AVERAGE_POOL :
519+ scale *= layer -> net .pool .strides ;
520+ break ;
521+ }
522+ }
523+ return scale ;
524+ }
525+
526+ static int _ccv_convnet_find_full_connect (ccv_convnet_t * convnet )
527+ {
528+ int i ;
529+ for (i = 0 ; i < convnet -> count ; i ++ )
530+ if (convnet -> layers [i ].type == CCV_CONVNET_FULL_CONNECT )
531+ return i ;
532+ return -1 ;
533+ }
534+
495535void ccv_convnet_classify (ccv_convnet_t * convnet , ccv_dense_matrix_t * * a , int symmetric , ccv_array_t * * ranks , int tops , int batch )
496536{
497537#ifdef HAVE_CUDA
498538 if (convnet -> use_cwc_accel )
499539 cwc_convnet_classify (convnet , a , symmetric , ranks , tops , batch );
500540 else {
501541#endif
502- int i , j , k ;
542+ int i , j , k , t ;
503543 ccv_dense_matrix_t * * b = (ccv_dense_matrix_t * * )alloca (sizeof (ccv_dense_matrix_t * ) * (convnet -> count + 1 ));
544+ int scan = _ccv_convnet_find_scan (convnet );
545+ int scale = _ccv_convnet_derive_scale (convnet , scan );
546+ int full_connect = _ccv_convnet_find_full_connect (convnet );
547+ assert (scan >= 0 && scan < convnet -> count );
548+ assert (full_connect >= 0 && full_connect < convnet -> count );
504549 memset (b , 0 , sizeof (ccv_dense_matrix_t * ) * (convnet -> count + 1 ));
505- int last = -1 ;
506- for (i = 0 ; i < convnet -> count ; i ++ )
507- // find the first full connect layer
508- if (convnet -> layers [i ].type == CCV_CONVNET_FULL_CONNECT )
509- {
510- last = i ;
511- break ;
512- }
513- int second = last ;
514- assert (last >= 0 );
515- for (i = last - 1 ; i >= 0 ; i -- )
516- // find the last convolutional layer
517- if (convnet -> layers [i ].type == CCV_CONVNET_CONVOLUTIONAL )
518- {
519- last = i + 1 ;
520- break ;
521- }
522- assert (last >= 0 && last < convnet -> count );
523550 for (i = 0 ; i < batch ; i ++ )
524551 {
525552 assert (CCV_GET_CHANNEL (a [i ]-> type ) == convnet -> channels );
526- assert (a [i ]-> rows == convnet -> input .height );
527- assert (a [i ]-> cols == convnet -> input .width );
528- ccv_subtract (a [i ], convnet -> mean_activity , (ccv_matrix_t * * )b , CCV_32F );
529- // doing the first few layers until the first full connect layer
530- int previous_rows = convnet -> input .height ;
531- int previous_cols = convnet -> input .width ;
532- for (j = 0 ; j < last ; j ++ )
533- {
534- ccv_convnet_layer_t * layer = convnet -> layers + j ;
535- _ccv_convnet_layer_forward_propagate (layer , b [j ], b + j + 1 , 0 );
536- int partition ;
537- _ccv_convnet_layer_deduce_output_format (layer , previous_rows , previous_cols , & previous_rows , & previous_cols , & partition );
538- ccv_matrix_free (b [j ]);
539- b [j ] = 0 ;
540- }
541- int c = (!!symmetric + 1 ) * 5 ;
542- ccv_convnet_layer_t * start_layer = convnet -> layers + last ;
543- int d [5 ][2 ] = {
544- {(b [last ]-> cols - start_layer -> input .matrix .cols ) / 2 , (b [last ]-> rows - start_layer -> input .matrix .rows ) / 2 }, // center
545- {0 , 0 }, // left top corner
546- {b [last ]-> cols - start_layer -> input .matrix .cols , 0 }, // right top corner
547- {0 , b [last ]-> rows - start_layer -> input .matrix .rows }, // left bottom corner
548- {b [last ]-> cols - start_layer -> input .matrix .cols , b [last ]-> rows - start_layer -> input .matrix .rows }, // right bottom corner
549- };
550- ccv_dense_matrix_t * multi = ccv_dense_matrix_new (c , convnet -> layers [second ].input .node .count , CCV_32F | CCV_C1 , 0 , 0 );
551- // for the last convolutional layer, we sample the layer at different locations (and horizontal mirrors), and average all of them
552- for (k = 0 ; k < 5 ; k ++ )
553+ assert (a [i ]-> rows == convnet -> input .height || a [i ]-> cols == convnet -> input .width );
554+ assert (a [i ]-> rows >= convnet -> input .height && a [i ]-> cols >= convnet -> input .width );
555+ // find optimal rows and cols to slice to
556+ int rows = convnet -> rows + ((a [i ]-> rows - convnet -> rows ) / scale ) * scale ;
557+ int cols = convnet -> cols + ((a [i ]-> cols - convnet -> cols ) / scale ) * scale ;
558+ assert (rows == convnet -> input .height || cols == convnet -> input .width );
559+ assert (rows <= a [i ]-> rows && cols <= a [i ]-> cols );
560+ ccv_dense_matrix_t * slice = 0 ;
561+ ccv_slice (a [i ], (ccv_matrix_t * * )& slice , CCV_32F , (a [i ]-> rows - rows ) / 2 , (a [i ]-> cols - cols ) / 2 , rows , cols );
562+ ccv_dense_matrix_t * mean_activity = 0 ;
563+ // scale mean activity up to be substractable (from this one, the CPU implementation is an approximation of GPU implementation)
564+ ccv_resample (convnet -> mean_activity , & mean_activity , 0 , rows , cols , CCV_INTER_CUBIC );
565+ ccv_subtract (slice , mean_activity , (ccv_matrix_t * * )b , CCV_32F );
566+ ccv_matrix_free (mean_activity );
567+ ccv_matrix_free (slice );
568+ // doing the first few layers until the first scan layer
569+ int out_rows , out_cols , out_partition ;
570+ ccv_dense_matrix_t * c = ccv_dense_matrix_new (5 * (!!symmetric + 1 ), convnet -> layers [full_connect ].input .node .count , CCV_32F | CCV_C1 , 0 , 0 );
571+ for (t = 0 ; t <= !!symmetric ; t ++ )
553572 {
554- int x = d [k ][0 ], y = d [k ][1 ];
555- ccv_dense_matrix_t * input = 0 ;
556- ccv_slice (b [last ], (ccv_matrix_t * * )& input , CCV_32F , y , x , start_layer -> input .matrix .rows , start_layer -> input .matrix .cols );
557- ccv_dense_matrix_t * full = b [last ];
558- b [last ] = input ;
559- b [second ] = ccv_dense_matrix_new (convnet -> layers [second ].input .matrix .rows , convnet -> layers [second ].input .matrix .cols , CCV_NO_DATA_ALLOC | CCV_32F | convnet -> layers [second ].input .matrix .channels , multi -> data .f32 + k * convnet -> layers [second ].input .node .count , 0 );
560- for (j = last ; j < second ; j ++ )
573+ rows = b [0 ]-> rows , cols = b [0 ]-> cols ;
574+ for (j = 0 ; j < scan + 1 ; j ++ )
561575 {
562576 ccv_convnet_layer_t * layer = convnet -> layers + j ;
577+ _ccv_convnet_layer_derive_output (layer , rows , cols , & out_rows , & out_cols , & out_partition );
563578 _ccv_convnet_layer_forward_propagate (layer , b [j ], b + j + 1 , 0 );
564- if (j > last )
565- {
579+ if (j > 0 )
566580 ccv_matrix_free (b [j ]);
567- b [j ] = 0 ;
568- }
581+ rows = out_rows , cols = out_cols ;
569582 }
570- ccv_matrix_free (b [second ]);
571- b [second ] = 0 ;
572- if (symmetric )
583+ int offsets [5 ][2 ] = {
584+ {0 , 0 },
585+ {cols - convnet -> layers [scan + 1 ].input .matrix .cols , 0 },
586+ {(cols - convnet -> layers [scan + 1 ].input .matrix .cols ) / 2 , (rows - convnet -> layers [scan + 1 ].input .matrix .rows ) / 2 },
587+ {0 , rows - convnet -> layers [scan + 1 ].input .matrix .rows },
588+ {cols - convnet -> layers [scan + 1 ].input .matrix .cols , rows - convnet -> layers [scan + 1 ].input .matrix .rows },
589+ };
590+ for (k = 0 ; k < 5 ; k ++ )
573591 {
574- ccv_flip (input , & input , 0 , CCV_FLIP_X );
575- b [second ] = ccv_dense_matrix_new (convnet -> layers [second ].input .matrix .rows , convnet -> layers [second ].input .matrix .cols , CCV_NO_DATA_ALLOC | CCV_32F | convnet -> layers [second ].input .matrix .channels , multi -> data .f32 + (k + 5 ) * convnet -> layers [second ].input .node .count , 0 );
576- // horizontal mirroring
577- for (j = last ; j < second ; j ++ )
592+ ccv_dense_matrix_t * input = 0 ;
593+ ccv_convnet_layer_t * layer = convnet -> layers + scan + 1 ;
594+ ccv_slice (b [scan + 1 ], (ccv_matrix_t * * )& input , CCV_32F , offsets [k ][1 ], offsets [k ][0 ], layer -> input .matrix .rows , layer -> input .matrix .cols );
595+ // copy the last layer for full connect compute
596+ b [full_connect ] = ccv_dense_matrix_new (convnet -> layers [full_connect ].input .matrix .rows , convnet -> layers [full_connect ].input .matrix .cols , CCV_NO_DATA_ALLOC | CCV_32F | convnet -> layers [full_connect ].input .matrix .channels , c -> data .f32 + (t * 5 + k ) * convnet -> layers [full_connect ].input .node .count , 0 );
597+ for (j = scan + 1 ; j < full_connect ; j ++ )
578598 {
579- ccv_convnet_layer_t * layer = convnet -> layers + j ;
580- _ccv_convnet_layer_forward_propagate (layer , b [j ], b + j + 1 , 0 );
581- if (j > last )
582- {
599+ layer = convnet -> layers + j ;
600+ _ccv_convnet_layer_forward_propagate (layer , j > scan + 1 ? b [j ] : input , b + j + 1 , 0 );
601+ if (j > scan + 1 )
583602 ccv_matrix_free (b [j ]);
584- b [ j ] = 0 ;
585- }
603+ else
604+ ccv_matrix_free ( input );
586605 }
587- ccv_matrix_free (b [second ]);
588- b [second ] = 0 ;
606+ ccv_matrix_free (b [full_connect ]);
607+ // set it to 0
608+ memset (b + scan + 2 , 0 , sizeof (ccv_dense_matrix_t * ) * (full_connect - scan - 1 ));
589609 }
590- ccv_matrix_free (input );
591- b [last ] = full ;
610+ ccv_matrix_free (b [scan + 1 ]);
611+ memset (b + 1 , 0 , sizeof (ccv_dense_matrix_t * ) * (scan + 1 ));
612+ ccv_flip (b [0 ], & b [0 ], 0 , CCV_FLIP_X );
592613 }
593- ccv_matrix_free (b [last ]);
594- b [ last ] = 0 ;
595- b [second ] = multi ;
596- for (j = second ; j < convnet -> count ; j ++ )
614+ ccv_matrix_free (b [0 ]);
615+ // now have everything in c, do the last full connect propagate
616+ b [full_connect ] = c ;
617+ for (j = full_connect ; j < convnet -> count ; j ++ )
597618 {
598619 ccv_convnet_layer_t * layer = convnet -> layers + j ;
599620 assert (layer -> type == CCV_CONVNET_FULL_CONNECT );
600621 _ccv_convnet_full_connect_forward_propagate_parallel (layer , b [j ], b + j + 1 );
601622 ccv_matrix_free (b [j ]);
602- b [j ] = 0 ;
603623 }
604624 ccv_dense_matrix_t * softmax = 0 ;
605625 _ccv_convnet_compute_softmax_parallel (b [convnet -> count ], & softmax , 0 );
606626 ccv_matrix_free (b [convnet -> count ]);
607- b [convnet -> count ] = 0 ;
608627 ranks [i ] = ccv_array_new (sizeof (ccv_classification_t ), tops , 0 );
609628 float * r = softmax -> data .f32 ;
610629 assert (tops <= softmax -> cols );
611630 for (j = 0 ; j < tops ; j ++ )
612631 {
613- float maxr = -1 ;
614- int id = -1 ;
632+ float max_val = -1 ;
633+ int max_idx = -1 ;
615634 for (k = 0 ; k < softmax -> cols ; k ++ )
616- if (r [k ] >= 0 && r [k ] > maxr )
617- maxr = r [k ], id = k ;
618- assert (id >= 0 );
619- r [id ] = -1 ;
635+ if (r [k ] >= 0 && r [k ] > max_val )
636+ max_val = r [k ], max_idx = k ;
637+ assert (max_idx >= 0 );
638+ r [max_idx ] = -1 ;
620639 ccv_classification_t classification = {
621- .id = id ,
622- .confidence = maxr / c ,
640+ .id = max_idx ,
641+ .confidence = max_val / ((!! symmetric + 1 ) * 5 ) ,
623642 };
624643 ccv_array_push (ranks [i ], & classification );
625644 }
626645 ccv_matrix_free (softmax );
646+ memset (b , 0 , sizeof (ccv_dense_matrix_t * ) * (convnet -> count + 1 ));
627647 }
628648#ifdef HAVE_CUDA
629649 }
@@ -641,7 +661,7 @@ static void _ccv_convnet_convolutional_backward_propagate(ccv_convnet_layer_t* l
641661 // x is the input (for forward prop), b is the output gradient (gradient, or known as propagated error)
642662 // note that y (the output from forward prop) is not included because the full connect net is simple enough that we don't need it
643663 int rows , cols , partition ;
644- _ccv_convnet_layer_deduce_output_format (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
664+ _ccv_convnet_layer_derive_output (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
645665 int ch = layer -> net .convolutional .channels ;
646666 int count = layer -> net .convolutional .count ;
647667 int strides = layer -> net .convolutional .strides ;
@@ -796,7 +816,7 @@ static void _ccv_convnet_full_connect_backward_propagate(ccv_convnet_layer_t* la
796816static void _ccv_convnet_rnorm_backward_propagate (ccv_convnet_layer_t * layer , ccv_dense_matrix_t * a , ccv_dense_matrix_t * n , ccv_dense_matrix_t * m , ccv_dense_matrix_t * denoms , ccv_dense_matrix_t * * b )
797817{
798818 int rows , cols , partition ;
799- _ccv_convnet_layer_deduce_output_format (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
819+ _ccv_convnet_layer_derive_output (layer , layer -> input .matrix .rows , layer -> input .matrix .cols , & rows , & cols , & partition );
800820 int size = layer -> net .rnorm .size ;
801821 float alpha = layer -> net .rnorm .alpha ;
802822 float beta = layer -> net .rnorm .beta ;
0 commit comments