|
8 | 8 | #include <ATen/Parallel.h> |
9 | 9 | #include <ATen/core/DistributionsHelper.h> |
10 | 10 |
|
| 11 | +#include <c10/util/irange.h> |
| 12 | + |
11 | 13 | namespace at { namespace native { |
12 | 14 |
|
13 | 15 | static const double SELU_ALPHA = 1.6732632423543772848170429916717; |
@@ -453,12 +455,12 @@ void inline prelu_cpu_kernel_multi_weights( |
453 | 455 | scalar_t* weight_data = weight.data_ptr<scalar_t>(); |
454 | 456 |
|
455 | 457 | auto loop = [&](int64_t start, int64_t end) { |
456 | | - for (auto i = start; i < end; ++i) { |
| 458 | + for (const auto i : c10::irange(start, end)) { |
457 | 459 | int64_t offset = i * channel_size * input_stride1; |
458 | 460 | scalar_t* n_input_data = input_data + offset; |
459 | 461 | scalar_t* n_result_data = result_data + offset; |
460 | | - for (auto j = 0; j < channel_size; ++j) { |
461 | | - for (auto k = 0; k < input_stride1; ++k) { |
| 462 | + for (const auto j : c10::irange(channel_size)) { |
| 463 | + for (const auto k : c10::irange(input_stride1)) { |
462 | 464 | // to allow for compiler optimization, here splitting into two lines: |
463 | 465 | scalar_t w = (n_input_data[k] > 0) ? scalar_t(1) : weight_data[j]; |
464 | 466 | n_result_data[k] = w * n_input_data[k]; |
@@ -578,9 +580,9 @@ void inline prelu_cpu_backward_kernel_multi_weights( |
578 | 580 | auto weight_grad_collector_data = weight_grad_collector.data_ptr<scalar_t>(); |
579 | 581 |
|
580 | 582 | auto loop = [&](int64_t start, int64_t end) { |
581 | | - for (auto i = start; i < end; i++) { |
582 | | - for (auto j = 0; j < channel_size; j++) { |
583 | | - for (auto k = 0; k < input_stride1; k++) { |
| 583 | + for (const auto i : c10::irange(start, end)) { |
| 584 | + for (const auto j : c10::irange(channel_size)) { |
| 585 | + for (const auto k : c10::irange(input_stride1)) { |
584 | 586 | int64_t pos = i * input_stride0 + j * input_stride1 + k; |
585 | 587 | scalar_t weight_data_val = weight_data[j]; |
586 | 588 | scalar_t input_data_val = input_data[pos]; |
|
0 commit comments