PaddlePaddle · engineer1109 · Nov 22, 2023
@@ -89,8 +89,7 @@ class IoCopyHostToOpenCLCompute
   }
 #endif
   void PrepareForRun() override {
-    auto& param = Param<param_t>();
-    if (fp16_support_ && param.process_type != 2) {
+    if (fp16_support_) {
       VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
       auto& context = ctx_->As<OpenCLContext>();
       context.cl_context()->AddKernel(kernel_func_name_,
@@ -114,8 +113,7 @@ class IoCopyHostToOpenCLCompute
     VLOG(2) << "param.y->dims().size():" << param.y->dims().size();
     VLOG(2) << "param.y->dims():" << param.y->dims();
 #endif
-    if (fp16_support_ && param.x->precision() == PRECISION(kFloat) &&
-        param.process_type != 2) {
+    if (fp16_support_ && param.x->precision() == PRECISION(kFloat)) {
       std::unique_ptr<Tensor> precision_cast_t =
           std::unique_ptr<Tensor>(new Tensor);
       precision_cast_t->Resize(param.x->dims());
@@ -132,7 +130,6 @@ class IoCopyHostToOpenCLCompute
       kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
       auto kernel = context.cl_context()->GetKernel(kernel_key.str());
       size_t count = param.x->dims().production();
-
       auto* y_data = MUTABLE_BUFFER_GPU(param.y);
       int arg_idx = 0;
       cl_int status;
@@ -204,8 +201,7 @@ class IoCopykOpenCLToHostCompute
   }
 #endif
   void PrepareForRun() override {
-    auto& param = Param<param_t>();
-    if (fp16_support_ && param.process_type != 2) {
+    if (fp16_support_) {
       VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
       auto& context = ctx_->As<OpenCLContext>();
       context.cl_context()->AddKernel(kernel_func_name_,
@@ -244,7 +240,7 @@ class IoCopykOpenCLToHostCompute
     VLOG(4) << "--- Find the sync event for the target cl tensor. ---";
 #endif
     if (fp16_support_ && param.x->precision() != PRECISION(kInt64) &&
-        param.x->precision() != PRECISION(kInt32) && param.process_type != 2) {
+        param.x->precision() != PRECISION(kInt32)) {
       mem_size = param.x->dims().production() * sizeof(float);
       std::unique_ptr<Tensor> precision_cast_t =
           std::unique_ptr<Tensor>(new Tensor);

@@ -46,7 +46,7 @@ class LayoutComputeBufferChwToImageDefault
       kernel_func_name_ = "buffer_to_image2d_with_pre255";
     }
     VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
-    if (param.process_type != 2 && fp16_support_) {
+    if (fp16_support_) {
       build_options_ += " -DMUTABLE_TYPE=half ";
     } else {
       build_options_ += " -DMUTABLE_TYPE=float ";
@@ -74,6 +74,10 @@ class LayoutComputeBufferChwToImageDefault
     } else {
       x_data = param.x->data<float, cl::Buffer>();
     }
+    // std::vector<float> temp(2);
+    // std::cout << x_data->get() << std::endl;
+    // TargetWrapperCL::MemcpySync(temp.data(), x_data, 8, IoDirection::DtoH);
+    // std::cout << "debug layout " << temp[0] << "/" << temp[1] << std::endl;
     auto x_dims = param.x->dims();
     auto image_shape = InitImageDimInfoWith(x_dims);
     auto* y_data = MUTABLE_DATA_GPU(
@@ -189,7 +193,7 @@ class LayoutComputeImageDefaultToBufferChw
     if (param.process_type == 1) {
       kernel_func_name_ = "image2d_to_buffer_with_post255";
     }
-    if (param.process_type != 2 && fp16_support_) {
+    if (fp16_support_) {
       build_options_ += " -DMUTABLE_TYPE=half ";
     } else {
       build_options_ += " -DMUTABLE_TYPE=float ";
@@ -217,10 +221,10 @@ class LayoutComputeImageDefaultToBufferChw
       y_data = param.y->mutable_data<uint8_t, cl::Buffer>(TARGET(kOpenCL));
       param.y->set_precision(PRECISION(kInt8));
     } else {
-      y_data = (fp16_support_ && param.process_type != 2)
+      y_data = (fp16_support_)
                    ? param.y->mutable_data<half_t, cl::Buffer>(TARGET(kOpenCL))
                    : param.y->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
-      if (fp16_support_ && param.process_type != 2)
+      if (fp16_support_)
         param.y->set_precision(PRECISION(kFP16));
       else
         param.y->set_precision(PRECISION(kFloat));
@@ -637,7 +641,7 @@ class LayoutComputeImageFolderToBufferChw
     if (x_dims.size() > 2) {
       kernel_func_name_ = "image2d_to_buffer";
     }
-    if (param.process_type != 2 && fp16_support_) {
+    if (fp16_support_) {
       build_options_ += " -DMUTABLE_TYPE=half ";
     } else {
       build_options_ += " -DMUTABLE_TYPE=float ";
@@ -672,11 +676,11 @@ class LayoutComputeImageFolderToBufferChw
       x_image_shape = folder_converter.InitImageDimInfoWith(x_dims);
     }
     auto* y_data =
-        (fp16_support_ && param.process_type != 2)
+        (fp16_support_)
             ? param.y->mutable_data<half_t, cl::Buffer>(TARGET(kOpenCL))
             : param.y->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
     auto* x_data = GET_DATA_GPU(param.x);
-    if (fp16_support_ && param.process_type != 2)
+    if (fp16_support_)
       param.y->set_precision(PRECISION(kFP16));
     else
       param.y->set_precision(PRECISION(kFloat));
@@ -787,7 +791,7 @@ class LayoutComputeBufferChwToImageFolder
     if (x_dims.size() > 2) {
       kernel_func_name_ = "buffer_to_image2d";
     }
-    if (param.process_type != 2 && fp16_support_) {
+    if (fp16_support_) {
       build_options_ += " -DMUTABLE_TYPE=half ";
     } else {
       build_options_ += " -DMUTABLE_TYPE=float ";