[AutoParallel] Add pad spmd rules (#68304)

* update * Update backward.yaml * Update backward.yaml * Update backward.yaml
PaddlePaddle · Sep 23, 2024 · eb15b78 · eb15b78
1 parent 72d0bba
commit eb15b78
Show file tree

Hide file tree

Showing 10 changed files with 255 additions and 1 deletion.
diff --git a/paddle/fluid/pir/dialect/op_generator/op_infermeta_func_gen.py b/paddle/fluid/pir/dialect/op_generator/op_infermeta_func_gen.py
@@ -893,7 +893,7 @@ def GenDistBranch(args, op_info):
             # Tensor input
             else:
                 TEMPLATE = """
-    dist_operand_attrs.push_back(GetTensorDistAttr({name}.type()));"""
+    dist_operand_attrs.push_back(GetTensorDistAttr({name}.dtype()));"""
                 dist_branch_str += TEMPLATE.format(
                     name=op_info.input_name_list[i]
                 )

diff --git a/paddle/phi/infermeta/spmd_rules/pad.cc b/paddle/phi/infermeta/spmd_rules/pad.cc
@@ -0,0 +1,109 @@
+/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/infermeta/spmd_rules/pad.h"
+#include <numeric>
+
+#include "glog/logging.h"
+
+#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
+#include "paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h"
+#include "paddle/phi/core/distributed/auto_parallel/utils.h"
+#include "paddle/phi/infermeta/spmd_rules/rules.h"
+#include "paddle/phi/infermeta/spmd_rules/spmd_rule_macro_define.h"
+#include "paddle/phi/infermeta/spmd_rules/utils.h"
+
+namespace phi {
+namespace distributed {
+
+SpmdInfo PadInferSpmd(const DistMetaTensor& x,
+                      const std::vector<int>& paddings,
+                      int pad_value) {
+  auto x_shape = phi::vectorize(x.dims());
+  int x_ndim = x_shape.size();
+  auto x_dist_attr_src = x.dist_attr();
+  std::vector<int64_t> x_dims_mapping = x_dist_attr_src.dims_mapping();
+  PADDLE_ENFORCE_EQ(
+      x_ndim,
+      x_dims_mapping.size(),
+      phi::errors::InvalidArgument("The Tensor X's rank [%d] and X's "
+                                   "dims_mapping size [%d] are not matched.",
+                                   x_ndim,
+                                   x_dims_mapping.size()));
+  std::vector<int64_t> dims_to_unshard;
+  for (size_t i = 0; i < paddings.size(); i += 2) {
+    if (paddings[i] != 0 || paddings[i + 1] != 0) {
+      dims_to_unshard.push_back(i / 2);
+    }
+  }
+  auto x_dist_attr = UnShardTensorDims(x_dist_attr_src, dims_to_unshard);
+  TensorDistAttr out_dist_attr = CopyTensorDistAttrForOutput(x_dist_attr);
+  out_dist_attr.set_dims_mapping(x_dist_attr.dims_mapping());
+
+  VLOG(4) << "PadInferSpmd: X shape: [" << str_join(x_shape) << "]";
+  VLOG(4) << "X dims_mapping: [" << str_join(x_dist_attr.dims_mapping())
+          << "] Out dims_mapping: [" << str_join(out_dist_attr.dims_mapping())
+          << "]";
+
+  return {{x_dist_attr}, {out_dist_attr}};
+}
+
+SpmdInfo PadGradInferSpmd(const DistMetaTensor& x,
+                          const DistMetaTensor& out,
+                          const std::vector<int>& paddings,
+                          int pad_value) {
+  auto out_shape = phi::vectorize(out.dims());
+  int out_ndim = out_shape.size();
+  auto out_dist_attr_src = out.dist_attr();
+  std::vector<int64_t> out_dims_mapping = out_dist_attr_src.dims_mapping();
+  PADDLE_ENFORCE_EQ(
+      out_ndim,
+      out_dims_mapping.size(),
+      phi::errors::InvalidArgument("The Tensor Out's rank [%d] and Out's "
+                                   "dims_mapping size [%d] are not matched.",
+                                   out_ndim,
+                                   out_dims_mapping.size()));
+
+  std::vector<int64_t> dims_to_unshard;
+  for (size_t i = 0; i < paddings.size(); i += 2) {
+    if (paddings[i] != 0 || paddings[i + 1] != 0) {
+      dims_to_unshard.push_back(i / 2);
+    }
+  }
+  auto out_dist_attr = UnShardTensorDims(out_dist_attr_src, dims_to_unshard);
+  TensorDistAttr x_dist_attr = CopyTensorDistAttrForOutput(out_dist_attr);
+  x_dist_attr.set_dims_mapping(out_dist_attr.dims_mapping());
+
+  VLOG(4) << "PadInferSpmdReverse: Out shape: [" << str_join(out_shape) << "]";
+  VLOG(4) << "Out dims_mapping: [" << str_join(x_dist_attr.dims_mapping())
+          << "] X dims_mapping: [" << str_join(x_dist_attr.dims_mapping())
+          << "]";
+
+  return {{x_dist_attr}, {out_dist_attr}};
+}
+
+SpmdInfo PadInferSpmdDynamic(const DistMetaTensor& x,
+                             const std::vector<int>& paddings,
+                             const Scalar& pad_value) {
+  return PadInferSpmd(x, paddings, pad_value.to<int32_t>());
+}
+
+SpmdInfo PadGradInferSpmdDynamic(const DistMetaTensor& out_grad,
+                                 const std::vector<int>& paddings,
+                                 const Scalar& pad_value) {
+  return PadInferSpmd(out_grad, paddings, pad_value.to<int32_t>());
+}
+
+}  // namespace distributed
+}  // namespace phi
diff --git a/paddle/phi/infermeta/spmd_rules/pad.h b/paddle/phi/infermeta/spmd_rules/pad.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/phi/common/scalar.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h"
+#include "paddle/phi/core/distributed/type_defs.h"
+
+namespace phi {
+namespace distributed {
+
+SpmdInfo PadInferSpmd(const DistMetaTensor& x,
+                      const std::vector<int>& paddings,
+                      int pad_value);
+
+SpmdInfo PadGradInferSpmd(const DistMetaTensor& x,
+                          const DistMetaTensor& out,
+                          const std::vector<int>& paddings,
+                          int pad_value);
+
+SpmdInfo PadInferSpmdDynamic(const DistMetaTensor& x,
+                             const std::vector<int>& paddings,
+                             const Scalar& pad_value);
+
+SpmdInfo PadGradInferSpmdDynamic(const DistMetaTensor& out_grad,
+                                 const std::vector<int>& paddings,
+                                 const Scalar& pad_value);
+}  // namespace distributed
+}  // namespace phi
diff --git a/paddle/phi/infermeta/spmd_rules/rules.cc b/paddle/phi/infermeta/spmd_rules/rules.cc
@@ -696,5 +696,10 @@ PD_REGISTER_SPMD_RULE(
     PD_INFER_SPMD(phi::distributed::LogSumExpInferSpmd),
     PD_INFER_SPMD(phi::distributed::LogSumExpInferSpmdReverse));
 
+// pad
+PD_REGISTER_SPMD_RULE(pad,
+                      PD_INFER_SPMD(phi::distributed::PadInferSpmd),
+                      PD_INFER_SPMD(phi::distributed::PadGradInferSpmd));
+
 }  // namespace distributed
 }  // namespace phi
diff --git a/paddle/phi/infermeta/spmd_rules/rules.h b/paddle/phi/infermeta/spmd_rules/rules.h
@@ -42,6 +42,7 @@ limitations under the License. */
 #include "paddle/phi/infermeta/spmd_rules/numel.h"
 #include "paddle/phi/infermeta/spmd_rules/one_hot.h"
 #include "paddle/phi/infermeta/spmd_rules/optimizer.h"
+#include "paddle/phi/infermeta/spmd_rules/pad.h"
 #include "paddle/phi/infermeta/spmd_rules/pow.h"
 #include "paddle/phi/infermeta/spmd_rules/reduction.h"
 #include "paddle/phi/infermeta/spmd_rules/replicated.h"

diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml
@@ -2223,6 +2223,7 @@
   infer_meta :
     func : UnchangedInferMeta
     param: [x]
+    spmd_rule : PadGradInferSpmdDynamic
   kernel :
     func : pad_grad
     param: [out_grad, paddings, pad_value]

diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
@@ -3559,6 +3559,7 @@
   output : Tensor
   infer_meta :
     func : PadInferMeta
+    spmd_rule : PadInferSpmdDynamic
   kernel :
     func : pad
   backward : pad_grad

diff --git a/test/auto_parallel/spmd_rules/CMakeLists.txt b/test/auto_parallel/spmd_rules/CMakeLists.txt
@@ -38,6 +38,7 @@ if(WITH_DISTRIBUTE)
   py_test_modules(test_unbind_rule MODULES test_unbind_rule)
   py_test_modules(test_stack_rule MODULES test_stack_rule)
   py_test_modules(test_gather_nd_rule MODULES test_gather_nd_rule)
+  py_test_modules(test_pad_rule MODULES test_pad_rule)
   py_test_modules(test_fused_dropout_add_rule MODULES
                   test_fused_dropout_add_rule)
   py_test_modules(test_logsumexp_rule MODULES test_logsumexp_rule)

diff --git a/test/auto_parallel/spmd_rules/test_pad_rule.py b/test/auto_parallel/spmd_rules/test_pad_rule.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from paddle.distributed.auto_parallel.static.dist_attribute import (
+    DistTensorSpec,
+    TensorDistAttr,
+)
+from paddle.distributed.fleet import auto
+from paddle.framework import core
+
+
+class TestPadSPMDRule(unittest.TestCase):
+    def setUp(self):
+        self.process_mesh = auto.ProcessMesh(mesh=[[0, 1, 2, 3], [4, 5, 6, 7]])
+        self.shapes = [[8, 16, 16]]
+        self.dim_mappings = [[0, 1, -1]]
+        self.paddings = [0, 0, 0, 1, 2, 3]
+
+    def build_inputs(self):
+        inputs = []
+        for shape, dim_mapping in zip(self.shapes, self.dim_mappings):
+            tensor_dist_attr = TensorDistAttr()
+            tensor_dist_attr.dims_mapping = dim_mapping
+            tensor_dist_attr.process_mesh = self.process_mesh
+            inputs.append(DistTensorSpec(shape, tensor_dist_attr))
+        return inputs
+
+    def test_infer_forward(self):
+        inputs = self.build_inputs()
+        rule = core.get_phi_spmd_rule("pad")
+        infered_dist_attrs = rule.infer_forward(inputs, self.paddings, 0)
+
+        infered_output_dist_attrs = infered_dist_attrs[1]
+        self.assertEqual(len(infered_output_dist_attrs), 1)
+        self.assertEqual(infered_output_dist_attrs[0].dims_mapping, [0, -1, -1])
+
+    def test_infer_backward(self):
+        inputs = self.build_inputs()
+        rule = core.get_phi_spmd_rule("pad")
+        infered_dist_attrs = rule.infer_backward(
+            inputs, inputs, self.paddings, 0
+        )
+
+        infered_input_dist_attrs = infered_dist_attrs[0]
+        self.assertEqual(len(infered_input_dist_attrs), 1)
+        self.assertEqual(infered_input_dist_attrs[0].dims_mapping, [0, -1, -1])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/legacy_test/test_pad_op.py b/test/legacy_test/test_pad_op.py
@@ -24,6 +24,7 @@
 from utils import static_guard
 
 import paddle
+import paddle.distributed as dist
 from paddle.base import core
 from paddle.framework import in_pir_mode
 from paddle.pir_utils import test_with_pir_api
@@ -71,33 +72,62 @@ def test_check_grad_normal(self):
             check_prim=True,
             check_pir=True,
             check_prim_pir=True,
+            check_auto_parallel=self.check_auto_parallel,
         )
 
     def initTestCase(self):
         self.shape = (16, 16)
         self.paddings = [(0, 1), (2, 3)]
         self.pad_value = 0.0
+        self.check_auto_parallel = False
 
 
 class TestCase1(TestPadOp):
     def initTestCase(self):
         self.shape = (2, 3, 4, 5)
         self.paddings = [(0, 1), (2, 3), (2, 1), (1, 1)]
         self.pad_value = 0.5
+        self.check_auto_parallel = False
 
 
 class TestCase2(TestPadOp):
     def initTestCase(self):
         self.shape = (5, 5, 5)
         self.paddings = [(0, 0), (0, 0), (1, 2)]
         self.pad_value = 1.0
+        self.check_auto_parallel = False
 
 
 class TestCase3(TestPadOp):
     def initTestCase(self):
         self.shape = 100
         self.paddings = [(0, 1)]
         self.pad_value = 0.9
+        self.check_auto_parallel = False
+
+
+class TestCase4(TestPadOp):
+    def initTestCase(self):
+        self.shape = (10, 10)
+        self.paddings = [(0, 1), (2, 3)]
+        self.pad_value = 1.0
+
+        self.check_auto_parallel = True
+        self.placements = {
+            'X': [dist.Replicate()],
+        }
+
+
+class TestCase5(TestPadOp):
+    def initTestCase(self):
+        self.shape = (10, 10)
+        self.paddings = [(0, 0), (2, 3)]
+        self.pad_value = 1.0
+
+        self.check_auto_parallel = True
+        self.placements = {
+            'X': [dist.Shard(0)],
+        }
 
 
 # ----------------Pad Fp16----------------
@@ -129,6 +159,8 @@ def test_check_grad_normal(self):
 create_test_fp16(TestCase1)
 create_test_fp16(TestCase2)
 create_test_fp16(TestCase3)
+create_test_fp16(TestCase4)
+create_test_fp16(TestCase5)
 
 
 class TestPadOpError(unittest.TestCase):