Skip to content

Commit 5d5992d

Browse files
committed
add comment for aclnn_ops.h
1 parent 681ae74 commit 5d5992d

File tree

2 files changed

+362
-0
lines changed

2 files changed

+362
-0
lines changed

.clang_format

+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
---
2+
Language: Cpp
3+
# BasedOnStyle: Google
4+
AccessModifierOffset: -1
5+
AlignAfterOpenBracket: Align
6+
AlignConsecutiveMacros: false
7+
AlignConsecutiveAssignments: false
8+
AlignConsecutiveDeclarations: false
9+
AlignEscapedNewlines: Left
10+
AlignOperands: true
11+
AlignTrailingComments: true
12+
AllowAllArgumentsOnNextLine: true
13+
AllowAllConstructorInitializersOnNextLine: true
14+
AllowAllParametersOfDeclarationOnNextLine: true
15+
AllowShortBlocksOnASingleLine: Never
16+
AllowShortCaseLabelsOnASingleLine: false
17+
AllowShortFunctionsOnASingleLine: All
18+
AllowShortLambdasOnASingleLine: All
19+
AllowShortIfStatementsOnASingleLine: WithoutElse
20+
AllowShortLoopsOnASingleLine: true
21+
AlwaysBreakAfterDefinitionReturnType: None
22+
AlwaysBreakAfterReturnType: None
23+
AlwaysBreakBeforeMultilineStrings: true
24+
AlwaysBreakTemplateDeclarations: Yes
25+
BinPackArguments: true
26+
BinPackParameters: true
27+
BraceWrapping:
28+
AfterCaseLabel: false
29+
AfterClass: false
30+
AfterControlStatement: false
31+
AfterEnum: false
32+
AfterFunction: false
33+
AfterNamespace: false
34+
AfterObjCDeclaration: false
35+
AfterStruct: false
36+
AfterUnion: false
37+
AfterExternBlock: false
38+
BeforeCatch: false
39+
BeforeElse: false
40+
IndentBraces: false
41+
SplitEmptyFunction: true
42+
SplitEmptyRecord: true
43+
SplitEmptyNamespace: true
44+
BreakBeforeBinaryOperators: None
45+
BreakBeforeBraces: Attach
46+
BreakBeforeInheritanceComma: false
47+
BreakInheritanceList: BeforeColon
48+
BreakBeforeTernaryOperators: true
49+
BreakConstructorInitializersBeforeComma: false
50+
BreakConstructorInitializers: BeforeColon
51+
BreakAfterJavaFieldAnnotations: false
52+
BreakStringLiterals: true
53+
ConstructorInitializerAllOnOneLineOrOnePerLine: true
54+
ConstructorInitializerIndentWidth: 4
55+
ContinuationIndentWidth: 4
56+
Cpp11BracedListStyle: true
57+
DeriveLineEnding: true
58+
DerivePointerAlignment: true
59+
DisableFormat: false
60+
ExperimentalAutoDetectBinPacking: false
61+
FixNamespaceComments: true
62+
ForEachMacros:
63+
- foreach
64+
- Q_FOREACH
65+
- BOOST_FOREACH
66+
IncludeBlocks: Regroup
67+
IncludeCategories:
68+
- Regex: '^<ext/.*\.h>'
69+
Priority: 2
70+
SortPriority: 0
71+
- Regex: '^<.*\.h>'
72+
Priority: 1
73+
SortPriority: 0
74+
- Regex: '^<.*'
75+
Priority: 2
76+
SortPriority: 0
77+
- Regex: '.*'
78+
Priority: 3
79+
SortPriority: 0
80+
IncludeIsMainRegex: '([-_](test|unittest))?$'
81+
IncludeIsMainSourceRegex: ''
82+
IndentCaseLabels: true
83+
IndentGotoLabels: true
84+
IndentPPDirectives: None
85+
IndentWidth: 4
86+
IndentWrappedFunctionNames: false
87+
JavaScriptQuotes: Leave
88+
JavaScriptWrapImports: true
89+
KeepEmptyLinesAtTheStartOfBlocks: false
90+
MacroBlockBegin: ''
91+
MacroBlockEnd: ''
92+
MaxEmptyLinesToKeep: 1
93+
NamespaceIndentation: None
94+
ObjCBinPackProtocolList: Never
95+
ObjCBlockIndentWidth: 2
96+
ObjCSpaceAfterProperty: false
97+
ObjCSpaceBeforeProtocolList: true
98+
PenaltyBreakAssignment: 2
99+
PenaltyBreakBeforeFirstCallParameter: 1
100+
PenaltyBreakComment: 300
101+
PenaltyBreakFirstLessLess: 120
102+
PenaltyBreakString: 1000
103+
PenaltyBreakTemplateDeclaration: 10
104+
PenaltyExcessCharacter: 1000000
105+
PenaltyReturnTypeOnItsOwnLine: 200
106+
PointerAlignment: Left
107+
RawStringFormats:
108+
- Language: Cpp
109+
Delimiters:
110+
- cc
111+
- CC
112+
- cpp
113+
- Cpp
114+
- CPP
115+
- 'c++'
116+
- 'C++'
117+
CanonicalDelimiter: ''
118+
BasedOnStyle: google
119+
- Language: TextProto
120+
Delimiters:
121+
- pb
122+
- PB
123+
- proto
124+
- PROTO
125+
EnclosingFunctions:
126+
- EqualsProto
127+
- EquivToProto
128+
- PARSE_PARTIAL_TEXT_PROTO
129+
- PARSE_TEST_PROTO
130+
- PARSE_TEXT_PROTO
131+
- ParseTextOrDie
132+
- ParseTextProtoOrDie
133+
CanonicalDelimiter: ''
134+
BasedOnStyle: google
135+
ReflowComments: true
136+
SortIncludes: true
137+
SortUsingDeclarations: true
138+
SpaceAfterCStyleCast: false
139+
SpaceAfterLogicalNot: false
140+
SpaceAfterTemplateKeyword: true
141+
SpaceBeforeAssignmentOperators: true
142+
SpaceBeforeCpp11BracedList: false
143+
SpaceBeforeCtorInitializerColon: true
144+
SpaceBeforeInheritanceColon: true
145+
SpaceBeforeParens: ControlStatements
146+
SpaceBeforeRangeBasedForLoopColon: true
147+
SpaceInEmptyBlock: false
148+
SpaceInEmptyParentheses: false
149+
SpacesBeforeTrailingComments: 2
150+
SpacesInAngles: false
151+
SpacesInConditionalStatement: false
152+
SpacesInContainerLiterals: true
153+
SpacesInCStyleCastParentheses: false
154+
SpacesInParentheses: false
155+
SpacesInSquareBrackets: false
156+
SpaceBeforeSquareBrackets: false
157+
Standard: Auto
158+
StatementMacros:
159+
- Q_UNUSED
160+
- QT_REQUIRE_VERSION
161+
TabWidth: 8
162+
UseCRLF: false
163+
UseTab: Never
164+
...

ggml/src/ggml-cann/aclnn_ops.h

+198
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,36 @@
11
#ifndef CANN_ACLNN_OPS
22
#define CANN_ACLNN_OPS
33

4+
/**
5+
* @file acl_tensor
6+
* @brief This file contains related functions of ggml_tensor and acl_tensor.
7+
* Contains conversion from ggml_tensor to acl_tensor, broadcast and other
8+
* functions.
9+
* @author hipudding <[email protected]>
10+
* @author wangshuai09 <[email protected]>
11+
* @date July 15, 2024
12+
*
13+
* Copyright (c) 2023-2024 The ggml authors
14+
*
15+
* Permission is hereby granted, free of charge, to any person obtaining a copy
16+
* of this software and associated documentation files (the "Software"), to
17+
* deal in the Software without restriction, including without limitation the
18+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
19+
* sell copies of the Software, and to permit persons to whom the Software is
20+
* furnished to do so, subject to the following conditions:
21+
*
22+
* The above copyright notice and this permission notice shall be included in
23+
* all copies or substantial portions of the Software.
24+
*
25+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31+
* IN THE SOFTWARE.
32+
*/
33+
434
#include <aclnnop/aclnn_add.h>
535
#include <aclnnop/aclnn_arange.h>
636
#include <aclnnop/aclnn_argsort.h>
@@ -18,26 +48,194 @@
1848
#include "acl_tensor.h"
1949
#include "common.h"
2050

51+
/**
52+
* @brief Repeats a ggml tensor along each dimension to match the dimensions
53+
* of another tensor.
54+
*
55+
* @details This function repeats the elements of a source ggml tensor along
56+
* each dimension to create a destination tensor with the specified
57+
* dimensions. The operation is performed using the ACL backend and
58+
* executed asynchronously on the device.
59+
*
60+
* @param ctx The CANN context used for operations.
61+
* @param dst The ggml tensor representing the destination, which op is
62+
* GGML_OP_REPEAT and specifies the desired dimensions.
63+
*/
2164
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
2265

66+
/**
67+
* @brief Adds two ggml tensors using the CANN backend.
68+
*
69+
* @details This function performs an element-wise addition of two tensors. In
70+
* case the tensors do not have the same shape, one or both tensors
71+
* will be broadcasted to match the shape of the other before the
72+
* addition is performed.The formula for the operation is given by:
73+
* \f[
74+
* \text{dst} = \text{acl_src0} + \alpha \cdot \text{acl_src1}
75+
* \f]
76+
*
77+
* @param ctx The CANN context used for operations.
78+
* @param dst The ggml tensor representing the destination, result of the
79+
* addition is stored at dst->data, and dst->op is GGML_OP_ADD
80+
*/
2381
void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst);
2482

83+
/**
84+
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
85+
* backend.
86+
*
87+
* @details This function computes the Leaky ReLU activation for each element of
88+
* the input tensor. The Leaky ReLU function allows a small gradient
89+
* when the unit is not active (i.e., when the input is negative). The
90+
* Leaky ReLU function is defined as:
91+
* \f[
92+
* \text{dst} = \max(0, src) + \text{negativeSlope} \cdot \min(0,
93+
* src)
94+
* \f]
95+
*
96+
* @param ctx The CANN context used for operations.
97+
* @param dst The destination tensor where the result of the Leaky ReLU
98+
* activation is stored, which op is GGML_OP_LEAKY_RELU
99+
*/
25100
void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
26101

102+
/**
103+
* @brief Concatenates multiple tensors along a specified dimension using the
104+
* CANN backend.
105+
*
106+
* @param ctx The CANN context used for operations.
107+
* @param tensorList A pointer to the list of tensors to be concatenated.
108+
* @param dst The destination tensor where the result of the
109+
* concatenation is stored. dst->op is GGML_OP_CONCAT.
110+
* @param concat_dim The dimension along which the tensors are concatenated.
111+
*
112+
* @attention tensorList length should be 2 and the dimension using for concat
113+
* default to 1.
114+
*/
27115
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
28116

117+
/**
118+
* @brief Generates a sequence of evenly spaced values within a specified
119+
* interval for a ggml tensor using the CANN backend.
120+
*
121+
* @details This function creates a sequence of numbers over a specified i
122+
* nterval, starting from `start`, ending before `stop`, and
123+
* incrementing by `step`. The sequence is stored in the destination
124+
* tensor `dst`.
125+
*
126+
* @param ctx The CANN context used for operations.
127+
* @param dst The destination tensor where the generated sequence will be stored.
128+
* `start`, 'stop' and 'step' are in dst->op_params and dst->op is
129+
* GGML_OP_ARANGE
130+
*/
29131
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
30132

133+
/**
134+
* @brief Computes the square of the elements of a ggml tensor using the CANN
135+
* backend.
136+
* @details The function sets the second source tensor of the destination
137+
* tensor `dst` to be equal to the first source tensor. This is
138+
* effectively squaring the elements since the multiplication becomes
139+
* `element * element`.
140+
* @param ctx The CANN context used for operations.
141+
* @param dst The destination tensor where the squared values will be stored,
142+
* which dst->op is GGML_OP_SQR
143+
*/
31144
void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst);
32145

146+
/**
147+
* @brief Applies a clamp operation to the elements of a ggml tensor using the
148+
* CANN backend.
149+
*
150+
* @details This function clamps the elements of the input tensor `src` to a
151+
* specified range defined by `min` and `max` values. The result is
152+
* stored in the destination tensor `dst`. The operation is defined as:
153+
* \f[
154+
* y = \max(\min(x, max\_value), min\_value)
155+
* \f]
156+
* where `x` is an element of the input tensor, and `y` is the
157+
* corresponding element in the output tensor.
158+
* @param ctx The CANN context used for operations.
159+
* @param dst The destination tensor where the clamped values will be stored.
160+
* dst->op is GGML_OP_CLAMP, `min` and `max` value is in dst->params.
161+
*/
33162
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
34163

164+
/**
165+
* @brief Scales the elements of a ggml tensor by a constant factor using the
166+
* CANN backend.
167+
*
168+
* @details This function multiplies each element of the input tensor `src` by
169+
* a scaling factor `scale`, storing the result in the destination
170+
* tensor `dst`. The operation is defined as:
171+
* \f[
172+
* dst = src \times scale
173+
* \f]
174+
*
175+
* @param ctx The CANN context used for operations.
176+
* @param dst The destination tensor where the scaled values will be stored.
177+
* dst->op is GGML_OP_SCALE and scale value is in dst->params.
178+
*/
35179
void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
36180

181+
/**
182+
* @brief Sorts the elements of a ggml tensor and returns the indices that
183+
* would sort the tensor using the CANN backend.
184+
*
185+
* @details This function performs an argsort operation on the input tensor
186+
* `src`. It sorts the elements of `src` in either ascending or
187+
* descending order, depending on the `GGML_SORT_ORDER_DESC`,
188+
* and returns the indices that would sort the original tensor.
189+
*
190+
* @param ctx The CANN context used for operations.
191+
* @param dst The destination tensor where the sorted indices will be stored.
192+
* dst->op is ARGSORT
193+
*/
37194
void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
38195

196+
/**
197+
* @brief Computes the Layer Normalization for a ggml tensor using the CANN
198+
* backend.
199+
*
200+
* @details This function applies the Layer Normalization operation on the
201+
* input tensor `src` and stores the result in the destination tensor
202+
* `dst`. Layer Normalization normalizes the features at each sample in
203+
* a mini-batch independently. It is commonly used in neural networks
204+
* to normalize the activations of a layer by adjusting and scaling
205+
* the outputs. The operation is defined as:
206+
* \f[
207+
* \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\operatorname{Var}[x]+eps}}
208+
* \f], Var is default dst->ne[0]. eps is dst->params.
209+
*
210+
* @param ctx The CANN context used for operations.
211+
* @param dst The destination tensor where the normalized values will be stored.
212+
*/
39213
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
40214

215+
/**
216+
* @brief Computes the Group Normalization for a ggml tensor using the CANN
217+
* backend.
218+
*
219+
* @brief This function applies the Group Normalization operation on the input
220+
* tensor `src` and stores the result in the destination tensor `dst`.
221+
* Group Normalization divides the channels into groups and normalizes
222+
* the features within each group across spatial locations.
223+
* It is commonly used in convolutional neural networks to improve
224+
* training stability and performance.
225+
*
226+
* @details
227+
* The function first creates ACL tensors from the ggml tensors `src` and `dst`. It then defines the epsilon value (`eps`)
228+
* and the number of groups (`n_groups`) for normalization. Variables for workspace allocation are initialized. The
229+
* function calculates the workspace size required for the Group Normalization operation using the
230+
* `aclnnGroupNormGetWorkspaceSize` function. If a workspace is needed, it allocates memory using the ggml pool allocator.
231+
* Temporary tensors `acl_mean_out` and `acl_rstd_out` are created for intermediate results. The Group Normalization
232+
* operation is executed using the `aclnnGroupNorm` function. Finally, the allocated resources are released.
233+
*
234+
* @param ctx The CANN context used for operations.
235+
* @param dst The destination tensor where the normalized values will be stored.
236+
*
237+
* @attention eps defaults to 1e-6f
238+
*/
41239
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
42240

43241
void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);

0 commit comments

Comments
 (0)