-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathdpofrt_ilp64.f
219 lines (219 loc) · 8.08 KB
/
dpofrt_ilp64.f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
SUBROUTINE DPOFRT( UPLO, N, A, LDA, C, LDC, INFO )
*
* -- Iain Murray, January 2016. Derived in part from DPOTRF, which is a --
* -- LAPACK routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
CHARACTER UPLO
INTEGER*8 INFO, LDA, LDC, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * )
DOUBLE PRECISION C( LDC, * )
* ..
*
* Purpose
* =======
*
* DPOFRT computes the reverse mode sensitivity of DPOTRF, the Cholesky
* factorization A of a real symmetric positive definite matrix X.
*
* The original factorization had the form
* X = A**T * A , if UPLO = 'U', or
* X = A * A**T, if UPLO = 'L',
* where A is an upper or lower triangular matrix respectively.
*
* The triangular matrix C provides sensitivities of a scalar cost
* function with respect to X, the output of the Cholesky algorithm A.
* This routine transforms C into a triangular matrix of sensitivities
* with respect to the input matrix X.
*
* This is the block version of the algorithm, calling Level 3 BLAS.
*
* Arguments
* =========
*
* UPLO (input) CHARACTER*1
* Specifies whether the upper or lower triangular part of the
* matrices A and C are to be used.
* = 'U': Upper triangular
* = 'L': Lower triangular
*
* N (input) INTEGER
* The order of the matrices A and C. N >= 0.
*
* A (input) DOUBLE PRECISION array, dimension (LDA,N)
* A contains a Cholesky decomposition of a matrix, usually from
* a previous call to DPOTRF. If UPLO = 'U', the leading n by n
* upper triangular part of A is used, and the strictly lower
* triangular part of A is not referenced. If UPLO = 'L', the
* leading n by n lower triangular part of A is used, and the
* strictly upper triangular part of A is not referenced.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,N).
*
* C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
*
* On exit, if INFO = 0, the sensitivity of the cost function
* with respect to the full matrix X = A**T *A or X = A*A**T.
* WARNING: In the current version, some of the elements in the
* unreferenced diagonal will be overwritten with zeros.
*
* LDC (input) INTEGER
* The leading dimension of the array C. LDC >= max(1,N).
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -k, the k-th argument had an illegal value
* or if INFO = -8, we hit a not-implemented error, NI_ERR
* > 0: if INFO = k, A( k, k ) was found to be -ve, and so
* cannot have been returned from DPOTRF.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
INTEGER*8 ONE_I, NI_ERR
PARAMETER ( ONE_I = 1, NI_ERR = -8 )
* ..
* .. Local Scalars ..
LOGICAL UPPER
INTEGER*8 J, JB, JL, NB, K, L
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER*8 ILAENV
EXTERNAL LSAME, ILAENV
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DSCAL, DGEMM, DPO2FT, DTRSM
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
UPPER = LSAME( UPLO, 'U' )
IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( ONE_I, N ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DPOFRT', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Determine the block size for this environment.
*
NB = ILAENV( ONE_I, 'DPOTRF', UPLO, N, -ONE_I, -ONE_I, -ONE_I )
IF( NB.LE.1 .OR. NB.GE.N ) THEN
*
* Use unblocked code.
*
CALL DPO2FT( UPLO, N, A, LDA, C, LDC, INFO )
ELSE
*
* Use blocked code.
*
IF( UPPER ) THEN
*
* Assume A is from a factorization X = A**T *A.
*
DO 10 JL = N-NB+1, -NB, -NB
* NOT IMPLEMENTED YET.
CALL XERBLA( 'DPOFRT', NI_ERR )
RETURN
10 CONTINUE
*
ELSE
*
* Assume A is from a factorization X = A*A**T.
*
DO 20 JL = N-NB+1, 1-NB, -NB
J = MAX( ONE_I, JL )
JB = NB - (J - JL)
IF( J+JB.LE.N ) THEN
* % Lb_B = Lb_B/L_T
* L_bar(J+JB:N, J:J+JB-1) = L_bar(J+JB:N, J:J+JB-1)/L(J:J+JB-1, J:J+JB-1);
CALL DTRSM( 'Right', 'Lower', 'No transpose',
$ 'Non-unit', N-J-JB+1, JB, ONE, A( J, J ),
$ LDA, C( J+JB, J ), LDC )
* % Lb_T -= tril(Lb_B'*L_B)
* L_bar(J:J+JB-1, J:J+JB-1) = L_bar(J:J+JB-1, J:J+JB-1) - tril(L_bar(J+JB:N, J:J+JB-1)'*L(J+JB:N, J:J+JB-1));
* % But omitting tril(.) and overwriting with stuff and then zeros later.
* WASTEFUL: only need bottom left triangle of answer.
CALL DGEMM( 'Transpose', 'No transpose', JB, JB,
$ N-J-JB+1, -ONE, C( J+JB, J ), LDC,
$ A( J+JB, J ), LDA, ONE, C( J, J ), LDC )
END IF
* % Lb_T = dpotf2_rev(L_T, Lb_T)
* L_bar(J:J+JB-1, J:J+JB-1) = dpotf2_rev_inplace(L(J:J+JB-1, J:J+JB-1), L_bar(J:J+JB-1, J:J+JB-1));
CALL DPO2FT( UPLO, JB, A( J, J ), LDA, C( J, J),
$ LDC, INFO )
IF( INFO.NE.0 )
$ GO TO 30
* Copy lower triangle into upper triangle for update of block C below,
* and temporarily double diagonal element
* There's probably a better way to do it...
DO 25 K = J+1, J+JB-1
CALL DCOPY ( JB-(K-J), C( K, K-1 ), ONE_I,
$ C( K-1, K ), LDC )
25 CONTINUE
IF( J.GT.1 ) THEN
CALL DSCAL( JB, 2.0D+0, C( J, J ), LDC+1 )
* % Lb_D -= Lb_B*L_C
* L_bar(J+JB:N, 1:J-1) = L_bar(J+JB:N, 1:J-1) - L_bar(J+JB:N, J:J+JB-1)*L(J:J+JB-1, 1:J-1);
CALL DGEMM( 'No transpose', 'No transpose', N-J-JB+1,
$ J-1, JB, -ONE, C( J+JB, J ), LDC,
$ A( J, 1 ), LDA, ONE, C( J+JB, 1 ), LDC )
* % Lb_C -= Lb_B'*L_D
* L_bar(J:J+JB-1, 1:J-1) = L_bar(J:J+JB-1, 1:J-1) - L_bar(J+JB:N, J:J+JB-1)'*L(J+JB:N, 1:J-1);
CALL DGEMM( 'Transpose', 'No transpose', JB, J-1,
$ N-J-JB+1, -ONE, C( J+JB, J ), LDC,
$ A( J+JB, 1 ), LDA, ONE, C( J, 1 ), LDC )
* % Lb_C -= (Lb_T + Lb_T')*C
* L_bar(J:J+JB-1, 1:J-1) = L_bar(J:J+JB-1, 1:J-1) - (L_bar(J:J+JB-1, J:J+JB-1) + L_bar(J:J+JB-1, J:J+JB-1)')*L(J:J+JB-1, 1:J-1);
CALL DGEMM( 'No transpose', 'No transpose', JB, J-1,
$ JB, -ONE, C( J, J ), LDC,
$ A( J, 1 ), LDA, ONE, C( J, 1 ), LDC )
* Rescale diagonal back again
CALL DSCAL( JB, 0.5D+0, C( J, J ), LDC+1 )
END IF
* Zero unwanted bits above diagonal
DO 26 K = J+1, J+JB-1
DO 27 L = 1, K-1
C( L, K ) = 0.0D+0
27 CONTINUE
26 CONTINUE
*
20 CONTINUE
END IF
END IF
GO TO 40
*
30 CONTINUE
INFO = INFO + J - 1
*
40 CONTINUE
RETURN
*
* End of DPOFRT
*
END