Bug fixes and enhancements for the linear solver.

- Modified forsolver.f90. - Updated and added new tests. - Modified fpm.toml. - Removed gfortran.rsp, ifort.rsp, ifx.rsp, nvfortran.rsp. - Added fpm.rsp. - Updated README.md
gha3mi · Dec 10, 2023 · f29a8be · f29a8be
1 parent 229b02f
commit f29a8be
Show file tree

Hide file tree

Showing 25 changed files with 219 additions and 67 deletions.
diff --git a/README.md b/README.md
@@ -41,7 +41,7 @@ forsolver = {git="https://github.com/gha3mi/forsolver.git"}
 
 ## Linear system solver
 ```fortran
-use forsolver, only : solve
+use forsolver, only: solve
 x = solve(A,b,method)
 ```
 available methods (optional):
@@ -51,7 +51,7 @@ available methods (optional):
 
 ## Nonlinear system solver
 ```fortran
-use forsolver, only : nlsolver
+use forsolver, only: nlsolver
 
 call nls%set_options(&
       lin_method,&
@@ -82,7 +82,7 @@ cs: complex step method
 
 ## Tests
 
-The `tests` directory contains test programs to verify the functionality of the `forsolver` module. To run the tests using `fpm`, you can use response files for specific compilers:
+The `tests` directory contains test programs to verify the functionality of the `forsolver` module. To run the tests using `fpm`:
 
 - For Intel Fortran Compiler (ifort):
 ```bash
@@ -113,7 +113,7 @@ fpm @gfortran
 program test1
 
    use kinds
-   use forsolver, only : solve
+   use forsolver, only: solve
 
    implicit none
 
@@ -158,7 +158,7 @@ program test3
 
    use kinds
    use functions_module
-   use forsolver, only : nlsolver
+   use forsolver, only: nlsolver
 
    implicit none
 

diff --git a/fpm.rsp b/fpm.rsp
@@ -0,0 +1,9 @@
+@gfortran
+options test --compiler gfortran --flag "-Wno-line-truncation -Ofast -march=native -llapack -lblas"
+@ifort
+# options test --compiler ifort --flag "-Ofast -xHost -mtune=native -qopenmp -parallel -qmkl=parallel"
+options test --compiler ifort --flag "-Ofast -xHost -mtune=native -qopenmp -parallel -llapack -lblas"
+@ifx
+options test --compiler ifx --flag "-Ofast -xHost -mtune=native -qopenmp -fopenmp-target-do-concurrent -parallel -qmkl=parallel"
+@nvfortran
+options test --compiler nvfortran --flag "-O4 -mtune=native -stdpar=gpu,multicore -llapack"
diff --git a/fpm.toml b/fpm.toml
@@ -88,4 +88,19 @@ main       = "test13.f90"
 [[test]]
 name       = "test14"
 source-dir = "test"
-main       = "test14.f90"
+main       = "test14.f90"
+
+[[test]]
+name       = "test15"
+source-dir = "test"
+main       = "test15.f90"
+
+[[test]]
+name       = "test16"
+source-dir = "test"
+main       = "test16.f90"
+
+[[test]]
+name       = "test17"
+source-dir = "test"
+main       = "test17.f90"
diff --git a/gfortran.rsp b/gfortran.rsp
diff --git a/ifort.rsp b/ifort.rsp
diff --git a/ifx.rsp b/ifx.rsp
diff --git a/nvfortran.rsp b/nvfortran.rsp
diff --git a/src/forsolver.f90 b/src/forsolver.f90
@@ -57,20 +57,24 @@ pure function solver_lin(A, b, method) result(x)
       character(*), optional,                    intent(in) :: method
 
       ! outputs:
-      real(rk), dimension(size(A,2))                        :: x      ! solution matrix x
+      real(rk), dimension(max(1, size(A, 2))) :: x      ! solution matrix x
+
+      ! local variables
+      integer :: info ! result info
 
+      ! call solver
       if (present(method)) then
          select case (method)
           case ('gesv')
-            x = dgesv_rel(A, b)
+            call dgesv_rel(A, b, x, info)
           case ('gels')
-            x = dgels_rel(A, b)
+            call dgels_rel(A, b, x, info)
          end select
       else
          if (size(A,1)==size(A,2)) then
-            x = dgesv_rel(A, b)
+            call dgesv_rel(A, b, x, info)
          else
-            x = dgels_rel(A, b)
+            call dgels_rel(A, b, x, info)
          end if
       end if
 
@@ -80,68 +84,76 @@ end function solver_lin
 
    !===============================================================================
    !> author: Seyed Ali Ghasemi
-   pure function dgesv_rel(A, b) result(x)
+   pure subroutine dgesv_rel(A, b, x, info)
       ! inputs:
-      real(rk), dimension(:, :), contiguous, intent(in)  :: A    ! input matrix A
-      real(rk), dimension(:),    contiguous, intent(in)  :: b    ! right-hand side matrix b
+      real(rk), dimension(:, :), contiguous, intent(in) :: A    ! input matrix A
+      real(rk), dimension(:),    contiguous, intent(in) :: b    ! right-hand side matrix b
 
       ! outputs:
-      real(rk), dimension(size(A,2))                     :: x    ! solution matrix x
+      real(rk), dimension(max(1, size(A, 2))), intent(out) :: x    ! solution matrix x
+      integer,                                 intent(out) :: info ! result info
 
       ! local variables
-      integer                                  :: info ! result info
-      integer                                  :: n, lda, ldb
-      integer,  dimension(size(A, 2))          :: ipiv
-      real(rk), dimension(:,:), allocatable    :: a_copy
-      real(rk), dimension(:),   allocatable    :: b_copy
+      integer                               :: n, lda, ldb, nrhs
+      integer,  dimension(size(A, 2))       :: ipiv
+      real(rk), dimension(:,:), allocatable :: a_copy
+      real(rk), dimension(:,:), allocatable :: b_copy
 
       ! interface for dgels subroutine
       interface
          pure subroutine dgesv(fn, fnrhs, fa, flda, fipiv, fb, fldb, finfo)
             import rk
             integer,  intent(in)    :: fn, fnrhs, flda, fldb
-            real(rk), intent(inout) :: fa(flda,*), fb(fldb,*)
+            real(rk), intent(inout) :: fa(flda,fn), fb(fldb,fnrhs)
             integer,  intent(out)   :: finfo
             integer,  intent(out)   :: fipiv(fn)
          end subroutine dgesv
       end interface
 
-      b_copy = b
-      a_copy = a
-
       ! get dimensions
+      nrhs = 1 ! size(b, 2)
       n    = size(A, 2)
       lda  = max(1, n)
       ldb  = max(1, n)
 
+      ! copy the input matrices
+      a_copy = a
+      allocate(b_copy(ldb, nrhs))
+      b_copy(:, 1) = b
+
       ! call dgels subroutine
-      call dgesv(n, 1, a_copy, n, ipiv, b_copy, n, info)
+      call dgesv(n, nrhs, a_copy, lda, ipiv, b_copy, ldb, info)
 
       ! copy the solution matrix
-      x = b_copy
+      if (info == 0) then
+         x = b_copy(1:ldb, 1) ! nrhs = 1
+      else
+         error stop 'dgesv failed'
+      end if
 
-   end function dgesv_rel
+   end subroutine dgesv_rel
    !===============================================================================
 
 
    !===============================================================================
    !> author: Seyed Ali Ghasemi
    !> solves an overdetermined or underdetermined linear system using dgels.
-   pure function dgels_rel(A, b) result(x)
+   pure subroutine dgels_rel(A, b, x, info)
       ! inputs:
-      real(rk), dimension(:, :), contiguous, intent(in)  :: A    ! input matrix A
-      real(rk), dimension(:),    contiguous, intent(in)  :: b    ! right-hand side matrix b
+      real(rk), dimension(:, :), contiguous, intent(in) :: A    ! input matrix A
+      real(rk), dimension(:),    contiguous, intent(in) :: b    ! right-hand side matrix b
 
       ! outputs:
-      real(rk), dimension(size(A,2))                     :: x    ! solution matrix x
+      real(rk), dimension(max(1, size(A, 2))), intent(out) :: x    ! solution matrix x
+      integer,                                 intent(out) :: info ! result info
 
       ! local variables
-      integer                                  :: info ! result info
-      integer                                  :: m, n, lda, ldb, lwork
-      real(rk), allocatable                    :: work(:)
-      real(rk)                                 :: work1(1)
-      real(rk), dimension(:,:), allocatable    :: a_copy
-      real(rk), dimension(:),   allocatable    :: b_copy
+      character(1)                          :: trans
+      integer                               :: m, n, lda, ldb, lwork, nrhs
+      real(rk), allocatable                 :: work(:)
+      real(rk)                              :: work1(1)
+      real(rk), dimension(:,:), allocatable :: a_copy
+      real(rk), dimension(:,:), allocatable :: b_copy
 
       ! interface for dgels subroutine
       interface
@@ -155,31 +167,42 @@ pure subroutine dgels(ftrans, fm, fn, fnrhs, fa, flda, fb, fldb, fwork, flwork,
          end subroutine dgels
       end interface
 
-      a_copy = a
-      b_copy = b
+      ! 
+      trans = 'n'
 
       ! get dimensions
+      nrhs = 1 ! size(b, 2)
       m    = size(A, 1)
       n    = size(A, 2)
       lda  = max(1, m)
       ldb  = max(1, max(m, n))
 
+      ! copy the input matrices
+      a_copy = a
+      allocate(b_copy(ldb, nrhs))
+      b_copy(:, 1) = b
+
       ! calculate the optimal size of the work array
-      call dgels('n', m, n, 1, a_copy, lda, b_copy, ldb, work1, -1, info)
+      call dgels(trans, m, n, nrhs, a_copy, lda, b_copy, ldb, work1, -1, info)
 
       ! allocate work array
       lwork = nint(work1(1))
       allocate(work(lwork))
 
       ! call dgels subroutine
-      call dgels('n', m, n, 1, a_copy, lda, b_copy, ldb, work, lwork, info)
+      call dgels(trans, m, n, nrhs, a_copy, lda, b_copy, ldb, work, lwork, info)
 
       ! copy the solution matrix
-      x = b_copy
+      if (info == 0) then
+         if (trans == 'n') x = b_copy(1:n, 1) ! nrhs = 1
+         if (trans == 't') x = b_copy(1:m, 1) ! nrhs = 1
+      else
+         error stop 'dgels failed'
+      end if
 
       ! deallocate workspace
       deallocate(work)
-   end function dgels_rel
+   end subroutine dgels_rel
    !===============================================================================
 
 

diff --git a/test/test1.f90 b/test/test1.f90
@@ -1,7 +1,7 @@
 program test1
 
-   use :: kinds
-   use :: forsolver, only : solve
+   use kinds
+   use forsolver, only: solve
 
    implicit none
 

diff --git a/test/test10.f90 b/test/test10.f90
@@ -2,7 +2,7 @@ program test10
 
    use kinds
    use functions_module
-   use forsolver, only : nlsolver
+   use forsolver, only: nlsolver
 
    implicit none
 

diff --git a/test/test11.f90 b/test/test11.f90
@@ -2,7 +2,7 @@ program test11
 
    use kinds
    use functions_module
-   use forsolver, only : nlsolver
+   use forsolver, only: nlsolver
 
    implicit none
 

diff --git a/test/test12.f90 b/test/test12.f90
@@ -2,7 +2,7 @@ program test12
 
    use kinds
    use functions_module
-   use forsolver, only : nlsolver
+   use forsolver, only: nlsolver
 
    implicit none
 

diff --git a/test/test13.f90 b/test/test13.f90
@@ -2,7 +2,7 @@ program test13
 
    use kinds
    use functions_module
-   use forsolver, only : nlsolver
+   use forsolver, only: nlsolver
 
    implicit none
 

diff --git a/test/test14.f90 b/test/test14.f90
@@ -2,7 +2,7 @@ program test14
 
    use kinds
    use functions_module
-   use forsolver, only : nlsolver
+   use forsolver, only: nlsolver
 
    implicit none
 

diff --git a/test/test15.f90 b/test/test15.f90
@@ -0,0 +1,39 @@
+program test15
+
+   use kinds
+   use forsolver, only: solve
+
+   implicit none
+
+   real(rk), dimension(6,4) :: A
+   real(rk), dimension(6)   :: b
+   real(rk), dimension(4)   :: x, x_expected
+
+   ! set the matrix A
+   A(1,:) = [ 1.44_rk, -7.84_rk, -4.39_rk,  4.53_rk]
+   A(2,:) = [-9.96_rk, -0.28_rk, -3.24_rk,  3.83_rk]
+   A(3,:) = [-7.55_rk,  3.24_rk,  6.27_rk, -6.64_rk]
+   A(4,:) = [ 8.34_rk,  8.09_rk,  5.28_rk,  2.06_rk]
+   A(5,:) = [ 7.08_rk,  2.52_rk,  0.74_rk, -2.47_rk]
+   A(6,:) = [-5.45_rk, -5.70_rk, -1.19_rk,  4.70_rk]
+
+   ! set the right-hand side
+   b = [8.58_rk, 8.26_rk, 8.48_rk,-5.28_rk, 5.72_rk, 8.93_rk]
+
+   ! solve the system
+   X = solve(A, b) ! X = solve(A, b, method='gels')
+
+   ! expected result
+   x_expected(1) = -0.45063713541953410_rk
+   x_expected(2) = -0.84915021471399577_rk
+   x_expected(3) =  0.70661216240939595_rk
+   x_expected(4) =  0.12888575215577794_rk
+
+   ! check the result
+   if (norm2(X - x_expected) < 1e-6_rk) then
+      print'(a)', 'test15: passed'
+   else
+      print'(a)', 'test15: failed'
+   end if
+
+end program test15