2D FFT routines
c**************************************************************************
c Rearranging the data received in the proper order to be
c transformed or the data transformed in the proper order to be sent
subroutine rearrange (b, c, dir_procs, dir, blocksize, loc_trans)
implicit none
double complex b(*), c(*)
integer blocksize, dir_procs, loc_trans
character dir
integer i, j, bindex, cindex, trans_length, block_length
trans_length = dir_procs * blocksize
block_length = loc_trans * blocksize
if (dir .eq. 'F') then
do i = 1, dir_procs
do j = 1, loc_trans
bindex = (i - 1) * block_length + (j - 1) * blocksize + 1
cindex = (j - 1) * trans_length + (i - 1) * blocksize + 1
call zcopy (blocksize, b(bindex), 1, c(cindex), 1)
enddo
enddo
else
do i = 1, dir_procs
do j = 1, loc_trans
bindex = (i - 1) * block_length + (j - 1) * blocksize + 1
cindex = (j - 1) * trans_length + (i - 1) * blocksize + 1
call zcopy (blocksize, c(cindex), 1, b(bindex), 1)
enddo
enddo
endif
return
end
c**************************************************************************
c block packing for sends (before the FFT) in row-based algorithms
subroutine bpack (a, b, mp, nq, q, mpq)
implicit none
integer mp, nq, q, mpq
double complex a(mp,nq), b(*)
integer i, j
do i = 1, q
do j = 1, nq
call zcopy (mpq, a((i - 1) * mpq + 1, j), 1,
$ b((i - 1) * mpq * nq + (j - 1) * mpq + 1), 1)
enddo
enddo
return
end
c**************************************************************************
c block unpacking for receives (after the FFT) in row-based
c algorithms
subroutine bunpack (b, a, mp, nq, q, mpq)
implicit none
integer mp, nq, q, mpq
double complex a(mp,nq), b(*)
integer i, j
do i = 1, q
do j = 1, nq
call zcopy (mpq, b((i - 1) * mpq * nq + (j - 1) * mpq + 1),
$ 1, a((i - 1) * mpq + 1, j), 1)
enddo
enddo
return
end
c**************************************************************************
c packing for block-based algorithms
subroutine rpack (a, b, mp, nq, unit_stride)
implicit none
integer mp, nq
double complex a(mp,nq), b(*)
character unit_stride
integer i, j
if (unit_stride .eq. 'L') then
do j = 1, nq
do i = 1, mp
b((i - 1) * nq + j) = a(i, j)
enddo
enddo
else
do i = 1, mp
do j = 1, nq
b((i - 1) * nq + j) = a(i, j)
enddo
enddo
endif
return
end
c**************************************************************************
c unpacking for block-based algorithms
subroutine runpack (b, a, mp, nq, unit_stride)
implicit none
integer mp, nq
double complex a(mp,nq), b(*)
character unit_stride
integer i, j
if (unit_stride .eq. 'L') then
do i = 1, mp
do j = 1, nq
a(i, j) = b((i - 1) * nq + j)
enddo
enddo
else
do j = 1, nq
do i = 1, mp
a(i, j) = b((i - 1) * nq + j)
enddo
enddo
endif
return
end
c**************************************************************************
c Error checking routine
c call as check_error (MPI_COMM_WORLD, ierror, 'MPI function name')
c useless as MPICH handles errors internally
subroutine check_error (comm, ierror, funct)
implicit none
include 'mpif.h'
integer comm, ierror, lierror
character*20 funct
if (ierror .ne. 0) then
write (6,*) 'Error number ', ierror, ' in ', funct
write (6,*) 'Exiting'
call MPI_ABORT (comm, ierror, lierror)
stop
endif
return
end
c**************************************************************************
c Array initialization routine
subroutine initl (a, lda, mp, nq, value, myproc)
implicit none
c arguments
double complex a(*), value
integer lda, mp, nq, myproc
c local
integer i, j
do j = 1, nq
do i= 1, mp
a((j - 1) * lda + i) = value
enddo
enddo
if (myproc .eq. 0) a(1) = (1.0d0, 0.0d0)
return
end
c**************************************************************************
c Checking of results
subroutine fft_res (a, lda, mp, nq, value, myproc)
implicit none
c arguments
double complex a(*), value
integer lda, mp, nq, myproc
c local
integer i, j
do j = 1, nq
do i = 1, mp
if (a((j - 1) * lda + i) .ne. value) write (6,*)
$ 'In processor ',myproc, ' element(', i, ',', j, ') = ',
$ a((j - 1) * lda + i)
enddo
enddo
return
end
c**************************************************************************
c Initialization for the block-based algorithms
subroutine init_essl_2dfft (b, m, n, mpq, nqp, auxr, auxc, aux2,
$ naux)
implicit none
double complex b(*)
double precision auxr(*), auxc(*), aux2(*)
integer m, n, mpq, nqp, naux
call DCFT(1, b, 1, m, b, 1, m, m, nqp, 1, 1.0d0, auxc, naux, aux2,
$ naux);
call DCFT(1, b, 1, n, b, 1, n, n, mpq, 1, 1.0d0, auxr, naux, aux2,
$ naux);
return
end
c**************************************************************************
c Initialization for the row-based algorithms
subroutine init_row_essl_2dfft (b, m, n, mpq, nqp, auxr, auxc,
$ aux2, naux, transpose)
implicit none
double complex b(*)
double precision auxr(*), auxc(*), aux2(*)
integer m, n, mpq, nqp, naux
character transpose
call DCFT(1, b, 1, m, b, 1, m, m, nqp, 1, 1.0d0, auxc, naux, aux2,
$ naux);
if (transpose .eq. 'N') then
call DCFT(1, b, mpq, 1, b, mpq, 1, n, mpq, 1, 1.0d0, auxr,
$ naux, aux2, naux);
else
call DCFT(1, b, mpq, 1, b, 1, n, n, mpq, 1, 1.0d0, auxr,
$ naux, aux2, naux);
endif
return
end
c**************************************************************************
c block-based FFT routine
subroutine essl_2dfft (a, b, c, m, n, mp, nq, mpq, nqp, auxc,
$ auxr, aux2, naux, comm_row, comm_col, p, q, unit_stride)
implicit none
include 'mpif.h'
integer m, n, mp, nq, mpq, nqp, naux
integer comm_row, comm_col, p, q
double complex a(mp,nq), b(*), c(*)
double precision auxr(*), auxc(*), aux2(*)
character unit_stride
integer ierror, exsize
integer i, j, k, myproc, inc
if (p .ne. 1) then
exsize = mp * nqp
call MPI_ALLTOALL(a, exsize, MPI_DOUBLE_COMPLEX, b, exsize,
$ MPI_DOUBLE_COMPLEX, comm_col, ierror)
call rearrange (b, c, p, 'Forward', mp, nqp)
call DCFT(0, c, 1, m, c, 1, m, m, nqp, 1, 1.0d0, auxc, naux,
$ aux2,naux);
call rearrange (b, c, p, 'Backward', mp, nqp)
call MPI_ALLTOALL(b, exsize, MPI_DOUBLE_COMPLEX, a, exsize,
$ MPI_DOUBLE_COMPLEX, comm_col, ierror)
else
call DCFT(0, a, 1, m, a, 1, m, m, nqp, 1, 1.0d0, auxc, naux,
$ aux2,naux);
endif
c call MPI_BARRIER (MPI_COMM_WORLD, ierror)
exsize = nq * mpq
call rpack (a, b, mp, nq, unit_stride)
call MPI_ALLTOALL(b, exsize, MPI_DOUBLE_COMPLEX, c, exsize,
$ MPI_DOUBLE_COMPLEX, comm_row, ierror)
call rearrange (c, b, q, 'Forward', nq, mpq)
call DCFT(0, b, 1, n, b, 1, n, n, mpq, 1, 1.0d0, auxr, naux, aux2,
$ naux);
call rearrange (c, b, q, 'Backward', nq, mpq)
call MPI_ALLTOALL(c, exsize, MPI_DOUBLE_COMPLEX, b, exsize,
$ MPI_DOUBLE_COMPLEX, comm_row, ierror)
call runpack (b, a, mp, nq, unit_stride)
c call MPI_BARRIER (MPI_COMM_WORLD, ierror)
return
end
c**************************************************************************
c row-based FFT routine
subroutine row_essl_2dfft (a, b, c, m, n, mp, nq, mpq, nqp, auxc,
$ auxr, aux2, naux, comm_row, comm_col, p, q, y_orient)
implicit none
include 'mpif.h'
integer m, n, mp, nq, mpq, nqp, naux
integer comm_row, comm_col, p, q
double complex a(mp,nq), b(*), c(*)
double precision auxr(*), auxc(*), aux2(*)
character y_orient
integer ierror, exsize
integer i, j, k, myproc, inc
call DCFT(0, a, 1, m, a, 1, m, m, nqp, 1, 1.0d0, auxc, naux, aux2,
$ naux);
exsize = nq * mpq
call bpack (a, b, mp, nq, q, mpq)
call MPI_ALLTOALL(b, exsize, MPI_DOUBLE_COMPLEX, c, exsize,
$ MPI_DOUBLE_COMPLEX, comm_row, ierror)
if (y_orient .eq. 'N') then
call DCFT(0, c, mpq, 1, c, mpq, 1, n, mpq, 1, 1.0d0, auxr,
$ naux, aux2, naux);
call MPI_ALLTOALL(c, exsize, MPI_DOUBLE_COMPLEX, b, exsize,
$ MPI_DOUBLE_COMPLEX, comm_row, ierror)
call bunpack (b, a, mp, nq, q, mpq)
else
call DCFT(0, c, mpq, 1, b, 1, n, n, mpq, 1, 1.0d0, auxr,
$ naux, aux2, naux);
endif
c call MPI_BARRIER (MPI_COMM_WORLD, ierror)
return
end