program ftest
use qmxf03_module
implicit none
integer, parameter :: DP=KIND(1.0d0)
integer, parameter :: SP=KIND(1.0e0)
integer(4), parameter :: NDAT=3*4*16*16*16
integer(4), parameter :: NTRY=20
integer :: myrank,nndat
integer(4) :: i,j,iout,itry
character(len=256) :: fname
real(DP) :: et0,et1
integer(4) :: idata(NDAT)
integer(4), allocatable, target :: irecv(:,:)
type(C_PTR) :: recv_ptr
real(DP) :: ddata(NDAT),dtmp,dtmp2
real(SP) :: sdata(NDAT),stmp,stmp2
complex(DP) :: zdata(NDAT),szdata(NDAT),rzdata(NDAT),ztmp,ztmp2
complex(SP) :: cdata(NDAT),scdata(NDAT),rcdata(NDAT),ctmp,ctmp2
iout = 33
call qmx_init()
allocate(irecv(NDAT,0:qmx_get_rank_size()-1))
myrank = qmx_get_local_rank()
write(fname,'("fqmx.",I2.2)')myrank
open(iout,file=TRIM(fname),form='formatted',status='unknown')
write(iout,'("RANK=",I3)')myrank
call qmx_clock(et0)
do i=1,NDAT
call qmx_barrier()
enddo
call qmx_clock(et1)
et1 = et1 - et0
write(iout,'("qmx_barrier(",I2,"): ",I10," [called] ",ES14.6," [sec] ",F14.6," [usec/call]")') myrank,NDAT,et1,et1*1000*1000/NDAT
!==============================================
! SUMCAST
!==============================================
do itry=1,NTRY
nndat = MIN(2**(itry-1)+1,NDAT)
write(iout,'("TRY=",I9," NDAT=",I10)')itry,nndat
idata(:) = 0
sdata(:) = 0.0_SP
ddata(:) = 0.0_DP
cdata(:) = cmplx(0.0_SP,0.0_SP,kind=SP)
zdata(:) = cmplx(0.0_DP,0.0_DP,kind=DP)
do i=1,nndat
idata(i) = qmx_get_local_rank()*i
enddo
call qmx_sumcast_int(nndat,idata)
do i=1,nndat
if (ABS(idata(i)-6*i) /= 0) then
write(iout,'("qmx_sumcast_int(",I2,"): data[",I10,"] = ",I10,2X,I10)') myrank,i,idata(i),6*i
endif
enddo
do i=1,nndat
sdata(i) = REAL(qmx_get_local_rank()*i,kind=DP)*sqrt(2.0_SP)
enddo
call qmx_sumcast_float(nndat,sdata)
do i=1,nndat
stmp = REAL(i,kind=DP)*sqrt(2.0_SP)*6
stmp2 = sdata(i) - stmp
if (ABS(stmp2/stmp) > 100*EPSILON(1.0_SP)) then
write(iout,'("qmx_sumcast_float(",I2,"): data[",I10,"] = ",ES24.15,2X,ES24.15)')myrank,i,sdata(i),stmp
endif
enddo
do i=1,nndat
ddata(i) = REAL(qmx_get_local_rank()*i,kind=DP)*sqrt(2.0_DP)
enddo
call qmx_sumcast_double(nndat,ddata)
do i=1,nndat
dtmp = REAL(i,kind=DP)*sqrt(2.0_DP)*6
dtmp2 = ddata(i) - dtmp
if (ABS(dtmp2/dtmp) > 100*EPSILON(1.0_DP)) then
write(iout,'("qmx_sumcast_double(",I2,"): data[",I10,"] = ",ES24.15,2X,ES24.15)')myrank,i,ddata(i),dtmp
endif
enddo
do i=1,nndat
cdata(i) = CMPLX(REAL(qmx_get_local_rank()*i,kind=SP),qmx_get_local_rank()*REAL(i,kind=SP)**2,kind=SP)
enddo
call qmx_sumcast_cmplx(nndat,cdata)
do i=1,nndat
ctmp = CMPLX(REAL(i,kind=SP),REAL(i,kind=SP)**2,kind=SP)*6
ctmp2 = cdata(i) - ctmp
if (ABS(ctmp2/ctmp) > 100*EPSILON(1.0_SP)) then
write(iout,'("qmx_sumcast_cmplx(",I2,"): data[",I10,"] = ",2ES24.15,2X,2ES24.15)')myrank,i,cdata(i),ctmp
endif
enddo
do i=1,nndat
zdata(i) = CMPLX(REAL(qmx_get_local_rank()*i,kind=DP),qmx_get_local_rank()*REAL(i,kind=DP)**2,kind=DP)
enddo
call qmx_sumcast_dcmplx(nndat,zdata)
do i=1,nndat
ztmp = CMPLX(REAL(i,kind=DP),REAL(i,kind=DP)**2,kind=DP)*6
ztmp2 = zdata(i) - ztmp
if (ABS(ztmp2/ztmp) > 100*EPSILON(1.0_DP)) then
write(iout,'("qmx_sumcast_dcmplx(",I2,"): data[",I10,"] = ",2ES24.15,2X,2ES24.15)')myrank,i,zdata(i),ztmp
endif
enddo
enddo
!==============================================
! BCAST
!==============================================
do i=1,NDAT
sdata(i) = REAL(qmx_get_local_rank()*i,kind=SP)
enddo
call qmx_bcast_float(NDAT,sdata,2)
do i=1,NDAT
write(iout,'("qmx_bcast_float(",I2,"): data[",I10,"] = ",ES14.6)')myrank,i,sdata(i)/i
enddo
!==============================================
! all gather
!==============================================
do i=1,NDAT
idata(i) = qmx_get_local_rank()*i
enddo
call qmx_allgather_int(NDAT,idata,irecv)
do j=0,qmx_get_rank_size()-1
do i=1,NDAT
write(iout,'("qmx_allgather_int(",I2,"): data[",I10,",",I2,"] = ",I10)')myrank,i,j,irecv(i,j)/i
enddo
enddo
!==============================================
! SENDRECV
!==============================================
do i=1,NDAT
dtmp = REAL(qmx_get_local_rank()+1,kind=SP)
scdata(i) = cmplx(dtmp,1.0_SP/dtmp,kind=SP)*i
rcdata(i) = (0.0_SP,0.0_SP)
enddo
call qmx_clock(et0)
call qmx_sendrecv_cmplx(NDAT,scdata,qmx_get_2d_next_x_rank(),rcdata,qmx_get_2d_prev_x_rank())
call qmx_clock(et1)
et1 = et1 - et0
write(iout,'("qmx_sendrecv_cmplx(",I2,") ETIME=",F14.6," [sec] MBYTE=",F14.6," MB/s=",F14.6)') myrank,et1,real(8*NDAT)/1000/1000,real(8*NDAT)/1000/1000/et1
do i=1,NDAT
write(iout,'("qmx_sendrecv_cmplx(",I2,"): data[",I10,"] = ",2ES14.6)')myrank,i,rcdata(i)
enddo
do i=1,NDAT
dtmp = REAL(qmx_get_local_rank()+1,kind=DP)
szdata(i) = cmplx(dtmp,1.0_DP/dtmp,kind=DP)*i
rzdata(i) = (0.0_DP,0.0_DP)
enddo
call qmx_clock(et0)
call qmx_sendrecv_dcmplx(NDAT,szdata,qmx_get_2d_next_y_rank(),rzdata,qmx_get_2d_prev_y_rank())
call qmx_clock(et1)
et1 = et1 - et0
write(iout,'("qmx_sendrecv_dcmplx(",I2,") ETIME=",F14.6," [sec] MBYTE=",F14.6," MB/s=",F14.6)') myrank,et1,real(16*NDAT)/1000/1000,real(16*NDAT)/1000/1000/et1
do i=1,NDAT
write(iout,'("qmx_sendrecv_dcmplx(",I2,"): data[",I10,"] = ",4ES24.16)')myrank,i,rzdata(i)
enddo
call qmx_finalize()
close(iout)
stop
end program