[Mitgcm-support] memory & impldiff.F
mitgcm-support at dev.mitgcm.org
mitgcm-support at dev.mitgcm.org
Wed Jul 9 15:57:09 EDT 2003
Chris,
I recoded impldiff.F, which now looks identical in structure
to the old implicit_tr.F (I've appended both).
The result didn't look that good at first,
but after adding the following store directives to the inner loop
in ecco_main (related to CD scheme)
CADJ STORE cg2d_xnm1 = history, key = i
CADJ STORE unm1 = history, key = i
CADJ STORE uveld = history, key = i
CADJ STORE vnm1 = history, key = i
CADJ STORE vveld = history, key = i
I am able to get a clean TAMC protocol.
That means, we got rid of the following ones ("->")
(and did not incure new ones of comparable size instead)
->16257024 cadbev_
->16257024 cadbew_
->16257024 cadckm2_
->17805312 cadgam_
->17805312 cadgxnm1_
->17805312 cadgxnm3_
->32514048 cadgxnm2_
34126848 cadkappars_
34126848 cadkappart_
which still leaves us with the two kappa-monsters.
Total size is now 169828003
compared to prev. 309954975
I am still convinced that some of the store directives
in the inner loop of ecco_main will not be neccessary
with the new implementation of the scaling (c31+),
and may be further reduced with changed time stepping.
That's at least a start.
Patrick
___________________________________________________________________
Patrick Heimbach Phon: +1 / 617 / 253 - 5259
Massachusetts Institute of Technology Facs: +1 / 617 / 253 - 4464
EAPS, Room 54-1518
77 Massachusetts Avenue mailto:heimbach at mit.edu
Cambridge MA 02139, U.S.A. http://www.mit.edu/~heimbach/
-------------- next part --------------
C $Header: /u/gcmpack/models/MITgcmUV/model/src/impldiff.F,v 1.10 2000/06/12 13:38:50 heimbach Exp $
#include "CPP_OPTIONS.h"
C /==========================================================\
C | S/R IMPLDIFF |
C | o Solve implicit diffusion equation for vertical |
C | diffusivity. |
C \==========================================================/
SUBROUTINE IMPLDIFF_NEW( bi, bj, iMin, iMax, jMin, jMax,
I deltaTX,KappaRX,recip_hFac,
U gXNm1,
I myThid )
IMPLICIT NONE
C == Global data ==
#include "SIZE.h"
#include "DYNVARS.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "GRID.h"
#ifdef ALLOW_AUTODIFF_TAMC
#include "tamc_keys.h"
#endif
C == Routine Arguments ==
INTEGER bi,bj,iMin,iMax,jMin,jMax
_RL deltaTX
_RL KappaRX(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
_RS recip_hFac(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr,nSx,nSy)
_RL gXnm1(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr,nSx,nSy)
_RL gYnm1(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr,nSx,nSy)
INTEGER myThid
C == Local variables ==
INTEGER i,j,k
_RL a(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
_RL b(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
_RL c(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
_RL bet(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
_RL gam(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
#ifdef ALLOW_AUTODIFF_TAMC
INTEGER kkey
#endif
C-- Initialise
C-- Old aLower
DO j=1-Oly,sNy+Oly
DO i=1-Olx,sNx+Olx
a(i,j,1) = 0. _d 0
ENDDO
ENDDO
DO k=2,Nr
DO j=1-Oly,sNy+Oly
DO i=1-Olx,sNx+Olx
a(i,j,k) = -deltaTX*recip_hFac(i,j,k,bi,bj)*recip_drF(k)
& *KappaRX(i,j, k )*recip_drC( k )
ENDDO
ENDDO
ENDDO
C-- Old aUpper
DO k=1,Nr-1
DO j=1-Oly,sNy+Oly
DO i=1-Olx,sNx+Olx
c(i,j,k) = -deltaTX*recip_hFac(i,j,k,bi,bj)*recip_drF(k)
& *KappaRX(i,j,k+1)*recip_drC(k+1)
ENDDO
ENDDO
ENDDO
DO j=1-Oly,sNy+Oly
DO i=1-Olx,sNx+Olx
c(i,j,Nr) = 0. _d 0
ENDDO
ENDDO
C-- Old aCenter
DO k=1,Nr
DO j=1-Oly,sNy+Oly
DO i=1-Olx,sNx+Olx
b(i,j,k) = 1. _d 0 - c(i,j,k) - a(i,j,k)
ENDDO
ENDDO
ENDDO
C-- Old and new gam, bet are the same
DO k=1,Nr
DO j=1-Oly,sNy+Oly
DO i=1-Olx,sNx+Olx
bet(i,j,k) = 0. _d 0
gam(i,j,k) = 0. _d 0
ENDDO
ENDDO
ENDDO
C-- Only need do anything if Nr>1
IF (Nr.GT.1) THEN
k = 1
C-- Beginning of forward sweep (top level)
DO j=jMin,jMax
DO i=iMin,iMax
IF (b(i,j,1).NE.0.) bet(i,j,1) = 1. _d 0 / b(i,j,1)
ENDDO
ENDDO
ENDIF
C-- Middle of forward sweep
IF (Nr.GT.2) THEN
CADJ loop = sequential
DO k=2,Nr
#ifdef ALLOW_AUTODIFF_TAMC
kkey = (idkey-1)*(Nr-2) + k-1
#endif
DO j=jMin,jMax
DO i=iMin,iMax
gam(i,j,k) = c(i,j,k-1)*bet(i,j,k-1)
IF ( ( b(i,j,k) - a(i,j,k)*gam(i,j,k) ) .NE. 0.)
& bet(i,j,k) = 1. _d 0 / ( b(i,j,k) - a(i,j,k)*gam(i,j,k) )
ENDDO
ENDDO
ENDDO
ENDIF
DO j=jMin,jMax
DO i=iMin,iMax
gYNm1(i,j,1,bi,bj) = gXNm1(i,j,1,bi,bj)*bet(i,j,1)
ENDDO
ENDDO
DO k=2,Nr
DO j=jMin,jMax
DO i=iMin,iMax
gYnm1(i,j,k,bi,bj) = bet(i,j,k)*
& (gXnm1(i,j,k,bi,bj) - a(i,j,k)*gYnm1(i,j,k-1,bi,bj))
ENDDO
ENDDO
ENDDO
C-- Backward sweep
CADJ loop = sequential
DO k=Nr-1,1,-1
DO j=jMin,jMax
DO i=iMin,iMax
gYnm1(i,j,k,bi,bj)=gYnm1(i,j,k,bi,bj)
& -gam(i,j,k+1)*gYnm1(i,j,k+1,bi,bj)
ENDDO
ENDDO
ENDDO
DO k=1,Nr
DO j=jMin,jMax
DO i=iMin,iMax
gXnm1(i,j,k,bi,bj)=gYnm1(i,j,k,bi,bj)
ENDDO
ENDDO
ENDDO
RETURN
END
-------------- next part --------------
C/-------------------------------------------------------------------\
C||| Procedure: IMPLCIT_DZZ_TRACER |||
C|||===============================================================|||
C||| Function: Does implcit vertical diffusion for tracer |||
C||| points. |||
C||| Comments: IMPLICIT_DZZ_TRACER takes an input field |||
C||| T = old and updates to hold T = new. So that |||
C||| new - dt.DZ{k.DZ{new}} = old (1) |||
C||| subject to a zero gradient bc at the upper |||
C||| and lower boundaries. |||
C||| (1) is viewed as a tridiagonal matrix equn |||
C||| A.new = old and is solved by LU decomp. |||
C||| followed by tridiagonal solve. |||
C\-------------------------------------------------------------------/
SUBROUTINE IMPLICIT_DZZ_TRACER (
U Tr, a2MomZ_Tr )
IMPLICIT NONE
C /--------------------------------------------------------------\
C | Global data |
C \--------------------------------------------------------------/
#include "SIZE.h"
#include "OPERATORS.h"
#include "GRID.h"
#include "PARAMS.h"
#include "MASKS.h"
C /--------------------------------------------------------------\
C | Routine arguments |
C |==============================================================|
C | Tr - Tracer field |
C | a2MomZ_Tr- XYZ map of viscosity at middepth between Tr points|
C | ( Pa**2/s ). Note: a2MomZ_Tr is always > 0. |
C | OperatingMode - Flag indicating different modes of operation |
C | 0: LU decomposition, no solving. |
C | 1: LU decomposition, and solve. |
C | 2: No LU decomposition, solve. |
C \--------------------------------------------------------------/
TYPEtracerFld( Tr )
TYPEtracerFld( a2MomZ_Tr)
C /--------------------------------------------------------------\
C | Local variables |
C |==============================================================|
C | Trmixed - Tracer field after mixing. |
C | K - Loop counters. |
C | aUpper - A operator element linking i,j,k and i,j,k-1. |
C | aCenter - A operator element on i,j,k. |
C | aLower - A operator element linking i,j,k and i,j,k+1. |
C | aNorm - Normalisation factor to improve numerical |
C | conditioning of A. |
C | gamma - Normalisation factor to improve numerical |
C | conditioning of RHS. |
C \--------------------------------------------------------------/
TYPEtracerFld( Trmixed )
TYPEtracerFld( aUpper )
TYPEtracerFld( aLower )
TYPEtracerFld( aCenter)
REAL aNorm
REAL gamma, factor
INTEGER K, num(3)
C
TYPEtracerFld( bet )
TYPEtracerFld( gam )
C Solve: aCenter.trMixed[K] - aUpper.trMixed[K-1] - aLower.trMixed[K+1] = Tr[K]
C using LU decomposition followed by tridiagonal solve.
C i.e Ax = b -> LUx = b
C solve Ly = b
C solve Ux = y
C LU factorisation
C Note: LU factorisation is only a function of geometry and a2MomZ_Tr.
C One can save factorisation between time steps if a2MomZ_Tr
C remains constant.
C aUpper refers to upper diagonal in Crouts method.
C aLower refers to lower diagonal in Crouts method.
C aCenter is stored as 1/(main diagonal) to avoid divides in the LU solve.
aLower = -pZFaceA*delT*pDdzOp*a2MomZ_Tr*pMask*_eoshift(pMask,_kxyz,-1)
aUpper = _eoshift(aLower,_kxyz,+1)
aLower = aLower*rPvol
aUpper = aUpper*rPvol
aCenter = 1.D0 - aLower - aUpper
c factor =1.e-3
c aLower = factor*(-pZFaceA*rPVol*delT*pDdzOp*a2MomZ_Tr*pMask*_eoshift(pMask,_kxyz,-1))
c aUpper = _eoshift(aLower,_kxyz,+1)
c aCenter = 1. - aLower - aUpper
#ifdef NORM
aNorm = MAXVAL(ABS(aCenter))
aNorm = MAX(aNorm, MAXVAL(ABS(aLower )) )
aNorm = MAX(aNorm, MAXVAL(ABS(aUpper )) )
IF ( aNorm .NE. 0. ) THEN
aCenter = aCenter/aNorm
aUpper = aUpper /aNorm
aLower = aLower /aNorm
ENDIF
gamma = MAXVAL(ABS(Tr))
IF ( gamma .NE. 0. ) THEN
Tr = Tr/gamma
ENDIF
#endif
bet(_i3(:,:,1)) = 1./aCenter(_i3(:,:,1))
gam(_i3(:,:,1)) = 0.
do k=2,nz
gam(_i3(:,:,k)) = aUpper(_i3(:,:,k-1))*bet(_i3(:,:,k-1))
bet(_i3(:,:,k)) = 1./(aCenter(_i3(:,:,k))
& - aLower(_i3(:,:,k))*gam(_i3(:,:,k)))
enddo
c gamma=minval(bet)
c num=minloc(bet)
c print *, 'minval, minloc' , gamma, num
Trmixed(_i3(:,:,1)) = tr(_i3(:,:,1))*bet(_i3(:,:,1))
do k=2,nz
Trmixed(_i3(:,:,k)) = (tr(_i3(:,:,k))
& -aLower(_i3(:,:,k))*Trmixed(_i3(:,:,k-1)))*bet(_i3(:,:,k))
enddo
CADJ loop = sequential
do k = nz-1,1,-1
Trmixed(_i3(:,:,k)) =
& Trmixed(_i3(:,:,k)) - gam(_i3(:,:,k+1))*Trmixed(_i3(:,:,k+1))
enddo
c Denormalise
#ifdef NORM
IF ( aNorm .NE. 0. ) THEN
Trmixed = Trmixed/aNorm
ENDIF
IF ( gamma .NE. 0. ) THEN
Trmixed = Trmixed*gamma
ENDIF
#endif
Tr = Trmixed
RETURN
END
More information about the MITgcm-support
mailing list