[Mitgcm-support] memory & impldiff.F

Wed Jul 9 15:57:09 EDT 2003

Chris,

I recoded impldiff.F, which now looks identical in structure
to the old implicit_tr.F (I've appended both).

The result didn't look that good at first,
but after adding the following store directives to the inner loop
in ecco_main (related to CD scheme)
CADJ STORE cg2d_xnm1 = history, key = i
CADJ STORE unm1      = history, key = i
CADJ STORE uveld     = history, key = i
CADJ STORE vnm1      = history, key = i
CADJ STORE vveld     = history, key = i
I am able to get a clean TAMC protocol.

That means, we got rid of the following ones ("->")
(and did not incure new ones of comparable size instead)
->16257024   cadbev_
->16257024   cadbew_
->16257024   cadckm2_
->17805312   cadgam_
->17805312   cadgxnm1_
->17805312   cadgxnm3_
->32514048   cadgxnm2_
34126848   cadkappars_
34126848   cadkappart_
which still leaves us with the two kappa-monsters.

Total size is now 169828003
compared to prev. 309954975

I am still convinced that some of the store directives
in the inner loop of ecco_main will not be neccessary
with the new implementation of the scaling (c31+),
and may be further reduced with changed time stepping.

That's at least a start.

Patrick
___________________________________________________________________
Patrick Heimbach                        Phon: +1 / 617 / 253 - 5259
Massachusetts Institute of Technology   Facs: +1 / 617 / 253 - 4464 
EAPS, Room 54-1518
77 Massachusetts Avenue                     mailto:heimbach at mit.edu
Cambridge MA 02139, U.S.A.            http://www.mit.edu/~heimbach/
-------------- next part --------------
C $Header: /u/gcmpack/models/MITgcmUV/model/src/impldiff.F,v 1.10 2000/06/12 13:38:50 heimbach Exp $

#include "CPP_OPTIONS.h"

C     /==========================================================\
C     | S/R IMPLDIFF                                             |
C     | o Solve implicit diffusion equation for vertical         |
C     |   diffusivity.                                           |
C     \==========================================================/
      SUBROUTINE IMPLDIFF_NEW( bi, bj, iMin, iMax, jMin, jMax,
     I                     deltaTX,KappaRX,recip_hFac,
     U                     gXNm1,
     I                     myThid )
      IMPLICIT NONE
C     == Global data ==
#include "SIZE.h"
#include "DYNVARS.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "GRID.h"

#ifdef ALLOW_AUTODIFF_TAMC
#include "tamc_keys.h"
#endif

C     == Routine Arguments ==
      INTEGER bi,bj,iMin,iMax,jMin,jMax
      _RL deltaTX
      _RL KappaRX(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
      _RS recip_hFac(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr,nSx,nSy)
      _RL gXnm1(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr,nSx,nSy)
      _RL gYnm1(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr,nSx,nSy)
      INTEGER myThid

C     == Local variables ==
      INTEGER i,j,k
      _RL a(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
      _RL b(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
      _RL c(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
      _RL bet(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)
      _RL gam(1-Olx:sNx+Olx,1-Oly:sNy+Oly,Nr)

#ifdef ALLOW_AUTODIFF_TAMC
      INTEGER kkey
#endif

C--   Initialise

C--   Old aLower
      DO j=1-Oly,sNy+Oly
       DO i=1-Olx,sNx+Olx
         a(i,j,1) = 0. _d 0 
       ENDDO
      ENDDO
      DO k=2,Nr
       DO j=1-Oly,sNy+Oly
        DO i=1-Olx,sNx+Olx
          a(i,j,k) = -deltaTX*recip_hFac(i,j,k,bi,bj)*recip_drF(k)
     &               *KappaRX(i,j, k )*recip_drC( k )
        ENDDO
       ENDDO
      ENDDO

C--   Old aUpper
      DO k=1,Nr-1
       DO j=1-Oly,sNy+Oly
        DO i=1-Olx,sNx+Olx
          c(i,j,k) = -deltaTX*recip_hFac(i,j,k,bi,bj)*recip_drF(k)
     &               *KappaRX(i,j,k+1)*recip_drC(k+1)
        ENDDO
       ENDDO
      ENDDO
      DO j=1-Oly,sNy+Oly
       DO i=1-Olx,sNx+Olx
         c(i,j,Nr) = 0. _d 0
       ENDDO
      ENDDO

C--   Old aCenter
      DO k=1,Nr
       DO j=1-Oly,sNy+Oly
        DO i=1-Olx,sNx+Olx
          b(i,j,k) = 1. _d 0 - c(i,j,k) - a(i,j,k)
        ENDDO
       ENDDO
      ENDDO

C--   Old and new gam, bet are the same
      DO k=1,Nr
       DO j=1-Oly,sNy+Oly
        DO i=1-Olx,sNx+Olx
          bet(i,j,k) = 0. _d 0
          gam(i,j,k) = 0. _d 0
        ENDDO
       ENDDO
      ENDDO

C--   Only need do anything if Nr>1
      IF (Nr.GT.1) THEN

       k = 1
C--    Beginning of forward sweep (top level)
       DO j=jMin,jMax
        DO i=iMin,iMax
         IF (b(i,j,1).NE.0.) bet(i,j,1) = 1. _d 0 / b(i,j,1)
        ENDDO
       ENDDO

      ENDIF

C--   Middle of forward sweep
      IF (Nr.GT.2) THEN

CADJ loop = sequential
       DO k=2,Nr

#ifdef ALLOW_AUTODIFF_TAMC
        kkey = (idkey-1)*(Nr-2) + k-1
#endif

        DO j=jMin,jMax
         DO i=iMin,iMax
          gam(i,j,k) = c(i,j,k-1)*bet(i,j,k-1)
          IF ( ( b(i,j,k) - a(i,j,k)*gam(i,j,k) ) .NE. 0.) 
     &        bet(i,j,k) = 1. _d 0 / ( b(i,j,k) - a(i,j,k)*gam(i,j,k) )
         ENDDO
        ENDDO

       ENDDO

      ENDIF

      DO j=jMin,jMax
       DO i=iMin,iMax
        gYNm1(i,j,1,bi,bj) = gXNm1(i,j,1,bi,bj)*bet(i,j,1)
       ENDDO
      ENDDO
      DO k=2,Nr
       DO j=jMin,jMax
        DO i=iMin,iMax
         gYnm1(i,j,k,bi,bj) = bet(i,j,k)*
     &        (gXnm1(i,j,k,bi,bj) - a(i,j,k)*gYnm1(i,j,k-1,bi,bj))
        ENDDO
       ENDDO
      ENDDO

C--    Backward sweep
CADJ loop = sequential
       DO k=Nr-1,1,-1
        DO j=jMin,jMax
         DO i=iMin,iMax
          gYnm1(i,j,k,bi,bj)=gYnm1(i,j,k,bi,bj)
     &              -gam(i,j,k+1)*gYnm1(i,j,k+1,bi,bj)
         ENDDO
        ENDDO
       ENDDO

       DO k=1,Nr
        DO j=jMin,jMax
         DO i=iMin,iMax
          gXnm1(i,j,k,bi,bj)=gYnm1(i,j,k,bi,bj)
         ENDDO
        ENDDO
       ENDDO

      RETURN
      END
-------------- next part --------------
C/-------------------------------------------------------------------\
C|||  Procedure: IMPLCIT_DZZ_TRACER                                |||
C|||===============================================================|||
C|||        Function: Does implcit vertical diffusion for tracer   |||
C|||                  points.                                      |||
C|||        Comments: IMPLICIT_DZZ_TRACER takes an input field     |||
C|||                  T = old and updates to hold T = new. So that |||
C|||                  new - dt.DZ{k.DZ{new}} = old  (1)            |||
C|||                  subject to a zero gradient bc at the upper   |||
C|||                  and lower boundaries.                        |||
C|||                  (1) is viewed as a tridiagonal matrix equn   |||
C|||                  A.new = old and is solved by LU decomp.      |||
C|||                  followed by tridiagonal solve.               |||
C\-------------------------------------------------------------------/
      SUBROUTINE IMPLICIT_DZZ_TRACER ( 
     U           Tr, a2MomZ_Tr )
      IMPLICIT NONE
C     /--------------------------------------------------------------\
C     | Global data                                                  |
C     \--------------------------------------------------------------/
#include "SIZE.h"
#include "OPERATORS.h"
#include "GRID.h"
#include "PARAMS.h"
#include "MASKS.h"
C     /--------------------------------------------------------------\
C     | Routine arguments                                            |
C     |==============================================================|
C     | Tr       - Tracer field                                      |
C     | a2MomZ_Tr- XYZ map of viscosity at middepth between Tr points|
C     |            ( Pa**2/s ). Note: a2MomZ_Tr is always > 0.       |
C     | OperatingMode - Flag indicating different modes of operation |
C     |                 0: LU decomposition, no solving.             |
C     |                 1: LU decomposition, and solve.              |
C     |                 2: No LU decomposition, solve.               |
C     \--------------------------------------------------------------/
      TYPEtracerFld( Tr       )
      TYPEtracerFld( a2MomZ_Tr)
C     /--------------------------------------------------------------\
C     | Local variables                                              |
C     |==============================================================|
C     | Trmixed  - Tracer field after mixing.                        |
C     | K       - Loop counters.                                     |
C     | aUpper  - A operator element linking i,j,k and i,j,k-1.      |
C     | aCenter - A operator element on i,j,k.                       |
C     | aLower  - A operator element linking i,j,k and i,j,k+1.      |
C     | aNorm   - Normalisation factor to improve numerical          |
C     |           conditioning of A.                                 |
C     | gamma   - Normalisation factor to improve numerical          |
C     |           conditioning of RHS.                               |
C     \--------------------------------------------------------------/
      TYPEtracerFld( Trmixed )
      TYPEtracerFld( aUpper )
      TYPEtracerFld( aLower )
      TYPEtracerFld( aCenter)
      REAL aNorm
      REAL gamma, factor
      INTEGER K, num(3)
C
      TYPEtracerFld( bet )
      TYPEtracerFld( gam )
C     Solve: aCenter.trMixed[K] - aUpper.trMixed[K-1] - aLower.trMixed[K+1] = Tr[K]
C            using LU decomposition followed by tridiagonal solve.    
C            i.e Ax = b -> LUx = b
C                   solve  Ly  = b
C                   solve   Ux = y

C     LU factorisation
C     Note: LU factorisation is only a function of geometry and a2MomZ_Tr. 
C           One can save factorisation between time steps if a2MomZ_Tr 
C           remains constant.
C           aUpper refers to upper diagonal in Crouts method.
C           aLower refers to lower diagonal in Crouts method.
C           aCenter is stored as 1/(main diagonal) to avoid divides in the LU solve.

       aLower  = -pZFaceA*delT*pDdzOp*a2MomZ_Tr*pMask*_eoshift(pMask,_kxyz,-1)
       aUpper  = _eoshift(aLower,_kxyz,+1)
       aLower  = aLower*rPvol
       aUpper  = aUpper*rPvol
       aCenter = 1.D0 - aLower - aUpper

c      factor =1.e-3
c      aLower  = factor*(-pZFaceA*rPVol*delT*pDdzOp*a2MomZ_Tr*pMask*_eoshift(pMask,_kxyz,-1))
c      aUpper  = _eoshift(aLower,_kxyz,+1)
c      aCenter = 1. - aLower - aUpper

#ifdef NORM
       aNorm   = MAXVAL(ABS(aCenter))
       aNorm   = MAX(aNorm, MAXVAL(ABS(aLower )) )
       aNorm   = MAX(aNorm, MAXVAL(ABS(aUpper )) )
       IF ( aNorm .NE. 0. ) THEN
        aCenter = aCenter/aNorm
        aUpper  = aUpper /aNorm
        aLower  = aLower /aNorm
       ENDIF
       gamma = MAXVAL(ABS(Tr))
       IF ( gamma .NE. 0. ) THEN
        Tr = Tr/gamma
       ENDIF
#endif

       bet(_i3(:,:,1)) = 1./aCenter(_i3(:,:,1))
       gam(_i3(:,:,1)) = 0.

       do k=2,nz
       gam(_i3(:,:,k)) = aUpper(_i3(:,:,k-1))*bet(_i3(:,:,k-1))
       bet(_i3(:,:,k)) = 1./(aCenter(_i3(:,:,k)) 
     &                 - aLower(_i3(:,:,k))*gam(_i3(:,:,k)))
       enddo

c      gamma=minval(bet)
c      num=minloc(bet)
c      print *, 'minval, minloc' , gamma, num

       Trmixed(_i3(:,:,1)) = tr(_i3(:,:,1))*bet(_i3(:,:,1))  
       do k=2,nz
       Trmixed(_i3(:,:,k))  = (tr(_i3(:,:,k))
     & -aLower(_i3(:,:,k))*Trmixed(_i3(:,:,k-1)))*bet(_i3(:,:,k))
       enddo

CADJ loop = sequential    
       do k = nz-1,1,-1
       Trmixed(_i3(:,:,k)) = 
     & Trmixed(_i3(:,:,k)) - gam(_i3(:,:,k+1))*Trmixed(_i3(:,:,k+1))   
       enddo

c     Denormalise
#ifdef NORM
      IF ( aNorm .NE. 0. ) THEN
       Trmixed = Trmixed/aNorm
      ENDIF
      IF ( gamma .NE. 0. ) THEN
       Trmixed = Trmixed*gamma
      ENDIF
#endif

      Tr = Trmixed

      RETURN
      END