#ifndef MS_SUA # include #endif #include #define STANDARD_ERROR 2 #define STANDARD_OUTPUT 1 #ifndef STUBMPI # include "mpi.h" #endif #include "rsl_lite.h" static int yp_curs, ym_curs, xp_curs, xm_curs ; RSL_LITE_INIT_PERIOD ( int * Fcomm0, int * shw0, int * n3dR0, int *n2dR0, int * typesizeR0 , int * n3dI0, int *n2dI0, int * typesizeI0 , int * n3dD0, int *n2dD0, int * typesizeD0 , int * n3dL0, int *n2dL0, int * typesizeL0 , int * me0, int * np0 , int * np_x0 , int * np_y0 , int * ips0 , int * ipe0 , int * jps0 , int * jpe0 , int * kps0 , int * kpe0 ) { #ifndef STUBMPI int n3dR, n2dR, typesizeR ; int n3dI, n2dI, typesizeI ; int n3dD, n2dD, typesizeD ; int n3dL, n2dL, typesizeL ; int shw ; int me, np, np_x, np_y ; int ips , ipe , jps , jpe , kps , kpe ; int yp, ym, xp, xm ; int nbytes ; int coords[2] ; MPI_Comm comm, *comm0, dummy_comm ; comm0 = &dummy_comm ; *comm0 = MPI_Comm_f2c( *Fcomm0 ) ; shw = *shw0 ; n3dR = *n3dR0 ; n2dR = *n2dR0 ; typesizeR = *typesizeR0 ; n3dI = *n3dI0 ; n2dI = *n2dI0 ; typesizeI = *typesizeI0 ; n3dD = *n3dD0 ; n2dD = *n2dD0 ; typesizeD = *typesizeD0 ; n3dL = *n3dL0 ; n2dL = *n2dL0 ; typesizeL = *typesizeL0 ; me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ; ips = *ips0-1 ; ipe = *ipe0-1 ; jps = *jps0-1 ; jpe = *jpe0-1 ; kps = *kps0-1 ; kpe = *kpe0-1 ; /* This assumes that the topoology associated with the communicator is periodic the period routines should be called with "local_communicator_periodic", which is set up in module_dm.F for RSL_LITE. Registry generated code automatically does this (gen_comms.c for RSL_LITE). */ if ( np_y > 1 ) { nbytes = typesizeR*(ipe-ips+1+2*shw)*(shw+1)*(n3dR*(kpe-kps+1)+n2dR) + typesizeI*(ipe-ips+1+2*shw)*(shw+1)*(n3dI*(kpe-kps+1)+n2dI) + typesizeD*(ipe-ips+1+2*shw)*(shw+1)*(n3dD*(kpe-kps+1)+n2dD) + typesizeL*(ipe-ips+1+2*shw)*(shw+1)*(n3dL*(kpe-kps+1)+n2dL) ; MPI_Comm_rank( *comm0, &me ) ; MPI_Cart_coords( *comm0, me, 2, coords ) ; MPI_Cart_shift( *comm0, 0, 1, &ym, &yp ) ; if ( yp != MPI_PROC_NULL && coords[0] == np_y - 1 ) { /* process on top of mesh */ buffer_for_proc ( yp , nbytes, RSL_RECVBUF ) ; buffer_for_proc ( yp , nbytes, RSL_SENDBUF ) ; } if ( ym != MPI_PROC_NULL && coords[0] == 0 ) { /* process on bottom of mesh */ buffer_for_proc ( ym , nbytes, RSL_RECVBUF ) ; buffer_for_proc ( ym , nbytes, RSL_SENDBUF ) ; } } if ( np_x > 1 ) { nbytes = typesizeR*(jpe-jps+1+2*shw)*(shw+1)*(n3dR*(kpe-kps+1)+n2dR) + typesizeI*(jpe-jps+1+2*shw)*(shw+1)*(n3dI*(kpe-kps+1)+n2dI) + typesizeD*(jpe-jps+1+2*shw)*(shw+1)*(n3dD*(kpe-kps+1)+n2dD) + typesizeL*(jpe-jps+1+2*shw)*(shw+1)*(n3dL*(kpe-kps+1)+n2dL) ; MPI_Comm_rank( *comm0, &me ) ; MPI_Cart_coords( *comm0, me, 2, coords ) ; MPI_Cart_shift( *comm0, 1, 1, &xm, &xp ) ; if ( xm != MPI_PROC_NULL && coords[1] == np_x - 1 ) { /* process on right hand side of mesh */ buffer_for_proc ( xp , nbytes, RSL_RECVBUF ) ; buffer_for_proc ( xp , nbytes, RSL_SENDBUF ) ; } if ( xp != MPI_PROC_NULL && coords[1] == 0 ) { /* process on left hand side of mesh */ buffer_for_proc ( xm, nbytes, RSL_RECVBUF ) ; buffer_for_proc ( xm , nbytes, RSL_SENDBUF ) ; } } yp_curs = 0 ; ym_curs = 0 ; xp_curs = 0 ; xm_curs = 0 ; #endif } RSL_LITE_PACK_PERIOD ( int* Fcomm0, char * buf , int * shw0 , int * typesize0 , int * xy0 , int * pu0 , int * imemord , int * stag0 , int *me0, int * np0 , int * np_x0 , int * np_y0 , int * ids0 , int * ide0 , int * jds0 , int * jde0 , int * kds0 , int * kde0 , int * ims0 , int * ime0 , int * jms0 , int * jme0 , int * kms0 , int * kme0 , int * ips0 , int * ipe0 , int * jps0 , int * jpe0 , int * kps0 , int * kpe0 ) { #ifndef STUBMPI int me, np, np_x, np_y ; int shw , typesize ; int ids , ide , jds , jde , kds , kde ; int ims , ime , jms , jme , kms , kme ; int ips , ipe , jps , jpe , kps , kpe ; int stag ; /* 0 not stag, 1 stag */ int xy ; /* y = 0 , x = 1 */ int pu ; /* pack = 0 , unpack = 1 */ register int i, j, k, t ; #ifdef crayx1 register int i2,i3,i4,i_offset; #endif char *p ; int the_buf ; int yp, ym, xp, xm ; int nbytes, ierr ; register int *pi, *qi ; int coords[2] ; int js, je, ks, ke, is, ie, wcount ; MPI_Comm comm, *comm0, dummy_comm ; comm0 = &dummy_comm ; *comm0 = MPI_Comm_f2c( *Fcomm0 ) ; me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ; stag = *stag0 ; shw = *shw0 ; typesize = *typesize0 ; ids = *ids0-1 ; ide = *ide0-1 ; jds = *jds0-1 ; jde = *jde0-1 ; kds = *kds0-1 ; kde = *kde0-1 ; ims = *ims0-1 ; ime = *ime0-1 ; jms = *jms0-1 ; jme = *jme0-1 ; kms = *kms0-1 ; kme = *kme0-1 ; ips = *ips0-1 ; ipe = *ipe0-1 ; jps = *jps0-1 ; jpe = *jpe0-1 ; kps = *kps0-1 ; kpe = *kpe0-1 ; xy = *xy0 ; pu = *pu0 ; #define RANGE(S1,E1,S2,E2,S3,E3,S4,E4) (((E1)-(S1)+1)*((E2)-(S2)+1)*((E3)-(S3)+1)*((E4)-(S4)+1)) #if 0 #define IMAX(A) (((A)>ids)?(A):ids) #define IMIN(A) (((A)jds)?(A):jds) #define JMIN(A) (((A) ide) since this will handle corner points for doubly periodic updates (he wrote hopefully) */ #define IMAX(A) (A) #define IMIN(A) (A) #define JMAX(A) (A) #define JMIN(A) (A) #endif the_buf = ( pu == 0 ) ? RSL_SENDBUF : RSL_RECVBUF ; if ( np_x > 1 && xy == 1 ) { /* exchange period in x dim */ MPI_Comm_rank( *comm0, &me ) ; MPI_Cart_coords( *comm0, me, 2, coords ) ; MPI_Cart_shift( *comm0, 1, 1, &xm, &xp ) ; if ( coords[1] == np_x - 1 ) { /* process on right hand edge of domain */ p = buffer_for_proc( xp , 0 , the_buf ) ; if ( pu == 0 ) { js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ; ks = kps ; ke = kpe ; is = ipe-shw ; ie = ipe-1 ; nbytes = buffer_size_for_proc( xp , the_buf ) ; if ( xp_curs + RANGE( JMAX(jps-shw), JMIN(jpe+shw), kps, kpe, ipe-shw, ipe-1, 1, typesize ) > nbytes ) { #ifndef MS_SUA fprintf(stderr,"memory overwrite in rsl_lite_pack_period_x, right hand X to %d, %d > %d\n",xp, xp_curs + RANGE( JMAX(jps-shw), JMIN(jpe+shw), kps, kpe, ipe-shw, ipe-1, 1, typesize ), nbytes ) ; #endif MPI_Abort(MPI_COMM_WORLD, 98) ; } if ( typesize == 8 ) { F_PACK_LINT ( buf, p+xp_curs, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; xp_curs += wcount*typesize ; } else if ( typesize == 4 ) { F_PACK_INT ( buf, p+xp_curs, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; xp_curs += wcount*typesize ; } else { #ifndef MS_SUA fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ; #endif } } else { js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ; ks = kps ; ke = kpe ; is = ipe ; ie = ipe+shw-1+stag ; if ( typesize == 8 ) { F_UNPACK_LINT ( p+xp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; xp_curs += wcount*typesize ; } else if ( typesize == 4 ) { F_UNPACK_INT ( p+xp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; xp_curs += wcount*typesize ; } else { #ifndef MS_SUA fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ; #endif } } } if ( coords[1] == 0 ) { /* process on left hand edge of domain */ p = buffer_for_proc( xm , 0 , the_buf ) ; if ( pu == 0 ) { js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ; ks = kps ; ke = kpe ; is = ips ; ie = ips+shw-1+stag ; nbytes = buffer_size_for_proc( xm , the_buf ) ; if ( xm_curs + RANGE( JMAX(jps-shw), JMIN(jpe+shw), kps, kpe, ips, ips+shw-1+stag, 1, typesize ) > nbytes ) { #ifndef MS_SUA fprintf(stderr,"memory overwrite in rsl_lite_pack_period_x, left hand X to %d , %d > %d\n",xm, xm_curs + RANGE( JMAX(jps-shw), JMIN(jpe+shw), kps, kpe, ips, ips+shw-1+stag, 1, typesize ), nbytes ) ; #endif MPI_Abort(MPI_COMM_WORLD, 98) ; } if ( typesize == 8 ) { F_PACK_LINT ( buf, p+xm_curs, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; xm_curs += wcount*typesize ; } else if ( typesize == 4 ) { F_PACK_INT ( buf, p+xm_curs, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; xm_curs += wcount*typesize ; } else { #ifndef MS_SUA fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ; #endif } } else { js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ; ks = kps ; ke = kpe ; is = ips-shw ; ie = ips-1 ; if ( typesize == 8 ) { F_UNPACK_LINT ( p+xm_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; xm_curs += wcount*typesize ; } else if ( typesize == 4 ) { F_UNPACK_INT ( p+xm_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; xm_curs += wcount*typesize ; } else { #ifndef MS_SUA fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ; #endif } } } } if ( np_y > 1 && xy == 0 ) { /* exchange period in Y dim */ MPI_Comm_rank( *comm0, &me ) ; MPI_Cart_coords( *comm0, me, 2, coords ) ; MPI_Cart_shift( *comm0, 0, 1, &ym, &yp ) ; if ( coords[0] == np_y - 1 ) { /* process on top edge of domain */ p = buffer_for_proc( yp , 0 , the_buf ) ; if ( pu == 0 ) { is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ; ks = kps ; ke = kpe ; js = jpe-shw ; je = jpe-1 ; nbytes = buffer_size_for_proc( yp , the_buf ) ; if ( yp_curs + RANGE( IMAX(ips-shw), IMIN(ipe+shw), kps, kpe, jpe-shw, jpe-1, 1, typesize ) > nbytes ) { #ifndef MS_SUA fprintf(stderr,"memory overwrite in rsl_lite_pack_period_y, right hand Y to %d, %d > %d\n",yp, yp_curs + RANGE( IMAX(ips-shw), IMIN(ipe+shw), kps, kpe, jpe-shw, jpe-1, 1, typesize ), nbytes ) ; #endif MPI_Abort(MPI_COMM_WORLD, 98) ; } if ( typesize == 8 ) { F_PACK_LINT ( buf, p+yp_curs, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; yp_curs += wcount*typesize ; } else if ( typesize == 4 ) { F_PACK_INT ( buf, p+yp_curs, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; yp_curs += wcount*typesize ; } else { #ifndef MS_SUA fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ; #endif } } else { is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ; ks = kps ; ke = kpe ; js = jpe ; je = jpe+shw-1+stag ; if ( typesize == 8 ) { F_UNPACK_LINT ( p+yp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; yp_curs += wcount*typesize ; } else if ( typesize == 4 ) { F_UNPACK_INT ( p+yp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; yp_curs += wcount*typesize ; } else { #ifndef MS_SUA fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ; #endif } } } if ( coords[0] == 0 ) { /* process on bottom edge of domain */ p = buffer_for_proc( ym , 0 , the_buf ) ; if ( pu == 0 ) { is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ; ks = kps ; ke = kpe ; js = jps ; je = jps+shw-1+stag ; nbytes = buffer_size_for_proc( ym , the_buf ) ; if ( ym_curs + RANGE( IMAX(ips-shw), IMIN(ipe+shw), kps, kpe, jps, jps+shw-1+stag, 1, typesize ) > nbytes ) { #ifndef MS_SUA fprintf(stderr,"memory overwrite in rsl_lite_pack_period_y, left hand Y to %d , %d > %d\n",xm, ym_curs + RANGE( IMAX(ips-shw), IMIN(ipe+shw), kps, kpe, jps, jps+shw-1+stag, 1, typesize ), nbytes ) ; #endif MPI_Abort(MPI_COMM_WORLD, 98) ; } if ( typesize == 8 ) { F_PACK_LINT ( buf, p+ym_curs, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; ym_curs += wcount*typesize ; } else if ( typesize == 4 ) { F_PACK_INT ( buf, p+ym_curs, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; ym_curs += wcount*typesize ; } else { #ifndef MS_SUA fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ; #endif } } else { is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ; ks = kps ; ke = kpe ; js = jps-shw ; je = jps-1 ; if ( typesize == 8 ) { F_UNPACK_LINT ( p+ym_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; ym_curs += wcount*typesize ; } else if ( typesize == 4 ) { F_UNPACK_INT ( p+ym_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie, &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ; ym_curs += wcount*typesize ; } else { #ifndef MS_SUA fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ; #endif } } } } #endif } #ifndef STUBMPI static MPI_Request yp_recv, ym_recv, yp_send, ym_send ; static MPI_Request xp_recv, xm_recv, xp_send, xm_send ; #endif RSL_LITE_EXCH_PERIOD_X ( int * Fcomm0, int *me0, int * np0 , int * np_x0 , int * np_y0 ) { #ifndef STUBMPI int me, np, np_x, np_y ; int yp, ym, xp, xm, nbytes ; MPI_Status stat ; MPI_Comm comm, *comm0, dummy_comm ; int coords[2] ; comm0 = &dummy_comm ; *comm0 = MPI_Comm_f2c( *Fcomm0 ) ; #if 1 comm = *comm0 ; me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ; if ( np_x > 1 ) { MPI_Comm_rank( *comm0, &me ) ; MPI_Cart_coords( *comm0, me, 2, coords ) ; MPI_Cart_shift( *comm0, 1, 1, &xm, &xp ) ; if ( coords[1] == np_x - 1 ) { /* proc on right hand side of domain */ nbytes = buffer_size_for_proc( xp, RSL_RECVBUF ) ; MPI_Irecv ( buffer_for_proc( xp , xp_curs, RSL_RECVBUF ), nbytes, MPI_CHAR, xp, me, comm, &xp_recv ) ; } if ( coords[1] == 0 ) { /* proc on left hand side of domain */ nbytes = buffer_size_for_proc( xm, RSL_RECVBUF ) ; MPI_Irecv ( buffer_for_proc( xm, xm_curs, RSL_RECVBUF ), nbytes, MPI_CHAR, xm, me, comm, &xm_recv ) ; } if ( coords[1] == np_x - 1 ) { /* proc on right hand side of domain */ MPI_Isend ( buffer_for_proc( xp , 0, RSL_SENDBUF ), xp_curs, MPI_CHAR, xp, xp, comm, &xp_send ) ; } if ( coords[1] == 0 ) { /* proc on left hand side of domain */ MPI_Isend ( buffer_for_proc( xm, 0, RSL_SENDBUF ), xm_curs, MPI_CHAR, xm, xm, comm, &xm_send ) ; } if ( coords[1] == np_x - 1 ) MPI_Wait( &xp_recv, &stat ) ; if ( coords[1] == 0 ) MPI_Wait( &xm_recv, &stat ) ; if ( coords[1] == np_x - 1 ) MPI_Wait( &xp_send, &stat ) ; if ( coords[1] == 0 ) MPI_Wait( &xm_send, &stat ) ; } #else # ifndef MS_SUA fprintf(stderr,"RSL_LITE_EXCH_PERIOD_X disabled\n") ; # endif #endif yp_curs = 0 ; ym_curs = 0 ; xp_curs = 0 ; xm_curs = 0 ; #endif } RSL_LITE_EXCH_PERIOD_Y ( int * Fcomm0, int *me0, int * np0 , int * np_x0 , int * np_y0 ) { #ifndef STUBMPI int me, np, np_x, np_y ; int yp, ym, xp, xm, nbytes ; MPI_Status stat ; MPI_Comm comm, *comm0, dummy_comm ; int coords[2] ; comm0 = &dummy_comm ; *comm0 = MPI_Comm_f2c( *Fcomm0 ) ; #if 1 comm = *comm0 ; me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ; if ( np_y > 1 ) { MPI_Comm_rank( *comm0, &me ) ; MPI_Cart_coords( *comm0, me, 2, coords ) ; MPI_Cart_shift( *comm0, 0, 1, &ym, &yp ) ; if ( coords[0] == np_y - 1 ) { /* proc on top of domain */ nbytes = buffer_size_for_proc( yp, RSL_RECVBUF ) ; MPI_Irecv ( buffer_for_proc( yp , yp_curs, RSL_RECVBUF ), nbytes, MPI_CHAR, yp, me, comm, &yp_recv ) ; } if ( coords[0] == 0 ) { /* proc on bottom of domain */ nbytes = buffer_size_for_proc( ym, RSL_RECVBUF ) ; MPI_Irecv ( buffer_for_proc( ym, ym_curs, RSL_RECVBUF ), nbytes, MPI_CHAR, ym, me, comm, &ym_recv ) ; } if ( coords[0] == np_y - 1 ) { /* proc on top of domain */ MPI_Isend ( buffer_for_proc( yp , 0, RSL_SENDBUF ), yp_curs, MPI_CHAR, yp, yp, comm, &yp_send ) ; } if ( coords[0] == 0 ) { /* proc on bottom of domain */ MPI_Isend ( buffer_for_proc( ym, 0, RSL_SENDBUF ), ym_curs, MPI_CHAR, ym, ym, comm, &ym_send ) ; } if ( coords[0] == np_y - 1 ) MPI_Wait( &yp_recv, &stat ) ; if ( coords[0] == 0 ) MPI_Wait( &ym_recv, &stat ) ; if ( coords[0] == np_y - 1 ) MPI_Wait( &yp_send, &stat ) ; if ( coords[0] == 0 ) MPI_Wait( &ym_send, &stat ) ; } #else # ifndef MS_SUA fprintf(stderr,"RSL_LITE_EXCH_PERIOD_Y disabled\n") ; # endif #endif yp_curs = 0 ; ym_curs = 0 ; xp_curs = 0 ; xm_curs = 0 ; #endif }