1 | subroutine trans_z2x ( np, comm, dir, r_wordsize, i_wordsize, memorder, & |
---|
2 | a, & |
---|
3 | sd1, ed1, sd2, ed2, sd3, ed3, & |
---|
4 | sp1, ep1, sp2, ep2, sp3, ep3, & |
---|
5 | sm1, em1, sm2, em2, sm3, em3, & |
---|
6 | ax, & |
---|
7 | sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, & |
---|
8 | sm1x, em1x, sm2x, em2x, sm3x, em3x ) |
---|
9 | USE duplicate_of_driver_constants |
---|
10 | implicit none |
---|
11 | integer, intent(in) :: sd1, ed1, sd2, ed2, sd3, ed3, & |
---|
12 | sp1, ep1, sp2, ep2, sp3, ep3, & |
---|
13 | sm1, em1, sm2, em2, sm3, em3, & |
---|
14 | sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, & |
---|
15 | sm1x, em1x, sm2x, em2x, sm3x, em3x |
---|
16 | integer, intent(in) :: np, comm, r_wordsize, i_wordsize |
---|
17 | integer, intent(in) :: dir ! 1 is a->ax, otherwise ax->a |
---|
18 | integer, intent(in) :: memorder |
---|
19 | integer, dimension((ep1-sp1+1)*(ep2-sp2+1)*(ep3-sp3+1)*max(1,(r_wordsize/i_wordsize))) :: a |
---|
20 | integer, dimension((ep1x-sp1x+1)*(ep2x-ep2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: ax |
---|
21 | #ifndef STUBMPI |
---|
22 | include 'mpif.h' |
---|
23 | |
---|
24 | !local |
---|
25 | integer :: ids, ide, jds, jde, kds, kde, & |
---|
26 | ips, ipe, jps, jpe, kps, kpe, & |
---|
27 | ims, ime, jms, jme, kms, kme, & |
---|
28 | ipsx, ipex, jpsx, jpex, kpsx, kpex, & |
---|
29 | imsx, imex, jmsx, jmex, kmsx, kmex |
---|
30 | |
---|
31 | integer, dimension(0:(ep1-sp1+1)*(ep2-sp2+1)*(ep3-sp3+1)*max(1,(r_wordsize/i_wordsize))) :: zbuf |
---|
32 | integer, dimension(0:(ep1x-sp1x+1)*(ep2x-sp2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: xbuf |
---|
33 | |
---|
34 | integer pencil(4), allpencils(4,np) |
---|
35 | integer sendcnts(np), sdispls(np), recvcnts(np), rdispls(np) |
---|
36 | integer allsendcnts(np+2,np), is(np), ie(np), ks(np),ke(np) |
---|
37 | integer sendcurs(np), recvcurs(np) |
---|
38 | integer i,j,k,p,sc,sp,rp,yp,zp,curs,zbufsz,cells,nkcells,ivectype,ierr |
---|
39 | |
---|
40 | SELECT CASE ( memorder ) |
---|
41 | CASE ( DATA_ORDER_XYZ ) |
---|
42 | ids = sd1 ; ide = ed1 ; jds = sd2 ; jde = ed2 ; kds = sd3 ; kde = ed3 |
---|
43 | ips = sp1 ; ipe = ep1 ; jps = sp2 ; jpe = ep2 ; kps = sp3 ; kpe = ep3 |
---|
44 | ims = sm1 ; ime = em1 ; jms = sm2 ; jme = em2 ; kms = sm3 ; kme = em3 |
---|
45 | ipsx = sp1x ; ipex = ep1x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp3x ; kpex = ep3x |
---|
46 | imsx = sm1x ; imex = em1x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm3x ; kmex = em3x |
---|
47 | CASE ( DATA_ORDER_YXZ ) |
---|
48 | ids = sd2 ; ide = ed2 ; jds = sd1 ; jde = ed1 ; kds = sd3 ; kde = ed3 |
---|
49 | ips = sp2 ; ipe = ep2 ; jps = sp1 ; jpe = ep1 ; kps = sp3 ; kpe = ep3 |
---|
50 | ims = sm2 ; ime = em2 ; jms = sm1 ; jme = em1 ; kms = sm3 ; kme = em3 |
---|
51 | ipsx = sp2x ; ipex = ep2x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp3x ; kpex = ep3x |
---|
52 | imsx = sm2x ; imex = em2x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm3x ; kmex = em3x |
---|
53 | CASE ( DATA_ORDER_XZY ) |
---|
54 | ids = sd1 ; ide = ed1 ; jds = sd3 ; jde = ed3 ; kds = sd2 ; kde = ed2 |
---|
55 | ips = sp1 ; ipe = ep1 ; jps = sp3 ; jpe = ep3 ; kps = sp2 ; kpe = ep2 |
---|
56 | ims = sm1 ; ime = em1 ; jms = sm3 ; jme = em3 ; kms = sm2 ; kme = em2 |
---|
57 | ipsx = sp1x ; ipex = ep1x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp2x ; kpex = ep2x |
---|
58 | imsx = sm1x ; imex = em1x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm2x ; kmex = em2x |
---|
59 | CASE ( DATA_ORDER_YZX ) |
---|
60 | ids = sd3 ; ide = ed3 ; jds = sd1 ; jde = ed1 ; kds = sd2 ; kde = ed2 |
---|
61 | ips = sp3 ; ipe = ep3 ; jps = sp1 ; jpe = ep1 ; kps = sp2 ; kpe = ep2 |
---|
62 | ims = sm3 ; ime = em3 ; jms = sm1 ; jme = em1 ; kms = sm2 ; kme = em2 |
---|
63 | ipsx = sp3x ; ipex = ep3x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp2x ; kpex = ep2x |
---|
64 | imsx = sm3x ; imex = em3x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm2x ; kmex = em2x |
---|
65 | CASE ( DATA_ORDER_ZXY ) |
---|
66 | ids = sd2 ; ide = ed2 ; jds = sd3 ; jde = ed3 ; kds = sd1 ; kde = ed1 |
---|
67 | ips = sp2 ; ipe = ep2 ; jps = sp3 ; jpe = ep3 ; kps = sp1 ; kpe = ep1 |
---|
68 | ims = sm2 ; ime = em2 ; jms = sm3 ; jme = em3 ; kms = sm1 ; kme = em1 |
---|
69 | ipsx = sp2x ; ipex = ep2x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp1x ; kpex = ep1x |
---|
70 | imsx = sm2x ; imex = em2x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm1x ; kmex = em1x |
---|
71 | CASE ( DATA_ORDER_ZYX ) |
---|
72 | ids = sd3 ; ide = ed3 ; jds = sd2 ; jde = ed2 ; kds = sd1 ; kde = ed1 |
---|
73 | ips = sp3 ; ipe = ep3 ; jps = sp2 ; jpe = ep2 ; kps = sp1 ; kpe = ep1 |
---|
74 | ims = sm3 ; ime = em3 ; jms = sm2 ; jme = em2 ; kms = sm1 ; kme = em1 |
---|
75 | ipsx = sp3x ; ipex = ep3x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp1x ; kpex = ep1x |
---|
76 | imsx = sm3x ; imex = em3x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm1x ; kmex = em1x |
---|
77 | END SELECT |
---|
78 | |
---|
79 | sendcnts = 0 ; recvcnts = 0 |
---|
80 | |
---|
81 | xbuf = 0 |
---|
82 | zbuf = 0 |
---|
83 | |
---|
84 | ! work out send/recv sizes to each processor in X dimension |
---|
85 | pencil(1) = ips |
---|
86 | pencil(2) = ipe |
---|
87 | pencil(3) = kpsx |
---|
88 | pencil(4) = kpex |
---|
89 | call mpi_allgather( pencil, 4, MPI_INTEGER, allpencils, 4, MPI_INTEGER, comm, ierr ) |
---|
90 | do p = 1, np |
---|
91 | is(p) = allpencils(1,p) |
---|
92 | ie(p) = allpencils(2,p) |
---|
93 | ks(p) = allpencils(3,p) |
---|
94 | ke(p) = allpencils(4,p) |
---|
95 | enddo |
---|
96 | ! pack send buffer |
---|
97 | sendcurs = 0 |
---|
98 | sdispls = 0 |
---|
99 | sc = 0 |
---|
100 | do p = 1, np |
---|
101 | if ( r_wordsize .eq. i_wordsize ) then |
---|
102 | if ( dir .eq. 1 ) then |
---|
103 | call f_pack_int ( a, zbuf(sc), memorder, & |
---|
104 | & jps, jpe, ks(p), ke(p), ips, ipe, & |
---|
105 | & jms, jme, kms, kme, ims, ime, sendcurs(p) ) |
---|
106 | else |
---|
107 | call f_pack_int ( ax, xbuf(sc), memorder, & |
---|
108 | & jpsx, jpex, kpsx, kpex, is(p), ie(p), & |
---|
109 | & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) ) |
---|
110 | endif |
---|
111 | else if ( r_wordsize .eq. 8 ) THEN |
---|
112 | if ( dir .eq. 1 ) then |
---|
113 | call f_pack_lint ( a, zbuf(sc), memorder, & |
---|
114 | & jps, jpe, ks(p), ke(p), ips, ipe, & |
---|
115 | & jms, jme, kms, kme, ims, ime, sendcurs(p) ) |
---|
116 | else |
---|
117 | call f_pack_lint ( ax, xbuf(sc), memorder, & |
---|
118 | & jpsx, jpex, kpsx, kpex, is(p), ie(p), & |
---|
119 | & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) ) |
---|
120 | endif |
---|
121 | sendcurs(p) = sendcurs(p) * max(1,(r_wordsize/i_wordsize)) |
---|
122 | else |
---|
123 | write(0,*)'RSL_LITE internal error: type size mismatch ',__FILE__,__LINE__ |
---|
124 | call mpi_abort(ierr) |
---|
125 | endif |
---|
126 | sc = sc + sendcurs(p) |
---|
127 | sendcnts(p) = sendcurs(p) |
---|
128 | if ( p .GT. 1 ) sdispls(p) = sdispls(p-1) + sendcnts(p-1) |
---|
129 | enddo |
---|
130 | ! work out receive counts and displs |
---|
131 | rdispls = 0 |
---|
132 | recvcnts = 0 |
---|
133 | do p = 1, np |
---|
134 | if ( dir .eq. 1 ) then |
---|
135 | recvcnts(p) = (ie(p)-is(p)+1)*(kpex-kpsx+1)*(jpex-jpsx+1) * max(1,(r_wordsize/i_wordsize)) |
---|
136 | else |
---|
137 | recvcnts(p) = (ke(p)-ks(p)+1)*(ipe-ips+1)*(jpe-jps+1) * max(1,(r_wordsize/i_wordsize)) |
---|
138 | endif |
---|
139 | if ( p .GT. 1 ) rdispls(p) = rdispls(p-1) + recvcnts(p-1) |
---|
140 | enddo |
---|
141 | ! do the transpose |
---|
142 | if ( dir .eq. 1 ) then |
---|
143 | call mpi_alltoallv(zbuf, sendcnts, sdispls, MPI_INTEGER, & |
---|
144 | xbuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr ) |
---|
145 | else |
---|
146 | call mpi_alltoallv(xbuf, sendcnts, sdispls, MPI_INTEGER, & |
---|
147 | zbuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr ) |
---|
148 | endif |
---|
149 | ! unpack |
---|
150 | do p = 1, np |
---|
151 | if ( r_wordsize .eq. i_wordsize ) then |
---|
152 | if ( dir .eq. 1 ) then |
---|
153 | call f_unpack_int ( xbuf(rdispls(p)), ax, memorder, & |
---|
154 | & jpsx, jpex, kpsx, kpex, is(p), ie(p), & |
---|
155 | & jmsx, jmex, kmsx, kmex, imsx, imex, curs ) |
---|
156 | else |
---|
157 | call f_unpack_int ( zbuf(rdispls(p)), a, memorder, & |
---|
158 | & jps, jpe, ks(p), ke(p), ips, ipe, & |
---|
159 | & jms, jme, kms, kme, ims, ime, curs ) |
---|
160 | endif |
---|
161 | else if ( r_wordsize .eq. 8 ) THEN |
---|
162 | if ( dir .eq. 1 ) then |
---|
163 | call f_unpack_lint ( xbuf(rdispls(p)), ax, memorder, & |
---|
164 | & jpsx, jpex, kpsx, kpex, is(p), ie(p), & |
---|
165 | & jmsx, jmex, kmsx, kmex, imsx, imex, curs ) |
---|
166 | else |
---|
167 | call f_unpack_lint ( zbuf(rdispls(p)), a, memorder, & |
---|
168 | & jps, jpe, ks(p), ke(p), ips, ipe, & |
---|
169 | & jms, jme, kms, kme, ims, ime, curs ) |
---|
170 | endif |
---|
171 | else |
---|
172 | write(0,*)'RSL_LITE internal error: type size mismatch ',__FILE__,__LINE__ |
---|
173 | call mpi_abort(ierr) |
---|
174 | endif |
---|
175 | enddo |
---|
176 | #endif |
---|
177 | return |
---|
178 | end subroutine trans_z2x |
---|
179 | |
---|
180 | subroutine trans_x2y ( np, comm, dir, r_wordsize, i_wordsize, memorder, & |
---|
181 | ax, & |
---|
182 | sd1, ed1, sd2, ed2, sd3, ed3, & |
---|
183 | sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, & |
---|
184 | sm1x, em1x, sm2x, em2x, sm3x, em3x, & |
---|
185 | ay, & |
---|
186 | sp1y, ep1y, sp2y, ep2y, sp3y, ep3y, & |
---|
187 | sm1y, em1y, sm2y, em2y, sm3y, em3y ) |
---|
188 | USE duplicate_of_driver_constants |
---|
189 | implicit none |
---|
190 | integer, intent(in) :: memorder |
---|
191 | integer, intent(in) :: sd1, ed1, sd2, ed2, sd3, ed3, & |
---|
192 | sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, & |
---|
193 | sm1x, em1x, sm2x, em2x, sm3x, em3x, & |
---|
194 | sp1y, ep1y, sp2y, ep2y, sp3y, ep3y, & |
---|
195 | sm1y, em1y, sm2y, em2y, sm3y, em3y |
---|
196 | |
---|
197 | integer, intent(in) :: np, comm, r_wordsize, i_wordsize |
---|
198 | integer, intent(in) :: dir ! 1 is a->ax, otherwise ax->a |
---|
199 | integer, dimension((ep1x-sp1x+1)*(ep2x-ep2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: ax |
---|
200 | integer, dimension((ep1y-sp1y+1)*(ep2y-sp2y+1)*(ep3y-sp3y+1)*max(1,(r_wordsize/i_wordsize))) :: ay |
---|
201 | #ifndef STUBMPI |
---|
202 | include 'mpif.h' |
---|
203 | |
---|
204 | integer, dimension(0:(ep1x-sp1x+1)*(ep2x-sp2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: xbuf |
---|
205 | integer, dimension(0:(ep1y-sp1y+1)*(ep2y-sp2y+1)*(ep3y-sp3y+1)*max(1,(r_wordsize/i_wordsize))) :: ybuf |
---|
206 | |
---|
207 | !local |
---|
208 | integer ids, ide, jds, jde, kds, kde, & |
---|
209 | ipsx, ipex, jpsx, jpex, kpsx, kpex, & |
---|
210 | imsx, imex, jmsx, jmex, kmsx, kmex, & |
---|
211 | ipsy, ipey, jpsy, jpey, kpsy, kpey, & |
---|
212 | imsy, imey, jmsy, jmey, kmsy, kmey |
---|
213 | integer pencil(4), allpencils(4,np) |
---|
214 | integer sendcnts(np), sdispls(np), recvcnts(np), rdispls(np) |
---|
215 | integer allsendcnts(np+2,np), is(np), ie(np), js(np), je(np) |
---|
216 | integer sendcurs(np), recvcurs(np) |
---|
217 | integer i,j,k,p,sc,sp,rp,yp,zp,curs,xbufsz,cells,nkcells,ivectype,ierr |
---|
218 | |
---|
219 | SELECT CASE ( memorder ) |
---|
220 | CASE ( DATA_ORDER_XYZ ) |
---|
221 | ids = sd1 ; ide = ed1 ; jds = sd2 ; jde = ed2 ; kds = sd3 ; kde = ed3 |
---|
222 | ipsx = sp1x ; ipex = ep1x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp3x ; kpex = ep3x |
---|
223 | imsx = sm1x ; imex = em1x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm3x ; kmex = em3x |
---|
224 | ipsy = sp1y ; ipey = ep1y ; jpsy = sp2y ; jpey = ep2y ; kpsy = sp3y ; kpey = ep3y |
---|
225 | imsy = sm1y ; imey = em1y ; jmsy = sm2y ; jmey = em2y ; kmsy = sm3y ; kmey = em3y |
---|
226 | CASE ( DATA_ORDER_YXZ ) |
---|
227 | ids = sd2 ; ide = ed2 ; jds = sd1 ; jde = ed1 ; kds = sd3 ; kde = ed3 |
---|
228 | ipsx = sp2x ; ipex = ep2x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp3x ; kpex = ep3x |
---|
229 | imsx = sm2x ; imex = em2x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm3x ; kmex = em3x |
---|
230 | ipsy = sp2y ; ipey = ep2y ; jpsy = sp1y ; jpey = ep1y ; kpsy = sp3y ; kpey = ep3y |
---|
231 | imsy = sm2y ; imey = em2y ; jmsy = sm1y ; jmey = em1y ; kmsy = sm3y ; kmey = em3y |
---|
232 | CASE ( DATA_ORDER_XZY ) |
---|
233 | ids = sd1 ; ide = ed1 ; jds = sd3 ; jde = ed3 ; kds = sd2 ; kde = ed2 |
---|
234 | ipsx = sp1x ; ipex = ep1x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp2x ; kpex = ep2x |
---|
235 | imsx = sm1x ; imex = em1x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm2x ; kmex = em2x |
---|
236 | ipsy = sp1y ; ipey = ep1y ; jpsy = sp3y ; jpey = ep3y ; kpsy = sp2y ; kpey = ep2y |
---|
237 | imsy = sm1y ; imey = em1y ; jmsy = sm3y ; jmey = em3y ; kmsy = sm2y ; kmey = em2y |
---|
238 | CASE ( DATA_ORDER_YZX ) |
---|
239 | ids = sd3 ; ide = ed3 ; jds = sd1 ; jde = ed1 ; kds = sd2 ; kde = ed2 |
---|
240 | ipsx = sp3x ; ipex = ep3x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp2x ; kpex = ep2x |
---|
241 | imsx = sm3x ; imex = em3x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm2x ; kmex = em2x |
---|
242 | ipsy = sp3y ; ipey = ep3y ; jpsy = sp1y ; jpey = ep1y ; kpsy = sp2y ; kpey = ep2y |
---|
243 | imsy = sm3y ; imey = em3y ; jmsy = sm1y ; jmey = em1y ; kmsy = sm2y ; kmey = em2y |
---|
244 | CASE ( DATA_ORDER_ZXY ) |
---|
245 | ids = sd2 ; ide = ed2 ; jds = sd3 ; jde = ed3 ; kds = sd1 ; kde = ed1 |
---|
246 | ipsx = sp2x ; ipex = ep2x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp1x ; kpex = ep1x |
---|
247 | imsx = sm2x ; imex = em2x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm1x ; kmex = em1x |
---|
248 | ipsy = sp2y ; ipey = ep2y ; jpsy = sp3y ; jpey = ep3y ; kpsy = sp1y ; kpey = ep1y |
---|
249 | imsy = sm2y ; imey = em2y ; jmsy = sm3y ; jmey = em3y ; kmsy = sm1y ; kmey = em1y |
---|
250 | CASE ( DATA_ORDER_ZYX ) |
---|
251 | ids = sd3 ; ide = ed3 ; jds = sd2 ; jde = ed2 ; kds = sd1 ; kde = ed1 |
---|
252 | ipsx = sp3x ; ipex = ep3x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp1x ; kpex = ep1x |
---|
253 | imsx = sm3x ; imex = em3x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm1x ; kmex = em1x |
---|
254 | ipsy = sp3y ; ipey = ep3y ; jpsy = sp2y ; jpey = ep2y ; kpsy = sp1y ; kpey = ep1y |
---|
255 | imsy = sm3y ; imey = em3y ; jmsy = sm2y ; jmey = em2y ; kmsy = sm1y ; kmey = em1y |
---|
256 | END SELECT |
---|
257 | |
---|
258 | sendcnts = 0 ; recvcnts = 0 |
---|
259 | |
---|
260 | xbuf = 0 |
---|
261 | ybuf = 0 |
---|
262 | |
---|
263 | ! work out send/recv sizes to each processor in X dimension |
---|
264 | pencil(1) = jpsx |
---|
265 | pencil(2) = jpex |
---|
266 | pencil(3) = ipsy |
---|
267 | pencil(4) = ipey |
---|
268 | |
---|
269 | call mpi_allgather( pencil, 4, MPI_INTEGER, allpencils, 4, MPI_INTEGER, comm, ierr ) |
---|
270 | do p = 1, np |
---|
271 | js(p) = allpencils(1,p) |
---|
272 | je(p) = allpencils(2,p) |
---|
273 | is(p) = allpencils(3,p) |
---|
274 | ie(p) = allpencils(4,p) |
---|
275 | enddo |
---|
276 | |
---|
277 | |
---|
278 | ! pack send buffer |
---|
279 | sendcurs = 0 |
---|
280 | sdispls = 0 |
---|
281 | sc = 0 |
---|
282 | do p = 1, np |
---|
283 | if ( r_wordsize .eq. i_wordsize ) then |
---|
284 | if ( dir .eq. 1 ) then |
---|
285 | call f_pack_int ( ax, xbuf(sc), memorder, & |
---|
286 | & jpsx, jpex, kpsx, kpex, is(p), ie(p), & |
---|
287 | & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) ) |
---|
288 | else |
---|
289 | call f_pack_int ( ay, ybuf(sc), memorder, & |
---|
290 | & js(p), je(p), kpsy, kpey, ipsy, ipey, & |
---|
291 | & jmsy, jmey, kmsy, kmey, imsy, imey, sendcurs(p) ) |
---|
292 | endif |
---|
293 | else if ( r_wordsize .eq. 8 ) THEN |
---|
294 | if ( dir .eq. 1 ) then |
---|
295 | call f_pack_lint ( ax, xbuf(sc), memorder, & |
---|
296 | & jpsx, jpex, kpsx, kpex, is(p), ie(p), & |
---|
297 | & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) ) |
---|
298 | else |
---|
299 | call f_pack_lint ( ay, ybuf(sc), memorder, & |
---|
300 | & js(p), je(p), kpsy, kpey, ipsy, ipey, & |
---|
301 | & jmsy, jmey, kmsy, kmey, imsy, imey, sendcurs(p) ) |
---|
302 | endif |
---|
303 | sendcurs(p) = sendcurs(p) * max(1,(r_wordsize/i_wordsize)) |
---|
304 | else |
---|
305 | write(0,*)'RSL_LITE internal error: type size mismatch ',__FILE__,__LINE__ |
---|
306 | call mpi_abort(ierr) |
---|
307 | endif |
---|
308 | sc = sc + sendcurs(p) |
---|
309 | sendcnts(p) = sendcurs(p) |
---|
310 | if ( p .GT. 1 ) sdispls(p) = sdispls(p-1) + sendcnts(p-1) |
---|
311 | enddo |
---|
312 | |
---|
313 | ! work out receive counts and displs |
---|
314 | rdispls = 0 |
---|
315 | recvcnts = 0 |
---|
316 | do p = 1, np |
---|
317 | if ( dir .eq. 1 ) then |
---|
318 | recvcnts(p) = (je(p)-js(p)+1)*(kpey-kpsy+1)*(ipey-ipsy+1) * max(1,(r_wordsize/i_wordsize)) |
---|
319 | else |
---|
320 | recvcnts(p) = (ie(p)-is(p)+1)*(kpex-kpsx+1)*(jpex-jpsx+1) * max(1,(r_wordsize/i_wordsize)) |
---|
321 | endif |
---|
322 | if ( p .GT. 1 ) rdispls(p) = rdispls(p-1) + recvcnts(p-1) |
---|
323 | enddo |
---|
324 | |
---|
325 | ! do the transpose |
---|
326 | if ( dir .eq. 1 ) then |
---|
327 | call mpi_alltoallv(xbuf, sendcnts, sdispls, MPI_INTEGER, & |
---|
328 | ybuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr ) |
---|
329 | else |
---|
330 | call mpi_alltoallv(ybuf, sendcnts, sdispls, MPI_INTEGER, & |
---|
331 | xbuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr ) |
---|
332 | endif |
---|
333 | ! unpack |
---|
334 | do p = 1, np |
---|
335 | if ( r_wordsize .eq. i_wordsize ) then |
---|
336 | if ( dir .eq. 1 ) then |
---|
337 | call f_unpack_int ( ybuf(rdispls(p)), ay, memorder, & |
---|
338 | & js(p), je(p), kpsy, kpey, ipsy, ipey, & |
---|
339 | & jmsy, jmey, kmsy, kmey, imsy, imey, curs ) |
---|
340 | else |
---|
341 | call f_unpack_int ( xbuf(rdispls(p)), ax, memorder, & |
---|
342 | & jpsx, jpex, kpsx, kpex, is(p), ie(p), & |
---|
343 | & jmsx, jmex, kmsx, kmex, imsx, imex, curs ) |
---|
344 | endif |
---|
345 | else if ( r_wordsize .eq. 8 ) THEN |
---|
346 | if ( dir .eq. 1 ) then |
---|
347 | call f_unpack_lint ( ybuf(rdispls(p)), ay, memorder, & |
---|
348 | & js(p), je(p), kpsy, kpey, ipsy, ipey, & |
---|
349 | & jmsy, jmey, kmsy, kmey, imsy, imey, curs ) |
---|
350 | else |
---|
351 | call f_unpack_lint ( xbuf(rdispls(p)), ax, memorder, & |
---|
352 | & jpsx, jpex, kpsx, kpex, is(p), ie(p), & |
---|
353 | & jmsx, jmex, kmsx, kmex, imsx, imex, curs ) |
---|
354 | endif |
---|
355 | else |
---|
356 | write(0,*)'RSL_LITE internal error: type size mismatch ',__FILE__,__LINE__ |
---|
357 | call mpi_abort(ierr) |
---|
358 | endif |
---|
359 | enddo |
---|
360 | #endif |
---|
361 | return |
---|
362 | end subroutine trans_x2y |
---|
363 | |
---|