1 |
|
|
SUBROUTINE GSTATS(KNUM,KSWITCH) |
2 |
|
|
|
3 |
|
|
!**** *GSTATS* - Gather timing statistics |
4 |
|
|
|
5 |
|
|
! PURPOSE. |
6 |
|
|
! -------- |
7 |
|
|
! To gather timings for subsequent output by routine STATS_OUTPUT |
8 |
|
|
|
9 |
|
|
|
10 |
|
|
!** INTERFACE. |
11 |
|
|
! ---------- |
12 |
|
|
! *CALL* *GSTATS(KNUM,KSWITCH) |
13 |
|
|
|
14 |
|
|
! EXPLICIT ARGUMENTS |
15 |
|
|
! -------------------- |
16 |
|
|
! KNUM - timing event number (for list of already defined events |
17 |
|
|
! see routine STATS_OUTPUT) |
18 |
|
|
! KSWITCH - KSWITCH=0 - switch on timer |
19 |
|
|
! KSWITCH=1 - switch off timer |
20 |
|
|
! KSWITCH=2 - suspend timer |
21 |
|
|
! KSWITCH=3 - resume timer |
22 |
|
|
|
23 |
|
|
! IMPLICIT ARGUMENTS |
24 |
|
|
! -------------------- |
25 |
|
|
! Module YOMSTATS |
26 |
|
|
|
27 |
|
|
! METHOD. |
28 |
|
|
! ------- |
29 |
|
|
|
30 |
|
|
|
31 |
|
|
! EXTERNALS. USER_CLOCK - timing routine |
32 |
|
|
! ---------- MPL_BARRIER - syncronization of processors |
33 |
|
|
|
34 |
|
|
! REFERENCE. |
35 |
|
|
! ---------- |
36 |
|
|
! ECMWF Research Department documentation of the IFS |
37 |
|
|
|
38 |
|
|
! AUTHOR. |
39 |
|
|
! ------- |
40 |
|
|
! Mats Hamrud ECMWF |
41 |
|
|
|
42 |
|
|
! MODIFICATIONS. |
43 |
|
|
! -------------- |
44 |
|
|
! ORIGINAL : 98-11-15 |
45 |
|
|
! D.Salmond: 02-02-25 Return if not master thread when called from a |
46 |
|
|
! parallel region. |
47 |
|
|
! J.Hague: 03-06-11 Memory tracing (for NSTATS_MEM MPI tasks) |
48 |
|
|
! ------------------------------------------------------------------ |
49 |
|
|
|
50 |
|
|
USE PARKIND1 ,ONLY : JPIM ,JPRB ,JPIB |
51 |
|
|
USE YOMHOOK ,ONLY : LHOOK, DR_HOOK |
52 |
|
|
|
53 |
|
|
USE YOMGSTATS |
54 |
|
|
!USE MPL_MODULE ! MPL 28.11.08 |
55 |
|
|
USE YOMOML |
56 |
|
|
|
57 |
|
|
IMPLICIT NONE |
58 |
|
|
|
59 |
|
|
INTEGER(KIND=JPIM),INTENT(IN) :: KNUM |
60 |
|
|
INTEGER(KIND=JPIM),INTENT(IN) :: KSWITCH |
61 |
|
|
|
62 |
|
|
INTEGER(KIND=JPIM) :: IMOD,ICALL |
63 |
|
|
INTEGER(KIND=JPIM) :: IIMEM, IIPAG, IIMEMC |
64 |
|
|
INTEGER(KIND=JPIB) :: IMEM, IMEMH, IMEMS, IMEMC, IPAG, INUM |
65 |
|
|
INTEGER(KIND=JPIB) :: GETRSS, GETHWM, GETSTK, GETCURHEAP, GETPAG |
66 |
|
|
EXTERNAL GETRSS, GETHWM, GETSTK, GETCURHEAP, GETPAG |
67 |
|
|
REAL(KIND=JPRB) :: ZTIMED,ZCLOCK,ZTIME,ZTCPU,ZVCPU |
68 |
|
|
REAL(KIND=JPRB) :: ZLAST_PAR_TIME |
69 |
|
|
LOGICAL :: LLFIRST=.TRUE. |
70 |
|
|
LOGICAL :: LLMFIRST=.TRUE. |
71 |
|
|
CHARACTER(LEN=32), SAVE :: CCDESC_DRHOOK(JPMAXSTAT) |
72 |
|
|
CHARACTER(LEN=32), SAVE :: CCDESC_BARR(JPMAXSTAT) |
73 |
|
|
SAVE ZLAST_PAR_TIME |
74 |
|
|
SAVE IIMEM, IIPAG, IIMEMC |
75 |
|
|
|
76 |
|
|
INTEGER(KIND=JPIM),SAVE :: NUM_THREADS |
77 |
|
|
REAL(KIND=JPRB),ALLOCATABLE,SAVE :: ZHOOK_HANDLE(:) |
78 |
|
|
REAL(KIND=JPRB),SAVE :: ZHOOK_HANDLE_COMMS, ZHOOK_HANDLE_COMMS1 |
79 |
|
|
REAL(KIND=JPRB),SAVE :: ZHOOK_HANDLE_TRANS |
80 |
|
|
REAL(KIND=JPRB),SAVE :: ZHOOK_HANDLE_BARR |
81 |
|
|
CHARACTER*4 CC |
82 |
|
|
|
83 |
|
|
INTEGER(KIND=JPIM) :: NMAX_STATS, KULNAM |
84 |
|
|
|
85 |
|
|
INTERFACE |
86 |
|
|
#include "user_clock.h" |
87 |
|
|
END INTERFACE |
88 |
|
|
|
89 |
|
|
! write(0,*) "GSTATS:LSTATS,JPMAXSTAT,LGSTATS_LABEL,KNUM=",LSTATS,JPMAXSTAT,LGSTATS_LABEL,KNUM |
90 |
|
|
|
91 |
|
|
IF(LSTATS) THEN |
92 |
|
|
|
93 |
|
|
IF(.NOT.ALLOCATED(ZHOOK_HANDLE))THEN |
94 |
|
|
NUM_THREADS=OML_MAX_THREADS() |
95 |
|
|
ALLOCATE(ZHOOK_HANDLE(NUM_THREADS)) |
96 |
|
|
ENDIF |
97 |
|
|
|
98 |
|
|
IF(LGSTATS_LABEL)THEN |
99 |
|
|
DO INUM=1,JPMAXSTAT |
100 |
|
|
WRITE(CC,'(I4)')INUM |
101 |
|
|
CCDESC_BARR(INUM)='>BAR-'//CCDESC(INUM)(1:21)//'('//CC//')' |
102 |
|
|
ENDDO |
103 |
|
|
DO INUM=1,JPMAXSTAT |
104 |
|
|
WRITE(CC,'(I4)')INUM |
105 |
|
|
! write(6,*) inum,cctype(inum) |
106 |
|
|
IF(CCTYPE(INUM).EQ."TRS".OR.CCTYPE(INUM).EQ.'MP-'.OR.CCTYPE(INUM).EQ.'MPL'& |
107 |
|
|
& .OR.CCTYPE(INUM).EQ.'BAR'.OR.CCTYPE(INUM).EQ.'OMP') THEN |
108 |
|
|
CCDESC_DRHOOK(INUM)='>'//CCTYPE(INUM)//'-'//CCDESC(INUM)(1:21)//'('//CC//')' |
109 |
|
|
ENDIF |
110 |
|
|
ENDDO |
111 |
|
|
LGSTATS_LABEL=.FALSE. |
112 |
|
|
ENDIF |
113 |
|
|
! ------------------------------------------------------------------ |
114 |
|
|
|
115 |
|
|
!J IF((KNUM > 1000 .AND.KNUM < 2001).AND.(.NOT.LSTATS_OMP))GOTO 99999 |
116 |
|
|
!J IF((KNUM > 500 .AND.KNUM < 1001).AND.(.NOT.LSTATS_COMMS))GOTO 99999 |
117 |
|
|
IF(OML_MY_THREAD() > 1)GOTO 99999 |
118 |
|
|
|
119 |
|
|
!J IF(KNUM/=0) THEN |
120 |
|
|
!J IF(LSYNCSTATS .AND.(KSWITCH==0.OR. KSWITCH==2)) THEN |
121 |
|
|
!J IF(.NOT.OML_IN_PARALLEL().AND. KNUM < 500 )THEN |
122 |
|
|
!J IF(LHOOK)CALL DR_HOOK(CCDESC_BARR(KNUM),0,ZHOOK_HANDLE_BARR) |
123 |
|
|
!J CALL MPL_BARRIER(CDSTRING='GSTATS:') |
124 |
|
|
!J IF(LHOOK)CALL DR_HOOK(CCDESC_BARR(KNUM),1,ZHOOK_HANDLE_BARR) |
125 |
|
|
!J ENDIF |
126 |
|
|
!J ENDIF |
127 |
|
|
!J ENDIF |
128 |
|
|
|
129 |
|
|
IF (LHOOK .AND. (KSWITCH == 0 .OR. KSWITCH == 1)) THEN |
130 |
|
|
! write(0,*) "KNUM,SWITCH=",KNUM,KSWITCH |
131 |
|
|
! write(0,*) "CCTYPE=",CCTYPE(KNUM) |
132 |
|
|
! write(0,*) "CCDESC_DRHOOK=",CCDESC_DRHOOK(KNUM) |
133 |
|
|
IF(CCTYPE(KNUM).EQ."TRS")THEN |
134 |
|
|
CALL DR_HOOK(CCDESC_DRHOOK(KNUM),KSWITCH,ZHOOK_HANDLE_TRANS) |
135 |
|
|
ELSEIF(CCTYPE(KNUM).EQ.'MP-')THEN |
136 |
|
|
CALL DR_HOOK(CCDESC_DRHOOK(KNUM),KSWITCH,ZHOOK_HANDLE_COMMS) |
137 |
|
|
ELSEIF(CCTYPE(KNUM).EQ.'MPL'.AND.KNUM.NE.682)THEN |
138 |
|
|
CALL DR_HOOK(CCDESC_DRHOOK(KNUM),KSWITCH,ZHOOK_HANDLE_COMMS1) |
139 |
|
|
ELSEIF(CCTYPE(KNUM).EQ.'OMP')THEN |
140 |
|
|
! CALL DR_HOOK(CCDESC_DRHOOK(KNUM),KSWITCH,ZHOOK_HANDLE) |
141 |
|
|
! MPL 1.12.08 : erreur a la compilation car zhook_handle est un tableau et non un |
142 |
|
|
! scalaire (PKEY) |
143 |
|
|
CALL DR_HOOK(CCDESC_DRHOOK(KNUM),KSWITCH,ZTIME) |
144 |
|
|
ELSEIF(CCTYPE(KNUM).EQ.'BAR')THEN |
145 |
|
|
CALL DR_HOOK(CCDESC_DRHOOK(KNUM),KSWITCH,ZHOOK_HANDLE_BARR) |
146 |
|
|
ENDIF |
147 |
|
|
ENDIF |
148 |
|
|
|
149 |
|
|
CALL USER_CLOCK(PELAPSED_TIME=ZCLOCK) |
150 |
|
|
IF (LSTATSCPU.OR.KNUM==0) THEN |
151 |
|
|
CALL USER_CLOCK(PTOTAL_CP=ZTCPU,PVECTOR_CP=ZVCPU) |
152 |
|
|
ELSE |
153 |
|
|
ZTCPU = 0.0_JPRB |
154 |
|
|
ZVCPU = 0.0_JPRB |
155 |
|
|
ENDIF |
156 |
|
|
|
157 |
|
|
IF (LLFIRST) THEN |
158 |
|
|
|
159 |
|
|
! write(0,*) "JPMAXSTAT:2=",JPMAXSTAT |
160 |
|
|
|
161 |
|
|
NCALLS(:) = 0 |
162 |
|
|
TIMESUM(:) = 0.0_JPRB |
163 |
|
|
TIMESQSUM(:) = 0.0_JPRB |
164 |
|
|
TIMEMAX(:) = 0.0_JPRB |
165 |
|
|
TIMESUMB(:) = 0.0_JPRB |
166 |
|
|
TTCPUSUM(:) = 0.0_JPRB |
167 |
|
|
TVCPUSUM(:) = 0.0_JPRB |
168 |
|
|
TIMELCALL(:) = ZCLOCK |
169 |
|
|
CCDESC="" |
170 |
|
|
CCTYPE="" |
171 |
|
|
NTMEM = 0 |
172 |
|
|
NTMEM(:,5) = 99999999 |
173 |
|
|
IIMEM=0 |
174 |
|
|
IIPAG=0 |
175 |
|
|
IIMEMC=0 |
176 |
|
|
TIME_LAST_CALL = ZCLOCK |
177 |
|
|
ZLAST_PAR_TIME=ZCLOCK |
178 |
|
|
LLFIRST = .FALSE. |
179 |
|
|
ENDIF |
180 |
|
|
|
181 |
|
|
IF(KNUM < 0.OR. KNUM > JPMAXSTAT) CALL ABOR1('GSTATS') |
182 |
|
|
IF(KSWITCH == 0.OR. KSWITCH == 1) THEN |
183 |
|
|
NCALLS(KNUM) = NCALLS(KNUM)+1 |
184 |
|
|
ENDIF |
185 |
|
|
IMOD = MOD(NCALLS(KNUM),2) |
186 |
|
|
IF(.NOT.((KSWITCH == 0.AND. IMOD == 1) .OR.& |
187 |
|
|
&(KSWITCH == 2.AND. IMOD == 1) .OR.& |
188 |
|
|
&(KSWITCH == 3.AND. IMOD == 1) .OR.& |
189 |
|
|
&(KSWITCH == 1.AND. IMOD == 0))) THEN |
190 |
|
|
WRITE(JPERR,*) 'KNUM,KSWITCH,IMOD,NCALLS(KNUM)',& |
191 |
|
|
&KNUM,KSWITCH,IMOD,NCALLS(KNUM) |
192 |
|
|
CALL ABOR1('GSTATS') |
193 |
|
|
ENDIF |
194 |
|
|
|
195 |
|
|
IF( KSWITCH == 0 ) THEN |
196 |
|
|
! Start timing event |
197 |
|
|
IF(KNUM < 500) THEN |
198 |
|
|
ZTIMED = ZCLOCK-TIME_LAST_CALL |
199 |
|
|
ELSE |
200 |
|
|
ZTIMED = ZCLOCK - ZLAST_PAR_TIME |
201 |
|
|
ENDIF |
202 |
|
|
TIMESUMB(KNUM) = TIMESUMB(KNUM)+ZTIMED |
203 |
|
|
THISTIME(KNUM) = 0.0_JPRB |
204 |
|
|
TIMELCALL(KNUM) = ZCLOCK |
205 |
|
|
TTCPULCALL(KNUM) = ZTCPU |
206 |
|
|
TVCPULCALL(KNUM) = ZVCPU |
207 |
|
|
THISTCPU(KNUM) = 0.0_JPRB |
208 |
|
|
THISVCPU(KNUM) = 0.0_JPRB |
209 |
|
|
IF(MYPROC_STATS.LE.NSTATS_MEM.AND.MYPROC_STATS.NE.0) THEN |
210 |
|
|
! CALL getrss(IMEM) |
211 |
|
|
! IMEM = getrss()/1024 |
212 |
|
|
IPAG = getpag() |
213 |
|
|
IMEMH = gethwm()/1024 |
214 |
|
|
IMEMS = getstk()/1024 |
215 |
|
|
IMEMC = 0 |
216 |
|
|
IF(LSTATS_ALLOC) IMEMC = GETCURHEAP()/1024 |
217 |
|
|
IF(IMEM > IIMEM.OR.IPAG > IIPAG.OR.(LSTATS_ALLOC.AND.(IMEMC.NE.IIMEMC))) THEN |
218 |
|
|
IF(LLMFIRST) THEN |
219 |
|
|
WRITE(0,*) ".---------------------------------------------------------" |
220 |
|
|
WRITE(0,*) "| Memory trace details" |
221 |
|
|
WRITE(0,*) "| --------------------" |
222 |
|
|
WRITE(0,*) "| Memory examined at each GSTATS call if NSTATS_MEM>0." |
223 |
|
|
WRITE(0,*) "| Header for each trace line is:" |
224 |
|
|
WRITE(0,*) "|" |
225 |
|
|
WRITE(0,*) "| RSS_INC: Increase in RSS_MAX (KB)" |
226 |
|
|
WRITE(0,*) "| RSS_MAX: Maximum real working set so far (KB)" |
227 |
|
|
WRITE(0,*) "| HEAP_MX: High Water Mark for heap so far (KB)" |
228 |
|
|
WRITE(0,*) "| STK: Current Stack usage (KB)" |
229 |
|
|
WRITE(0,*) "| PGS: Page faults w I/O since last trace line" |
230 |
|
|
WRITE(0,*) "| CALL: Number of gstats call" |
231 |
|
|
WRITE(0,*) "| HEAP: Current malloc'd total (KB)" |
232 |
|
|
WRITE(0,*) "|" |
233 |
|
|
WRITE(0,*) "| Trace line written for NSTATS_MEM MPI tasks if RSS_MAX" |
234 |
|
|
WRITE(0,*) "| RSS_MAX increases, PGS>0, or HEAP changed" |
235 |
|
|
WRITE(0,*) "| (if LTATS_ALLOC=.TRUE.)" |
236 |
|
|
WRITE(0,*) "`---------------------------------------------------------" |
237 |
|
|
WRITE(0,*) "" |
238 |
|
|
WRITE(0,'(A10,A5,21X,A7,2A8,A7,A5,A5,A8)') & |
239 |
|
|
& "MEMORY "," KNUM","RSS_INC"," RSS_MAX"," HEAP_MX"," STK", & |
240 |
|
|
& " PGS"," CALL"," HEAP" |
241 |
|
|
LLMFIRST=.FALSE. |
242 |
|
|
ENDIF |
243 |
|
|
WRITE(0,'(A10,I5,1X,A20,1X,I6,2(1X,I7),1X,I6,1X,I4,1X,I4,1X,I7)') & |
244 |
|
|
& "MEMORY bfr",KNUM,CCDESC(KNUM),IMEM-IIMEM,IMEM,IMEMH,IMEMS, & |
245 |
|
|
& IPAG-IIPAG,(NCALLS(KNUM)+1)/2,IMEMC |
246 |
|
|
ENDIF |
247 |
|
|
NTMEM(KNUM,2)=IMEM |
248 |
|
|
IIMEM=IMEM |
249 |
|
|
IIPAG=IPAG |
250 |
|
|
IIMEMC=IMEMC |
251 |
|
|
ENDIF |
252 |
|
|
ELSEIF( KSWITCH == 1 ) THEN |
253 |
|
|
! Finish timing event |
254 |
|
|
ZTIME = THISTIME(KNUM)+(ZCLOCK-TIMELCALL(KNUM)) |
255 |
|
|
TIMESUM(KNUM) = TIMESUM(KNUM)+ZTIME |
256 |
|
|
TIMESQSUM(KNUM) = TIMESQSUM(KNUM)+ZTIME**2 |
257 |
|
|
TIMEMAX(KNUM) = MAX(TIMEMAX(KNUM),ZTIME) |
258 |
|
|
TTCPUSUM(KNUM) = TTCPUSUM(KNUM)+THISTCPU(KNUM)+ZTCPU-TTCPULCALL(KNUM) |
259 |
|
|
TVCPUSUM(KNUM) = TVCPUSUM(KNUM)+THISVCPU(KNUM)+ZVCPU-TVCPULCALL(KNUM) |
260 |
|
|
IF(MYPROC_STATS.LE.NSTATS_MEM.AND.MYPROC_STATS.NE.0) THEN |
261 |
|
|
! CALL getrss(IMEM) |
262 |
|
|
IMEM = GETRSS()/1024 |
263 |
|
|
IPAG = GETPAG() |
264 |
|
|
IMEMH = GETHWM()/1024 |
265 |
|
|
IMEMS = GETSTK()/1024 |
266 |
|
|
IMEMC = 0 |
267 |
|
|
IF(LSTATS_ALLOC) IMEMC = GETCURHEAP()/1024 |
268 |
|
|
IF(IMEM > IIMEM.OR.IPAG > IIPAG.OR.(LSTATS_ALLOC.AND.(IMEMC.NE.IIMEMC))) THEN |
269 |
|
|
WRITE(0,'(A10,I5,1X,A20,1X,I6,2(1X,I7),1X,I6,1X,I4,1X,I4,1X,I7)') & |
270 |
|
|
& "MEMORY aft ",KNUM,CCDESC(KNUM),IMEM-IIMEM,IMEM,IMEMH,IMEMS, & |
271 |
|
|
& IPAG-IIPAG,NCALLS(KNUM)/2,IMEMC |
272 |
|
|
ENDIF |
273 |
|
|
IIMEM=IMEM |
274 |
|
|
IIPAG=IPAG |
275 |
|
|
IIMEMC=IMEMC |
276 |
|
|
IMEM=IMEM-NTMEM(KNUM,2) |
277 |
|
|
NTMEM(KNUM,4)=NTMEM(KNUM,4)+IMEM |
278 |
|
|
IF(IMEM > NTMEM(KNUM,1)) THEN |
279 |
|
|
NTMEM(KNUM,1)=IMEM |
280 |
|
|
NTMEM(KNUM,3)=NCALLS(KNUM) |
281 |
|
|
ENDIF |
282 |
|
|
IF(IMEM < NTMEM(KNUM,5)) NTMEM(KNUM,5)=IMEM |
283 |
|
|
ENDIF |
284 |
|
|
ELSEIF( KSWITCH == 2 ) THEN |
285 |
|
|
! Suspend timing event |
286 |
|
|
ZTIMED = ZCLOCK-TIMELCALL(KNUM) |
287 |
|
|
THISTIME(KNUM) = THISTIME(KNUM)+ZTIMED |
288 |
|
|
THISTCPU(KNUM) = THISTCPU(KNUM)+ZTCPU-TTCPULCALL(KNUM) |
289 |
|
|
THISVCPU(KNUM) = THISVCPU(KNUM)+ZVCPU-TVCPULCALL(KNUM) |
290 |
|
|
ELSEIF( KSWITCH == 3 ) THEN |
291 |
|
|
! Resume timing event |
292 |
|
|
TIMELCALL(KNUM) = ZCLOCK |
293 |
|
|
TTCPULCALL(KNUM) = ZTCPU |
294 |
|
|
TVCPULCALL(KNUM) = ZVCPU |
295 |
|
|
ENDIF |
296 |
|
|
TIME_LAST_CALL = ZCLOCK |
297 |
|
|
IF(KNUM > 500.OR.KNUM == 102.OR.KNUM == 103) ZLAST_PAR_TIME = ZCLOCK |
298 |
|
|
! Trace stats |
299 |
|
|
NCALLS_TOTAL = NCALLS_TOTAL+1 |
300 |
|
|
IF (LTRACE_STATS .AND. NCALLS_TOTAL <= NTRACE_STATS) THEN |
301 |
|
|
ICALL = NCALLS_TOTAL |
302 |
|
|
TIME_TRACE(ICALL) = ZCLOCK |
303 |
|
|
NCALL_TRACE(ICALL) = (JPMAXSTAT+1)*KSWITCH+KNUM |
304 |
|
|
ENDIF |
305 |
|
|
|
306 |
|
|
ENDIF |
307 |
|
|
|
308 |
|
|
99999 CONTINUE |
309 |
|
|
END SUBROUTINE GSTATS |