LMDZ
gstats.F90
Go to the documentation of this file.
1 SUBROUTINE gstats(KNUM,KSWITCH)
2 
3 !**** *GSTATS* - Gather timing statistics
4 
5 ! PURPOSE.
6 ! --------
7 ! To gather timings for subsequent output by routine STATS_OUTPUT
8 
9 
10 !** INTERFACE.
11 ! ----------
12 ! *CALL* *GSTATS(KNUM,KSWITCH)
13 
14 ! EXPLICIT ARGUMENTS
15 ! --------------------
16 ! KNUM - timing event number (for list of already defined events
17 ! see routine STATS_OUTPUT)
18 ! KSWITCH - KSWITCH=0 - switch on timer
19 ! KSWITCH=1 - switch off timer
20 ! KSWITCH=2 - suspend timer
21 ! KSWITCH=3 - resume timer
22 
23 ! IMPLICIT ARGUMENTS
24 ! --------------------
25 ! Module YOMSTATS
26 
27 ! METHOD.
28 ! -------
29 
30 
31 ! EXTERNALS. USER_CLOCK - timing routine
32 ! ---------- MPL_BARRIER - syncronization of processors
33 
34 ! REFERENCE.
35 ! ----------
36 ! ECMWF Research Department documentation of the IFS
37 
38 ! AUTHOR.
39 ! -------
40 ! Mats Hamrud ECMWF
41 
42 ! MODIFICATIONS.
43 ! --------------
44 ! ORIGINAL : 98-11-15
45 ! D.Salmond: 02-02-25 Return if not master thread when called from a
46 ! parallel region.
47 ! J.Hague: 03-06-11 Memory tracing (for NSTATS_MEM MPI tasks)
48 ! ------------------------------------------------------------------
49 
50 USE parkind1 ,ONLY : jpim ,jprb ,jpib
51 USE yomhook ,ONLY : lhook, dr_hook
52 
53 USE yomgstats
54 !USE MPL_MODULE ! MPL 28.11.08
55 USE yomoml
56 
57 IMPLICIT NONE
58 
59 INTEGER(KIND=JPIM),INTENT(IN) :: KNUM
60 INTEGER(KIND=JPIM),INTENT(IN) :: KSWITCH
61 
62 INTEGER(KIND=JPIM) :: IMOD,ICALL
63 INTEGER(KIND=JPIM) :: IIMEM, IIPAG, IIMEMC
64 INTEGER(KIND=JPIB) :: IMEM, IMEMH, IMEMS, IMEMC, IPAG, INUM
65 INTEGER(KIND=JPIB) :: GETRSS, GETHWM, GETSTK, GETCURHEAP, GETPAG
66 EXTERNAL getrss, gethwm, getstk, getcurheap, getpag
67 REAL(KIND=JPRB) :: ZTIMED,ZCLOCK,ZTIME,ZTCPU,ZVCPU
68 REAL(KIND=JPRB) :: ZLAST_PAR_TIME
69 LOGICAL :: LLFIRST=.true.
70 LOGICAL :: LLMFIRST=.true.
71  CHARACTER(LEN=32), SAVE :: CCDESC_DRHOOK(jpmaxstat)
72  CHARACTER(LEN=32), SAVE :: CCDESC_BARR(jpmaxstat)
73 SAVE zlast_par_time
74 SAVE iimem, iipag, iimemc
75 
76 INTEGER(KIND=JPIM),SAVE :: NUM_THREADS
77 REAL(KIND=JPRB),ALLOCATABLE,SAVE :: ZHOOK_HANDLE(:)
78 REAL(KIND=JPRB),SAVE :: ZHOOK_HANDLE_COMMS, ZHOOK_HANDLE_COMMS1
79 REAL(KIND=JPRB),SAVE :: ZHOOK_HANDLE_TRANS
80 REAL(KIND=JPRB),SAVE :: ZHOOK_HANDLE_BARR
81 CHARACTER*4 CC
82 
83 INTEGER(KIND=JPIM) :: NMAX_STATS, KULNAM
84 
85 INTERFACE
86 #include "user_clock.h"
87 END INTERFACE
88 
89 ! write(0,*) "GSTATS:LSTATS,JPMAXSTAT,LGSTATS_LABEL,KNUM=",LSTATS,JPMAXSTAT,LGSTATS_LABEL,KNUM
90 
91 IF(lstats) THEN
92 
93  IF(.NOT.ALLOCATED(zhook_handle))THEN
94  num_threads=oml_max_threads()
95  ALLOCATE(zhook_handle(num_threads))
96  ENDIF
97 
98  IF(lgstats_label)THEN
99  DO inum=1,jpmaxstat
100  WRITE(cc,'(I4)')inum
101  ccdesc_barr(inum)='>BAR-'//ccdesc(inum)(1:21)//'('//cc//')'
102  ENDDO
103  DO inum=1,jpmaxstat
104  WRITE(cc,'(I4)')inum
105 ! write(6,*) inum,cctype(inum)
106  IF(cctype(inum).EQ."TRS".OR.cctype(inum).EQ.'MP-'.OR.cctype(inum).EQ.'MPL'&
107  & .OR.cctype(inum).EQ.'BAR'.OR.cctype(inum).EQ.'OMP') THEN
108  ccdesc_drhook(inum)='>'//cctype(inum)//'-'//ccdesc(inum)(1:21)//'('//cc//')'
109  ENDIF
110  ENDDO
112  ENDIF
113 ! ------------------------------------------------------------------
114 
115 !J IF((KNUM > 1000 .AND.KNUM < 2001).AND.(.NOT.LSTATS_OMP))GOTO 99999
116 !J IF((KNUM > 500 .AND.KNUM < 1001).AND.(.NOT.LSTATS_COMMS))GOTO 99999
117  IF(oml_my_thread() > 1)GOTO 99999
118 
119 !J IF(KNUM/=0) THEN
120 !J IF(LSYNCSTATS .AND.(KSWITCH==0.OR. KSWITCH==2)) THEN
121 !J IF(.NOT.OML_IN_PARALLEL().AND. KNUM < 500 )THEN
122 !J IF(LHOOK)CALL DR_HOOK(CCDESC_BARR(KNUM),0,ZHOOK_HANDLE_BARR)
123 !J CALL MPL_BARRIER(CDSTRING='GSTATS:')
124 !J IF(LHOOK)CALL DR_HOOK(CCDESC_BARR(KNUM),1,ZHOOK_HANDLE_BARR)
125 !J ENDIF
126 !J ENDIF
127 !J ENDIF
128 
129  IF (lhook .AND. (kswitch == 0 .OR. kswitch == 1)) THEN
130 ! write(0,*) "KNUM,SWITCH=",KNUM,KSWITCH
131 ! write(0,*) "CCTYPE=",CCTYPE(KNUM)
132 ! write(0,*) "CCDESC_DRHOOK=",CCDESC_DRHOOK(KNUM)
133  IF(cctype(knum).EQ."TRS")THEN
134  CALL dr_hook(ccdesc_drhook(knum),kswitch,zhook_handle_trans)
135  ELSEIF(cctype(knum).EQ.'MP-')THEN
136  CALL dr_hook(ccdesc_drhook(knum),kswitch,zhook_handle_comms)
137  ELSEIF(cctype(knum).EQ.'MPL'.AND.knum.NE.682)THEN
138  CALL dr_hook(ccdesc_drhook(knum),kswitch,zhook_handle_comms1)
139  ELSEIF(cctype(knum).EQ.'OMP')THEN
140 ! CALL DR_HOOK(CCDESC_DRHOOK(KNUM),KSWITCH,ZHOOK_HANDLE)
141 ! MPL 1.12.08 : erreur a la compilation car zhook_handle est un tableau et non un
142 ! scalaire (PKEY)
143  CALL dr_hook(ccdesc_drhook(knum),kswitch,ztime)
144  ELSEIF(cctype(knum).EQ.'BAR')THEN
145  CALL dr_hook(ccdesc_drhook(knum),kswitch,zhook_handle_barr)
146  ENDIF
147  ENDIF
148 
149  CALL user_clock(pelapsed_time=zclock)
150  IF (lstatscpu.OR.knum==0) THEN
151  CALL user_clock(ptotal_cp=ztcpu,pvector_cp=zvcpu)
152  ELSE
153  ztcpu = 0.0_jprb
154  zvcpu = 0.0_jprb
155  ENDIF
156 
157  IF (llfirst) THEN
158 
159 ! write(0,*) "JPMAXSTAT:2=",JPMAXSTAT
160 
161  ncalls(:) = 0
162  timesum(:) = 0.0_jprb
163  timesqsum(:) = 0.0_jprb
164  timemax(:) = 0.0_jprb
165  timesumb(:) = 0.0_jprb
166  ttcpusum(:) = 0.0_jprb
167  tvcpusum(:) = 0.0_jprb
168  timelcall(:) = zclock
169  ccdesc=""
170  cctype=""
171  ntmem = 0
172  ntmem(:,5) = 99999999
173  iimem=0
174  iipag=0
175  iimemc=0
176  time_last_call = zclock
177  zlast_par_time=zclock
178  llfirst = .false.
179  ENDIF
180 
181  IF(knum < 0.OR. knum > jpmaxstat) CALL abor1('GSTATS')
182  IF(kswitch == 0.OR. kswitch == 1) THEN
183  ncalls(knum) = ncalls(knum)+1
184  ENDIF
185  imod = mod(ncalls(knum),2)
186  IF(.NOT.((kswitch == 0.AND. imod == 1) .OR.&
187  &(kswitch == 2.AND. imod == 1) .OR.&
188  &(kswitch == 3.AND. imod == 1) .OR.&
189  &(kswitch == 1.AND. imod == 0))) THEN
190  WRITE(jperr,*) 'KNUM,KSWITCH,IMOD,NCALLS(KNUM)',&
191  &knum,kswitch,imod,ncalls(knum)
192  CALL abor1('GSTATS')
193  ENDIF
194 
195  IF( kswitch == 0 ) THEN
196 ! Start timing event
197  IF(knum < 500) THEN
198  ztimed = zclock-time_last_call
199  ELSE
200  ztimed = zclock - zlast_par_time
201  ENDIF
202  timesumb(knum) = timesumb(knum)+ztimed
203  thistime(knum) = 0.0_jprb
204  timelcall(knum) = zclock
205  ttcpulcall(knum) = ztcpu
206  tvcpulcall(knum) = zvcpu
207  thistcpu(knum) = 0.0_jprb
208  thisvcpu(knum) = 0.0_jprb
209  IF(myproc_stats.LE.nstats_mem.AND.myproc_stats.NE.0) THEN
210 ! CALL getrss(IMEM)
211 ! IMEM = getrss()/1024
212  ipag = getpag()
213  imemh = gethwm()/1024
214  imems = getstk()/1024
215  imemc = 0
216  IF(lstats_alloc) imemc = getcurheap()/1024
217  IF(imem > iimem.OR.ipag > iipag.OR.(lstats_alloc.AND.(imemc.NE.iimemc))) THEN
218  IF(llmfirst) THEN
219  WRITE(0,*) ".---------------------------------------------------------"
220  WRITE(0,*) "| Memory trace details"
221  WRITE(0,*) "| --------------------"
222  WRITE(0,*) "| Memory examined at each GSTATS call if NSTATS_MEM>0."
223  WRITE(0,*) "| Header for each trace line is:"
224  WRITE(0,*) "|"
225  WRITE(0,*) "| RSS_INC: Increase in RSS_MAX (KB)"
226  WRITE(0,*) "| RSS_MAX: Maximum real working set so far (KB)"
227  WRITE(0,*) "| HEAP_MX: High Water Mark for heap so far (KB)"
228  WRITE(0,*) "| STK: Current Stack usage (KB)"
229  WRITE(0,*) "| PGS: Page faults w I/O since last trace line"
230  WRITE(0,*) "| CALL: Number of gstats call"
231  WRITE(0,*) "| HEAP: Current malloc'd total (KB)"
232  WRITE(0,*) "|"
233  WRITE(0,*) "| Trace line written for NSTATS_MEM MPI tasks if RSS_MAX"
234  WRITE(0,*) "| RSS_MAX increases, PGS>0, or HEAP changed"
235  WRITE(0,*) "| (if LTATS_ALLOC=.TRUE.)"
236  WRITE(0,*) "`---------------------------------------------------------"
237  WRITE(0,*) ""
238  WRITE(0,'(A10,A5,21X,A7,2A8,A7,A5,A5,A8)') &
239  & "MEMORY "," KNUM","RSS_INC"," RSS_MAX"," HEAP_MX"," STK", &
240  & " PGS"," CALL"," HEAP"
241  llmfirst=.false.
242  ENDIF
243  WRITE(0,'(A10,I5,1X,A20,1X,I6,2(1X,I7),1X,I6,1X,I4,1X,I4,1X,I7)') &
244  & "MEMORY bfr",knum,ccdesc(knum),imem-iimem,imem,imemh,imems, &
245  & ipag-iipag,(ncalls(knum)+1)/2,imemc
246  ENDIF
247  ntmem(knum,2)=imem
248  iimem=imem
249  iipag=ipag
250  iimemc=imemc
251  ENDIF
252  ELSEIF( kswitch == 1 ) THEN
253 ! Finish timing event
254  ztime = thistime(knum)+(zclock-timelcall(knum))
255  timesum(knum) = timesum(knum)+ztime
256  timesqsum(knum) = timesqsum(knum)+ztime**2
257  timemax(knum) = max(timemax(knum),ztime)
258  ttcpusum(knum) = ttcpusum(knum)+thistcpu(knum)+ztcpu-ttcpulcall(knum)
259  tvcpusum(knum) = tvcpusum(knum)+thisvcpu(knum)+zvcpu-tvcpulcall(knum)
260  IF(myproc_stats.LE.nstats_mem.AND.myproc_stats.NE.0) THEN
261 ! CALL getrss(IMEM)
262  imem = getrss()/1024
263  ipag = getpag()
264  imemh = gethwm()/1024
265  imems = getstk()/1024
266  imemc = 0
267  IF(lstats_alloc) imemc = getcurheap()/1024
268  IF(imem > iimem.OR.ipag > iipag.OR.(lstats_alloc.AND.(imemc.NE.iimemc))) THEN
269  WRITE(0,'(A10,I5,1X,A20,1X,I6,2(1X,I7),1X,I6,1X,I4,1X,I4,1X,I7)') &
270  & "MEMORY aft ",knum,ccdesc(knum),imem-iimem,imem,imemh,imems, &
271  & ipag-iipag,ncalls(knum)/2,imemc
272  ENDIF
273  iimem=imem
274  iipag=ipag
275  iimemc=imemc
276  imem=imem-ntmem(knum,2)
277  ntmem(knum,4)=ntmem(knum,4)+imem
278  IF(imem > ntmem(knum,1)) THEN
279  ntmem(knum,1)=imem
280  ntmem(knum,3)=ncalls(knum)
281  ENDIF
282  IF(imem < ntmem(knum,5)) ntmem(knum,5)=imem
283  ENDIF
284  ELSEIF( kswitch == 2 ) THEN
285 ! Suspend timing event
286  ztimed = zclock-timelcall(knum)
287  thistime(knum) = thistime(knum)+ztimed
288  thistcpu(knum) = thistcpu(knum)+ztcpu-ttcpulcall(knum)
289  thisvcpu(knum) = thisvcpu(knum)+zvcpu-tvcpulcall(knum)
290  ELSEIF( kswitch == 3 ) THEN
291 ! Resume timing event
292  timelcall(knum) = zclock
293  ttcpulcall(knum) = ztcpu
294  tvcpulcall(knum) = zvcpu
295  ENDIF
296  time_last_call = zclock
297  IF(knum > 500.OR.knum == 102.OR.knum == 103) zlast_par_time = zclock
298 ! Trace stats
300  IF (ltrace_stats .AND. ncalls_total <= ntrace_stats) THEN
301  icall = ncalls_total
302  time_trace(icall) = zclock
303  ncall_trace(icall) = (jpmaxstat+1)*kswitch+knum
304  ENDIF
305 
306 ENDIF
307 
308 99999 CONTINUE
309 END SUBROUTINE gstats
integer, parameter jpib
Definition: parkind1.F90:14
real(kind=jprb), dimension(0:jpmaxstat) thisvcpu
Definition: yomgstats.F90:78
real(kind=jprb), dimension(0:jpmaxstat) ttcpulcall
Definition: yomgstats.F90:79
integer(kind=jpim), dimension(0:jpmaxstat, 5) ntmem
Definition: yomgstats.F90:95
integer(kind=jpim) myproc_stats
Definition: yomgstats.F90:92
real(kind=jprb), dimension(0:jpmaxstat) ttcpusum
Definition: yomgstats.F90:74
integer(kind=jpim) function, public oml_max_threads()
Definition: yomoml.F90:199
real(kind=jprb), dimension(0:jpmaxstat) tvcpulcall
Definition: yomgstats.F90:80
real(kind=jprb) time_last_call
Definition: yomgstats.F90:81
real(kind=jprb), dimension(0:jpmaxstat) timesum
Definition: yomgstats.F90:69
real(kind=jprb), dimension(0:jpmaxstat) timemax
Definition: yomgstats.F90:71
real(kind=jprb), dimension(0:jpmaxstat) tvcpusum
Definition: yomgstats.F90:75
subroutine abor1(CDTEXT)
Definition: abor1.F90:2
integer(kind=jpim) nstats_mem
Definition: yomgstats.F90:96
subroutine user_clock(PELAPSED_TIME, PELAPSED_TIME_SINCE, PVECTOR_CP, PTOTAL_CP)
Definition: user_clock.F90:2
integer(kind=jpim), parameter jpmaxstat
Definition: yomgstats.F90:61
real(kind=jprb), dimension(:), allocatable time_trace
Definition: yomgstats.F90:84
integer(kind=jpim) ntrace_stats
Definition: yomgstats.F90:64
integer(kind=jpim), dimension(:), allocatable ncall_trace
Definition: yomgstats.F90:67
!$Id itapm1 ENDIF!IM on interpole les champs sur les niveaux STD de pression!IM a chaque pas de temps de la physique c!positionnement de l argument logique a false c!pour ne pas recalculer deux fois la meme chose!c!a cet effet un appel a plevel_new a ete deplace c!a la fin de la serie d appels c!la boucle DO nlevSTD a ete internalisee c!dans d ou la creation de cette routine c c!CALL false
Definition: calcul_STDlev.h:26
logical lstats_alloc
Definition: yomgstats.F90:53
subroutine gstats(KNUM, KSWITCH)
Definition: gstats.F90:2
real(kind=jprb), dimension(0:jpmaxstat) timesqsum
Definition: yomgstats.F90:70
integer, parameter jprb
Definition: parkind1.F90:31
real(kind=jprb), dimension(0:jpmaxstat) timesumb
Definition: yomgstats.F90:72
logical ltrace_stats
Definition: yomgstats.F90:58
character *3, dimension(0:jpmaxstat) cctype
Definition: yomgstats.F90:89
logical lstatscpu
Definition: yomgstats.F90:54
character *50, dimension(0:jpmaxstat) ccdesc
Definition: yomgstats.F90:88
!$Id itapm1 ENDIF!IM on interpole les champs sur les niveaux STD de pression!IM a chaque pas de temps de la physique c!positionnement de l argument logique a false c!pour ne pas recalculer deux fois la meme chose!c!a cet effet un appel a plevel_new a ete deplace c!a la fin de la serie d appels c!la boucle DO nlevSTD a ete internalisee c!dans d ou la creation de cette routine c c!CALL ulevSTD CALL &zphi philevSTD CALL &zx_rh rhlevSTD!DO klev DO klon klev DO klon klev DO klon klev DO klon klev DO klon klev DO klon klev DO klon klev DO klon klev DO klon klev DO klon du jour ou toutes les read_climoz CALL true
integer(kind=jpim), parameter jperr
Definition: yomgstats.F90:85
logical lhook
Definition: yomhook.F90:12
real(kind=jprb), dimension(0:jpmaxstat) thistcpu
Definition: yomgstats.F90:77
integer(kind=jpim) ncalls_total
Definition: yomgstats.F90:66
subroutine dr_hook(CDNAME, KSWITCH, PKEY)
Definition: yomhook.F90:17
Definition: yomoml.F90:1
integer, parameter jpim
Definition: parkind1.F90:13
logical lgstats_label
Definition: yomgstats.F90:59
integer(kind=jpim), dimension(0:jpmaxstat) ncalls
Definition: yomgstats.F90:65
integer(kind=jpim) function, public oml_my_thread()
Definition: yomoml.F90:192
logical lstats
Definition: yomgstats.F90:49
real(kind=jprb), dimension(0:jpmaxstat) thistime
Definition: yomgstats.F90:76
real(kind=jprb), dimension(0:jpmaxstat) timelcall
Definition: yomgstats.F90:73