1 |
#!/bin/bash |
2 |
|
3 |
source /gpfs/wizard/flight/production/installed/set_pam_env.sh > /dev/null 2>&1; |
4 |
|
5 |
if [ "$PAM_BIN" == "" ]; then exit; fi |
6 |
|
7 |
export MAIL=Emiliano.Mocchiutti@ts.infn.it |
8 |
|
9 |
#export CCMAIL=Emiliano.Mocchiutti@ts.infn.it |
10 |
export CCMAIL=boezio@ts.infn.it |
11 |
|
12 |
export PRODDIR=/gpfs/wizard/flight/production/ |
13 |
|
14 |
export WRKDIR=$PRODDIR/deamons/wrkdir/ |
15 |
|
16 |
export LOGDIR=$PRODDIR/logs/ |
17 |
|
18 |
MFILE=$WRKDIR/mailtime.ref |
19 |
|
20 |
SENDMAIL=$WRKDIR/mail.log |
21 |
|
22 |
MAXNTRY=4 |
23 |
|
24 |
LOG=$WRKDIR/log.log |
25 |
|
26 |
RRBUSY=$WRKDIR/rrlist.busy |
27 |
|
28 |
YODABUSY=$WRKDIR/yodalist.busy |
29 |
|
30 |
DVBUSY=$WRKDIR/dvlist.busy |
31 |
|
32 |
WAITFOR=10800 |
33 |
|
34 |
touch $LOG |
35 |
|
36 |
touch $RRBUSY |
37 |
|
38 |
touch $YODABUSY |
39 |
|
40 |
touch $DVBUSY |
41 |
|
42 |
touch $MFILE |
43 |
|
44 |
touch $SENDMAIL |
45 |
|
46 |
echo -n " DEAMON STARTED AT " >> $LOG; |
47 |
|
48 |
date >> $LOG; |
49 |
|
50 |
echo "600" > $WRKDIR/log.step |
51 |
#echo "-1" > $WRKDIR/log.step |
52 |
|
53 |
TIME=`tail -1 $WRKDIR/log.step`; |
54 |
|
55 |
while [ $TIME -gt 0 ]; do |
56 |
|
57 |
# |
58 |
# RAWREADER OUTPUT |
59 |
# |
60 |
ls $LOGDIR/rr.[0-9]*.log 1> $WRKDIR/log.list 2>/dev/null; |
61 |
|
62 |
for file in `cat $WRKDIR/log.list`; do |
63 |
|
64 |
FILE=`basename $file`; |
65 |
|
66 |
NOW=`date +%s` |
67 |
|
68 |
CTIME=`date --reference=$file +%s` |
69 |
|
70 |
let DELTA=NOW-CTIME |
71 |
|
72 |
if [ $DELTA -gt $WAITFOR ]; then |
73 |
|
74 |
PAMNAME0=`basename $file .log` |
75 |
|
76 |
PAMNAME1=`echo $PAMNAME0 | sed s/rr.//g `.pam |
77 |
|
78 |
RETRY=$LOGDIR/rr.$PAMNAME1.retry |
79 |
|
80 |
NTRY=0; |
81 |
|
82 |
if [ -f $RETRY ]; then |
83 |
|
84 |
NTRY=`tail -1 $RETRY`; |
85 |
|
86 |
let NTRY=$NTRY+1; |
87 |
|
88 |
fi; |
89 |
|
90 |
if [ $NTRY -gt $MAXNTRY ]; then |
91 |
|
92 |
OKOUT=`grep -i "finished, exiting..." $file` |
93 |
|
94 |
if [ "$OKOUT" == "" ]; then |
95 |
|
96 |
ERROR="no exiting signature"; |
97 |
|
98 |
echo "################################################################################" >> $SENDMAIL; |
99 |
echo "Subject Logchecking deamon RR: $ERROR in file $file" >> $SENDMAIL; |
100 |
echo "################################################################################" >> $SENDMAIL; |
101 |
cat $file >> $SENDMAIL; |
102 |
|
103 |
echo "NTRY is $NTRY " >>$LOG; |
104 |
|
105 |
echo -n " File $file, error $ERROR move in bad dir and sending mail at " >>$LOG; |
106 |
|
107 |
date >> $LOG; |
108 |
|
109 |
mv -f $file $LOGDIR/rr/bad/$FILE.NTRY; |
110 |
|
111 |
else |
112 |
|
113 |
ERROR="no way to process file $file"; |
114 |
|
115 |
echo "################################################################################" >> $SENDMAIL; |
116 |
echo "Subject Logchecking deamon RR: $ERROR " >> $SENDMAIL; |
117 |
echo "################################################################################" >> $SENDMAIL; |
118 |
cat $file >> $SENDMAIL; |
119 |
|
120 |
echo "NTRY is $NTRY " >>$LOG; |
121 |
|
122 |
echo -n " File $file, error $ERROR move in bad dir and sending mail at " >>$LOG; |
123 |
|
124 |
date >> $LOG; |
125 |
|
126 |
mv -f $file $LOGDIR/rr/bad/$FILE.NTRY; |
127 |
|
128 |
fi; |
129 |
|
130 |
else |
131 |
|
132 |
OKOUT=`grep -i "finished, exiting..." $file` |
133 |
|
134 |
if [ "$OKOUT" == "" ]; then |
135 |
|
136 |
echo $NTRY > $RETRY; |
137 |
|
138 |
echo "NTRY is $NTRY " >>$LOG; |
139 |
|
140 |
echo -n " RR on $file aborted move in bad dir at " >>$LOG; |
141 |
|
142 |
date >> $LOG; |
143 |
|
144 |
mv -f $file $LOGDIR/rr/bad/$FILE.$NTRY; |
145 |
|
146 |
CHOK=`tail -1 $RRBUSY`; |
147 |
|
148 |
if [ "$CHOK" == "" ]; then |
149 |
|
150 |
echo "0" > $RRBUSY; |
151 |
|
152 |
CHOK=`tail -1 $RRBUSY`; |
153 |
|
154 |
fi |
155 |
|
156 |
while [ $CHOK -eq 1 ]; do |
157 |
|
158 |
echo -n " RR LIST FILE IS BUSY " >> $LOG; |
159 |
|
160 |
date >> $LOG; |
161 |
|
162 |
sleep 1; |
163 |
|
164 |
CHOK=`tail -1 $RRBUSY `; |
165 |
|
166 |
done |
167 |
|
168 |
echo "1" > $RRBUSY; |
169 |
|
170 |
echo " Force resubmitting of job for file $PAMNAME1 " >>$LOG; |
171 |
|
172 |
echo -n " Rename entry $PAMNAME1 to ${PAMNAME1}_rrfailed in rr.rrlist at " >>$LOG; |
173 |
|
174 |
date >> $LOG; |
175 |
|
176 |
cat $WRKDIR/rr.rrlist | sed s/$PAMNAME1/${PAMNAME1}_rrfailed/g 1> $WRKDIR/rr.rrlist.tmp; |
177 |
|
178 |
cat $WRKDIR/rr.rrlist >> $WRKDIR/rr.rrlist.old; |
179 |
|
180 |
mv -f $WRKDIR/rr.rrlist.tmp $WRKDIR/rr.rrlist; |
181 |
|
182 |
echo "0" > $RRBUSY; |
183 |
|
184 |
else |
185 |
|
186 |
echo "NTRY is $NTRY " >>$LOG; |
187 |
|
188 |
echo -n " File $file is good, moving log to good dir at " >>$LOG; |
189 |
|
190 |
date >> $LOG; |
191 |
|
192 |
if [ -f $RETRY ]; then |
193 |
|
194 |
mv -f $RETRY $LOGDIR/rr/good/; |
195 |
|
196 |
fi; |
197 |
|
198 |
while [ -f $LOGDIR/rr/good/$FILE.$NTRY ]; do |
199 |
|
200 |
let NTRY=$NTRY+1; |
201 |
|
202 |
done; |
203 |
|
204 |
mv -f $file $LOGDIR/rr/good/$FILE.$NTRY; |
205 |
|
206 |
### RNAME0=`basename $file .log`.pam |
207 |
### RNAME=`echo $RNAME0 | sed 's/rr.//g'` |
208 |
### rm -f $PRODDIR/preRawreader/$RNAME |
209 |
|
210 |
fi; |
211 |
|
212 |
if [ -f $file ] && [ $DELTA -gt 43200 ]; then |
213 |
|
214 |
echo "NTRY is $NTRY " >>$LOG; |
215 |
|
216 |
echo -n " Warning! Very old RR job in $file send mail at " >>$LOG; |
217 |
|
218 |
date >> $LOG; |
219 |
|
220 |
ERROR="RR job in unknown status,"; |
221 |
|
222 |
echo "################################################################################" >> $SENDMAIL; |
223 |
echo "Subject Logchecking deamon RR: $ERROR file $file" >> $SENDMAIL; |
224 |
echo "################################################################################" >> $SENDMAIL; |
225 |
cat $file >> $SENDMAIL; |
226 |
|
227 |
fi; |
228 |
|
229 |
fi; |
230 |
|
231 |
fi; |
232 |
|
233 |
done; |
234 |
|
235 |
# |
236 |
# RAWREADER LSF JOBS |
237 |
# |
238 |
ls $LOGDIR/rr.lsf.*.log 1> $WRKDIR/log.list 2>/dev/null; |
239 |
|
240 |
for file in `cat $WRKDIR/log.list`; do |
241 |
|
242 |
FILE=`basename $file`; |
243 |
|
244 |
NOW=`date +%s` |
245 |
|
246 |
CTIME=`date --reference=$file +%s` |
247 |
|
248 |
let DELTA=NOW-CTIME |
249 |
|
250 |
if [ $DELTA -gt $WAITFOR ]; then |
251 |
|
252 |
PAMNAME0=`basename $file .log` |
253 |
|
254 |
PAMNAME1=`echo $PAMNAME0 | sed s/rr.lsf.//g `.pam |
255 |
|
256 |
RETRY=$LOGDIR/rr.$PAMNAME1.retry |
257 |
|
258 |
NTRY=0; |
259 |
|
260 |
if [ -f $RETRY ]; then |
261 |
|
262 |
NTRY=`tail -1 $RETRY`; |
263 |
|
264 |
let NTRY=$NTRY+1; |
265 |
|
266 |
fi; |
267 |
|
268 |
if [ $NTRY -gt $MAXNTRY ]; then |
269 |
|
270 |
ERROR="no way to process file $file"; |
271 |
|
272 |
echo "################################################################################" >> $SENDMAIL; |
273 |
echo "Subject Logchecking deamon RR: $ERROR " >> $SENDMAIL; |
274 |
echo "################################################################################" >> $SENDMAIL; |
275 |
cat $file >> $SENDMAIL; |
276 |
|
277 |
echo "NTRY is $NTRY " >>$LOG; |
278 |
|
279 |
echo -n " File $file, error $ERROR move in bad dir and sending mail at " >>$LOG; |
280 |
|
281 |
date >> $LOG; |
282 |
|
283 |
mv -f $file $LOGDIR/rr/bad/$FILE.$NTRY; |
284 |
|
285 |
else |
286 |
|
287 |
OKOUT=`grep -i "successfully completed" $file` |
288 |
|
289 |
if [ "$OKOUT" == "" ]; then |
290 |
|
291 |
echo $NTRY > $RETRY; |
292 |
|
293 |
echo "NTRY is $NTRY " >>$LOG; |
294 |
|
295 |
echo -n " LSF job $file aborted move in bad dir at " >>$LOG; |
296 |
|
297 |
date >> $LOG; |
298 |
|
299 |
mv -f $file $LOGDIR/rr/bad/$FILE.$NTRY; |
300 |
|
301 |
CHOK=`tail -1 $RRBUSY`; |
302 |
|
303 |
if [ "$CHOK" == "" ]; then |
304 |
|
305 |
echo "0" > $RRBUSY; |
306 |
|
307 |
CHOK=`tail -1 $RRBUSY`; |
308 |
|
309 |
fi |
310 |
|
311 |
while [ $CHOK -eq 1 ]; do |
312 |
|
313 |
echo -n " RR LIST FILE IS BUSY " >> $LOG; |
314 |
|
315 |
date >> $LOG; |
316 |
|
317 |
sleep 1; |
318 |
|
319 |
CHOK=`tail -1 $RRBUSY `; |
320 |
|
321 |
done |
322 |
|
323 |
echo "1" > $RRBUSY; |
324 |
|
325 |
echo " Force resubmitting of job for file $PAMNAME1 " >>$LOG; |
326 |
|
327 |
echo -n " Rename entry $PAMNAME1 to ${PAMNAME1}_failed in rr.rrlist at " >>$LOG; |
328 |
|
329 |
date >> $LOG; |
330 |
|
331 |
cat $WRKDIR/rr.rrlist | sed s/$PAMNAME1/${PAMNAME1}_failed/g 1> $WRKDIR/rr.rrlist.tmp; |
332 |
|
333 |
cat $WRKDIR/rr.rrlist >> $WRKDIR/rr.rrlist.old; |
334 |
|
335 |
mv -f $WRKDIR/rr.rrlist.tmp $WRKDIR/rr.rrlist; |
336 |
|
337 |
echo "0" > $RRBUSY; |
338 |
|
339 |
else |
340 |
|
341 |
echo "NTRY is $NTRY " >>$LOG; |
342 |
|
343 |
echo -n " LSF job $file done move in good dir at " >>$LOG; |
344 |
|
345 |
date >> $LOG; |
346 |
|
347 |
while [ -f $LOGDIR/rr/good/$FILE.$NTRY ]; do |
348 |
|
349 |
let NTRY=$NTRY+1; |
350 |
|
351 |
done; |
352 |
|
353 |
mv -f $file $LOGDIR/rr/good/$FILE.$NTRY; |
354 |
|
355 |
if [ -f $RETRY ]; then |
356 |
|
357 |
mv -f $RETRY $LOGDIR/rr/good/; |
358 |
|
359 |
fi; |
360 |
|
361 |
fi; |
362 |
|
363 |
fi; |
364 |
|
365 |
if [ -f $file ] && [ $DELTA -gt 43200 ]; then |
366 |
|
367 |
echo "NTRY is $NTRY " >>$LOG; |
368 |
|
369 |
echo -n " Warning! Very old LSF job in $file send mail at " >>$LOG; |
370 |
|
371 |
date >> $LOG; |
372 |
|
373 |
ERROR="LSF job in unknown status,"; |
374 |
|
375 |
echo "################################################################################" >> $SENDMAIL; |
376 |
echo "Subject Logchecking deamon RR: $ERROR file $file " >> $SENDMAIL; |
377 |
echo "################################################################################" >> $SENDMAIL; |
378 |
cat $file >> $SENDMAIL; |
379 |
|
380 |
fi; |
381 |
|
382 |
fi; |
383 |
|
384 |
done; |
385 |
|
386 |
# |
387 |
# YODA OUTPUT |
388 |
# |
389 |
ls $LOGDIR/yoda.[0-9]*.log 1> $WRKDIR/log.list 2>/dev/null; |
390 |
|
391 |
for file in `cat $WRKDIR/log.list`; do |
392 |
|
393 |
TIME=`tail -1 $WRKDIR/log.step `; |
394 |
|
395 |
if [ $TIME -le 0 ]; then |
396 |
|
397 |
echo -n " EXIT DEAMON, RECEIVED TERM SIGNAL " >> $LOG; |
398 |
|
399 |
date >> $LOG; |
400 |
|
401 |
exit; |
402 |
|
403 |
fi |
404 |
|
405 |
while [ $TIME -eq 1 ]; do |
406 |
|
407 |
TIME=`tail -1 $WRKDIR/log.step `; |
408 |
|
409 |
echo -n " TIME = 1! DEAMON PAUSE " >> $LOG; |
410 |
|
411 |
date >> $LOG; |
412 |
|
413 |
sleep 120; |
414 |
|
415 |
done |
416 |
|
417 |
FILE=`basename $file`; |
418 |
|
419 |
NOW=`date +%s` |
420 |
|
421 |
CTIME=`date --reference=$file +%s` |
422 |
|
423 |
let DELTA=NOW-CTIME |
424 |
|
425 |
if [ $DELTA -gt $WAITFOR ]; then |
426 |
|
427 |
PAMNAME0=`basename $file .log` |
428 |
|
429 |
PAMNAME1=`echo $PAMNAME0 | sed s/yoda.//g `.pam |
430 |
|
431 |
RETRY=$LOGDIR/yoda.${PAMNAME1}.retry |
432 |
|
433 |
NTRY=0; |
434 |
|
435 |
if [ -f $RETRY ]; then |
436 |
|
437 |
NTRY=`tail -1 $RETRY`; |
438 |
|
439 |
let NTRY=$NTRY+1; |
440 |
|
441 |
fi; |
442 |
|
443 |
if [ $NTRY -gt $MAXNTRY ]; then |
444 |
|
445 |
OKOUT=`grep -i "finished, exiting..." $file` |
446 |
|
447 |
if [ "$OKOUT" == "" ]; then |
448 |
|
449 |
ERROR="no exiting signature"; |
450 |
|
451 |
echo "################################################################################" >> $SENDMAIL; |
452 |
echo "Subject Logchecking deamon YODA: $ERROR in file $file " >> $SENDMAIL; |
453 |
echo "################################################################################" >> $SENDMAIL; |
454 |
cat $file >> $SENDMAIL; |
455 |
|
456 |
echo "NTRY is $NTRY " >>$LOG; |
457 |
|
458 |
echo -n " File $file, YODA error $ERROR move in bad dir and sending mail at " >>$LOG; |
459 |
|
460 |
date >> $LOG; |
461 |
|
462 |
mv -f $file $LOGDIR/yoda/bad/${FILE}.$NTRY; |
463 |
|
464 |
else |
465 |
|
466 |
ERROR="no way to process file $file"; |
467 |
|
468 |
echo "################################################################################" >> $SENDMAIL; |
469 |
echo "Subject Logchecking deamon YODA: $ERROR " >> $SENDMAIL; |
470 |
echo "################################################################################" >> $SENDMAIL; |
471 |
cat $file >> $SENDMAIL; |
472 |
|
473 |
echo "NTRY is $NTRY " >>$LOG; |
474 |
|
475 |
echo -n " File $file, YODA error $ERROR move in bad dir and sending mail at " >>$LOG; |
476 |
|
477 |
date >> $LOG; |
478 |
|
479 |
mv -f $file $LOGDIR/yoda/bad/$FILE.NTRY; |
480 |
|
481 |
fi; |
482 |
|
483 |
else |
484 |
|
485 |
OKOUT=`grep -i "finished, exiting..." $file` |
486 |
|
487 |
if [ "$OKOUT" == "" ]; then |
488 |
|
489 |
echo $NTRY > $RETRY; |
490 |
|
491 |
echo "NTRY is $NTRY " >>$LOG; |
492 |
|
493 |
echo -n " YODA on $file aborted move in bad dir at " >>$LOG; |
494 |
|
495 |
date >> $LOG; |
496 |
|
497 |
mv -f $file $LOGDIR/yoda/bad/$FILE.$NTRY; |
498 |
|
499 |
CHOK=`tail -1 $YODABUSY`; |
500 |
|
501 |
if [ "$CHOK" == "" ]; then |
502 |
|
503 |
echo "0" > $YODABUSY; |
504 |
|
505 |
CHOK=`tail -1 $YODABUSY`; |
506 |
|
507 |
fi |
508 |
|
509 |
while [ $CHOK -eq 1 ]; do |
510 |
|
511 |
echo -n " YODA LIST FILE IS BUSY " >> $LOG; |
512 |
|
513 |
date >> $LOG; |
514 |
|
515 |
sleep 1; |
516 |
|
517 |
CHOK=`tail -1 $YODABUSY `; |
518 |
|
519 |
done |
520 |
|
521 |
echo "1" > $YODABUSY; |
522 |
|
523 |
echo " Force resubmitting of job for file $PAMNAME1 " >>$LOG; |
524 |
|
525 |
echo -n " Rename entry $PAMNAME1 to ${PAMNAME1}_yodafailed in yoda.yodalist at " >>$LOG; |
526 |
|
527 |
date >> $LOG; |
528 |
|
529 |
cat $WRKDIR/yoda.yodalist | sed s/$PAMNAME1/${PAMNAME1}_yodafailed/g 1> $WRKDIR/yoda.yodalist.tmp; |
530 |
|
531 |
cat $WRKDIR/yoda.yodalist >> $WRKDIR/yoda.yodalist.old; |
532 |
|
533 |
mv -f $WRKDIR/yoda.yodalist.tmp $WRKDIR/yoda.yodalist; |
534 |
|
535 |
echo "0" > $YODABUSY; |
536 |
|
537 |
else |
538 |
|
539 |
echo "NTRY is $NTRY " >>$LOG; |
540 |
|
541 |
echo -n " YODA File $file is good, moving log to good dir at " >>$LOG; |
542 |
|
543 |
date >> $LOG; |
544 |
|
545 |
if [ -f $RETRY ]; then |
546 |
|
547 |
mv -f $RETRY $LOGDIR/yoda/good/; |
548 |
|
549 |
fi; |
550 |
|
551 |
while [ -f $LOGDIR/yoda/good/$FILE.$NTRY ]; do |
552 |
|
553 |
let NTRY=$NTRY+1; |
554 |
|
555 |
done; |
556 |
|
557 |
mv -f $file $LOGDIR/yoda/good/${FILE}.$NTRY; |
558 |
|
559 |
NFILE=$PRODDIR/postRawreader/$PAMNAME1 ; |
560 |
|
561 |
# |
562 |
# HERE WE REMOVE POSTRAWREADER FROM DISK |
563 |
# |
564 |
|
565 |
echo " REMOVING FILE $NFILE FROM DISK " >> $LOG 2>&1; |
566 |
|
567 |
rm -f $NFILE >> $LOG 2>&1; |
568 |
|
569 |
YFILE=$PRODDIR/level0/`basename $NFILE .pam`.root |
570 |
|
571 |
echo " MOVING FILE $YFILE TO $PRODDIR/level0/good/ " >> $LOG 2>&1; |
572 |
|
573 |
mv -f $PRODDIR/level0/`basename $NFILE .pam`.root $PRODDIR/level0/good/ >> $LOG 2>&1; |
574 |
|
575 |
mv -f $PRODDIR/level0/`basename $NFILE .pam`Corrupted.dat $PRODDIR/level0/good/ >> $LOG 2>&1; |
576 |
|
577 |
fi; |
578 |
|
579 |
if [ -f $file ] && [ $DELTA -gt 43200 ]; then |
580 |
|
581 |
echo "NTRY is $NTRY " >>$LOG; |
582 |
|
583 |
echo -n " Warning! Very old YODA job in $file send mail at " >>$LOG; |
584 |
|
585 |
date >> $LOG; |
586 |
|
587 |
ERROR="YODA job in unknown status,"; |
588 |
|
589 |
echo "################################################################################" >> $SENDMAIL; |
590 |
echo "Subject Logchecking deamon YODA: $ERROR file $file" >> $SENDMAIL; |
591 |
echo "################################################################################" >> $SENDMAIL; |
592 |
cat $file >> $SENDMAIL; |
593 |
|
594 |
fi; |
595 |
|
596 |
fi; |
597 |
|
598 |
fi; |
599 |
|
600 |
done; |
601 |
|
602 |
# |
603 |
# YODA LSF JOBS |
604 |
# |
605 |
ls $LOGDIR/yoda.lsf.*.log 1> $WRKDIR/log.list 2>/dev/null; |
606 |
|
607 |
for file in `cat $WRKDIR/log.list`; do |
608 |
|
609 |
FILE=`basename $file`; |
610 |
|
611 |
NOW=`date +%s` |
612 |
|
613 |
CTIME=`date --reference=$file +%s` |
614 |
|
615 |
let DELTA=NOW-CTIME |
616 |
|
617 |
if [ $DELTA -gt $WAITFOR ]; then |
618 |
|
619 |
PAMNAME0=`basename $file .log` |
620 |
|
621 |
PAMNAME1=`echo $PAMNAME0 | sed s/yoda.lsf.//g `.pam |
622 |
|
623 |
RETRY=$LOGDIR/yoda.${PAMNAME1}.retry |
624 |
|
625 |
NTRY=0; |
626 |
|
627 |
if [ -f $RETRY ]; then |
628 |
|
629 |
NTRY=`tail -1 $RETRY`; |
630 |
|
631 |
let NTRY=$NTRY+1; |
632 |
|
633 |
fi; |
634 |
|
635 |
if [ $NTRY -gt $MAXNTRY ]; then |
636 |
|
637 |
ERROR="no way to process file $file"; |
638 |
|
639 |
echo "################################################################################" >> $SENDMAIL; |
640 |
echo "Subject Logchecking deamon YODA: $ERROR " >> $SENDMAIL; |
641 |
echo "################################################################################" >> $SENDMAIL; |
642 |
cat $file >> $SENDMAIL; |
643 |
|
644 |
echo "NTRY is $NTRY " >>$LOG; |
645 |
|
646 |
echo -n " YODA File $file, error $ERROR move in bad dir and sending mail at " >>$LOG; |
647 |
|
648 |
date >> $LOG; |
649 |
|
650 |
mv -f $file $LOGDIR/yoda/bad/${FILE}.$NTRY; |
651 |
|
652 |
else |
653 |
|
654 |
OKOUT=`grep -i "successfully completed" $file` |
655 |
|
656 |
if [ "$OKOUT" == "" ]; then |
657 |
|
658 |
echo $NTRY > $RETRY; |
659 |
|
660 |
echo "NTRY is $NTRY " >>$LOG; |
661 |
|
662 |
echo -n " YODA LSF job $file aborted move in bad dir at " >>$LOG; |
663 |
|
664 |
date >> $LOG; |
665 |
|
666 |
mv -f $file $LOGDIR/yoda/bad/${FILE}.$NTRY; |
667 |
|
668 |
CHOK=`tail -1 $YODABUSY`; |
669 |
|
670 |
if [ "$CHOK" == "" ]; then |
671 |
|
672 |
echo "0" > $YODABUSY; |
673 |
|
674 |
CHOK=`tail -1 $YODABUSY`; |
675 |
|
676 |
fi |
677 |
|
678 |
while [ $CHOK -eq 1 ]; do |
679 |
|
680 |
echo -n " YODA LIST FILE IS BUSY " >> $LOG; |
681 |
|
682 |
date >> $LOG; |
683 |
|
684 |
sleep 1; |
685 |
|
686 |
CHOK=`tail -1 $YODABUSY `; |
687 |
|
688 |
done |
689 |
|
690 |
echo "1" > $YODABUSY; |
691 |
|
692 |
echo " Force resubmitting of job for file $PAMNAME1 " >>$LOG; |
693 |
|
694 |
echo -n " Rename entry $PAMNAME1 to ${PAMNAME1}_yfailed in yoda.yodalist at " >>$LOG; |
695 |
|
696 |
date >> $LOG; |
697 |
|
698 |
cat $WRKDIR/yoda.yodalist | sed s/$PAMNAME1/${PAMNAME1}_yfailed/g 1> $WRKDIR/yoda.yodalist.tmp; |
699 |
|
700 |
cat $WRKDIR/yoda.yodalist >> $WRKDIR/yoda.yodalist.old; |
701 |
|
702 |
mv -f $WRKDIR/yoda.yodalist.tmp $WRKDIR/yoda.yodalist; |
703 |
|
704 |
echo "0" > $YODABUSY; |
705 |
|
706 |
else |
707 |
|
708 |
echo "NTRY is $NTRY " >>$LOG; |
709 |
|
710 |
echo -n " YODA GRID job $file done move in good dir at " >>$LOG; |
711 |
|
712 |
date >> $LOG; |
713 |
|
714 |
while [ -f $LOGDIR/yoda/good/$FILE.$NTRY ]; do |
715 |
|
716 |
let NTRY=$NTRY+1; |
717 |
|
718 |
done; |
719 |
|
720 |
mv -f $file $LOGDIR/yoda/good/${FILE}.$NTRY; |
721 |
|
722 |
if [ -f $RETRY ]; then |
723 |
|
724 |
mv -f $RETRY $LOGDIR/yoda/good/; |
725 |
|
726 |
fi; |
727 |
|
728 |
fi; |
729 |
|
730 |
fi; |
731 |
|
732 |
if [ -f $file ] && [ $DELTA -gt 43200 ]; then |
733 |
|
734 |
echo "NTRY is $NTRY " >>$LOG; |
735 |
|
736 |
echo -n " Warning! Very old YODA LSF job in $file send mail at " >>$LOG; |
737 |
|
738 |
date >> $LOG; |
739 |
|
740 |
ERROR="LSF job in unknown status,"; |
741 |
|
742 |
echo "################################################################################" >> $SENDMAIL; |
743 |
echo "Subject Logchecking deamon YODA: $ERROR file $file " >> $SENDMAIL; |
744 |
echo "################################################################################" >> $SENDMAIL; |
745 |
cat $file >> $SENDMAIL; |
746 |
|
747 |
fi; |
748 |
|
749 |
fi; |
750 |
|
751 |
done; |
752 |
|
753 |
# |
754 |
# YodaProfiler OUTPUT |
755 |
# |
756 |
ls $LOGDIR/profiler.[0-9]*.log 1> $WRKDIR/log.list 2>/dev/null; |
757 |
|
758 |
for file in `cat $WRKDIR/log.list`; do |
759 |
|
760 |
TIME=`tail -1 $WRKDIR/log.step `; |
761 |
|
762 |
if [ $TIME -le 0 ]; then |
763 |
|
764 |
echo -n " EXIT DEAMON, RECEIVED TERM SIGNAL " >> $LOG; |
765 |
|
766 |
date >> $LOG; |
767 |
|
768 |
exit; |
769 |
|
770 |
fi |
771 |
|
772 |
while [ $TIME -eq 1 ]; do |
773 |
|
774 |
TIME=`tail -1 $WRKDIR/log.step `; |
775 |
|
776 |
echo -n " TIME = 1! DEAMON PAUSE " >> $LOG; |
777 |
|
778 |
date >> $LOG; |
779 |
|
780 |
sleep 120; |
781 |
|
782 |
done |
783 |
|
784 |
FILE=`basename $file`; |
785 |
|
786 |
NOW=`date +%s` |
787 |
|
788 |
CTIME=`date --reference=$file +%s` |
789 |
|
790 |
let DELTA=NOW-CTIME |
791 |
|
792 |
if [ $DELTA -gt $WAITFOR ]; then |
793 |
|
794 |
PAMNAME0=`basename $file .log` |
795 |
|
796 |
PAMNAME1=`echo $PAMNAME0 | sed s/profiler.//g `.pam |
797 |
|
798 |
OKOUT=`grep -i "finished, exiting..." $file` |
799 |
|
800 |
if [ "$OKOUT" == "" ]; then |
801 |
|
802 |
ERROR="no exiting signature"; |
803 |
|
804 |
echo "################################################################################" >> $SENDMAIL; |
805 |
echo "Subject Logchecking deamon YodaProfiler: $ERROR in file $file " >> $SENDMAIL; |
806 |
echo "################################################################################" >> $SENDMAIL; |
807 |
cat $file >> $SENDMAIL; |
808 |
|
809 |
echo -n " File $file, YodaProfiler error $ERROR move in bad dir and sending mail at " >>$LOG; |
810 |
|
811 |
date >> $LOG; |
812 |
|
813 |
mv -f $file $LOGDIR/profiler/bad/$FILE; |
814 |
|
815 |
else |
816 |
|
817 |
OKOUT=`grep -i "error (" $file` |
818 |
|
819 |
if [ "$OKOUT" != "" ]; then |
820 |
|
821 |
ERROR=$OKOUT; |
822 |
|
823 |
echo "################################################################################" >> $SENDMAIL; |
824 |
echo "Subject Logchecking deamon YodaProfiler: \"$ERROR\" in file $file " >> $SENDMAIL; |
825 |
echo "################################################################################" >> $SENDMAIL; |
826 |
cat $file >> $SENDMAIL; |
827 |
|
828 |
echo -n " File $file, YodaProfiler error \"$ERROR\" move in bad dir and sending mail at " >>$LOG; |
829 |
|
830 |
date >> $LOG; |
831 |
|
832 |
mv -f $file $LOGDIR/profiler/bad/$FILE; |
833 |
|
834 |
else |
835 |
|
836 |
echo -n " YodaProfiler File $file is good, moving log to good dir at " >>$LOG; |
837 |
|
838 |
date >> $LOG; |
839 |
|
840 |
mv -f $file $LOGDIR/profiler/good/$FILE; |
841 |
|
842 |
fi; |
843 |
|
844 |
fi; |
845 |
|
846 |
fi; |
847 |
|
848 |
done; |
849 |
|
850 |
|
851 |
|
852 |
# |
853 |
# DARTHVADER LSF JOBS |
854 |
# |
855 |
ls $LOGDIR/dv.lsf.*.log 1> $WRKDIR/log.list 2>/dev/null; |
856 |
|
857 |
for file in `cat $WRKDIR/log.list`; do |
858 |
|
859 |
FILE=`basename $file`; |
860 |
|
861 |
NOW=`date +%s` |
862 |
|
863 |
CTIME=`date --reference=$file +%s` |
864 |
|
865 |
let DELTA=NOW-CTIME |
866 |
|
867 |
if [ $DELTA -gt $WAITFOR ]; then |
868 |
|
869 |
PAMNAME0=`basename $file .log` |
870 |
|
871 |
PAMNAME1=`echo $PAMNAME0 | sed s/dv.lsf.//g `.pam |
872 |
|
873 |
RETRY=$LOGDIR/dv.${PAMNAME1}.retry |
874 |
|
875 |
NTRY=0; |
876 |
|
877 |
if [ -f $RETRY ]; then |
878 |
|
879 |
NTRY=`tail -1 $RETRY`; |
880 |
|
881 |
let NTRY=$NTRY+1; |
882 |
|
883 |
fi; |
884 |
|
885 |
if [ $NTRY -gt $MAXNTRY ]; then |
886 |
|
887 |
ERROR="no way to process file $file"; |
888 |
|
889 |
echo "################################################################################" >> $SENDMAIL; |
890 |
echo "Subject Logchecking deamon DarthVader: $ERROR " >> $SENDMAIL; |
891 |
echo "################################################################################" >> $SENDMAIL; |
892 |
cat $file >> $SENDMAIL; |
893 |
|
894 |
echo "NTRY is $NTRY " >>$LOG; |
895 |
|
896 |
echo -n " DarthVader File $file, error $ERROR move in bad dir and sending mail at " >>$LOG; |
897 |
|
898 |
date >> $LOG; |
899 |
|
900 |
mv -f $file $LOGDIR/dv/bad/${FILE}.$NTRY; |
901 |
|
902 |
else |
903 |
|
904 |
OKOUT=`grep -i "successfully completed" $file` |
905 |
|
906 |
if [ "$OKOUT" == "" ]; then |
907 |
|
908 |
echo $NTRY > $RETRY; |
909 |
|
910 |
echo "NTRY is $NTRY " >>$LOG; |
911 |
|
912 |
echo -n " DarthVader LSF job $file aborted move in bad dir at " >>$LOG; |
913 |
|
914 |
date >> $LOG; |
915 |
|
916 |
mv -f $file $LOGDIR/dv/bad/${FILE}.$NTRY; |
917 |
|
918 |
CHOK=`tail -1 $DVBUSY`; |
919 |
|
920 |
if [ "$CHOK" == "" ]; then |
921 |
|
922 |
echo "0" > $DVBUSY; |
923 |
|
924 |
CHOK=`tail -1 $DVBUSY`; |
925 |
|
926 |
fi |
927 |
|
928 |
while [ $CHOK -eq 1 ]; do |
929 |
|
930 |
echo -n " DV LIST FILE IS BUSY " >> $LOG; |
931 |
|
932 |
date >> $LOG; |
933 |
|
934 |
sleep 1; |
935 |
|
936 |
CHOK=`tail -1 $DVBUSY `; |
937 |
|
938 |
done |
939 |
|
940 |
echo "1" > $DVBUSY; |
941 |
|
942 |
echo " Force resubmitting of job for file $PAMNAME1 " >>$LOG; |
943 |
|
944 |
echo -n " Rename entry $PAMNAME1 to ${PAMNAME1}_yfailed in dv.dvlist at " >>$LOG; |
945 |
|
946 |
date >> $LOG; |
947 |
|
948 |
cat $WRKDIR/dv.dvlist | sed s/$PAMNAME1/-${PAMNAME1}/g 1> $WRKDIR/dv.dvlist.tmp; |
949 |
|
950 |
cat $WRKDIR/dv.dvlist >> $WRKDIR/dv.dvlist.old; |
951 |
|
952 |
mv -f $WRKDIR/dv.dvlist.tmp $WRKDIR/dv.dvlist; |
953 |
|
954 |
echo "0" > $DVBUSY; |
955 |
|
956 |
else |
957 |
|
958 |
echo "NTRY is $NTRY " >>$LOG; |
959 |
|
960 |
echo -n " DV GRID job $file done move in good dir at " >>$LOG; |
961 |
|
962 |
date >> $LOG; |
963 |
|
964 |
while [ -f $LOGDIR/dv/good/$FILE.$NTRY ]; do |
965 |
|
966 |
let NTRY=$NTRY+1; |
967 |
|
968 |
done; |
969 |
|
970 |
mv -f $file $LOGDIR/dv/good/${FILE}.$NTRY; |
971 |
|
972 |
if [ -f $RETRY ]; then |
973 |
|
974 |
mv -f $RETRY $LOGDIR/dv/good/; |
975 |
|
976 |
fi; |
977 |
|
978 |
fi; |
979 |
|
980 |
fi; |
981 |
|
982 |
if [ -f $file ] && [ $DELTA -gt 43200 ]; then |
983 |
|
984 |
echo "NTRY is $NTRY " >>$LOG; |
985 |
|
986 |
echo -n " Warning! Very old DarthVader LSF job in $file send mail at " >>$LOG; |
987 |
|
988 |
date >> $LOG; |
989 |
|
990 |
ERROR="LSF job in unknown status,"; |
991 |
|
992 |
echo "################################################################################" >> $SENDMAIL; |
993 |
echo "Subject Logchecking deamon DarthVader: $ERROR file $file " >> $SENDMAIL; |
994 |
echo "################################################################################" >> $SENDMAIL; |
995 |
cat $file >> $SENDMAIL; |
996 |
|
997 |
fi; |
998 |
|
999 |
fi; |
1000 |
|
1001 |
done; |
1002 |
|
1003 |
# |
1004 |
# DarthVader OUTPUT |
1005 |
# |
1006 |
ls $LOGDIR/dv.[0-9]*.log 1> $WRKDIR/log.list 2>/dev/null; |
1007 |
|
1008 |
for file in `cat $WRKDIR/log.list`; do |
1009 |
|
1010 |
TIME=`tail -1 $WRKDIR/log.step `; |
1011 |
|
1012 |
if [ $TIME -le 0 ]; then |
1013 |
|
1014 |
echo -n " EXIT DEAMON, RECEIVED TERM SIGNAL " >> $LOG; |
1015 |
|
1016 |
date >> $LOG; |
1017 |
|
1018 |
exit; |
1019 |
|
1020 |
fi |
1021 |
|
1022 |
while [ $TIME -eq 1 ]; do |
1023 |
|
1024 |
TIME=`tail -1 $WRKDIR/log.step `; |
1025 |
|
1026 |
echo -n " TIME = 1! DEAMON PAUSE " >> $LOG; |
1027 |
|
1028 |
date >> $LOG; |
1029 |
|
1030 |
sleep 120; |
1031 |
|
1032 |
done |
1033 |
|
1034 |
FILE=`basename $file`; |
1035 |
|
1036 |
NOW=`date +%s` |
1037 |
|
1038 |
CTIME=`date --reference=$file +%s` |
1039 |
|
1040 |
let DELTA=NOW-CTIME |
1041 |
|
1042 |
if [ $DELTA -gt $WAITFOR ]; then |
1043 |
|
1044 |
PAMNAME0=`basename $file .log` |
1045 |
|
1046 |
PAMNAME1=`echo $PAMNAME0 | sed s/dv.//g `.pam |
1047 |
|
1048 |
OKOUT=`grep -i "finished, exiting..." $file` |
1049 |
|
1050 |
if [ "$OKOUT" == "" ]; then |
1051 |
|
1052 |
ERROR="no exiting signature"; |
1053 |
|
1054 |
echo "################################################################################" >> $SENDMAIL; |
1055 |
echo "Subject Logchecking deamon DarthVader: $ERROR in file $file " >> $SENDMAIL; |
1056 |
echo "################################################################################" >> $SENDMAIL; |
1057 |
cat $file >> $SENDMAIL; |
1058 |
|
1059 |
echo -n " File $file, DarthVader error $ERROR move in bad dir and sending mail at " >>$LOG; |
1060 |
|
1061 |
date >> $LOG; |
1062 |
|
1063 |
mv -f $file $LOGDIR/dv/bad/$FILE; |
1064 |
|
1065 |
else |
1066 |
|
1067 |
OKOUT=`grep -i "error (" $file` |
1068 |
|
1069 |
if [ "$OKOUT" != "" ]; then |
1070 |
|
1071 |
ERROR=$OKOUT; |
1072 |
|
1073 |
echo "################################################################################" >> $SENDMAIL; |
1074 |
echo "Subject Logchecking deamon DarthVader: \"$ERROR\" in file $file " >> $SENDMAIL; |
1075 |
echo "################################################################################" >> $SENDMAIL; |
1076 |
cat $file >> $SENDMAIL; |
1077 |
|
1078 |
echo -n " File $file, DarthVader error \"$ERROR\" move in bad dir and sending mail at " >>$LOG; |
1079 |
|
1080 |
date >> $LOG; |
1081 |
|
1082 |
mv -f $file $LOGDIR/dv/bad/$FILE; |
1083 |
|
1084 |
else |
1085 |
|
1086 |
echo -n " DarthVader File $file is good, moving log to good dir at " >>$LOG; |
1087 |
|
1088 |
date >> $LOG; |
1089 |
|
1090 |
mv -f $file $LOGDIR/dv/good/$FILE; |
1091 |
|
1092 |
fi; |
1093 |
|
1094 |
fi; |
1095 |
|
1096 |
fi; |
1097 |
|
1098 |
done; |
1099 |
|
1100 |
# |
1101 |
# HERE I HAVE TO MERGE L2 files and MOVE GOOD FILES IN THE GOOD DIRECTORY |
1102 |
# |
1103 |
|
1104 |
|
1105 |
# |
1106 |
# END, SEND MAIL WHEN IT IS THE CASE |
1107 |
# |
1108 |
NOW=`date +%s`; |
1109 |
|
1110 |
CTIME=`date --reference=$MFILE +%s`; |
1111 |
|
1112 |
let DELTA=NOW-CTIME; |
1113 |
|
1114 |
if [ $DELTA -gt 43200 ]; then |
1115 |
|
1116 |
touch $MFILE; |
1117 |
|
1118 |
if [ -s $SENDMAIL ]; then |
1119 |
|
1120 |
TESTM=`grep -i subject $SENDMAIL`; |
1121 |
|
1122 |
if [ "$TESTM" != "" ]; then |
1123 |
|
1124 |
echo -n " Sending report mail at " >> $LOG; |
1125 |
|
1126 |
date >> $LOG; |
1127 |
|
1128 |
cat $SENDMAIL | mail -s "Logchecking deamon report at `date`" -c $CCMAIL $MAIL |
1129 |
|
1130 |
fi; |
1131 |
|
1132 |
rm -f $SENDMAIL; |
1133 |
|
1134 |
touch $SENDMAIL; |
1135 |
|
1136 |
fi; |
1137 |
|
1138 |
fi; |
1139 |
|
1140 |
TIME=`tail -1 $WRKDIR/log.step `; |
1141 |
|
1142 |
if [ $TIME -le 0 ]; then |
1143 |
|
1144 |
echo -n " EXIT DEAMON, RECEIVED TERM SIGNAL " >> $LOG; |
1145 |
|
1146 |
date >> $LOG; |
1147 |
|
1148 |
exit; |
1149 |
|
1150 |
fi |
1151 |
|
1152 |
while [ $TIME -eq 1 ]; do |
1153 |
|
1154 |
TIME=`tail -1 $WRKDIR/log.step `; |
1155 |
|
1156 |
echo -n " TIME = 1! DEAMON PAUSE " >> $LOG; |
1157 |
|
1158 |
date >> $LOG; |
1159 |
|
1160 |
sleep 120; |
1161 |
|
1162 |
done |
1163 |
|
1164 |
sleep $TIME; |
1165 |
|
1166 |
TIME=`tail -1 $WRKDIR/log.step `; |
1167 |
|
1168 |
done; |
1169 |
|
1170 |
echo -n " EXIT DEAMON, RECEIVED TERM SIGNAL " >> $LOG; |
1171 |
|
1172 |
date >> $LOG; |