#!/bin/bash
#
# This script can be used to send you an email once all of the jobs in
# your job array are finished. 
#
# The script creates a process id file for each of the jobs started on 
# a network filesystem and sends an email once all of the process id 
# files are gone.
#
# The script must be run before and after the job you are execuring. 
# Here is an example of a PBS job file that calls the monitoring script:
#
# #!/bin/bash
# #PBS -l nodes=1:ppn=1
# #PBS -l walltime=00:10:00
# #PBS -m a -M akrevl@cs.stanford.edu
# 
# /lfs/local/0/x/qjobmon.sh
# /lfs/local/0/x/python myjob.py
# /lfs/local/0/x/qjobmon.sh

# Email address defaults to your_csid@cs.stanford.edu feel free to
# change it.
ADDR="$USER@cs.stanford.edu"

PIDDIR="/dfs/hulk/0/qjobmon"
if [ ! -d "$PIDDIR" ]; then
  exit 1
fi

JOBID="$(echo $PBS_JOBID | cut -d. -f1 | cut -d[ -f1)"
PIDFILE="$PIDDIR/$PBS_JOBID"
if [ ! -f "$PIDFILE" ]; then # Create a file before our job runs
  touch $PIDFILE
  exit 0
fi

# After the job is done, get a lock
while ! mkdir $PIDDIR/lock.$JOBID; do
  sleep 1
done

rm $PIDFILE
RJOBS="$(find $PIDDIR/ -name "$JOBID*" -print)"
if [ -z "$RJOBS" ]; then
  MSG="Your job $JOBID has finished."
  SUBJECT="[ilcluster] Job $JOBID finished."
  echo $MSG > $PIDFILE.txt
  mail -s "$SUBJECT" "$ADDR" < $PIDFILE.txt
  rm $PIDFILE.txt
fi

# Remove the lock
rm -r $PIDDIR/lock.$JOBID 
