#!/bin/bash
# usage: ./copy.sh <# workers> <web|tw|fb>
LOC1=/dfs/scratch1/dataset.rocky/spinn3r/F5
LOC2=/dfs/scratch0/dataset/spinn3r/F5
LOC3=/dfs/scratch0/dataset/spinn3r/D
LOC4=/dfs/scratch0/dataset/spinn3r/E

function displaytime {
  local T=$1
  local D=$((T/60/60/24))
  local H=$((T/60/60%24))
  local M=$((T/60%60))
  local S=$((T%60))
  ntime=""
  [[ $D > 0 ]] && ntime="$ntime${D}d "
  [[ $H > 0 ]] && ntime="$ntime${H}h "
  [[ $M > 0 ]] && ntime="$ntime${M}m "
  #[[ $D > 0 || $H > 0 || $M > 0 ]] && printf 'and '
  ntime="$ntime${S}s"
}

function printhelp {
	echo "Usage:"
        echo -e "\t./copy.sh <NUMBER-OF-WORKERS> <CONTENT-TYPE>"
        echo "Parameters:"
        echo -e "\t<NUMBER-OF-WORKERS> ...\tthe limit on the number of parallel workers in range [1, 100]"
        echo -e "\t\t\t\tNotice: for number of workers > 100 java logging does not work!"
	echo -e "\t<CONTENT-TYPE> ........\tshould be one of: web, tw, fb"
}

# check for arguments
if [ $# -lt 2 ]
then
        echo "ERROR: No enough arguments supplied!"
	printhelp	
	exit 1
fi
re='^-?[0-9]+$'
if ! [[ $1 =~ $re ]] ; then
echo "ERROR: First argument should be a number!"
	printhelp
exit 1
fi
if [ $1 -gt 100 ] || [ $1 -lt 1 ]
then
        echo "ERROR: Number of workers should be in range [1, 100]!"
        printhelp
        exit 1
fi
if [ $2 != "web" ] && [ $2 != "tw" ] && [ $2 != "fb" ]
then
        echo "ERROR: Content should be one of web, tw, fb!"
        printhelp
        exit 1
fi
# end check

# settings
OUTFOLDER="log"
TMP="/lfs/${HOSTNAME%%.*}/0/niko"
WORKERS=$1
CONTENT=$2
ALL_FILES=`(find -L $LOC1 -name "$CONTENT*.rar" | sort);(find -L $LOC2 -name "$CONTENT*.rar" | sort);(find -L $LOC3 -name "$CONTENT*.rar" | sort);(find -L $LOC4 -name "$CONTENT*.rar" | sort)`
#debug
#ALL_FILES=`(find -L $LOC1 -name "$CONTENT*.rar" | sort | head -n 15)`
NUM_ALL_FILES=`find -L $LOC1 $LOC2 $LOC3 $LOC4 -name "$CONTENT*.rar" | wc -l`

# main
start=`date +%s`
i=0

for FILE in $ALL_FILES;
do
	BASE=$(basename $FILE)
	LOG_FILE=${BASE%.rar}.log
	YEAR_MONTH="${LOG_FILE#*-}"
	YEAR_MONTH="${YEAR_MONTH:0:7}"
	LOG_FILE_PATH=$OUTFOLDER/$CONTENT/$YEAR_MONTH/$LOG_FILE
	
	mkdir -p $OUTFOLDER/$CONTENT/$YEAR_MONTH
	
	if [ -f $LOG_FILE_PATH ] && [ $(grep "__SUCCESS__" $LOG_FILE_PATH | wc -l) -eq 1 ];
	then
		echo -e "__skip__\t$LOG_FILE"
	else
		echo -e "__run___\t$LOG_FILE"
		
		# copy file
		cp $FILE $TMP

		# check how many workers are running
		# if all running sleep until someone is done
		running=$(ps -ef | grep -v grep | grep handle_one.sh | wc -l)
                while [ $running -ge $WORKERS ]; do
                        sleep 1
                        running=$(ps -ef | grep -v grep | grep handle_one.sh | wc -l)
                done

                # run new worker for this file
                ./handle_one.sh $TMP/$BASE &> $LOG_FILE_PATH &

                # progress handling
                i=$((i+1))
                now=`date +%s`
                runtime=$((now-start))
                displaytime $runtime
                echo -e "__debug__\t#workers=$(ps -ef | grep -v grep | grep handle_one.sh | wc -l)\tfiles=${i}/${NUM_ALL_FILES}\truntime=${ntime}"
	fi
done

wait
now=`date +%s`
runtime=$((now-start))
displaytime $runtime
echo -e "__debug__\t#workers=$(ps -ef | grep -v grep | grep handle_one.sh | wc -l)\tcopied files=${i}/${NUM_ALL_FILES}\truntime=${ntime}"
