#!/bin/sh
#
# This is run as root at the end of /var/lib/hepix/context/epilog.sh
# or on the late stage of the init process
#
#  Andrew.McNab@cern.ch - November 2015
#  Modified: A.Tsaregorodtsev - April 2016
#

# Just in case it doesn't exist
mkdir -p /etc/joboutputs

(

date --utc +"%Y-%m-%d %H:%M:%S %Z vm-bootstrap vm-bootstrap Start bootstrap on `hostname`"

RAWARGS="$@"

for i in "$@"
do
case $i in
    --dirac-site=*)
    DIRAC_SITE="${i#*=}"
    ;;
    --vm-uuid=*)
    VM_UUID=`echo "${i#*=}" | sed 's/#.*$//'`
    ;;
    --ce-name=*)
    CE_NAME="${i#*=}"
    ;;
    --vo=*)
    VO="${i#*=}"
    ;;
    --running-pod=*)
    RUNNING_POD="${i#*=}"
    ;;
    --release-version=*)
    VERSION="${i#*=}"
    ;;
    --release-project=*)
    PROJECT="${i#*=}"
    ;;
    --setup=*)
    SETUP="${i#*=}"
    ;;
    --cs-servers=*)
    CS_SERVERS="${i#*=}"
    ;;
    --number-of-processors=*)
    NUMBER_OF_PROCESSORS="${i#*=}"
    ;;
    --whole-node=*)
    WHOLE_NODE="${i#*=}"
    ;;
    --required-tag=*)
    REQUIRED_TAG="${i#*=}"
    ;;
    *)
    # unknown option
    ;;
esac
done

# We might be running from cvmfs or from /var/spool/checkout
export CONTEXTDIR=`readlink -f \`dirname $0\``
date --utc +"%Y-%m-%d %H:%M:%S %Z vm-bootstrap CONTEXTDIR=$CONTEXTDIR"

# Get the utility function definitions
. $CONTEXTDIR/vm-bootstrap-functions

# Create a shutdown_message if ACPI shutdown signal received
cp -f $CONTEXTDIR/power.sh /etc/acpi/actions/power.sh

# Suppress annoying mails
vm_disable_mail

#########################################################################
if [ "$VM_UUID" = "" ] ; then
  # Try getting it from OpenStack metadata instead
  export VM_UUID=`python -c 'import requests ; print requests.get("http://169.254.169.254/openstack/2013-10-17/meta_data.json").json()["uuid"]'`
fi

# Once we have finished with the metadata, stop any user process reading it later
/sbin/iptables -A OUTPUT -d 169.254.169.254 -p tcp --dport 80 -j DROP

##########################################################################
# Get $MACHINEFEATURES, $JOBFEATURES, and $JOBOUTPUTS if any
if [ -f /etc/profile.d/mjf.sh ]; then
  . /etc/profile.d/mjf.sh
  machine_syslog=`python -c "import urllib ; print urllib.urlopen('$MACHINEFEATURES/syslog').read().strip()"`
  if [ "$machine_syslog" ] ; then
    echo "*.* @$machinesyslog" > /etc/rsyslog.d/vm.conf
    /sbin/service rsyslog restart
  fi
fi

###########################################################################
# Get the big 40GB+ logical partition as /scratch
mkdir -p /scratch

vm_mount_scratch_disk

# anyone can create directories there
chmod ugo+rwxt /scratch
# Scratch tmp for TMPDIR
mkdir -p /scratch/tmp
chmod ugo+rwxt /scratch/tmp

# DIRAC externals require boost-python
yum -y install boost-python

# Bootstrap CVMFS
vm_cvmfs_bootstrap

mountpoint /dev/shm
if [ $? != 0 ] ; then
  # Needed for POSIX semaphores and missing in CernVM 3
  mount /dev/shm
  chmod ugo+rwxt /dev/shm
fi

echo $JOBOUTPUTS | egrep '^http://|^https://' >/dev/null 2>/dev/null
if [ $? = 0 ] ; then
  # put vm-heartbeat on MJF server every 5 minutes
  echo 0.0 0.0 0.0 0.0 0.0 > /etc/joboutputs/vm-heartbeat
  /usr/bin/curl --capath /etc/grid-security/certificates --cert /root/hostkey.pem --location --upload-file /etc/joboutputs/vm-heartbeat "$JOBOUTPUTS/vm-heartbeat"
  echo -e "RANDOM_DELAY=9\n*/10 * * * * root echo \`cut -f1-3 -d' ' /proc/loadavg\` \`cat /proc/uptime\` >/etc/joboutputs/vm-heartbeat ; /usr/bin/curl --capath /etc/grid-security/certificates --cert /root/hostkey.pem --location --upload-file /etc/joboutputs/vm-heartbeat $JOBOUTPUTS/vm-heartbeat >/tmp/curl.log 2>&1" >/etc/cron.d/vm-heartbeat
else
  # vm-heartbeat is written every 5 minutes
  echo 0.0 0.0 0.0 0.0 0.0 > /etc/joboutputs/vm-heartbeat
  echo '*/5 * * * * root echo `cut -f1-3 -d" " /proc/loadavg` `cat /proc/uptime` >/etc/joboutputs/vm-heartbeat' >/etc/cron.d/vm-heartbeat
fi

# We swap on the logical partition if no CernVM 2 swapfile
# (cannot on CernVM 3 aufs filesystem)
if [ ! -f /var/swap ] ; then
  # Iff /scratch is ext4 can use:
  fallocate -l 4g /scratch/swapfile
  chmod 0600 /scratch/swapfile
  mkswap /scratch/swapfile
  swapon /scratch/swapfile
fi

# Swap as little as possible
sysctl vm.swappiness=1

# Get CA certs from cvmfs
if [ -d /etc/cvmfs ]; then
  rm -Rf /etc/grid-security
  ln -sf /cvmfs/grid.cern.ch/etc/grid-security /etc/grid-security
fi

# Don't want to be doing this at 4 or 5am every day!
rm -f /etc/cron.daily/mlocate.cron

# Avoid age-old sudo problem
echo 'Defaults !requiretty' >>/etc/sudoers
echo 'Defaults visiblepw'   >>/etc/sudoers

chmod +x $CONTEXTDIR/save-payload-logs

echo "*/5 * * * * root $CONTEXTDIR/save-payload-logs 2>/dev/null" >/etc/cron.d/save-payload-logs

export NUM_JOB_SLOTS=1
if [ "$MACHINEFEATURES" ]; then
  export NUM_JOB_SLOTS=`python -c "import urllib ; print urllib.urlopen('$MACHINEFEATURES/log_cores').read().strip()"`
fi
if [ "$NUM_JOB_SLOTS" = "" ] ; then
 NUM_JOB_SLOTS=`grep '^processor' /proc/cpuinfo | wc --lines`
fi

##############################################################################
# Make a common DIRAC installation for all the pilots
date --utc +"%Y-%m-%d %H:%M:%S %Z vm-bootstrap install DIRAC in /opt/dirac"
vm_dirac_install $PROJECT $VERSION $SETUP $DIRAC_SITE $CE_NAME $CS_SERVERS $VO

##############################################################################
# Start the VM Monitoring Agent
if [ -f $CONTEXTDIR/vm-monitor-agent ]; then
  chmod +x $CONTEXTDIR/vm-monitor-agent
  $CONTEXTDIR/vm-monitor-agent
fi

##############################################################################
# Start the pilots now as many as the number of processors on the host
chmod +x $CONTEXTDIR/vm-pilot
chmod +x $CONTEXTDIR/parse-jobagent-log

date --utc +"%Y-%m-%d %H:%M:%S %Z Starting $NUM_JOB_SLOTS pilots"

n=0
NUM_JOB_SLOTS=1
while [ $n -lt $NUM_JOB_SLOTS ]
do
  nn=`printf '%02d' $n`

  # Create the account to run the Pilot (plt)
  /usr/sbin/useradd -b /scratch plt$nn

  cd /scratch/plt$nn

  mkdir -p /scratch/plt$nn/etc/grid-security
  cp /root/hostkey.pem /scratch/plt$nn/etc/grid-security
  chmod 0600 /scratch/plt$nn/etc/grid-security/hostkey.pem

  cp /root/hostkey.pem /scratch/plt$nn/etc/grid-security/hostcert.pem
  chmod 0600 /scratch/plt$nn/etc/grid-security/hostcert.pem

  chmod 0755 /scratch/plt$nn
  chown -R plt$nn.plt$nn /scratch/plt$nn

  # This can be removed when PilotCommands.py is fixed!
  mkdir -p /home/plt$nn/certs
  ln -sf /scratch/plt$nn/etc/grid-security/* /home/plt$nn/certs

  # add pltNNp00 account for the Payload (p00) that pltNN can sudo to
  /usr/sbin/useradd -m -b /scratch plt${nn}p00
  /usr/sbin/usermod -G plt${nn}p00 plt${nn}
  echo "Defaults>plt${nn}p00 !requiretty"         >>/etc/sudoers
  echo "Defaults>plt${nn}p00 visiblepw"           >>/etc/sudoers
  echo "Defaults>plt${nn}p00 !env_reset"          >>/etc/sudoers
  echo "plt$nn ALL = (plt${nn}p00) NOPASSWD: ALL" >>/etc/sudoers

  # One subprocess per job slot
  (
    # Now run the pilot script, possibly with the IaaS metadata updated VM_UUID
    /usr/bin/sudo -n -u plt$nn $CONTEXTDIR/vm-pilot $RAWARGS --pilot-number="$nn" >>/etc/joboutputs/vm-pilot.$nn.log 2>&1

    # Last shutdown_message to be written is used as overall result
    $CONTEXTDIR/parse-jobagent-log /etc/joboutputs/vm-pilot.$nn.log > /etc/joboutputs/shutdown_message_$nn
    cp -f /etc/joboutputs/shutdown_message_$nn /etc/joboutputs/shutdown_message

    oneHS06=`grep ' *CPUNormalizationFactor = ' /scratch/plt$nn/pilot.cfg | sed 's/ *CPUNormalizationFactor = //'`
    if [ "$oneHS06" ] ; then
     # This is a quick hack but not bad as all measurements will have run at roughly the same time.
     echo $oneHS06 \* $NUM_JOB_SLOTS | bc > /etc/joboutputs/hs06
    fi

  ) > /etc/joboutputs/vm-bootstrap.$nn.log 2>&1 &

  n=`expr $n + 1`
done

# Wait for ALL subprocesses to finish
wait

# Save system logs
cp -f /var/log/boot.log /var/log/dmesg /var/log/secure /var/log/messages* /etc/cvmfs/site.conf /etc/cvmfs/default.* /etc/joboutputs

# Save payload logs
$CONTEXTDIR/save-payload-logs

if [ "$JOBOUTPUTS" ]; then
(
  cd /etc/joboutputs
  for i in *
  do
   if [ -f $i ] ; then
    echo $JOBOUTPUTS | egrep '^http://|^https://' >/dev/null 2>/dev/null
    if [ $? = 0 ] ; then
     curl --capath /etc/grid-security/certificates --cert /root/hostkey.pem --location --upload-file "$i" "$JOBOUTPUTS/"
    fi
    curl --capath /etc/grid-security/certificates --cert /root/hostkey.pem --location --upload-file "$i" \
      "https://depo.gridpp.ac.uk/`openssl x509 -in /root/hostkey.pem -noout -subject | sed 's/^subject=.*CN=//'`/$CE_NAME/$HOSTNAME/$VM_UUID/"
   fi
  done
)
fi

exit

# Try conventional shutdown
date --utc +'%Y-%m-%d %H:%M:%S %Z vm-bootstrap Run /sbin/shutdown -h now'
/sbin/shutdown -h now
sleep 60

# Instant reboot
date --utc +'%Y-%m-%d %H:%M:%S %Z vm-bootstrap Run echo o > /proc/sysrq-trigger'
echo o > /proc/sysrq-trigger

# Sleep here if neither worked
sleep 1234567890
) >>/etc/joboutputs/vm-bootstrap.log 2>&1
