#!/bin/sh
#  Copyright (C) 2000-2009, Parallels, Inc. All rights reserved.
#  Copyright (C) 2006-2007 Dmitry V. Levin <ldv@altlinux.org>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
#
#
# chkconfig: - 96 04
# description: OpenVZ startup script.
#
### BEGIN INIT INFO
# Provides: vz
# Required-Start: $network $remote_fs
# Required-Stop: $network $remote_fs
# Should-Start: sshd vzeventd
# Should-Stop: sshd vzeventd
# Default-Start: 2 3 5
# Default-Stop: 0 1 6
# Short-Description: OpenVZ startup script
# Description: OpenVZ startup script.
### END INIT INFO

WITHOUT_RC_COMPAT=1

# Source function library.
. /etc/init.d/functions

CONFIG_DIR="/etc/vz/conf"
VZCONF="/etc/vz/vz.conf"
VZREBOOTDIR=/var/lib/vz/vzreboot
LOCKFILE=/var/lock/subsys/vz
PROC_VZ_VESTAT=/proc/vz/vestat
VZDEV=venet0
PRELOAD_MODULES=
MODULES=
MODULES_OTHER=
NET_MODULES=
IPT_MODULES=

#From initd-functions
SourceIfNotEmpty "$VZCONF" || exit 0
is_no "$VIRTUOZZO" && exit 0

if ! is_yes "$MODULES_DISABLED"; then
	PRELOAD_MODULES="af_packet"
	MODULES="vzmon vzdquota vzdev"
	CPT_MODULES="vzcpt vzrst"
	MODULES_OTHER="vzcompat ${CPT_MODULES}"
	VNET_MODULES="vznetdev vznet"
	VETH_MODULES="vzethdev"
	NET_MODULES="${VNET_MODULES} ${VETH_MODULES}"
	is_yes "$VZWDOG" && MODULES="${MODULES} vzwdog"
	test -z "$IPTABLES_MODULES" && IPTABLES_MODULES="$IPTABLES"
	IPT_MODULES="ip_tables ${IPTABLES_MODULES} xt_tcpudp"
	is_yes "$IPV6" && IPT_MODULES="$IPT_MODULES $IP6TABLES"
	VZFS_MODULES="simfs"
	PLOOP_MODULES="ploop pfmt_ploop1 pfmt_raw pio_direct pio_nfs"
fi

VEINFO=
RETVAL=0
# Number of the parallel VEs on stop.
# In case empty value the number of parallel VEs calculated as 'num_cpu * 4'
PARALLEL=
cd /

check_vzkernel()
{
	if ! test -d /proc/vz ; then
		echo "Running kernel is not an OpenVZ kernel"
	fi
}

get_kernel_version()
{
	[ ! -z "$KERNEL_MAJOR" ] && return

	local ver=$(uname -r)
	local kernel=$(echo $ver | sed s/[-+].*//)
	KERNEL_MAJOR=$(echo $kernel | awk -F . '{print $1}')
	KERNEL_MINOR=$(echo $kernel | awk -F . '{print $2}')
	KERNEL_PATCHLEVEL=$(echo $kernel | awk -F . '{print $3}')
}

check_kernel_config()
{
	test -r /proc/config.gz || return 0

	local conf opt err=0
	local opt_must="SIM_FS VE VE_CALLS VZ_GENCALLS"
	get_kernel_version
	# For kernels >= 2.6.9 VZ_DEV must be set.
	test "${KERNEL_MINOR}" -ge 6 &&
		test "${KERNEL_PATCHLEVEL}" -gt 9 &&
			opt_must="${opt_must} VZ_DEV"
#	local opt_rec="SCHED_VCPU FAIRSCHED VZ_QUOTA VZ_QUOTA_UGID VE_NETDEV VE_ETHDEV
#			VE_IPTABLES VZ_CHECKPOINT VZ_WDOG"

	conf="`zcat /proc/config.gz 2>/dev/null | grep -E -v '^#|^$'`"

	for opt in $opt_must; do
		if ! echo "$conf" 2>/dev/null | grep -q "$opt="; then
			echo "ERROR: Missing kernel config option: CONFIG_$opt"
			err=1
		fi
	done
	if [ $err != 0 ]; then
		printf %s "Please recompile your kernel."
		failure "Please recompile your kernel."
		echo
		exit 1
	fi
}

mount_cgroups()
{
	local g
	for g in beancounter container fairsched ; do
		if [ -d /proc/vz/$g ]; then
			mount -t cgroup $g /proc/vz/$g -o name=$g 2>/dev/null
		fi
	done
}

umount_cgroups()
{
	local g
	for g in beancounter container fairsched ; do
		umount /proc/vz/$g 2>/dev/null
	done
}

get_parallel()
{
	[ -z "$PARALLEL" ] || return 0
	local NPROCS
	NPROCS=`egrep -cs ^cpu[0-9]+ /proc/stat`
	[ "$NPROCS" -gt 0 ] 2>/dev/null || NPROCS=1
	PARALLEL="$(($NPROCS*4))"
}

get_veinfo()
{
	if [ -f /proc/vz/veinfo ]; then
		VEINFO=/proc/vz/veinfo
	elif [ ! -f $PROC_VZ_VESTAT ]; then
		return 1
	fi
	return 0
}

is_running()
{
	[ -f "$LOCKFILE" ] && get_veinfo
}

modify_vzconf()
{
	# For vswap-enabled kernel (like RHEL6 042test or greater),
	# use vswap config for CT. Don't do it if CONFIGFILE= is modified

	egrep -q 'test|stab' /proc/vz/version 2>/dev/null || return 0

	local cfg_old='basic' cfg_new='vswap'
	local kv=$(expr $(cat /proc/vz/version) : '\(^[[:digit:]]*\)')
	if test $kv -ge 42; then
		if ! grep -q "CONFIGFILE=\"$cfg_new" $VZCONF; then
			action "WARNING! Changing configuration: Set $cfg_new in $VZCONF" sed -i "/^CONFIGFILE=/s/$cfg_old/$cfg_new-256m/" $VZCONF
		fi
	fi
}

status()
{
	check_vzkernel

	if is_running; then
		echo "OpenVZ is running."
		return 0
	else
		echo "OpenVZ is stopped."
		return 3
	fi
}

load_modules()
{
	local mod rc=0
	for mod; do
		! lsmod |grep -qs "^$mod[[:space:]]" || continue
		modinfo "$mod" >/dev/null 2>&1 || continue
		action "Loading module $mod:" modprobe "$mod"
		[ "$rc" = 0 ] || rc=$?
	done
	return $rc
}

unload_modules()
{
	local mod rc=0
	for mod; do
		lsmod |grep -qs "^$mod[[:space:]]" || continue
		printf %s "Unloading module $mod: "
		modprobe -r "$mod" >/dev/null 2>&1 &&
			success "$STRING" || passed "$STRING"
		[ "$rc" = 0 ] || rc=$?
		echo
	done
	return $rc
}

setup_net()
{
	local mod

	load_modules ${NET_MODULES} || return
	if ip addr list | grep -qs "venet0:.*UP"; then
		return 0
	fi
	get_veinfo
	[ -n "$VEINFO" ] || return
	action "Configuring sysctl:" \
		sysctl -w net.ipv4.conf.default.proxy_arp=0 && \
		sysctl -w net.ipv4.conf.default.send_redirects=1 && \
                sysctl -w net.ipv4.conf.all.send_redirects=0 && \
		sysctl -w net.bridge.bridge-nf-call-iptables=1

	action "Bringing up interface $VZDEV:" \
		ip link set ${VZDEV} up || return
	action "Assigning IPv4 address to interface $VZDEV:" \
		ip addr add 0.0.0.0/0 dev ${VZDEV} || return
	if [ "$(sysctl -n net.ipv4.conf.$VZDEV.forwarding)" = 0 ]; then
		action "Enabling IPv4 packet forwarding:" \
			sysctl -w net.ipv4.ip_forward=1
	fi
	action "Configuring interface $VZDEV:" \
		sysctl -w net.ipv4.conf.${VZDEV}.send_redirects=0
		
	if is_yes "$IPV6"; then
		if [ "$(sysctl -n -e net.ipv6.conf.$VZDEV.disable_ipv6)" = 0 ]; then
			action "Assigning IPv6 address to interface $VZDEV:" \
				ip -6 addr add fe80::1/128 dev ${VZDEV} || return
		fi
		action "Configuring sysctl for ipv6:" \
			sysctl -w net.bridge.bridge-nf-call-ip6tables=1
	fi
}

setup_ve0()
{
	mount_cgroups
	if test -z "${VE0CPUUNITS}"; then
		printf "VE0CPUUNITS is not set in %s; using value of 1000" ${VZCONF}
		passed VE0CPUUNITS
		VE0CPUUNITS=1000
	fi
	action "Configuring cpuunits limit for VE0 to $VE0CPUUNITS:" \
		vzctl set 0 --cpuunits ${VE0CPUUNITS} || return
	test -s "${CONFIG_DIR}/0.conf" || return
	grep -qs '^ONBOOT=yes\|^ONBOOT=\"yes\"' ${CONFIG_DIR}/0.conf || return 0
	action "Configuring node UB resources:" \
		vzctl set 0 --reset_ub
}

start_ves()
{
	local veid velist need_restart=

	# CTs that were running before a reboot
	velist=$(ls $VZREBOOTDIR)
	rm -f $VZREBOOTDIR/*
	# ... and not have ONBOOT=no
	test -n "$velist" && velist=$(vzlist -aH -octid,onboot $velist |
			awk '$2 != "no" {print $1}')
	# ... plus ones with ONBOOT=yes
	velist=$(echo "$velist"; vzlist -aH -octid,onboot |
			awk '$2 == "yes" {print $1}')
	# Then sort by bootorder
	test -n "$velist" && velist=$(vzlist -aH -octid -s-bootorder $velist)
	sysctl -q -w net.ipv4.route.src_check=0
	for veid in ${velist}; do
		[ "$veid" != "0" ] || continue
		if is_yes "$VZFASTBOOT" && is_yes "$DISK_QUOTA"; then
			printf "Preparing VE %s quota:" ${veid}
			vzquota stat ${veid} >/dev/null 2>&1
			if [ $? -eq 6 ]; then
				if vzquota show ${veid} 2>&1 |
				   grep -qs "vzquota : (warning) Quota is running" >/dev/null 2>&1; then
					vzquota on ${veid} --nocheck >/dev/null 2>&1
					need_restart="${need_restart} ${veid}"
				fi
			fi
			success "Preparing VE %s quota"
		fi
		rm -f $VZREBOOTDIR/$veid
		action "Starting VE $veid:" \
			vzctl start ${veid} --skip-fsck
	done
	for veid in ${need_restart}; do
		action "Stopping VE $veid:" \
			vzctl stop ${veid}
		action "Starting VE $veid:" \
			vzctl start ${veid}
	done
}

ve_stop()
{
	local veid velist i iter pid pids msg stage stages

	if ! get_veinfo; then
		return
	fi

	# Pre-stop stage
	rm -f $VZREBOOTDIR/*
	velist=$(vzlist -1 2>/dev/null)
	for veid in $velist; do
		# Equalize cpuunits for all CTs
		vzctl set $veid --cpuunits 2000 --cpulimit 0 >/dev/null 2>&1
		# Save to vzreboot list
		touch $VZREBOOTDIR/$veid
	done

	get_parallel
	stages="stop"
	if [ -z "${VE_STOP_MODE}" -o "$VE_STOP_MODE" = "suspend" ]; then
		stages="suspend stop"
	fi
	for stage in $stages; do
		case $stage in
			suspend)
				msg='Suspending VE'
				;;
			stop)
				msg='Shutting down VE'
				;;
		esac
		for i in 0 1 2; do
			iter=0
			pids=
			velist=$(vzlist -H -o ctid -sbootorder 2>/dev/null)
			for veid in $velist; do
				echo "$msg $veid"
				if [ "$stage" = "stop" ]; then
					# Unset cpulimit for CT to stop fast
					vzctl set $veid --cpulimit 0 >/dev/null 2>&1
				fi
				vzctl --skiplock $stage $veid >/dev/null 2>&1 &
				pids="$pids $!"
				iter=$(($iter+1))
				if [ ${iter} -gt ${PARALLEL} ]; then
					for pid in ${pids}; do
						wait ${pid}
					done
					pids=
					iter=0
				fi
			done
			for pid in $pids; do
				wait $pid
			done
		done
	done

	UnmountFilesystems 3 5 \
		'($3=="simfs") {print $2}' \
		"Unmounting VE area" \
		"Unmounting VE area (retry)"

	for veid in $(awk -F: '/^[0-9]+:/{print $1}' </proc/vz/vzquota 2>/dev/null); do
		action "Turning off quota for VE $veid:" \
			vzquota off ${veid}
	done
}

lockfile()
{
	local tempfile lockfile="${1}"

	tempfile="$(mktemp ${lockfile}.XXXXXX)" || return 1
	echo $$ > ${tempfile} || return 1

	if ln ${tempfile} ${lockfile} >/dev/null 2>&1; then
		rm -f ${tempfile}
		return 0
	fi
	if kill -0 `cat $lockfile` >/dev/null 2>&1; then
		rm -f ${tempfile}
		return 1
	fi
	if ln -f ${tempfile} ${lockfile}; then
		rm -f ${tempfile}
		return 0
	fi
	rm -f ${tempfile}
	return 1
}

locked_start()
{
	local veid velist msg

	if [ -f "$LOCKFILE" ]; then
		msg_already_running vz
		passed "OpenVZ startup"
		echo
		RETVAL=$?
		return $RETVAL
	fi

	load_modules ${IPT_MODULES}
	unload_modules ${PRELOAD_MODULES}

	load_modules ${PRELOAD_MODULES} ${MODULES}
	RETVAL=$?
	if [ $RETVAL -ne 0 ]; then
		return $RETVAL
	fi

	load_modules ${MODULES_OTHER} ${VZFS_MODULES} ${PLOOP_MODULES}

	if [ ! -e /dev/vzctl ]; then
		# On most modern distros udev will create a device for you,
		# while on the old distros /dev/vzctl comes with vzctl rpm.
		# So the below mknod call is probably not needed at all.
		action "Creating vzctl device:" \
			mknod -m 600 /dev/vzctl c 126 0
		RETVAL=$?
		if [ $RETVAL -ne 0 ]; then
			return $RETVAL
		fi
	fi

	if [ -f /proc/vz/oom_score_adj ]; then
		echo -n "Applying OOM adjustments:"
		cat /etc/vz/oom-groups.conf > /proc/vz/oom_score_adj
		echo_success
		echo
	fi

	setup_net
	RETVAL=$?
	if [ $RETVAL -ne 0 ]; then
		return $RETVAL
	fi

	setup_ve0
	RETVAL=$?
	if [ $RETVAL -ne 0 ]; then
		return $RETVAL
	fi

# Don't like alter config from scripts
#	modify_vzconf
	start_ves

	touch "$LOCKFILE"
}

locked_stop()
{
	ve_stop
	umount_cgroups
	action "Shutting down interface $VZDEV:" \
		ip link set ${VZDEV} down
	unload_modules ${MODULES_OTHER} ${MODULES} ${PRELOAD_MODULES} ${IPT_MODULES} ${NET_MODULES} ${VZFS_MODULES} ${PLOOP_MODULES}
	rm -f "$LOCKFILE"
}

start()
{
	check_vzkernel
	check_kernel_config

	if ! lockfile ${LOCKFILE}_lock; then
		printf %s "OpenVZ is locked"
		failure "OpenVZ startup"
		echo
		RETVAL=$?
		return $RETVAL
	fi

	locked_start
	RETVAL=$?
	rm -f ${LOCKFILE}_lock
}

stop()
{
	# Avoid stop action inside a CT, check we are in CT0
	if test -r /proc/user_beancounters; then
		if ! egrep -q '^[[:space:]]*0:[[:space:]]' /proc/user_beancounters; then
			printf %s "Inside OpenVZ"
			failure "Looks like we are inside a container!"
			echo
			RETVAL=$?
			return $RETVAL
		fi
	fi
	if ! lockfile ${LOCKFILE}_lock; then
		printf %s "OpenVZ is locked"
		failure "OpenVZ stop"
		echo
		RETVAL=$?
		return $RETVAL
	fi
	locked_stop
	RETVAL=$?
	rm -f ${LOCKFILE}_lock
}

restart()
{
	stop
	start
}

# See how we were called.
case "$1" in
	start)
		start
		;;
	stop)
		stop
		;;
	restart)
		restart
		;;
	status)
		status
		RETVAL=$?
		;;
	condstop)
		if [ -e "$LOCKFILE" ]; then
			stop
		fi
		;;
	condrestart)
		if [ -e "$LOCKFILE" ]; then
			restart
		fi
		;;
	*)
		msg_usage "${0##*/} {start|stop|status|restart|condstop|condrestart}"
		RETVAL=1
esac

exit $RETVAL

