#!/bin/bash -eu
#
# Copyright (C) 2012-2017  Alexey Gladkov <gladkov.alexey@gmail.com>
#
# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
#

LANG=C
LANGUAGE=C
LC_ALL=C

. shell-error

abortfile=
abort()
{
	: >"$abortfile"
	fatal "$*"
}

if [ -n "${INITRD_COPY_INSIDE-}" ]; then
	abortfile="${INITRD_COPY_ABORTFILE:-}"
	prefix="${INITRD_COPY_PREFIX:-}"
	destdir="${INITRD_COPY_DESTDIR:-}"

	for source; do
		if [ -z "${source##*	*}" ]; then
			abort "unable to generate manifest for '$source': filename contains a tab symbol"
		fi
		symlink=
		if [ -h "$source" ]; then
			type=l
			symlink="$(readlink -n "$source")"
			if [ "${#symlink}" != 0 ]; then
				if [ -z "${symlink##*
*}" ]; then
					abort "unable to generate manifest for '$source': symlink contains a newline symbol"
				fi
				if [ -z "${symlink##*	*}" ]; then
					abort "unable to generate manifest for '$source': symlink contains a tab symbol"
				fi
				symlink="${symlink#$prefix}"
			fi
		elif [ -d "$source" ]; then
			type=d
		elif [ -f "$source" ]; then
			type=f
		elif [ -b "$source" ]; then
			type=b
		elif [ -c "$source" ]; then
			type=c
		elif [ -p "$source" ]; then
			type=p
		elif [ -S "$source" ]; then
			type=s
		else
			abort "unable to generate manifest for: $source"
		fi

		destination="${source#$destdir}"
		destination="${destination#$prefix}"
		destination="$destdir/${destination#/}"

		printf '%s\t%s\t%s\t%s\n' "$type" "$destination" "$source" "$symlink"
	done
	exit
fi

. shell-signal

ORIG_IFS="$IFS"

prefix=
destdir=
dryrun=
force=
logfile=

print_version()
{
	cat <<-EOF
	$PROG version 2.9.0
	Written by Alexey Gladkov.

	Copyright (C) 2012-2017  Alexey Gladkov <gladkov.alexey@gmail.com>
	This is free software; see the source for copying conditions.  There is NO
	warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	EOF
	exit
}

show_help()
{
	cat <<-EOF
	Usage: $PROG [<option>] <destdir> [directory|filename]...

	Utility allows to copy files and directories along with their dependencies
	into a specified destination directory.

	This utility follows symbolic links and binary dependencies and copies them
	along with the specified files.

	Options:
	   --dry-run                 don't do nothing;
	   -f, --force               overwrite destination file if exists;
	   -D, --debug               print more information about file dependencies;
	   -l, --log=FILE            white log about what was copied;
	   -r, --remove-prefix=STR   ignore prefix in path;
	   -v, --verbose             print a message for each action;
	   -V, --version             output version information and exit;
	   -h, --help                display this help and exit.

	Report bugs to authors.

	EOF
	exit
}

show_usage()
{
	[ -z "$*" ] || message "$*"
	echo "Try \`$PROG --help' for more information." >&2
	exit 1
}

workdir=
exit_handler()
{
	wait
	[ -z "$workdir" ] || rm -rf -- "$workdir"
}

debug=
debug()
{
	[ -n "$debug" ] || return 0
	message "$@"
}


file_exists()
{
	[ ! -h "$1" ] && [ ! -e "$1" ] || return 0
	abort "error: $1: No such file or directory"
}

glob()
{
	[ -h "$1" ] || [ -e "$1" ]
}

canonicalize_file() {
	local __canonicalize_file_name="$1" __canonicalize_file_result
	__canonicalize_file() {
		local fn="$1" r='' t=''
		[ -n "${fn%%./*}" ] ||
			fn="$PWD/${fn##./}"
		[ -n "${fn%%../*}" ] ||
			fn="${PWD%/*}/${fn#../}"
		while [ "$fn" != "$r" ]; do
			r="$fn"
			if [ -z "${fn##*//*}" ]; then
				fn="${fn%%//*}/${fn#*//}"
			elif [ -z "${fn##*/./*}" ]; then
				fn="${fn%%/./*}/${fn#*/./}"
			elif [ -z "${fn##*/../*}" ]; then
				t="${fn%%/../*}"
				t="${t%/*}"
				fn="$t/${fn#*/../}"
			elif [ -z "${fn##*/..}" ]; then
				fn="${fn%/*/..}"
			elif [ -z "${fn##*/}" ]; then
				fn="${fn%%/}"
			fi
		done
		__canonicalize_file_result="$fn"
	}
	__canonicalize_file "$2"
	eval "$__canonicalize_file_name=\$__canonicalize_file_result"
}

print_canonicalize()
{
	local f=
	canonicalize_file f "$1"
	printf '%s\0' "$f"
}

expand_directory()
{
	find "$1" -mindepth 1 -print0 ||
		abort "unable to list directory: $1"
}

binary_file()
{
	{
		ldd "$1" ||
			abort "unable to print shared object dependencies: $1"
	} |
	while read -r f; do
		f="${f% (0x*)}"
		f="${f##* => }"
		f="${f##[!/]*}"

		[ -n "$f" ] ||
			continue

		debug "binary file $1 -> $f"
		print_canonicalize "$f"
	done
}

shebang_file()
{
	local f="$1"
	shift

	local line=
	read -r line < "$f"

	[ -n "$line" ] && [ -z "${line##\#\!*}" ] ||
		return 0

	line="${line##\#\!}"
	line="${line%$'\r'}"

	local IFS="$ORIG_IFS"
	set -- $line

	[ "$#" != 0 ] ||
		return 0

	local exe="$1"

	[ -z "${exe##/*}" ] ||
		abort "interpreter must be an absolute pathname: $f: $*"

	[ -x "$exe" ] ||
		abort "interpreter must be executable: $f: $*"

	case "$#,$exe" in
		2,/usr/bin/env)
			local arg=
			arg="$(type -P "$2")"
			debug "script file $f -> $arg"
			print_canonicalize "$arg"
			;;
		[12],*)
			;;
		*,/usr/bin/env)
			fatal "too many arguments: $f: $line"
			;;
	esac
	debug "script file $f -> $exe"
	print_canonicalize "$exe"
}

TEMP=`getopt -n "$PROG" -o 'f,l:,r:,h,D,v,V' -l 'dry-run,force,log:,remove-prefix:,help,debug,verbose,version' -- "$@"` ||
	show_usage
eval set -- "$TEMP"

while :; do
	case "$1" in
		-r|--remove-prefix) shift
			prefix="$1"
			;;
		-l|--log) shift
			logfile="$1"
			;;
		--dry-run)
			dryrun=:
			;;
		-D|--debug)
			debug=-v
			;;
		-f|--force)
			force=-f
			;;
		-h|--help)
			show_help
			;;
		-v|--verbose)
			verbose=-v
			;;
		-V|--version)
			print_version
			;;
		--) shift
			break
			;;
		*)
			fatal "Unknown option: $1"
			;;
	esac
	shift
done

[ "$#" -gt 0 ] ||
	show_usage "More arguments required"

destdir="$(readlink -ev "$1")"
shift

set_cleanup_handler exit_handler
workdir="$(mktemp -dt "$PROG.XXXXXXXXX")"

abortfile="$workdir/abort"
manifest="$workdir/manifest"
new_files="$workdir/new"
processed_files="$workdir/done"
mimetypes="$workdir/mime"

export INITRD_COPY_PREFIX="$prefix"
export INITRD_COPY_DESTDIR="$destdir"
export INITRD_COPY_ABORTFILE="$abortfile"
export INITRD_COPY_INSIDE=1
export IFS=$'\t'

: >"$processed_files" >"$new_files"

for n; do
	if [ -z "$force" ] && [ ! -d "$n" ]; then
		[ ! -h "$destdir/$n" ] && [ ! -e "$destdir/$n" ] ||
			continue
	fi
	file_exists "$n"
	print_canonicalize "$n"
done |
	xargs -0r "$0" > "$manifest"

[ ! -e "$abortfile" ] ||
	exit 1

file_bin="/usr/libexec/make-initrd/mi-file"

[ -x "$file_bin" ] ||
	file_bin="file"

max_procs=${INITRD_COPY_PROCS:-7}

while :; do
	# Sort manifest before join. We do not need to sort the processed files
	# because we get them from this file.
	sort -k3,3 -uo "$manifest" "$manifest"

	# Get the list of unprocessed files.
	join -1 1 -2 3 -v 2 -t '	' -o 2.1,2.3,2.4 "$processed_files" "$manifest" >"$new_files"

	# Nothing to do.
	[ -s "$new_files" ] ||
		break

	# Classify regular files.
	sed -n -e 's/^f	\([^	]\+\)	.*/\1/p' "$new_files" |
		"$file_bin" -nbhf - > "$mimetypes"

	exec 3<"$new_files" 4<"$mimetypes"

	eof=
	while [ -z "$eof" ]; do
		i=0
		while [ $i -lt $max_procs ] && [ -z "$eof" ]; do
			if [ -e "$abortfile" ]; then
				eof=1
				break
			fi

			read -r type source symlink <&3 || eof=1

			[ -n "$type" ] ||
				break

			# Add file to processed files
			printf '%s\n' "$source"

			case "$type" in
				d)
					expand_directory "$source" >> "$workdir/list.$i" &
					i=$(($i+1))
					;;
				l)
					target="$symlink"
					[ -z "${target%%/*}" ] ||
						target="${source%/*}/$target"
					if [ ! -e "$target" ]; then
						message "Warning: symlink \`$source' points to the nonexistent \`$target'"
					else
						print_canonicalize "$target" >> "$workdir/list.$i"
					fi
					;;
				f)
					read -r mime <&4 ||:
					case "$mime" in
						*'ELF '*' dynamically linked'*)
							binary_file  "$source" >> "$workdir/list.$i" &
							i=$(($i+1))
							;;
						*'script text'*)
							shebang_file "$source" >> "$workdir/list.$i" &
							i=$(($i+1))
							;;
					esac
					;;
			esac
		done
		wait
	done |
		sort -mo "$processed_files" "$processed_files" -

	[ ! -e "$abortfile" ] ||
		exit 1

	glob "$workdir/list".* ||
		break

	sort -zu "$workdir/list".* |
		xargs -0r "$0" >> "$manifest"

	rm -f -- "$workdir/list".*
done

# shellcheck disable=SC2094
# Disable this `Make sure not to read and write the same file in the same pipeline' because
# is used in a pipeline. The sort must read the entire stdin to sort it and then print it.
while read -r type destination _; do
	destination="${destination#$destdir}"

	case "$type" in
		d)
			;;
		c|b|f|l|s)
			destination="${destination%/*}"
			;;
	esac

	while [ -n "$destination" ]; do
		printf 'd\t%s\t\t\n' "$destdir/${destination#/}"
		destination="${destination%/*}"
	done
done < "$manifest" > "$manifest.dirs"

sort -u -d -k1,2 -o "$manifest" "$manifest" "$manifest.dirs"

if [ -n "${SHOW_MANIFEST-}" ]; then
	cat "$manifest"
	exit
fi

while read -r type destination source symlink; do
	case "$type" in
		d)
			if [ -d "$destination" ]; then
				verbose "Directory \`$destination' already exists"
				continue
			fi
			verbose "Make new directory \`$destination'"
			$dryrun mkdir $debug -- "$destination" >&2
			;;
		l)
			if [ -z "$force" ] && [ -h "$destination" ]; then
				verbose "Symbolic link \`$destination' already exists"
				continue
			fi
			if [ -z "$force" ] && [ -e "$destination" ]; then
				verbose "Destination \`$destination' already exists"
				continue
			fi
			verbose "Create symbolic link \`$destination' -> \`$symlink'"
			$dryrun ln $debug $force -s -- "$symlink" "$destination" >&2
			;;
		b|c|f|s)
			if [ -z "$force" ] && [ -e "$destination" ]; then
				verbose "File \`$destination' already exists"
				continue
			fi
			verbose "Copy file \`$destination'"
			$dryrun cp $debug $force -a --remove-destination -- "$source" "$destination" >&2
			;;
	esac
	[ -n "$dryrun" ] || [ -z "$logfile" ] ||
		printf '%s\t%s\t%s\t%s\n' "$type" "/initramfs/${destination#$destdir/}" "$source" "$symlink" >>"$logfile"
done < "$manifest"

while read -r type destination source _; do
	[ "$type" = "d" ] ||
		break
	[ -d "$source" ] ||
		continue
	$dryrun chmod $debug --reference="$source" -- "$destination" >&2
done < "$manifest"

exit

# dependency builder requires it.
chmod; cp; ln; mkdir; sed; sort; join; xargs;
