#!/bin/sh -efu

# Copyright (C) 2025 Paul Wolneykien <manowar@altlinux.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

#shellcheck disable=SC2002

# See https://www.gnu.org/software/coreutils/faq/coreutils-faq.html#Sort-does-not-sort-in-normal-order_0021.
export LC_ALL=C

PROG=${0##*/}
VERSION='0.4.6'

usage()
{
    [ "$1" = 0 ] || exec >&2
    cat <<EOF
Usage: $PROG [ options ] base.chksum update.chksum

Options:

  -O OUTDIR, --outdir=OUTDIR    place the resulting files in OUTDIR
                                instead of placing them next to the
                                base.chksum file;

  -f, --force    overwrite the existing per-task *.chksum files;

  -r RFILE, --report-to=RFILE   report statistics to RFILE;

  -v, --verbose    be verbose;

  -V, --version    print program version and exit;

  -h, --help    show this text and exit.

Report bugs to https://bugzilla.altlinux.org/.
EOF
    exit "${1:-0}"
}

TEMP="$(getopt -n "$PROG" -o O:fr:vVh -l outdir:,force,report-to:,verbose,version,help -- "$@")" || usage 1
eval set -- "$TEMP"

outdir=
force=
verbose=
report_to=
while :; do
    case "$1" in
	-O|--outdir)
	    shift
	    outdir="$1"
	    ;;
	-f|--force)
	    force=y
	    ;;
	-r|--report-to)
	    shift
	    report_to="$1"
	    ;;
	-v|--verbose)
	    verbose=y
	    ;;
        -h|--help)
	    usage 0
            ;;
	-V|--version)
	    cat <<EOF
$VERSION 2025
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
EOF
	    exit 0
	    ;;
        --)
	    shift
	    break
            ;;
        *)
	    echo "$PROG: unrecognized option: $1" >&2
	    usage 1
            ;;
    esac
    shift
done

if [ $# -ne 2 ]; then
    usage 1
fi

base="$1"
update="$2"
[ -z "$verbose" ] || echo "Comparing checksum files: $base ? $update..." >&2

workdir=
cleanup()
{
    if [ "${DEBUG:-0}" -eq 0 ]; then
	[ -z "$workdir" ] || rm -rf "$workdir"
    else
	echo "DEBUG: Workdir: $workdir" >&2
    fi
}
trap 'cleanup' EXIT
workdir="$(mktemp -d --tmpdir "$PROG.XXXX")"

#QF='%{name}-%{version}-%{release}.%{arch}.rpm/%{disttag}@%{buildtime}\n'
simple_syntax='^[^#[:space:]]\+\.rpm/[^@/]\+@[0-9]\+$'
grep "$simple_syntax" "$base" | sort -u >"$workdir"/base.rpmlist
grep "$simple_syntax" "$update" | sort -u | comm -13 "$workdir"/base.rpmlist - >"$workdir"/update.rpmlist

if [ ! -s "$workdir"/update.rpmlist ]; then
    [ -z "$verbose" ] || \
	echo "No updates found." >&2
    exit 0
fi

base_syntax='^[^#[:space:]]\+\.rpm/\([^@/+.]\+[+.][0-9]\+\(\.[0-9]\+\)\+\|(none)\)@[0-9]\+$'
if grep -qv "$base_syntax" "$workdir"/base.rpmlist; then
    echo "ERROR: Unexpected package descriptors found in the base list:" >&2
    grep -v "$base_syntax" "$workdir"/base.rpmlist
    exit 1
fi
if grep -qv "$base_syntax" "$workdir"/update.rpmlist; then
    echo "ERROR: Unexpected package descriptors found in the update list:" >&2
    grep -v "$base_syntax" "$workdir"/update.rpmlist
    exit 1
fi

tasknum_sed='s,^[^#[:space:]]\+\.rpm/[^@/+.]\+[+.]\([0-9]\+\)\(\.[0-9]\+\)\+@[0-9]\+$,#\1,p'
sed -n -e "$tasknum_sed" "$workdir"/update.rpmlist \
    >"$workdir"/task.update.rpmlist

# Use the first 5 digits of the buildtime as a "task" number:
buildtime_sed='s,^[^#[:space:]]\+\.rpm/(none)@\([0-9]\{5\}\)[0-9]\+$,@\1,p'
sed -n -e "$buildtime_sed" "$workdir"/update.rpmlist \
    >>"$workdir"/task.update.rpmlist

t_dir="${base%/*}"
[ "$t_dir" != "$base" ] || t_dir='.'
t_basename="${base##*/}"; t_basename="${t_basename%.chksum*}"
t_suf="${base##*/}"; t_suf="${t_suf#"$t_basename"}"
sort -u "$workdir"/task.update.rpmlist | \
    while read -r tasknum; do
	[ -z "$verbose" ] || \
	    echo "Extracting checksum updates for task $tasknum..." >&2

	# In the case of #NNNNN task number, remove the '#' prefix.
	# Otherwise, for @buildtime "tasks" the '@' prefix is left
	# unchanged.
	tasknum="${tasknum#\#}"
	taskfile="$t_dir/$t_basename.$tasknum$t_suf"
	if [ -n "$outdir" ]; then
	    taskfile="${outdir%/}"/"${taskfile##*/}"
	fi

	if [ -e "$taskfile" ]; then
	    if [ -z "$force" ]; then
		echo "ERROR: Task checksum file $taskfile already exists!" >&2
		echo "Use -f option to overwrite it." >&2
		exit 1
	    fi
	fi

	case "$tasknum" in
	    @*)
		sed -n -e "/^[^#[:space:]]\\+\\.rpm\\/(none)${tasknum}[0-9]\\+\$/,/^\$/ p" "$update"
		;;
	    *)
		sed -n -e "/^[^#[:space:]]\\+\\.rpm\\/[^@/+.]\\+[+.]$tasknum\\(\\.[0-9]\\+\\)\\+@[0-9]\\+\$/,/^\$/ p" "$update"
	esac >"$taskfile"

	cat "$taskfile" >>"$workdir"/newtasks.chksum
    done

if [ ! -e "$workdir"/newtasks.chksum ]; then
    echo "BUG! No per-task .chksum files generated!" >&2
    exit 1
fi

[ -z "$verbose" ] || \
    echo "Verifying the sorted checksums..." >&2

inline_chksums() {
    sed -n -e '
/^[^#[:space:]]\+\.rpm\/[^@/]\+@[0-9]\+$/,/^$/ {
    /^[^#[:space:]]\+\.rpm\/[^@/]\+@[0-9]\+$/ { h; d }
    H
    /^$/ { g; s/^\n//; s/\n$//; s/\n/|/g; p }
}
'
}

cat "$base" | inline_chksums | sort >"$workdir"/base.sorted
cat "$update" | inline_chksums | sort | comm -13 "$workdir"/base.sorted - >"$workdir"/update.sorted
cat "$workdir"/newtasks.chksum | inline_chksums | sort | comm -13 "$workdir"/base.sorted - >"$workdir"/newtasks.sorted

if ! diff "$workdir"/update.sorted "$workdir"/newtasks.sorted >&2
then
    echo "BUG! Some checksum data was not processed." >&2
    exit 1
fi

b_cnt="$(cat "$update" | inline_chksums | sort | comm -12 "$workdir"/base.sorted - | wc -l)"
t_cnt="$(wc -l <"$workdir"/newtasks.sorted)"
u_cnt="$(cat "$update" | inline_chksums | wc -l)"

if [ "$((b_cnt + t_cnt))" -ne "$u_cnt" ]; then
    echo "BUG! Some checksum data was not processed (total count mismatch)." >&2
    exit 1
fi

if [ -n "$verbose" ] || [ -n "$report_to" ]; then
    (
	[ -z "$report_to" ] || exec 2>"$report_to"
	echo "$b_cnt pkgs in $base + $t_cnt pkgs in ${base%"$t_suf"}.*$t_suf ($(((100 * b_cnt) / u_cnt))% + $(((100 * t_cnt) / u_cnt))%)." >&2
    )
fi
