#!/bin/sh -fu
#
# Copyright (C) 2019  Denis Medvedev.
# Copyright (C) 2024  Paul Wolneykien.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

. /bin/shell-source

SYSTEMD_COLORS=0
PIDFILE=/run/nagwad.pid
LOG_USER=nagios
LOG_GROUP=nagwad
CONFDIR=/etc/nagwad
FILTERDIR="$CONFDIR"
POSTFILTERS="$CONFDIR/filter-event.d"
POSTPROCESS="$CONFDIR/process-event.d"
LOGDIR=/var/log/nagwad
MAXAGE=
CHECK_SCRIPT=/usr/lib/nagios/plugins/check_nagwad

NAGWAD_SKIP_EVENT=3

PROG="${0##*/}"
VERSION="0.11.10"

usage()
{
    [ "$1" = 0 ] || exec >&2
    echo "Usage: $PROG [options] [command]"
    echo
    echo "To run as a system service use:"
    echo "  $PROG --service"
    echo
    echo "To list registered events use the \"list\" command:"
    echo "  $PROG [list]"
    echo
    echo "To view one particular event use the \"show\" command:"
    echo "  $PROG [show] EVENT-ID"
    echo
    echo "To mark an event as fixed use the \"fix\" command:"
    echo "  $PROG fix EVENT-ID [EVENT-ID ...]"
    echo
    echo "To check all or some events use the \"check\" command:"
    echo "  $PROG check [FILTER]"
    echo
    echo "To check for the \"nagwad\" service status use the \"status\" command:"
    echo "  $PROG status"
    echo
    echo "To cleanup old events use the \"clean-old\" command:"
    echo "  $PROG cleanup-old [DAYS]"
    echo
    echo "To run in the test mode use:"
    echo "  $PROG --test"
    echo
    echo "To run with alternative configuration file use:"
    echo "  $PROG -c CONF | --config=CONF"
    exit "${1:-0}"
}

# Clean up the logs if MAXAGE is set
cleanup_old() {
    local maxage="${1:-$MAXAGE}"
    if [ "${maxage:-0}" -gt 0 ]; then
    echo "Removing old signal files..." >&2
    find "$LOGDIR_BASE" \
	 -mindepth 1 -maxdepth 1 \
	 -type d \
	 -ctime "+$maxage" | \
        while read -r olddir; do
	    if [ "$olddir" = "$LOGDIR" ]; then
		echo "...not deleting $olddir (current)" >&2
	    else
		echo "...deleting $olddir" >&2
		rm -rf "$olddir"
	    fi
	done
    echo "...done removing old signal files." >&2
    fi
}

list_events() {
    [ -e "$LOGDIR" ] || return 0
    find "$LOGDIR" -mindepth 2 -maxdepth 2 -type f \
	 ! -name '*.FIXED' -printf '%T+ %f\n'
}

find_event() {
    local id="$1"
    ! find "$LOGDIR" -mindepth 2 -maxdepth 2 -type f \
      \( -name "$id" -o -name "*.$id.*" \) -print \
      -exec /bin/false '{}' +
}

find_event_or_error() {
    if ! find_event "$1"; then
	echo "Event $1 not found." >&2
	return 1
    fi
}

show_event() {
    local file=
    file="$(find_event_or_error "$1")" || return $?
    cat "$file"
}

fix_event() {
    local file=
    file="$(find_event_or_error "$1")" || return $?
    mv "$file" "${file%.*}.FIXED"
}

check_filter() {
    "$CHECK_SCRIPT" "$@" ||:
}

check_all_filters() {
    find "$LOGDIR" -mindepth 1 -maxdepth 1 -type d | \
	while read -r dir; do
	    echo -n "${dir##*/}: "
	    check_filter "${dir##*/}"
	done
}

run_service=
test_mode=
check_run_service() {
    if [ -n "$run_service" ]; then
	echo "Incompatible with --service and --test." >&2
	usage 1
    fi
}

TEMP="$(getopt -n "$PROG" -o c:hV -l service,test,config:,help,version -- "$@")" || usage 1
eval set -- "$TEMP"

config=
while :; do
    case "$1" in
	--service)
	    run_service=1
	    ;;
	--test)
	    run_service=1
	    test_mode=1
	    ;;
	-c|--config)
	    shift
	    config="$1"
	    ;;
        -h|--help)
	    usage 0
            ;;
	-V|--version)
	    cat <<EOF
$VERSION 2024
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
EOF
	    exit 0
	    ;;
        --)
	    shift
	    break
            ;;
        *)
	    message "$PROG: unrecognized option: $1" >&2
	    usage 1
            ;;
    esac
    shift
done

if [ -n "$config" ]; then
    echo "Reading configuration from $config..." >&2
    . "$config" || exit $?
else
    source_if_exists "$CONFDIR"/nagwad.conf
    source_if_exists /etc/sysconfig/nagwad
fi

if [ -n "$test_mode" -a -n "${TEST_BOOTID:-}" ]; then
    boot_id="$TEST_BOOTID"
else
    boot_id="$(cat /proc/sys/kernel/random/boot_id 2>/dev/null)"
    if [ -z "$boot_id" ]; then
	if [ -n "$test_mode" ]; then
	    boot_id='tEsTm0dE'
	else
	    echo "Unable to get the current boot ID!" >&2
	    exit 1
	fi
    fi
fi

LOGDIR_BASE="$LOGDIR"
LOGDIR="${LOGDIR_BASE%/}/$boot_id"

if [ $# -gt 0 ]; then
    case "$1" in
	list)
	    check_run_service && list_events
	    exit $?
	    ;;
	fix)
	    [ $# -ge 2 ] || usage 1
	    check_run_service || exit $?
	    shift
	    while [ $# -gt 0 ]; do
		fix_event "$1" || exit $?
		shift
	    done
	    exit $?
	    ;;
	cleanup-old)
	    check_run_service && cleanup_old "${2:-}"
	    exit $?
	    ;;
	show)
	    [ $# = 2 ] || usage 1
	    check_run_service && show_event "$2"
	    exit $?
	    ;;
	check)
	    if [ $# = 2 ]; then
		check_run_service && check_filter "$2"
	    else
		check_run_service && check_all_filters
	    fi
	    exit $?
	    ;;
	status)
	    check_run_service && check_filter
	    exit $?
	    ;;
	*)
	    check_run_service && show_event "$1"
	    exit $?
	    ;;
    esac
fi

if [ -z "$run_service" ]; then
    list_events
    exit $?
fi

## Service
if [ -n "$test_mode" ]; then
    echo "Using test mode boot id: $boot_id." >&2
else
    echo "Boot ID: $boot_id" >&2
fi

LANG=C
export SYSTEMD_COLORS LANG

if [ -n "$PIDFILE" ]; then
    exec 4>>"$PIDFILE"
    if ! flock -x -n 4; then
        echo "ERROR: $PIDFILE can not be locked!" >&2
        exit 1
    fi
    truncate -s0 "$PIDFILE"
    echo $$ >&4
fi

workdir="$(mktemp -d --tmpdir nagwad.XXXX)"
cleanup()
{
    [ -z "$workdir" ] || rm -rf "$workdir"
    [ -z "$PIDFILE" ] || rm -f "$PIDFILE"
}
trap 'cleanup' EXIT

find_filters()
{
    mkdir "$workdir/filters"

    find "$FILTERDIR" -maxdepth 1 ! -type d -name '*.regexp' | \
        while read f; do
            name="${f##*/}"; name="${name%.*}"
            sed -n -e 's/^[^^]/^.*&/' \
				   -e "s,^\\^.*\$,/&/ s//$name:CRITICAL:\\&/p,p" \
                "$f" >"$workdir/filters/$name.sed"
        done

    find "$FILTERDIR" -maxdepth 1 ! -type d -name '*.sed' -exec cp '{}' "$workdir/filters/" \;

    ls "$workdir/filters" | sed -n -e 's/\.[^.]\+$//; H' -e '$ { g; s/^\n//; s/\n/ /g; p }'
}

postfilter_event()
{
    (
	set +f
	export NAGWAD_SKIP_EVENT

	for f in "$POSTFILTERS"/*; do
	    [ -f "$f" -o -h "$f" ] || continue
	    [ -x "$f" ] || continue

	    # Don't run *.rpm* and *~ scripts.
	    case "$f" in
		*.rpm*| *~)
		    continue
		    ;;
	    esac

	    ret=0
	    "$f" "$@" || ret=$?

	    if [ $ret -ne 0 ]; then
		if [ $ret -eq $NAGWAD_SKIP_EVENT ]; then
		    exit $ret
		else
		    echo "Post-filter $f returned error: $ret" >&2
		fi
	    fi
	done
    )
}

postprocess_event()
{
    (
	set +f

	for p in "$POSTPROCESS"/*; do
	    [ -f "$p" -o -h "$p" ] || continue
	    [ -x "$p" ] || continue

	    # Don't run *.rpm* and *~ scripts.
	    case "$p" in
		*.rpm*| *~)
		    continue
		    ;;
	    esac

	    "$p" "$@" || \
		echo "Postprocess script $p returned error: $?" >&2
	done
    )
}

process_event()
{
    local filter="$1"; shift
    local status="$1"; shift
    local message="$*"

    # Trim leading and trailing whitespace:
    while [ "${message# }" != "$message" ]; do
	message="${message# }"
    done
    while [ "${message% }" != "$message" ]; do
	message="${message% }"
    done

    mkdir -p "$LOGDIR/$filter"

    local hash="$(printf '%s: %s\n' "$status" "$message" | md5sum | head -c 32)"

    if [ -z "$hash" ]; then
	echo "BUG! Failed to calculate message hash!" >&2
	exit 1
    fi

    local NAME="$LOGDIR/$filter/$filter.$hash.$status"

    if [ -e "$LOGDIR/$filter/$filter.$hash.$status" -o \
	 -e "$LOGDIR/$filter/$filter.$hash.FIXED" ]
    then
	# Event already registered or fixed.
	return 0
    fi

    local addon=
    addon="$(postfilter_event "$filter" "$status" "$message")" || return 0

    if [ -n "$addon" ]; then
	message="$message $(echo -n "$addon" | tr '\n' ' ')"
    fi

    printf '%s: %s\n' "$status" "$message" >"$NAME"

    chown "$LOG_USER" "$NAME"
    chgrp "$LOG_GROUP" "$NAME"
    chmod 0440 "$NAME"

    postprocess_event "$filter" "$status" "$message"
}

# Run MAXAGE cleaner if not in test mode
if [ -z "$test_mode" ]; then
    cleanup_old
fi

## Main

filters="$(find_filters)"

if [ -z "$filters" ]; then
    echo "No filters found. Try to put some *.regexp files in the $FILTERDIR." >&2
    exit 0
else
    echo "Run with the following filters: $filters" >&2
fi

mkdir -p "$LOGDIR"
echo "Write logs to $LOGDIR" >&2

UNIT="$(ps -o unit= $$ 2>/dev/null)"
case "$UNIT" in
    *.service)
	UNIT="${UNIT%.service}"
	;;
    *)
	UNIT="$PROG"
	;;
esac

cat <<EOF >"$workdir/all.sed"
# Skip messages from nagwad itself:
/^[^[:space:]]\\+[[:space:]]\\+[^[:space:]]\\+[[:space:]]\\+$UNIT\\[/ d
EOF

for f in $filters; do
    echo "# Filter $f.sed:" >>"$workdir/all.sed"
    cat "$workdir/filters/$f.sed" >>"$workdir/all.sed"
done

read_messages() {
    if [ -z "$test_mode" ]; then
	journalctl -o short-iso -n ${JOURNAL_TAIL:-all} -b -f
    else
	echo "Test mode. Reading messages from standard input..." >&2
	cat
    fi
}

# Run the main filter for all patterns
read_messages | \
    sed -u -n -f "$workdir/all.sed" | (
        IFS=:
        while read -r filter status message; do
            process_event "$filter" "$status" "$message"
        done
    )
