#!/usr/bin/perl -w

use 5.012;
use strict;
use warnings;
use lib 'lib';

use Cwd;
use Getopt::Long;
use Parallel::ForkManager;
use File::Path qw(make_path remove_tree);
use File::Basename;
use Gear::Remotes::Tags;
use Gear::Remotes::GitUrl;
#$Gear::Remotes::Tags::verbose=3;

my $MAX_PROCESSES = 32;
my $TMPDIR=$ENV{'TMPDIR'};
my $workdir;
my $verbose=0;
my $help;

my $opt_keep_tree=0;
my $opt_warn_ahead=0;
my $opt_log_ahead=0;
my $opt_log_notags=0;
my $opt_bad_urls_file;
my $opt_input_mode3;
my $timestamp_expire = 3600*24*30;
my $timestamp_volatility = 3600*24*10;

my $watch_error_const='99999999';
my $plugin_mode=0;
$plugin_mode=1 if $0 =~ /-plugin/;
if (!$plugin_mode) {
    $opt_keep_tree=1;
    $opt_log_ahead=1;
    $opt_log_notags=1;
    #$opt_warn_ahead=1;
    $TMPDIR=getcwd();
}

my $result = GetOptions (
    'quiet'=> sub {$verbose=0},
    "verbose+"  => \$verbose,
    "h|help"  => \$help,
    "tmpdir=s"  => \$TMPDIR,
    "workdir=s"  => \$workdir,
    "j|jobs=i"  => \$MAX_PROCESSES,
    "bad-urls-file=s" => \$opt_bad_urls_file,
    "keep-tree!"  => \$opt_keep_tree,
    "log-notags!"  => \$opt_log_notags,
    "log-ahead!"  => \$opt_log_ahead,
    "warn-ahead|warn_ahead!" => \$opt_warn_ahead,
    "input3" => \$opt_input_mode3,
    "input4" => sub {$opt_input_mode3=0},
);

if ($verbose) {
    if ($plugin_mode) {
	warn 'info: entered plugin mode: ',$0,"\n";
    } else {
	warn 'info: entered standalone mode: ',$0,"\n";
    }
}

$workdir //= $TMPDIR."/BATCH-WATCH-WORKDIR";
my $workdir_out=$workdir.'/out';
my $workdir_notags=$workdir.'/notags';
my $workdir_ahead=$workdir.'/ahead';
my $workdir_fail=$workdir.'/failed';

my (%IGNORE_URL, %IGNORE_TIME);

&read_ignore_file($opt_bad_urls_file) if $opt_bad_urls_file;

remove_tree($workdir);
make_path($workdir_out, $workdir_fail);
make_path($workdir_notags) if $opt_log_notags;
make_path($workdir_ahead) if $opt_log_ahead;

my $pm = new Parallel::ForkManager($MAX_PROCESSES);
while (<>) {
    chomp;
    next if /^\s*$/ or /^\s*#/;
    my ($name, $version, $url, $vcs);
    if ($opt_input_mode3) {
	($name, $version, $vcs) = split(/\t/);
    } else { # input mode 4
	($name, $version, $url, $vcs) = split(/\t/);
    }
    $vcs = &Gear::Remotes::GitUrl::emptify_no_url($vcs);
    $url = &Gear::Remotes::GitUrl::emptify_no_url($url) if defined($url);
    # explicitly disabled
    next if defined($vcs) and $vcs eq '0';
    next if not $vcs and not $url;
    next if $vcs and not &Gear::Remotes::GitUrl::is_git_like_url($vcs);
    next if $url and not &Gear::Remotes::GitUrl::is_git_like_url($url);
    my $watch_url=$vcs ? $vcs : $url;

    my $bad_url=$IGNORE_URL{$name};
    if ($bad_url) {
	if ($watch_url eq $bad_url) {
	    warn 'info: ignored cached bad url in ',$name,'-',$version,' ',$watch_url,"\n" if $verbose;
	    next;
	} else {
	    # url changed, no more bad
	    delete $IGNORE_URL{$name};
	    delete $IGNORE_TIME{$name};
	}
    }

    warn 'quering ',$name,'-',$version,' ',$watch_url,"\n" if $verbose;

    # Forks and returns the pid for the child:
    my $pid = $pm->start();
    next if $pid;
    my ($version_cmp, $oldversion, $newversion, $target)=&batch_tagwatch_versions($name, $version, $watch_url);
    my $watch_message;
    if ($version_cmp>1) {
	if ($verbose) {
	    warn 'WARN: ',$name,'-',$version,' is ahead of ',$newversion,"\n" if $verbose;
	}
	if ($opt_warn_ahead) {
	    # todo: -1?
	    $watch_message ="$name\t$watch_error_const\t".&_watch_msg($target,$watch_url).
		"+ERROR:version_${oldversion}_is_ahead_of_remote_repository_$newversion\n";
	}
	&log_at($workdir_ahead.'/'.$name) if $opt_log_ahead;
    } elsif ($version_cmp==0) {
	# Repository is up to date
	if ($verbose) {
	    warn 'info: ',$name,'-',$version,' is up to date',"\n" if $verbose;
	}
    } else {
	if ($plugin_mode) {
	    # for watch.altlinux.org
	    $watch_message = "$name\t$newversion\t".&_watch_msg($target,$watch_url)."\n";
	} else {
	    $watch_message = "$name\t$oldversion\t$newversion\t".&_watch_msg($target,$watch_url)."\n";
	}
	if ($verbose) {
	    warn 'info: ',$name,'-',$version,' has update ',$newversion,' from tag ',$target,"\n" if $verbose;
	}
    }
    if ($watch_message) {
	open (my $fn, '>', $workdir_out.'/'.$name) || die "can't open ",$workdir_out,'/',$name,': ',$!;
	print $fn $watch_message;
	close ($fn);
    }
    $pm->finish(); # Terminates the child process
}
print STDERR 'info: waiting for children...',"\n" if $verbose;

$pm->wait_all_children;
$pm->DESTROY;

print STDERR 'info: waiting done',"\n" if $verbose;

if ($opt_bad_urls_file) {
    &merge_ignore_with_workdir_fail();
    &write_ignore_file($opt_bad_urls_file);
}

# remove dirs if empty
system("rmdir $workdir_fail $workdir_out 2>/dev/null");
system("rmdir $workdir_ahead 2>/dev/null") if $opt_log_ahead;
system("rmdir $workdir_notags 2>/dev/null") if $opt_log_notags;
&print_dir($workdir_out) if -d $workdir_out;
remove_tree($workdir) unless $opt_keep_tree;

sub batch_tagwatch_versions {
    my ($name, $version, $url) = @_;
    my @extra_args;
    if ($version and $version !~ /20[0123]\d(?:\d\d\d\d)?(?:\b|$)/) {
	push @extra_args, '-filter_out_timestamps' => 1;
    }
    my ($tag_pairs_ref, $exit_code)=&Gear::Remotes::Tags::ls_remote_git_tags($url, @extra_args);
    unless ($tag_pairs_ref) {
	warn 'ERROR: ',$name, ': failed ',$url,"\n" if $verbose;
	&log_at($workdir_fail.'/'.$name, $url);
	return 0;
    }
    if (not @$tag_pairs_ref) {
	warn 'WARN: ',$name,': no acceptable tags found in ',$url,"\n" if $verbose;
	&log_at($workdir_notags.'/'.$name) if $opt_log_notags;
	return 0;
    }
    return &Gear::Remotes::Tags::cmp_tag_versions($version, $tag_pairs_ref);
}

# На github можно ссылаться прямо на версию (example):
# https://github.com/thesofproject/sof-bin/releases/tag/v1.9.2
sub _watch_msg {
    my ($tag,$url)=@_;
    $url =~ s!^https?://!https://!;
    $url =~ s,/$,,;
    if ($url=~m!^https://(?:[^\./]+\.)?github\.[^/]+/(?:[^#?]+/)?[^/#?]+$!) {
	return $url.'/releases/tag/'.$tag;
    # gitlab:
    #https://gitlab.freedesktop.org/bolt/bolt/-/releases#0.9.1
    #https://gitlab.com/cutecom/cutecom/-/releases#v0.51.0
    } elsif ($url=~m!^https://gitlab\.[^/]+/(?:[^#?]+/)?[^/#?]+$!) {
	return $url.'/-/releases#'.$tag;
    # cgit:
    #https://cgit.freedesktop.org/xorg/app/appres/tree/?h=appres-1.0.5
    } elsif ($url=~m!^https://cgit\.freedesktop\.org/xorg/app/([^/#?]+)$!) {
	return $url.'/tree/?h='.$1.'-'.$tag;
    }
    return 'tag+git:'.$tag.'+'.$url;
}

sub log_at {
    my $file = shift;
    if (0 == @_) {
	system('touch', $file) && die "can't touch ",$file,': ',$!;
    }else {
	open (my $fn, '>', $file) || die "Can't write to ",$file,': ',$!;
	print $fn @_;
	close ($fn);
    }
}

sub print_dir {
    my $dir = shift;
    opendir(my $dh, $dir) || die "Can't open $dir: $!";
    while (my $fname = readdir $dh) {
	next if substr($fname,0,1) eq '.';
	my $file="$dir/$fname";
	next unless -f $file;
	open (my $fn, '<', $file) || die "Can't read $file: $!";
	print <$fn>;
	close ($fn);
    }
    closedir $dh;
}

sub read_ignore_file {
    my $file = shift;
    return unless -f $file;
    open (my $fn, '<', $file) || die "Can't read $file: $!";
    while (<$fn>) {
	chomp;
	next if /^\s*$/ or /^\s*#/;
	my ($name, $timestamp, $url) = split(/\t/);
	$IGNORE_URL{$name} =$url;
	$IGNORE_TIME{$name}=$timestamp;
    }
    close ($fn);
    warn "info: loaded --bad-urls-file ",$file,' with ',scalar keys(%IGNORE_URL),' entries.',"\n" if $verbose;
}

sub merge_ignore_with_workdir_fail {
    my $dir = $workdir_fail;
    opendir(my $dh, $dir) || die "Can't open $dir: $!";
    while (my $fname = readdir $dh) {
	next if substr($fname,0,1) eq '.';
	my $file="$dir/$fname";
	next unless -f $file;
	my @stat = stat($file);
	$IGNORE_TIME{$fname}=$stat[9];
	open (my $fn, '<', $file) || die "Can't read $file: $!";
	my $url = <$fn>;
	chomp $url;
	$IGNORE_URL{$fname} =$url;
	close ($fn);
	#warn "read bad-url ",$file,"\n" if $verbose;
    }
    closedir $dh;
}

sub write_ignore_file {
    my $file = shift;
    my $time = time();
    open (my $fn, '>', $file) || die "Can't write to ",$file,': ',$!;
    foreach my $name (sort {$a cmp $b} keys(%IGNORE_URL)) {
	my $timestamp = $IGNORE_TIME{$name};
	my $url = $IGNORE_URL{$name};
	unless ($time - $timestamp > $timestamp_expire + rand($timestamp_volatility)) {
	    print $fn $name,"\t",$timestamp,"\t",$url,"\n";
	}
    }
    close ($fn);
    warn "info: saved --bad-urls-file ",$file,' with ',scalar keys(%IGNORE_URL),' entries.',"\n" if $verbose;
}

__END__

=head1	NAME

gr-batch-watch-standalone - batch check a pkgset for remote git update

=head1	SYNOPSIS

B<gr-batch-watch-standalone>
[B<-q>]
[B<--quiet>]
[B<-v>]
[B<--verbose>]
[B<--help>]
[B<-j> I<num>]
[B<--jobs> I<num>]
[B<--tmpdir> I<dir>]
[B<--workdir> I<dir>]
[B<--bad-urls-file> I<file>]
[B<--keep-tree>]
[B<--no-keep-tree>]
[B<--log-notags>]
[B<--no-log-notags>]
[B<--log-ahead>]
[B<--no-log-ahead>]
[B<--warn-ahead>]
[B<--no-warn-ahead>]
[B<--input3>]
[B<--input4>]
I<input file>]

=head1	DESCRIPTION

B<gr-batch-watch-standalone> is used to batch check a set of packages for
remote git update. The set of packages is defined in STDIN or input file.
It output a list of packages that have an update.

B<gr-batch-watch-standalone> (standalone mode) by default also creates
and keeps its own WORKDIR in current directory for user to check for
different warnings and failures later.

B<repocop-watch-batch-git-plugin> (watch plugin mode) is a symlink of
B<gr-batch-watch-standalone> with another set of defaults intended for
automatic execution at watch.altlinux.org. It has different output format
and removes its WORKDIR after run.

=head1	INPUT

There are two input modes: default 4-part mode and 3-part mode.

 The 3-part mode input consists of 3 columns:
 <name><tab><version><tab><vcs url>

 The 4-part mode input consists of 4 columns:
 <name><tab><version><tab><web url><tab><vcs url>

I<vcs url> is assumed to be content of the VCS: tag, an URL that can be used
in git clone or git ls-remote command.
I<web url> is assumed to be content of the URL: tag, an URL that is intended
for human use, but sometimes coincidently can be used as I<web url> too.
I<vcs url> takes priority over I<web url>.

Packages are filtered and only those who has urls that was recognized as
possible git repository urls are processed, other are silently rejected.
Use gr-batch-watch-filter-rejected(1) to view rejected packages and their urls.

Use gr-batch-check-url(1) to check a specific url.

=head1	OUTPUT

B<gr-batch-watch-standalone> (standalone mode) by default use 4 column format
 <name><tab><old version><tab><new version><tab><message>

B<repocop-watch-batch-git-plugin> (watch plugin mode) by default use 3 columns
 <name><tab><new version><tab><message>

I<message> here is a message that contains the new version tag and remote git
url or some warning.

=head1	WORKDIR

 WORKDIR/out

Logs packages with update found.

 WORKDIR/fail

Logs packages whose urls failed to connect.

 WORKDIR/notags

Logs packages whose remote git repositories do not have git version tags.
Those repositories either have no tags at all or have no tags in format
([vV]\.?)?\d([\.\d]*)(-rel|-release)?, for example, They can have tag
FOO_1_3_2, but not tag v1.3.2 or 1.3.2 recognized by this system.

 WORKDIR/ahead

Logs packages whose version is ahead of discoverd in remote repository.

=head1	OPTIONS

=over

=item	B<-j> I<num>, B<--jobs> I<num>

Number of parallel jobs. Default is 32.

=item	B<--tmpdir> I<dir>

TMPDIR location. For watch plugin mode only

=item	B<--workdir> I<dir>

WORKDIR name and location. See WORKDIR

=item	B<--bad-urls-file> I<file>

File to cache bad urls information. Bad urls will not be quered
each time but only if package change its urls or until the expiration time (30+ days).
expiration time is slightly randomized to prevent DoS.

=item	B<--keep-tree>, B<--no-keep-tree>

Whether to keep WORKDIR after run.

=item	B<--log-notags>, B<--no-log-notags>

Whether to log to WORKDIR/notags.

=item	B<--log-ahead>, B<--no-log-ahead>

Whether to log to WORKDIR/ahead.

=item	B<--warn-ahead>, B<--no-warn-ahead>

Whether to warn about ahead versions in output.

=item	B<--input3>, B<--input4>

Set 3 column / 4 column input. See INPUT

=item	B<-v|--verbose>

Verbose. Prints extra information.

=item	B<-q|--quiet>

Quiet. Print no warnings.

=item	B<-h|--help>

Display this help and exit.

=back

=head1	AUTHOR

Written by Igor Vlasenko <viy@altlinux.org>.

=head1	COPYING

Copyright (c) 2021 Igor Vlasenko, ALT Linux Team.

This is free software; you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the Free Software Foundation;
either version 2 of the License, or (at your option) any later version.

=cut

