#!/usr/bin/perl

### TEST to create SAME DESTSETS

use strict;
use warnings;
use Carp;
use Clone 'clone';
use Getopt::Long;
use DistroDB::Load;
use DistroDB::CLI::Stems qw/@loaded_stems/;
use DistroDB::CLI::DistroMap qw/$ORIGIN $DEST $opt_use_projectdb/;
use DistroDB::CLI::SaveState qw//;
use DistroDB::CLI::DebugName qw/$debug_obname $debug_oname/;
use DistroDB::Arrow qw/
&static_create_raw_arrow
&__arrow_head_pkgs
&__aggregate_arrowset_by_pkgset_by_adding_artifacts
&__sort_arrowset_by_set_power
&__is_arrow_pkgset_equal
&__is_arrow_pkgset_included
&__arrow_pkgset_merge
&__get_arrow_keystring
&__arrowset_debug
&__arrow_get_head_pkgset_hashstring
&__arrow_set_head_pkgset
/;
use DistroDBTools;
use ProjectDBTools;

# TODO: add contents_index based unique files mapping based criteria
# (like, mod %bin%, mod usr/share (not doc) if unique-mapping is dence enough (> 50% in the area)?
# to fix likes of broken mapping
# wesnoth-data    python-module-wesnoth

my $verbose=0;
my $help;
my ($opt_dry_run,$skip_id_map);
my $binary_output_file='binary/40-distrodb-new.txt';

GetOptions (
    @ProjectDBTools::LONGOPT,
    @DistroDB::CLI::Stems::LONGOPT,
    @DistroDB::CLI::DistroMap::LONGOPT,
    @DistroDB::CLI::SaveState::LONGOPT,
    @DistroDB::CLI::DebugName::LONGOPT,
    'dry-run' => \$opt_dry_run,
    'help'  => \$help,
    'id!'  => \$skip_id_map,
    'O|output-file=s' => \$binary_output_file,
    'verbose+'  => \$verbose,
);

if ($help) {
    _usage();
    exit(0);
}
DistroDB::CLI::Stems->process_options();
DistroDB::CLI::DistroMap->process_options();
$DistroDBTools::verbose=$verbose;

my $origin={};
my $dest={};
&DistroDB::Load::load_distrodb($origin,$ORIGIN,\@loaded_stems);
&DistroDB::Load::load_distrodb($dest,$DEST,\@loaded_stems);

my $originurl=$origin->{'sourceurl.raw'};
my $desturl=$dest->{'sourceurl.raw'};
DistroDB::OriginDestTags->initialize($origin,$dest);

my $aggregatred_premap_origin2set={};
my $aggregatred_premap_dest2set={};
my $premap_origin2set={};
my $premap_dest2set={};
my @is_same_origin_param=('origin2origin', $origin->{'sourcename'}, $originurl, \&ProjectDBTools::origin2origin_same_url);
my @is_same_dest_param=('dest2dest', $dest->{'sourcename'}, $desturl, \&ProjectDBTools::dest2dest_same_url);

my $raw_premap_origin2set=&__pkgs2set_generic_pretranslate($origin);
&__pkgs2set_generic_aggregate($origin,$raw_premap_origin2set,$aggregatred_premap_origin2set,$premap_origin2set,\@is_same_origin_param);
my $raw_premap_dest2set=&__pkgs2set_generic_pretranslate($dest);
&__pkgs2set_generic_aggregate($dest,$raw_premap_dest2set,$aggregatred_premap_dest2set,$premap_dest2set,\@is_same_dest_param);

exit if $opt_dry_run;

sub __pkgs2set_generic_pretranslate {
    my ($db)=@_;
    my $__pkgs2set={};
    foreach my $stem (@loaded_stems) {
	#print STDERR "stem=$stem starting...\n" if $verbose;
	my $pkgdb=$db->{$stem};
	foreach my $key (keys(%$pkgdb)) {
	    my $pkgset=$pkgdb->{$key};
	    next unless $pkgset;
	    next if @$pkgset<2;
	    foreach my $pkg (@$pkgset) {
		push @{$__pkgs2set->{$pkg}}, &static_create_raw_arrow({$stem => [$key]}, $pkgset);
	    }
	}
    }
    return $__pkgs2set;
}

sub __pkgs2set_generic_aggregate {
    my ($db,$__raw_pkgs2set,$__aggr_pkgs2set, $__pkgs2set, $_param_same)=@_;
    my $locdebug;
    foreach my $pkg (keys(%$__raw_pkgs2set)) {
	$locdebug=0;
	$locdebug=2 if $pkg eq $debug_obname;
	#print STDERR "DEBUG:$pkg","\n";
	my $aggregated_arrowlist=&__aggregate_arrowset_by_pkgset_by_adding_artifacts($__raw_pkgs2set->{$pkg});
	&__arrowset_debug($aggregated_arrowlist, "$_param_same->[0]:$pkg: aggregated_arrowlist") if $locdebug;
	my @same_arrowlist;
	foreach my $aggregated_arrow (@$aggregated_arrowlist) {
	    my @same_pkgset;
	    foreach my $pkg2 (&__arrow_head_pkgs($aggregated_arrow)) {
		# TODO: SUBPROJECT?
		if (ProjectDBTools::SAME==&_set_is_same_url($pkg,$pkg2,
			&__get_arrow_keystring($aggregated_arrow),
			$locdebug,@$_param_same)) {
		    push @same_pkgset, $pkg2;
		    print STDERR "DEBUG:$_param_same->[0]:$pkg, $pkg2 are the same\n" if $locdebug>1;
		}

	    }
	    if ($locdebug>1) {
		print STDERR "DEBUG:$_param_same->[0]:$pkg: aggregated pkgset=",join(',',&__arrow_head_pkgs($aggregated_arrow)),"\n";
		print STDERR "DEBUG:$_param_same->[0]:$pkg: same_pkgset=",join(',',@same_pkgset),"\n";
	    }
	    &__arrow_set_head_pkgset($aggregated_arrow,\@same_pkgset);
	    push @same_arrowlist, $aggregated_arrow if @same_pkgset>1;
	}
	if (@same_arrowlist) {
	    $__aggr_pkgs2set->{$pkg}=&__sort_arrowset_by_set_power(
		&__aggregate_arrowset_by_pkgset(\@same_arrowlist));
	}
    }
    if (0) {
	# check reflexivity; guaranteed by algorythm;
	# uncomment to check algorythm changes
	foreach my $pkg1 (keys(%$__aggr_pkgs2set)) {
	    my $arrowset1=$__aggr_pkgs2set->{$pkg1};
	    foreach my $arrow (@{$arrowset1}) {
		foreach my $pkg2 (&__arrow_head_pkgs($arrow)) {
		    #_is_pkg_in_one_of_the_arrow_sets_of($pkg1,$pkg2)
		    my $arrowset2=$__aggr_pkgs2set->{$pkg1};
		    L2: foreach my $arrow2 (@{$arrowset2}) {
			foreach my $pkg1candidate (&__arrow_head_pkgs($arrow2)) {
			    last L2 if $pkg1candidate eq $pkg1;
			}
			&__arrowset_debug($arrowset1, "$_param_same->[0]: pkg1=$pkg1");
			&__arrowset_debug($arrowset2, "$_param_same->[0]: pkg2=$pkg2");
			die "check reflexivity: $pkg1 not found for $pkg2";
		    }
		}
	    }
	} # end check reflexivity
    } # end if 0

    foreach my $pkg (keys(%$__aggr_pkgs2set)) {
	my $arrowset=clone($__aggr_pkgs2set->{$pkg});
	if (@$arrowset>1) {
	    #&__arrowset_debug($arrowset, "$_param_same->[0]: pkg=$pkg");
	    my @tail;
	    while (@$arrowset>1) {
		my $merged=0;
		my $arrow=pop @$arrowset;
		my $i;
		M: for ($i=$#{$arrowset}, $i>=0, $i--) {
		    if (&__is_arrow_pkgset_included($arrow,$arrowset->[$i])) {
			&__arrow_pkgset_merge($arrow,$arrowset->[$i]);
			$merged=1;
			last M;
		    }
		}
		if (!$merged) {
		    unshift @tail, $arrow;
		}
	    }
	    push @$arrowset, @tail;
	}
	if (@$arrowset>1) { # TODO ifdebug
	    &__arrowset_debug($arrowset, "$_param_same->[0]: pkg=$pkg has multiset");
	}
	$__pkgs2set->{$pkg}=$arrowset;
	if (@$arrowset==1) {
	    foreach my $pkg1 (&__arrow_head_pkgs($arrowset->[0])) {
		my $arrowset1=$__pkgs2set->{$pkg};
		if (!$arrowset1) {
		    $__pkgs2set->{$pkg}=$arrowset;
		} elsif (scalar @$arrowset1==1 and 
			 &__is_arrow_pkgset_equal($arrowset->[0],$arrowset1->[0])) {
		    $__pkgs2set->{$pkg1}=$arrowset;
		} else {
		    &__arrowset_debug($arrowset1, "$_param_same->[0]: pkg=$pkg1 has different multiset from $pkg");
		}
	    }
	}
    }

    foreach my $pkg (keys(%$__pkgs2set)) {
	my $arrowset=$__pkgs2set->{$pkg};
	if (@$arrowset>1) {
	    &__arrowset_debug($arrowset, "W: $_param_same->[0]: pkg=$pkg has multiset");
	}
    }

    # debug: TODO: save as state ?
    if ($verbose) {
	my %sets;
	foreach my $pkg (keys(%$__pkgs2set)) {
	    $sets{&__arrow_get_head_pkgset_hashstring($__pkgs2set->{$pkg}->[0])}=1;
	}
	print "$_param_same->[0]: known sets:\n";
	foreach my $key (sort {$a cmp $b} keys(%sets)) {
	    print '= ',$key,"\n";
	}
    }
}

sub _set_is_same_url {
    my ($pkg1, $pkg2,$message,$locdebug,
	$setname, $sourcenamedb, $urldb, $same_url_sub)=@_;
    my $same_proj=ProjectDBTools::SAME;
    # NO NEED; checked in ProjectDB
    # in the same repository
    # return $same_proj if $pkg1 eq $pkg2;
    if ($opt_use_projectdb) {
	my $src1=$sourcenamedb->{$pkg1};
	my $src2=$sourcenamedb->{$pkg2};
	if (!defined($src1) or !defined($src2)) {
	    warn "W: $setname: can't find src for pkg1 $pkg1" unless defined $src1;
	    warn "W: $setname: can't find src for pkg2 $pkg2" unless defined $src2;
	} else {
	    $message//='[?]';
	    $same_proj=&$same_url_sub(
		$src1,$src2,$urldb->{$src1}, $urldb->{$src2},
		"$setname $pkg1 vs $pkg2 for $message",$locdebug);
	}
    }
    return $same_proj;
}

1;

__END__
