#!/usr/bin/perl

use strict;
use warnings;
use Carp;
use Getopt::Long;
use DistroMap 0.25;
use Data::Dumper;
use DistroDBTools;
use ProjectDBTools;
use DistroDB::Load;
use DistroDB::CLI::DistroMap qw/$ORIGIN $DEST $opt_use_projectdb/;
use DistroDB::CLI::DebugName qw/$debug_obname $debug_oname/;

my $skip_id_map=1;
my $verbose=1;
my $help;
my $opt_dry_run;
my $source_output_file='source/40-distrodb-new.txt';

GetOptions (
    @ProjectDBTools::LONGOPT,
    @DistroDB::CLI::DistroMap::LONGOPT,
    @DistroDB::CLI::DebugName::LONGOPT,
    'dry-run' => \$opt_dry_run,
    'help'  => \$help,
    'id!'  => \$skip_id_map,
    'O|output-file=s' => \$source_output_file,
    'verbose+'  => \$verbose,
);

if ($help) {
    _usage();
    exit(0);
}
DistroDB::CLI::DistroMap->process_options();

if ($ORIGIN=~m!^(.+)/(.+)$!) {
    $DistroMap::default_originrepo=$1;
    $DistroMap::default_originbranch=$2;
} else {
    $DistroMap::default_originrepo=$ORIGIN;
}
if ($DEST=~m!^(.+)/(.+)$!) {
    $DistroMap::default_destrepo=$1;
    $DistroMap::default_destbranch=$2;
} else {
    $DistroMap::default_destrepo=$DEST;
}

my $distromap=DistroMap->new();
my $raw_source_map={};
my $raw_binary_map={};
&DistroDBTools::read_except_dir_distromap_map($raw_source_map,'source',\%DistroDBTools::SOURCE_DISTRODB_GENERATED);
&DistroDBTools::read_except_dir_distromap_map($raw_binary_map,'binary',{});


my $origin={};
my $dest={};
&DistroDB::Load::load_distrodb_sourceonly($origin,$ORIGIN);
&DistroDB::Load::load_distrodb_sourceonly($dest,$DEST);

my $originurl=$origin->{'sourceurl.raw'};
my $desturl=$dest->{'sourceurl.raw'};
my $originsnmap=$origin->{'sourcename'};
my $destsnmap=$dest->{'sourcename'};

my (%originsources, %destsources);
&DistroDBTools::read_distrodb_2nd_column_as_flag(\%originsources, $ORIGIN, 'sourcename');
&DistroDBTools::read_distrodb_2nd_column_as_flag(\%destsources, $DEST, 'sourcename');
my (%origindestbin, %distromap, %ambiguous_map, %replaceable_map, %unmapped, %todo_import);
my (%known_different);

foreach my $binname (keys (%$originsnmap)) {
    my $originsrcname=$originsnmap->{$binname};
    my $raw=$raw_binary_map->{$binname};
    my $locdebug=1 if ($debug_obname and $binname eq $debug_obname or
	 $debug_oname and $originsrcname eq $debug_oname);
    next if !$raw;
    warn "DEBUG: $originsrcname has raw binmap\n" if $locdebug;
    foreach my $mapped_name (@$raw) {
	my $destsrcname=$destsnmap->{$mapped_name};
	if ($destsrcname) {
	    $origindestbin{$originsrcname}->{$destsrcname}=1;
	    warn "DEBUG: from binmap $binname -> $mapped_name: $originsrcname -> $destsrcname\n" if $locdebug;
	}
    }
}

foreach my $originname (keys (%originsources)) {
    my $local_debug=1 if $debug_oname and $debug_oname eq $originname;
    my $ref=$origindestbin{$originname};
    unless($ref) {
	warn "debug: $originname: no ref\n" if $local_debug;
	if ($originname=~/^(?:aspell|hunspell|hyphen|ispell|man-pages|mythes|stardict)-/) {
	    if ($destsnmap->{$originname}) {
		if ($destsnmap->{$originname} eq $originname) {
		    $distromap{$originname}=[$originname];
		} else {
		    $unmapped{$originname}=1;
		}
	    } else {
		$todo_import{$originname}=1 if !$raw_source_map->{$originname};
		$unmapped{$originname}=1;
	    }
	} elsif ($originname=~/-fonts$/) {
	    $todo_import{$originname}=1 if !$raw_source_map->{$originname};
	    $unmapped{$originname}=1;
	} else {
	    if ($opt_use_projectdb and $destsources{$originname}) {
		my $same_proj=&ProjectDBTools::origin2dest_same_url($originname,$originname,$originurl->{$originname},$desturl->{$originname});
		if ($same_proj == ProjectDBTools::SAME) {
		    # ?
		    # $distromap{$originname}=[$originname];
		} elsif ($same_proj == ProjectDBTools::REPLACEABLE or $same_proj == ProjectDBTools::SUBPROJECT) {
		    $replaceable_map{$originname}=1;
		    if ($destsnmap->{$originname}) {
			$known_different{$originname}=1;
		    }
		} else {
		    $unmapped{$originname}=1;
		    if ($destsnmap->{$originname}) {
			$known_different{$originname}=2;
		    }
		}
	    } else {
		$unmapped{$originname}=1;
	    }
	}
    } else {
	my @destcandidates=keys (%$ref);
	my @destnames=grep {&_is_src_same_url($originname, $_)} @destcandidates;
	warn "debug: $originname: destcandidates=",join(' ',@destcandidates),"\n",
	    "debug: $originname: destnames=",join(' ',@destnames),"\n" if $local_debug;
	if (1==@destnames) {
	    $distromap{$originname}=\@destnames;
	    warn "debug: $originname: added distromap @destnames\n" if $local_debug;
	} elsif (@destnames>1) {
	    $ambiguous_map{$originname}=\@destnames;
	    warn "debug: $originname: ambiguous destnames\n" if $local_debug;
	} else {
	    warn "debug: $originname: replaceable destcandidates\n" if $local_debug;
	    $replaceable_map{$originname}=\@destcandidates;
	    if ($destsnmap->{$originname}) {
		$known_different{$originname}=3;
	    }
	}
    }
}


######################### python ##########################
my %KNOWN_PYNAME=map {$_ => 1} qw/
9pfs
fuse9p
eyeD3
eyeD3-py3
gnome-devel-docs
instant
kde4-python-devel
libpyside-qt4
libpyside-qt4-devel
libpyside-qt4-py3
libpyside-qt4-py3-devel
lorem-ipsum-generator
pudb
pykickstart
pytables
pytables-py3
pytz-zoneinfo
ryu
rpm-macros-sphinx
rpm-macros-sphinx3
subvertpy-fast-export
sympy
sympy-py3
translate-toolkit
veusz
veusz-py3
/;
my %PY_SKIP_ORIGIN=map {$_ => 1} qw/
redhat-menus
/;

my (%originbinpkg2pyegg,%destbinpkg2pyegg);
&DistroDBTools::read_distrodb_reverse_known_one2one(\%originbinpkg2pyegg, $ORIGIN, 'pyegg.raw');
my $originsrc2pyegg=&pyegg_sourcemap_from_binarymap(\%originbinpkg2pyegg,$originsnmap);
&DistroDBTools::read_distrodb_reverse_known_one2one(\%destbinpkg2pyegg, $DEST, 'pyegg.raw');
my $destsrc2pyegg=&pyegg_sourcemap_from_binarymap(\%destbinpkg2pyegg,$destsnmap);

my (%python_candidate, %python_reject, %origin_has_python2, %origin_has_python3, %origindestpybin);

foreach my $binname (keys (%$originsnmap)) {
    my $srcname=$originsnmap->{$binname};
    my @map=$distromap->multimap_binary_origin2dest($binname);
    foreach my $mapped_name (@map) {
	if (&is_allowed_dest_bin_python_name($mapped_name)) {
	    $python_candidate{$srcname}=1;
	    if (&is_python3_name($mapped_name)) {
		$origin_has_python3{$srcname}=1;
	    } else {
		$origin_has_python2{$srcname}=1;
	    }
	    push @{$origindestpybin{$srcname}}, $mapped_name;
	} elsif ($binname=~/(?:-devel$|-macros$)/) {
	    # pass; can be python
	} else {
	    $python_reject{$srcname}=1;
	}
    }
}

warn "$debug_oname: python candidate=",_S($python_candidate{$debug_oname}),
    " python_reject=",_S($python_reject{$debug_oname}),"\n" if $debug_oname;

my (%python_destcandidate, %python_destreject, %dest_has_python2, %dest_has_python3, %destbin);
foreach my $binname (keys (%$destsnmap)) {
    my $srcname=$destsnmap->{$binname};
	# alt specific: python(3?)-module
    if (&is_allowed_dest_bin_python_name($binname)) {
	$python_destcandidate{$srcname}=1;
	if (&is_python3_name($binname)) {
	    $dest_has_python3{$srcname}=1;
	} else {
	    $dest_has_python2{$srcname}=1;
	}
	push @{$destbin{$srcname}}, $binname;
    } else {
	$python_destreject{$srcname}=1;
    }
}

my (%pymap,%pyxor,%pydestno3,%pydestno2,%pyoriginno3,%pyoriginno2,%nonpy);
# redefine has no impact on the map
my $pyoriginno3=\%pymap;
my $pyoriginno2=\%pymap;

my %originsrcnames_tmp;
&DistroDBTools::read_distrodb_2nd_column_as_flag(\%originsrcnames_tmp, $ORIGIN, 'sourcename');
my @originsrcnames=sort {$a cmp $b} keys (%originsrcnames_tmp);

foreach my $originsrcname (@originsrcnames) {
    next if $originsrcname!~/^python/ and $python_reject{$originsrcname};
    next unless $python_candidate{$originsrcname};
    my $local_debug=1 if $debug_oname and $debug_oname eq $originsrcname;
    my $already_mapped_name=$distromap{$originsrcname};
    my $origin2=$origin_has_python2{$originsrcname};
    my $origin3=$origin_has_python3{$originsrcname};
    my %srcmap_tmp;
    foreach my $mappedbin (@{$origindestpybin{$originsrcname}}) {
	my $destsrcname = $destsnmap->{$mappedbin};
	$srcmap_tmp{$destsrcname}=1 if defined $destsrcname;
    }
    my @srcmap=sort {$a cmp $b} keys(%srcmap_tmp);
    warn "$originsrcname: python origin2=",_S($origin2)," origin3=",_S($origin3),
	" srcmap=",join(',',@srcmap),"\n" if $local_debug;
    @srcmap=grep {&_is_src_same_url($originsrcname, $_)} @srcmap;

    # pyegg name check
    my $origineggs=$originsrc2pyegg->{$originsrcname};
    warn "$originsrcname: origineggs=",join(',',keys(%$origineggs)),"\n" if $local_debug;
    if ($origineggs) {
	my (@newsrcmap,@eliminated);
	foreach my $destname (@srcmap) {
	    my @desteggs=keys(%{$destsrc2pyegg->{$destname}});
	    my $destegg;
	    $destegg=$desteggs[0] if 1==@desteggs;
	    if (not $destegg or $origineggs->{$destegg}) {
		push @newsrcmap, $destname;
	    } else {
		push @eliminated, $destname.($destegg? '('.$destegg.')': '');
	    }
	}
	@srcmap=@newsrcmap;
	print STDERR "$originsrcname: pyegg=",join(',',keys(%$origineggs))," eliminated ", join(',',@eliminated),"\n" if @eliminated;
    }

    if (@srcmap==1) {
	my $destname=$srcmap[0];
	if (!&_is_src_same_url($originsrcname,$destname)) {
	    print STDERR "$originsrcname: maps to not same pure python module $destname\n" if $verbose;
	    next;
	}
	if ($python_destreject{$destname}) {
	    my $raw=$raw_source_map->{$originsrcname};
	    next if $raw or $PY_SKIP_ORIGIN{$originsrcname};
	    #if ($originsrcname ne $destname) { # oscillation :(
		print STDERR "$originsrcname: maps to not pure python module $destname\n" if not $already_mapped_name and $verbose;
		&add_to_hash2array(\%nonpy,$originsrcname,$destname);
		next;
	    #}
	}
	my $dest2=$dest_has_python2{$destname};
	my $dest3=$dest_has_python3{$destname};
	if ($origin2 and $origin3 and $dest2 and $dest3) {
	    &add_to_hash2array(\%pymap,$originsrcname,$destname);
	} elsif ($origin2 and $origin3 and $dest2 and !$dest3) {
	    &add_to_hash2array(\%pydestno3,$originsrcname,$destname);
	} elsif ($origin2 and $origin3 and !$dest2 and $dest3) {
	    &add_to_hash2array(\%pydestno2,$originsrcname,$destname);
	} elsif ($origin2 and !$origin3 and $dest2 and $dest3) {
	    &add_to_hash2array($pyoriginno3,$originsrcname,$destname);
	} elsif ($origin2 and !$origin3 and $dest2 and !$dest3) {
	    &add_to_hash2array(\%pymap,$originsrcname,$destname);
	} elsif ($origin2 and !$origin3 and !$dest2 and $dest3) {
	    &add_to_hash2array(\%pyxor,$originsrcname,$destname);
	} elsif (!$origin2 and $origin3 and $dest2 and $dest3) {
	    &add_to_hash2array($pyoriginno2,$originsrcname,$destname);
	} elsif (!$origin2 and $origin3 and $dest2 and !$dest3) {
	    &add_to_hash2array(\%pyxor,$originsrcname,$destname);
	} elsif (!$origin2 and $origin3 and !$dest2 and $dest3) {
	    &add_to_hash2array(\%pymap,$originsrcname,$destname);
	} elsif (!$origin2 and !$dest2 or !$origin3 and !$dest3) {
	    die "Oops1";
	} else {
	    die "Oops2";
	}
    } elsif (@srcmap>1) {
	my @stems=@srcmap;
	@stems=map {s/^python[23]?-modules?-//;s/-py3$//;$_} @stems;
	my ($stem1,$stem2)=@stems;
	if ($stem1 eq $stem2 and $dest_has_python2{$srcmap[0]} && $dest_has_python3{$srcmap[1]}) {
	    if ($origin2 and $origin3) {
		&add_to_hash2array(\%pymap,$originsrcname,$srcmap[0]);
		&add_to_hash2array(\%pymap,$originsrcname,$srcmap[1]);
	    } elsif ($origin2 and !$origin3) {
		&add_to_hash2array(\%pymap,$originsrcname,$srcmap[0]);
	    } elsif (!$origin2 and $origin3) {
		&add_to_hash2array(\%pymap,$originsrcname,$srcmap[1]);
	    } else {
		die "Oops";
	    }
	} else {
	    if ($stem1 eq $stem2 and $verbose>1) {
		foreach my $n (@srcmap) {
		    print STDERR $n,"has py2\n" if $dest_has_python2{$n};
		    print STDERR $n,"has py3\n" if $dest_has_python3{$n};
		}
	    }
	    my %rawmap;
	    my $raw=$raw_source_map->{$originsrcname};
	    # by hands
	    if ($raw) {
		%rawmap=map {$_=>1} @$raw;
		print STDERR "$originsrcname: found explicit mapping to ",join(', ',@$raw),"\n" if $verbose>1;
	    }
	    my @has_explicit_map=grep {$rawmap{$_}} @srcmap;
	    if ($raw and !@has_explicit_map) {
		print STDERR "$originsrcname: explicit mapping to ",join(', ',@$raw),' not in ',join(', ',@srcmap),"\n";# if $verbose;
	    } elsif (!$raw) {
		print STDERR "multiple candidates for $originsrcname: ",join(', ',@srcmap),"\n" if $verbose;
	    }
	}
    }
}

############################################################

# merge nonpy
foreach my $srcname (keys (%nonpy)) {
    my $distromap=$distromap{$srcname};
    my $nonpy=$nonpy{$srcname};
    #print STDERR "merging nonpy for $srcname: ",' nonpy=',join(',',@$nonpy),' distromap=',($distromap ? join(',',@$distromap):''),"\n";
    if ($distromap) {
	if (_SS($nonpy) eq _SS($distromap)) {
	    delete $nonpy{$srcname};
	} else {
	    print STDERR "nonpy: conflict for $srcname: distromap=",_SS($distromap)," nonpy=",_SS($nonpy),"\n";
	    $ambiguous_map{$srcname}=$distromap{$srcname};
	    push @{$ambiguous_map{$srcname}}, @{$nonpy{$srcname}};
	    delete $nonpy{$srcname};
	    delete $distromap{$srcname};
	}
    } elsif (1==@$nonpy and (
		 $nonpy->[0] eq $srcname or
		 $nonpy->[0].'-doc' eq $srcname) # SuSE style
	) {
	# move to distromap
	$distromap{$srcname}=[$srcname];
	delete $nonpy{$srcname};
    }
}

# merge pymap
foreach my $srcname (keys (%pymap)) {
    my $distromap=$distromap{$srcname};
    my $pymap=$pymap{$srcname};

    if ($distromap) {
	my $pymapss=_SS($pymap);
	my $distromapss=_SS($distromap);
	if ($pymapss eq $distromapss) {
	    # pass
	# TODO: naive string inclusion, works only for one entry -- replace by set inclusion
	# allow distromap to be subset of python
	} elsif ($pymapss =~ /(?:^|,)\Q$distromapss\E(?:,|$)/) {
	    # move to distromap
	    $distromap{$srcname}=$pymap;
	} else {
	    print STDERR "pymap: conflict for $srcname: distromap=",$distromapss," nonpy=",$pymapss,"\n";
	    $ambiguous_map{$srcname}=$distromap{$srcname};
	    push @{$ambiguous_map{$srcname}}, @{$pymap{$srcname}};
	    delete $distromap{$srcname};
	}
    } else {
	# move to distromap
	$distromap{$srcname}=$pymap;
	delete $unmapped{$srcname};
    }
}

# TODO: Representing Sets in Perl
# http://world.std.com/~swmcd/steven/perl/pm/set.html

############################################################
#                         OUTPUT                           #
############################################################

# kill django from pydestno2; known can have no py2
foreach my $key (keys(%pydestno2)) {
    delete $pydestno2{$key} if $key=~/django/;
}

if ($verbose>1) {
    print STDERR "distromap=",scalar keys (%distromap),"\n";
    print STDERR "ambiguous=",scalar keys (%ambiguous_map),"\n";
    print STDERR "replaceable=",scalar keys (%replaceable_map),"\n";
    print STDERR "unmapped=",scalar keys (%unmapped),"\n";
    print STDERR "pymap=",scalar keys (%pymap),"\n";
    print STDERR "pydestno2=",scalar keys (%pydestno2),"\n";
    print STDERR "pydestno3=",scalar keys (%pydestno3),"\n";
    #print STDERR "pyoriginno2=",scalar keys (%pyoriginno2),"\n";
    #print STDERR "pyoriginno3=",scalar keys (%pyoriginno3),"\n";
    print STDERR "pyxor=",scalar keys (%pyxor),"\n";
    print STDERR "nonpy=",scalar keys (%nonpy),"\n";

    print STDERR "pydestno2:\n", Dumper(\%pydestno2),"\n" if %pydestno2;
    print STDERR "pyxor:\n", Dumper(\%pyxor),"\n" if %pyxor;
    print STDERR "nonpy:\n", Dumper(\%nonpy),"\n" if %nonpy;
}

warn "still have nonpy: ", Dumper(\%nonpy),"\n" if %nonpy;
warn "pydestno2 found. consider building py2 packages" if %pydestno2;
die "pyxor: Not implemented yet" if %pyxor;

exit if $opt_dry_run;

### TODO: print id map if source/10-distrodb-old.txt tells otherwise!!!
&DistroDBTools::sort_print_hasharray(\%distromap,$skip_id_map,$source_output_file);
unlink qw/todo-source-ambiguous.txt todo-source-unmapped.txt todo-source-import.txt todo-source-pydestno3.txt
todo-source-has-replaceable.txt todo-source-replaceable.txt todo-source-different.txt
/;
&DistroDBTools::write_map(\%pydestno3,'todo-source-pydestno3.txt') if %pydestno3;
&DistroDBTools::write_map(\%ambiguous_map,'todo-source-ambiguous.txt') if %ambiguous_map;
&DistroDBTools::write_flag(\%unmapped,'todo-source-unmapped.txt') if %unmapped;
&DistroDBTools::write_flag(\%todo_import,'todo-source-import.txt') if %todo_import;
&DistroDBTools::write_flag(\%replaceable_map,'todo-source-has-replaceable.txt') if %replaceable_map;

# write always to overwrite old flags
&DistroDBTools::write_flag(\%known_different,'flags/source/noversion/40-source-different.txt');



############################################################

sub is_allowed_dest_bin_python_name {
    my ($name)=@_;
    # alt specific: KNOWN_PYNAME
    return 1 if $KNOWN_PYNAME{$name};
    my $sourcename=$destsnmap->{$name};
    #confess "Oops: for $name: sourcename not found" unless $sourcename;
    return 1 if $sourcename and $sourcename=~/^python/;
    # alt specific: python(3?)-module
    return $name =~ /(^python(3?)-(?:module|tool)|-debuginfo$|-doc$|-docs$|-examples$|-pickles$|-tests$|-common$|-bench$)/;
}

sub is_python3_name {
    return $_[0] =~ /(^python3-|-py3$)/;
}

sub add_to_hash2array {
    my ($map,$from,$to)=@_;
    #return if $to =~ /^python3?-module-.+-tests$/;
    my $list=$map->{$from};
    unless ($list) {
	$list=[];
	$map->{$from}=$list;
    }
    push @$list, $to unless grep {$_ eq $to} @$list;
}

sub _S {
    return defined $_[0] ? $_[0]: 'undef';
}

sub _SS {
    return '' if !$_[0];
    return join(',',sort {$a cmp $b} @{$_[0]});
}

sub pyegg_sourcemap_from_binarymap {
    my ($binmap,$bin2src)=@_;
    my (%srcmap);
    foreach my $key (keys(%$binmap)) {
	my $srcname=$bin2src->{$key};
	#warn "$key: no srcname" if ! defined ($srcname); # happens in bad rsync state
	next if ! defined ($srcname);
	$binmap->{$key}=~/^pythonegg\(.*\)\((.*)\)$/;
	$srcmap{$srcname}->{$1}=1;
    }
    return \%srcmap;
}

sub _is_src_same_url {
    my ($originsrc, $destsrc)=@_;
    my $same_proj=ProjectDBTools::SAME;
    die "Oops!: undefined origin src" unless defined $originsrc;
    die "Oops!: undefined dest src" unless defined $destsrc;
    if ($opt_use_projectdb) {
	$same_proj=&ProjectDBTools::origin2dest_same_url($originsrc,$destsrc,$originurl->{$originsrc},$desturl->{$destsrc});
    }
    return $same_proj==ProjectDBTools::SAME;
}
