#!/usr/bin/perl -w

use strict;
use warnings;
#use autodie qw(open close);
use File::Path qw(make_path remove_tree);
use Test::Repocop::Workdir qw/$repocop_workdir $repocop_test_dbdir $repocop_testcachedir/;
use Test::Repocop::CLI::Base;
our @ISA=qw/Test::Repocop::CLI/;

my $distro='altlinux';
my $branch='sisyphus';
my $component='classic';
our @LONGOPT=(
    "distribution=s"  => \$distro,
    "branch=s"  => \$branch,
    "component=s"  => \$component,
);
__PACKAGE__->get_and_process_cli_options();

&Test::Repocop::Workdir::die_if_nothing_to_report();

$branch=lc($branch);
my $distrodbdir="$ENV{'HOME'}/.cache/distrodb/$distro/$branch";
my $db_version=12;
my $db_compat_version=5;
my $rawdb_version=5;
my $rawdb_compat_version=0;

remove_tree($distrodbdir);
make_path("$distrodbdir/groups-allowed");
if (-f "$repocop_testcachedir/distromap/groups_allowed/GROUPS") {
    `cp -a "$repocop_testcachedir/distromap/groups_allowed/GROUPS" $distrodbdir/groups-allowed/classic.tsv`;
}

my @compactify=qw!aspell.raw bin cmake devel-libs erlang.raw fonts.raw gir golang.raw
 headers headers-rebased java.raw mono.raw mozplugin.raw nagios.raw nodejs.raw
 ocaml.raw path perl php.raw pkg-config plugins.raw provides
 python2 python3 python2.raw python3.raw pyegg.raw requires.raw
 shared-data.raw shared-lib.raw desktop.raw
 srcname2binnames.raw srcversion.raw static-libs texmf.raw typelib vapi vdr.raw!;
# no need to compactify sourcename,..etc
foreach my $type (@compactify, qw!buildreqs-subst sourcename sourceurl.raw unique_data.raw!) {
    `mkdir -p "$distrodbdir/$type"`;
}

# stat(1) filemode reminder
#61440      S_IFMT     0170000   bit mask for the file type bit field

#49152      S_IFSOCK   0140000   socket
#40960      S_IFLNK    0120000   symbolic link
#32768      S_IFREG    0100000   regular file
#24576      S_IFBLK    0060000   block device
#16384      S_IFDIR    0040000   directory
#8192       S_IFCHR    0020000   character device
#4096       S_IFIFO    0010000   FIFO

open SQLITE, "| sqlite3 '$repocop_test_dbdir/rpm.db'" || die "can't run sqlite3: $!";
print SQLITE ".mode tabs
.output $distrodbdir/sourcename/${component}.txt
SELECT rpm.name, srcrpm.name FROM rpm JOIN srcrpm ON rpm.sourceid = srcrpm.pkgid;
.output $distrodbdir/srcname2binnames.raw/${component}.txt
SELECT srcrpm.name, rpm.name FROM rpm JOIN srcrpm ON rpm.sourceid = srcrpm.pkgid;
.output $distrodbdir/srcversion.raw/${component}.txt
SELECT srcrpm.name, srcrpm.version FROM srcrpm;
.output $distrodbdir/provides/${component}.txt
select DISTINCT providename, rpm.name FROM rpm_provides JOIN rpm ON rpm_provides.pkgid = rpm.pkgid ORDER BY PROVIDENAME;
.output $distrodbdir/requires.raw/${component}.txt
select DISTINCT requirename, rpm.name FROM rpm_requires JOIN rpm ON rpm_requires.pkgid = rpm.pkgid ORDER BY REQUIRENAME;
.output $distrodbdir/devel-libs/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/lib*.so' or filename glob '/lib64/lib*.so';
.output $distrodbdir/plugins.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and (
 (filename glob '/usr/lib64/*.so' and not filename glob '/usr/lib64/lib*.so')
 or filename glob '/usr/lib64/gimp/2.0/plug-ins/*'
 or filename glob '/usr/lib64/xfce4/panel-plugins/*'
);
.output $distrodbdir/shared-lib.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and (filename glob '/usr/lib/cups/backend/*' or filename glob '/usr/lib/cups/driver/*' or filename glob '/usr/lib/cups/filter/*');
.output $distrodbdir/static-libs/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/lib*.a' or filename glob '/lib64/lib*.a';
.output $distrodbdir/headers/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/include/*.*' and filemode & 16384 = 0;
.output $distrodbdir/pkg-config/${component}.lib.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/pkgconfig/*.pc';
.output $distrodbdir/pkg-config/${component}.share.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/share/pkgconfig/*.pc';
.output $distrodbdir/bin/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/bin/*' or filename glob '/usr/sbin/*' or filename glob '/bin/*' or filename glob '/sbin/*' or filename glob '/usr/games/*' or filename glob '/usr/lib/kde3/bin/*' or filename glob '/usr/lib/kde4/bin/*' or filename glob '/usr/lib/kf5/bin/*' or filename glob '/usr/lib64/qt3/bin/*' or filename glob '/usr/lib64/qt4/bin/*' or filename glob '/usr/share/qt5/bin/*';
.output $distrodbdir/gir/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/share/gir-1.0/*.gir';
.output $distrodbdir/vapi/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/share/*/vapi/*.vapi';
.output $distrodbdir/typelib/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/girepository-1.0/*.typelib';
.output $distrodbdir/perl/${component}.txt
select providename, name from rpm_provides join rpm on rpm_provides.pkgid=rpm.pkgid where providename glob 'perl(*)';
-- .output $distrodbdir/python2/${component}-rp.txt
-- select providename, name from rpm_provides join rpm on rpm_provides.pkgid=rpm.pkgid where providename glob 'python2.*(*)';
-- .output $distrodbdir/python3/${component}-rp.txt
-- select providename, name from rpm_provides join rpm on rpm_provides.pkgid=rpm.pkgid where providename glob 'python3*(*)';
.output $distrodbdir/python2.raw/${component}-py.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/python2.*/site-packages/*.py' or filename glob '/usr/lib/python2.*/site-packages/*.py';
.output $distrodbdir/python3.raw/${component}-py.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/python3*/site-packages/*.py' or filename glob '/usr/lib/python3*/site-packages/*.py';
.output $distrodbdir/python2.raw/${component}-so.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/python2.*/site-packages/*.so';
.output $distrodbdir/python3.raw/${component}-so.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/python3*/site-packages/*.so';
.output $distrodbdir/python2/${component}-sc.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/python2*/*.so' or filename glob '/usr/lib/python2*/*.so';
.output $distrodbdir/python2/${component}-pc.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/python2*/*.py';
.output $distrodbdir/python3/${component}-sc.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/python3*/*.so';
.output $distrodbdir/python3/${component}-pc.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/python3*/*.py' or filename glob '/usr/lib/python3*/*.py';
.output $distrodbdir/pyegg.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '*.egg-info' and not filename glob '/usr/share/doc/*' and not rpm.sourceid glob 'python-2.*';
.output $distrodbdir/php.raw/${component}-so.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib64/php/*/extensions/*.so';
.output $distrodbdir/php.raw/${component}-mo.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and filename glob '/usr/share/php*/*';
.output $distrodbdir/mozplugin.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and (filename glob '/usr/lib64/browser-plugins/*.so' or filename glob '/usr/lib64/mozilla/plugins/*.so');

-- todo drop /usr/share/doc/xorg-docs when unique_data will be enabled
.output $distrodbdir/shared-data.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid  where filemode & 16384 = 0 and filename glob '/usr/share/*' and (
    filename glob '/usr/share/doc/xorg-docs/*'
 or filename glob '/usr/share/doc/qt4/*'
 or filename glob '/usr/share/doc/qt5/*'
 or filename glob '/usr/share/qt4/translations/*'
 or filename glob '/usr/share/qt5/translations/*'
 or filename glob '/usr/share/gtk-doc/html/*'
 or filename glob '/usr/share/help/*'
 or filename glob '/usr/share/gnome-shell/*'
 or filename glob '/usr/share/myspell/*'
 or filename glob '/usr/share/hyphen/*'
 or filename glob '/usr/share/mythes/*'
 or filename glob '/usr/share/mediawiki/extensions/*'
 or filename glob '/usr/share/timidity/*'
 or filename glob '/usr/share/awesome/lib/*'
 or filename glob '/usr/share/thumbnailers/*'
 or filename glob '/usr/share/mate-panel/applets/*'
 or filename glob '/usr/share/glib-2.0/schemas/*'
 or filename glob '/usr/share/festival/dicts/*'
 or filename glob '/usr/share/festival/voices/*'
 or filename glob '/usr/share/gimp/2.0/scripts/*'
 or filename glob '/usr/share/dokuwiki/lib/plugins/*'
 or filename glob '/usr/share/themes/*'
 or filename glob '/usr/share/icons/*/*'
    and not filename glob '/usr/share/icons/hicolor/*'
    and not filename glob '/usr/share/icons/locolor/*'
    and not filename glob '/usr/share/icons/HighContrastInverse/*'
    and not filename glob '/usr/share/icons/LowContrast/*'
);
.output $distrodbdir/unique_data.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid  where filemode & 16384 = 0 and filename glob '/usr/share/*' GROUP BY filename HAVING COUNT(filename)=1;
.output $distrodbdir/texmf.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where rpm.sourceid not glob 'tetex-2.0-alt*' and filemode & 16384 = 0 and (filename glob '/usr/share/texmf/*' or filename glob '/usr/share/texmf-texlive/*' or filename glob '/usr/share/texlive/texmf-dist/*' or filename glob '/usr/share/texmf-dist/*');
.output $distrodbdir/cmake/${component}.txt
.output $distrodbdir/fonts.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/share/fonts/*' and (filename glob '*.ttf' or filename glob '*.ttc' or filename glob '*.ttx' or filename glob '*.otf' or filename glob '*.afm' or filename glob '*.pfb' or filename glob '*.pcf' or filename glob '*.pcf.bz2' or filename glob '*.pcf.gz' or filename glob '*.pcf.xz');
.output $distrodbdir/java.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where (filename glob '/usr/share/java/*' or filename glob '/usr/lib64/java/*' or filename glob '/usr/lib/java/*') and filename glob '*.jar';
.output $distrodbdir/mono.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and filename glob '/usr/lib/mono/*.dll' and not filename glob '/usr/lib/mono/gac/*';
.output $distrodbdir/aspell.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and filename glob '/usr/lib64/aspell/*';
.output $distrodbdir/golang.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and filename glob '/usr/share/gocode/src/*';
.output $distrodbdir/nagios.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and (filename glob '/usr/share/nagios/plugins/*' or filename glob '/usr/lib/nagios/plugins/*' or filename glob '/usr/lib64/nagios/plugins/*');
.output $distrodbdir/vdr.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and filename glob '/usr/lib64/vdr/*.so*';
.output $distrodbdir/erlang.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and (filename glob '/usr/share/erlang/lib/*' or filename glob '/usr/lib/erlang/lib/*' or filename glob '/usr/lib64/erlang/lib/*');
.output $distrodbdir/ocaml.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 16384 = 0 and filename glob '/usr/lib64/ocaml/*';
.output $distrodbdir/nodejs.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/lib/node_modules/*';
.output $distrodbdir/desktop.raw/${component}.txt
select filename, name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filename glob '/usr/share/applications/*.desktop';
.output /dev/null
attach database '$repocop_test_dbdir/buildreqs-subst.db' as brs;
-- create temporary table tmp001 (realname TEXT, substname TEXT);
-- INSERT INTO tmp001 SELECT realname, substname FROM buildreq_subst WHERE realname <> substname;
.output $distrodbdir/buildreqs-subst/${component}.txt
SELECT realname, substname FROM buildreq_subst WHERE realname <> substname;
.output $distrodbdir/sourceurl.raw/${component}.txt
SELECT name, url FROM srcrpm WHERE url IS NOT NULL;
-- TODO
-- warn !!! filemode & 61440 = 16384 -- looking for dirs only
select LOWER(filename), name from rpm_files join rpm on rpm_files.pkgid=rpm.pkgid where filemode & 61440 = 16384 and (filename glob '/usr/lib64/cmake/*' or filename glob '/usr/lib/cmake/*' or filename glob '/usr/share/cmake/*' or filename glob '/usr/share/CMake/*');
";
close (SQLITE) or die "sqlite pipe failed.";

## extcall ${distrodbdir} ${component}
system ('repocop-report-helper-distrodb-preprocess',$distrodbdir, $component)==0 or die "WARNING: repocop-report-helper-distromap-db-preprocess ${distrodbdir} ${component} failed";

&rebase_1dep("$distrodbdir/headers/$component.txt","$distrodbdir/headers-rebased/$component.txt");

foreach my $component (grep {-e $_} map {glob "$distrodbdir/$_/*.txt"} @compactify) {
    &compactify($component);
}
`echo $db_version > $distrodbdir/.version`;
`echo $db_compat_version > $distrodbdir/.compat`;
`echo $rawdb_version > $distrodbdir/version.raw`;
`echo $rawdb_compat_version > $distrodbdir/compat.raw`;

sub compactify {
    my ($file)=@_;
    my %KEY;
    open my $fh, "<", $file or die $!;
    while (<$fh>) {
	chomp;
	#my @line=split(/\s+/);
	# something like font names with spaces
	my @line=split(/\t/);
	my $ref=$KEY{$line[0]};
	unless ($ref) {
	    $ref=[];
	    $KEY{$line[0]}=$ref;
	}
	my $val=$line[1];
	push @$ref, $val if not grep {$_ eq $val} @$ref;
    }
    close $fh;
    open $fh, '>', $file or die $!;
    map {print $fh "$_\t", join("\t",sort {$a cmp $b} @{$KEY{$_}}),"\n"} sort {$a cmp $b} keys (%KEY);
    close $fh;
}

sub rebase_1dep {
    my ($infile,$outfile)=@_;
    open (my $in,'<',$infile) || die "$!";
    open (my $out,'>',$outfile) || die "$!";
    while (<$in>) {
	my ($header)=split(/\t/,$_,-1);
	next if $header=~/^\./;
	my $rebased_header=$header;
	$rebased_header=~s!^[^/]*/!!;
	$rebased_header=~s!^[\d\.]*/!!;
	print $out "$rebased_header\t$header\n";
    }
    close($in) || die "$!";
    close($out) || die "$!";
}

#print STDERR "done.\n" if $verbose;

=head1	NAME

repocop-report-distrodb - a tool that creates distrodb db as a repocop report.

=head1	SYNOPSIS

see repocop-report-distrodb

=head1	DESCRIPTION

B<repocop-report-distrodb> creates distrodb db as a repocop report.

=head1	OPTIONS

--branch <branch name>

=head1	AUTHOR

Written by Igor Vlasenko <viy@altlinux.org>.

=head1	ACKNOWLEGEMENTS

To Alexey Torbin <at@altlinux.org>, whose qa-robot package
had a strong influence on repocop.

=head1	COPYING

Copyright (c) 2008-2023 Igor Vlasenko, ALT Linux Team.

This is free software; you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the Free Software Foundation;
either version 2 of the License, or (at your option) any later version.

=cut

