#!/usr/bin/python
# Arepo is the Automatic Repackager, working in the fields of Sisyphus.
#
# Arepo operates on two repositories ('origin' and 'shadow')
# and two architectures ('source' and 'target').
# Packages from the 'origin' repo with architecture 'source'
# are compared to packages with architecture 'target',
# repackaged semi-automatically and placed in the 'shadow' repo.
#
# For example, given the following configuration:
#   name: Sisyphus
#   origin: /Sisyphus
#   flavor: classic
#   source: i586
#   target: x86_64
#   shadow: ~/i586-biarch-repo
#   packages: >
#       glibc
# Arepo will compare two packages glibc-*.i586.rpm and
# glibc-*.x86_64.rpm, extract the source-specific
# files from the former, and create the i586-glibc-*.i586.rpm
# package in the biarch repository.

import os, sys, stat, time
import rpm
import yaml
from itertools import *

loglevel = 1

def Log(level, format, *args):
    if level <= loglevel:
        print format % args


def usage():
    print "Usage: arepo <arepo.conf>"
    sys.exit(1)

vars = {}
cases = [] # array of (RepackagingCase constructor, "package name")
hacks = {}

class RepackagingCase:
    """ represents a package name and repackaging strategy """
    def __init__(self, name, src_arch, dst_arch):
        self.name = name
        self.src_arch = src_arch
        self.dst_arch = dst_arch
    def pkg_file_list_in(self, arch):
        h = arch.header_for(self.name)
        return h.get_file_list()

class LibraryCase(RepackagingCase):
    strategy = "library"
    def get_parent_relation(self):
        try:
            h = self.dst_arch.header_for(self.name)
            Log(3, "+ package %s exists on both arches", self.name)
            return "Requires"
        except PackageNotFound:
            Log(2, "+ package %s is single-arch, not requiring it", self.name)
            return "# not requiring non-existent package"

    def file_list(self):
        src_list = self.pkg_file_list_in(self.src_arch)
        try:
            dst_list = self.pkg_file_list_in(self.dst_arch)
        except PackageNotFound:
            Log(2, "+ package %s is single-arch, copying all files", self.name)
            dst_list = set()
        # repackage only files that won't cause a conflict
        return src_list - dst_list

class ProgramCase(RepackagingCase):
    strategy = "program"
    def get_parent_relation(self):
        return "Conflicts"
    def file_list(self):
        # repackage everything
        return self.pkg_file_list_in(self.src_arch)

copy_props = ["archive", "version", "origin", "label", "architecture"]

def concat(lists):
    return [x for l in lists for x in l]

def read_config(name):
    global vars, cases, hacks
    config = yaml.load(file(name))
    for key in config:
        if key in ("packages", "libraries"):
            cases.extend((LibraryCase, x) for x in config[key].split())
        elif key == "programs":
            cases.extend((ProgramCase, x) for x in config[key].split())
        elif key == "hacks":
            hacks.update(config[key])
        elif key == "name":
            vars["name"] = str(config["name"])
        else:
            vars[key] = config[key]

    orphan_hacks = set(hacks.keys()) - set(name for (cons, name) in cases)
    if orphan_hacks:
        Log(1, "! orphaned hacks exist for: %s", ' '.join(orphan_hacks))


class Repo:
    "An RPM repository: a pair of topdir and flavor."

    def __init__(self, name, root_var, flavor_var, parent=None):
        import os.path
        self.name = name
        self.root = os.path.expanduser(root_var)
        self.flavor = flavor_var
        self.parent = parent

    def __str__(self):
        return self.name

def cached(fun):
    """decorator for obj.get_foo() methods; stores
    the first returned value as obj.cached_foo and returns it
    ever after."""
    name = fun.__name__
    assert name.startswith('get_')
    name = 'cached_' + name[4:]
    def method(self, *args, **kw):
        if not hasattr(self, name):
            setattr(self, name, fun(self, *args, **kw))
        return getattr(self, name)
    return method

def flush_caches(obj):
    for attr in obj.__dict__.keys():
        if attr.startswith('cached_'):
            delattr(obj, attr)


class ExternalProgramError(RuntimeError): pass
class PackageNotInstallable(ExternalProgramError): pass
class PackageNotFound(PackageNotInstallable): pass

class Arch:
    "Subset of a Repo. Every line in a sources.list file is an Arch."

    def __init__(self, repo, arch_name, deps):
        self.repo = repo
        self.name = arch_name
        self.deps = deps
    def evr_of(self, pkg_name):
        return self.get_aptbox().evr_of(pkg_name)
    def evr_or_none(self, pkg):
        try:
            return self.evr_of(pkg)
        except ExternalProgramError:
            return None

    def header_for(self, pkg):
        return self.get_aptbox().header_for(pkg)

    @cached
    def get_aptbox(self):
        return Aptbox(self)

    @cached
    def get_hasher(self):
        return Hasher(self)

    @cached
    def get_releaseprops(self):
	releasefile = os.path.join(self.repo.root,
			self.name, 'base/release.' + self.repo.flavor)
	props = {}
	for line in file(releasefile):
            line = line.strip()
            name, value = [s.strip() for s in line.split(':')]
            props[name.lower()] = value
	return props

    def get_release_entry(self, name):
        return self.get_releaseprops()[name]

    def mangle(self, pkgname):
        return self.name + '-' + pkgname

    def __str__(self):
        return "%s.%s" % (self.repo, self.name)

    def get_pkg_dir(self):
        return os.path.join(self.repo.root, self.name,
                'RPMS.' + self.repo.flavor)

    def get_base_dir(self):
        return os.path.join(self.repo.root, self.name, 'base')

    def regenbasedir(self, props=[]):
        try:
            os.makedirs(self.get_base_dir())
        except:
            pass
        args = ['--bloat'] + \
			['--topdir', self.repo.root] + \
			concat(('--' + o, v) for (o,v) in props) + \
			['--', self.name, self.repo.flavor]
        run('/usr/bin/genbasedir', *args)
        flush_caches(self)

    def rdups(self):
        pkg_dir = self.get_pkg_dir()
        allfiles = os.listdir(pkg_dir)
        cmd = ['/usr/bin/rpmrdups'] + \
                [os.path.join(pkg_dir, p) for p in allfiles]
        from subprocess import Popen, PIPE
        process = Popen(cmd, env={}, stdout=PIPE, stderr=PIPE)
        out, err = process.communicate()
        if err:
            Log(1, '! rpmrdups says:\n' + err)
        for line in out.split('\n'):
            for pkg in line.split()[1:]:
                Log(2, "+ removing old package '%s'", pkg)
                os.unlink(pkg)

    def try_install(self, pkg):
        return self.get_aptbox().try_install(pkg)

def to_file(*args):
    from os.path import join
    fname = join(*args)
    return file(fname, 'w')

def run(*args, **kwargs):
    Log(3, '+ %s', ' '.join(args))
    import subprocess
    if subprocess.call(args, **kwargs) != 0:
        raise ExternalProgramError(args)

def grep(regex, strings):
    if type(strings) == type(''):
        strings = strings.split('\n')
    import re
    re = re.compile(regex)
    return filter(re.search, strings)

class Dependency:
    def __init__(self, name, flags, version):
        self.name = name
        self.flags = flags
        self.version = version

    def flag(self, name):
        return self.flags & getattr(rpm, 'RPMSENSE_' + name)

    def type(self):
        if self.flag('PREREQ'):
            return 'PreReq'
        else:
            return 'Requires'

    def operation(self):
        operation = ''
        for flag, char in [
                ('LESS', '<'),
                ('GREATER', '>'),
                ('EQUAL', '=')]:
            if self.flag(flag):
                operation += char
        return operation

    def limitations(self):
        lims = filter(self.flag,
                map('SCRIPT_'.__add__,
                    ('BUILD', 'CLEAN', 'INSTALL', 'POST',
                        'POSTUN', 'PRE', 'PREUN', 'PREP', 'VERIFY')))
        if lims:
            return '(%s)' % ','.join(lim[7:].lower() for lim in lims)
        else:
            return ''

    def __str__(self):
        return '%s%s: %s %s %s' % (
                self.type(), self.limitations(), self.name,
                self.operation(), self.version)


class Header:
    def __init__(self, rpm_header):
        self.h = rpm_header

    @staticmethod
    def from_path(path):
        f = file(path)
        Log(3, '= reading package %s', os.path.basename(path))
        header = rpm.headerFromPackage(f.fileno())[0]
        return Header(header)

    def get_evr(self):
        return map(self.h.__getitem__, ('epoch', 'version', 'release'))

    def __getitem__(self, key):
        val = self.h[key]
        if key == 'requireflags' and not isinstance(val, list):
            val = [val]
        return val

    def get_file_list(self):
        return set(zip(self['filenames'], self['filemodes']))

    def deps_in_for(self, list, arch):
        return [Dependency(arch.mangle(name), flags, ver)
                for (name, flags, ver) in zip(
                    self['requirename'],
                    self['requireflags'],
                    self['requireversion'])
                if name in list]

class BasicAptbox:
    goal = 'unknown'
    def __init__(self, arch):
        from tempfile import mkdtemp
        self.dir = mkdtemp(prefix='arepo-%s-%s.' % (self.goal, arch.name))
        self._write_apt_conf()
        self._write_sources_list(arch)

    def apt_conf_fname(self):
        from os.path import join
        return join(self.dir, 'apt.conf')


    def _write_apt_conf(self):
        print >>to_file(self.apt_conf_fname()), """
        Dir::Etc::SourceList "%(basedir)s/sources.list";
        Dir::Etc::SourceParts "/var/empty";
        Dir::Etc::Parts "/var/empty";
        """ % { 'basedir': self.dir }

    def _write_sources_list(self, arch):
        list = "\n".join(
                "rpm file:%s %s %s" % (a.repo.root, a.name, a.repo.flavor)
                for a in [arch] + arch.deps)
        Log(1, '- writing sources.list for %s', arch)
        Log(2, list)
        print >>to_file(self.dir, 'sources.list'), list

    def __del__(self):
        run('rm', '-rf', '--', self.dir)


class Aptbox(BasicAptbox):
    goal = 'inspector'
    def __init__(self, arch):
        BasicAptbox.__init__(self, arch)
        self.name = str(arch)
        Log(2, '* %s: mkaptbox', self.name)
        run('/usr/bin/mkaptbox', '--apt-conf', self.apt_conf_fname(),
                '--target', arch.name,
                '--', self.dir)
        self.known_uris = {}
        self.known_packages = {}

    def header_for(self, pkg):
        if pkg not in self.known_packages:
            self.try_install(pkg)
        return self.known_packages[pkg]

    def evr_of(self, pkg):
        return self.header_for(pkg).get_evr()

    def try_install(self, pkg):
        """
        Run "apt-get install --print-uris pkg", capture output
        and read headers from the given files. Updates known_packages. """

        from os.path import join
        apt_get = join(self.dir, 'aptbox', 'apt-get')
        print_uris = [apt_get, 'install', '--yes', '--print-uris', pkg]

        Log(2, '* %s: print-uris for %s', self.name, pkg)
        from subprocess import Popen, PIPE
        process = Popen(print_uris, env={}, stdout=PIPE, stderr=PIPE)
        out, err = process.communicate()
        if process.returncode != 0:
            if "Couldn't find package" in err:
                Log(2, '%% %s not found in %s', pkg, self.name)
                raise PackageNotFound(err)
            if "unmet dependencies" in err:
                Log(2, '%% %s not installable', pkg)
                raise PackageNotInstallable(err)
            raise ExternalProgramError(err)

        out = grep("^'", out)
        uris = [line.split("'")[1] for line in out]

        map(self.inspect_uri, uris)

    def inspect_uri(self, uri):
        if uri in self.known_uris:
            return Log(3, ": %s seen before", uri)

        assert uri.startswith('file:')
        path = uri[5:]

        header = Header.from_path(path)
        name = header['name']

        self.known_uris[uri] = name
        self.known_packages[name] = Header(header)

class Hasher(BasicAptbox):
    goal = 'builder'
    def __init__(self, arch):
        BasicAptbox.__init__(self, arch)
        Log(2, '* creating a hasher environment')
        self.target = arch.name
        run('hsh', '--init', self.dir, '--apt-conf', self.apt_conf_fname(),
                '--target', self.target)

    def __del__(self):
        try:
            run('/usr/bin/hsh', '--cleanup-only', '--', self.dir)
        finally:
            BasicAptbox.__del__(self)

    def build(self, pkg, spec_string):
        from tempfile import NamedTemporaryFile as TmpFile
        spec = TmpFile(prefix=pkg, suffix='.spec')

        print >>spec, spec_string
        spec.flush()
        Log(4, 'SPEC:\n%s\nEND OF SPEC', spec_string)


        from tempfile import NamedTemporaryFile as TmpFile
        tarfile = TmpFile(prefix=pkg, suffix='.tar')

        basename = os.path.basename(spec.name)
        dirname = os.path.dirname(spec.name)
        run('/bin/tar', '-cf', '-', '--label', basename, '-C', dirname,
                basename, stdout=tarfile)

        run('/usr/bin/setarch', self.target, 'hsh', self.dir,
                '--without-stuff',
                '--apt-conf', self.apt_conf_fname(),
                '--target', self.target, tarfile.name)

    def repo_dir(self):
        return os.path.join(self.dir, 'repo')

    @cached
    def get_repo(self):
        return Repo('hasher', self.repo_dir(), 'hasher', parent=self)

    @cached
    def get_arch(self):
        return Arch(self.get_repo(), self.target)

    def move_packages_to(self, arch):
        Log(2, '# moving newly built packages to %s', arch)
        from os.path import join
        destdir = arch.get_pkg_dir()
        try:
            os.makedirs(destdir)
        except:
            pass
        srcdir = join(self.repo_dir(), self.target, 'RPMS.hasher')
        for p in os.listdir(srcdir):
            run('mv', '--', join(srcdir, p), destdir)

def files_entry((filename, mode)):
    return '%s%s' % (stat.S_ISDIR(mode) and '%dir ' or '', filename)

def auto_file_list(case):
    return map(files_entry, case.file_list())

def auto_dep_list(header, source):
    # we only take into account packages that are present in our config
    pkg_names = [name for (cons, name) in cases]
    return '\n'.join(str(dep) for dep in header.deps_in_for(pkg_names, source))

class DictSearch:
    def __init__(self, *refs):
        self.refs = refs
    def __getitem__(self, key):
        for r in self.refs:
            try:
                return r[key]
            except KeyError:
                pass
        raise KeyError(key)

class HacksDict:
    def __init__(self, d):
        self.backend = d
    def __getitem__(self, key):
        try:
            return self.backend[key]
        except KeyError, e:
            parts = key.split('-')
            if len(parts) == 2 and \
                    parts[0] in ('before', 'after', 'replace') and \
                    parts[1] in ('install', 'files', 'deps', 'tags'):
                        # Okay, this hack is just empty here
                        return ""
            else:
                raise e

def gen_spec(case, source, fl):
    pkg = case.name
    header = source.header_for(pkg)

    return """# Autogenerated. You should not have seen it at all.
~(before-tags)s
Name: ~(mangled-name)s

~(define-ext_epoch)s
%ifdef ext_epoch
Epoch: %ext_epoch
%define chg_epoch %ext_epoch:
%else
%define chg_epoch %nil
%endif

Version: ~(version)s
Release: ~(release)s

Summary: ~(summary)s
License: ~(license)s
Group: ~(group)s
Packager: Sator Arepo <nobody@altlinux.ru>
BuildArch: ~(arch)s

~(before-deps)s

BuildRequires:       ~(name)s = %chg_epoch%version-%release
~(parent-relation)s: ~(name)s = %chg_epoch%version-%release

~(after-tags)s
~(dep-list)s
~(after-deps)s

AutoReq: no, lib, pkgconfig
%ifdef brp_strip_none
%brp_strip_none /*
%else
%set_strip_method none
%endif

%set_verify_elf_method relaxed

%description
[This package was automatically repackaged from ~(arch)s.]

~(description)s

%build
cat >install.sh <<'EOF'
#!/bin/sh -e
pkg=~(pkgname)s
>${TMP:-/tmp}/${pkg}-requires
list=
for file in "$@"; do
        if [ -d "$file" ]; then
            install -d -- "%buildroot$file"
        else
	    echo === $file
            case `file -bL -- "$file"` in
            'ELF '*' shared object'*|'ELF '*' executable'*)
                list="${list} `ldd "$file"|cut -d '>' -f 2|grep \.so|cut -f 1 -d '('`"
                ;;
            esac
            mkdir -p -- "`dirname %buildroot$file`"
            cp -a -- "$file" "%buildroot$file"
        fi
done
if [ -n "$list" ]; then
    list=`for l in $list; do echo $l; done|sort|uniq`
    list=`for l in $list; do rpm -qf --queryformat '%{NAME}\n' $l; done|sort|uniq`
    echo search for $pkg in $list
    for l in $list; do [ $l = $pkg ] && echo skip $l || echo i586-$l >> ${TMP:-/tmp}/${pkg}-requires; done
fi
EOF
chmod +x install.sh

cat >find-deps.sh <<'EOF'
#!/bin/sh -e
file=
handler()
{
        local rc=$?
        trap - EXIT
        [ -z "$file" ] || rm -f -- "$file"
        exit $rc
}
trap handler EXIT HUP INT QUIT PIPE TERM
file=`mktemp -t ${0##*/}.XXXXXXXXXX`

echo 1 >"$file"
(/usr/lib/rpm/${0##*/}; echo $? >"$file") |fgrep -v GLIBC_PRIVATE ||:
exit "$(cat "$file")"

EOF
chmod +x find-deps.sh

cat >find-requires <<'EOF'
#!/bin/sh -e
cat >/dev/null
pkg=~(pkgname)s
file=${TMP:-/tmp}/${pkg}-requires
cat $file
exit 0

EOF
chmod +x find-requires

%define __find_requires %_builddir/find-requires
ln -s find-deps.sh find-provides
%define __find_provides %_builddir/find-provides

%install
xargs ./install.sh <<EOF
~(instlist)s
EOF
~(after-install)s

%files
~(filelist)s
~(after-files)s

%changelog
* ~(date)s Sator Arepo <nobody@altlinux.ru> %chg_epoch%version-%release
- Autorepackaged the version from Sisyphus.
    """.replace('%', '%%').replace('~', '%') % DictSearch({
        'pkgname': pkg,
        'mangled-name': source.mangle(pkg),
        'filelist': '\n'.join(fl),
        'instlist': '\n'.join(x.startswith('%dir ') and x[5:] or x for x in fl),
        'date': time.strftime('%a %b %d %Y'),
        'buildarch': source.name,
        'define-ext_epoch': header['epoch'] and
                ('%%define ext_epoch %s' % header['epoch']) or '',
        'dep-list': auto_dep_list(header, source),
        'parent-relation': case.get_parent_relation(),
    },
    header,
    HacksDict(hacks.get(pkg, {})))


def repackage(case, source, target, shadow):
    pkg = case.name
    Log(1, '! repackaging %s', pkg)

    fl = auto_file_list(case)
    Log(3, 'file list: %s', ' '.join(fl))

    spec_string = gen_spec(case, source, fl)

    h = source.get_hasher()
    try:
        h.build(pkg, spec_string)
        Log(1, '! repackaged %s', pkg)
        return True
    except:
        Log(1, '! repackaging %s failed', pkg)


def main():
    import getopt
    opts, args = getopt.gnu_getopt(sys.argv[1:], 'vf',
            ['verbose', 'force', 'force-list='])

    global loglevel
    force_all = False
    force_list = []

    for opt, val in opts:
        if   opt in ('-v', '--verbose'):
            loglevel += 1
        elif opt in ('-f', '--force'):
            force_all = True
        elif opt in ('--force-list'):
            force_list = val.split(',')

    if len(args) != 1:
        usage()
    read_config(args[0])

    repo = Repo(vars['name'], vars['origin'], vars['flavor'])
    noarch = Arch(repo, 'noarch', [])
    source = Arch(repo, vars['source'], [noarch])
    target = Arch(repo, vars['target'], [noarch])
    shadow_repo = Repo('biarch', vars['shadow'], vars['flavor'])
    shadow = Arch(shadow_repo, vars['source'], [source, noarch])

    new_packages = False

    for (case_constructor, pkg) in cases:
        case = case_constructor(pkg, source, target)
        pkg = case.name
        mangled = shadow.mangle(pkg)
        evr_s = source.evr_of(pkg)
        evr_d = shadow.evr_or_none(mangled)
        is_obsolete = (evr_s != evr_d)
        if is_obsolete:
            Log(3, '* %s %s -> %s', pkg, evr_d, evr_s)
        elif force_all or (pkg in force_list):
            Log(2, '* forced rebuild of %s', mangled)
        else:
            continue
        res = repackage(case, source, target, shadow)
        if res: new_packages = True

    if new_packages:
        source.get_hasher().move_packages_to(shadow)
        shadow.rdups()
        shadow.regenbasedir([(p, source.get_release_entry(p))
		for p in copy_props])

    # Check that packages can be installed on a target machine.
    Log(1, "! checking that re-packages can be installed")

    model_mixin = Arch(shadow_repo, vars['source'], [])
    model = Arch(repo, vars['target'], [model_mixin, noarch])

    for pkg in (shadow.mangle(name) for (cons, name) in cases):
        try:
            model.try_install(pkg)
        except PackageNotInstallable, e:
            Log(1, "\n* %s is not installable:\n%s\n", pkg, e.args)

if __name__ == '__main__':
    main()

# vim:set et sw=4 ts=4:
