#!/usr/bin/perl

my $VERSION = "0.1";
my $ID = '$Id: perlztxt,v 1.7 2001/06/15 04:25:23 vader21 Exp $';

use Palm::ZText;
use File::Spec;
use Pod::Usage;
use Getopt::Long;
use strict;

my %options = ();
my @paras;
my $text;
my @bookmarks;
my $ztext;
my $filter;

GetOptions(\%options, 
	    'adjust|a=i', 'length|b=i', 'creator|c=s', 'help|h',
	    'list|l', 'output|o=s', 'regex|r=s@', 'title|t=s', 'type|u=s',
	    'version|V', 'wbits|w=i', 'compression|z=i', 'raw|a', 'man');

pod2usage( -verbose => 0, -exitval => 1) if $options{help};
pod2usage( -verbose => 2 ) if $options{man};
pod2usage( -message => "This is perlztxt $VERSION ($ID)\nusing Palm::ZText $Palm::ZText::VERSION and Palm::PDB $Palm::PDB::VERSION",
	   -exitval => 2, -verbose => 0 ) if $options{'version'};


my $fname = (File::Spec->splitpath($ARGV[0]))[2];
my $basename = $fname;
$basename =~ s/\.[^\.]*$//;

$options{'output'} = "$basename.pdb" unless $options{'output'};

$options{'title'} = $fname unless $options{'title'};

{
    local $/;
    $/ = "";
    @paras = <>;
}

# Remove hyphenations, and join each paragraph onto one line
foreach (@paras) {
    s/-\s*\n//g;
    s/\s*\n\s*/ /g;
}

$text = join("\n\n", @paras);

foreach (@{$options{'regex'}}) {
    my $r = qr/$_/o;
    pos($text) = 0;
    while ($text =~ /\G.*?($r)/gsm) {
	push(@bookmarks,{offset => $-[$#-], name => (substr($text,$-[$#-],$+[$#+]-$-[$#-]))});
    }
}

if ($options{'list'}) {
    local $\ = "\n";
    print "offset		name";
    print "--------------------";
    foreach (@bookmarks) {
	print "$_->{offset}	$_->{name}";
    }
}

$ztext = new Palm::ZText;

$ztext->text($text);
#$ztext->bookmarks(@bookmarks);
$ztext->name($options{'title'});

$ztext->Write($options{'output'});

__END__;


=head1 NAME

perlztxt - Convert text files into ZText databases for use with Gutenpalm

=head1 SYNOPSIS

perlztxt [options] file

  Options:
    --help -h                 Brief usage information
    --man                     Display entire manual
    --output -o filename      Write PDB to filename
    --regex -r regex          Put bookmarks where regex matches
    --list -l                 List bookmarks created by --regex option
    --title -t title          Set title of document to title
    --raw -R                  Do not reformat text file for better viewing
    --version -V              Print version information

  The following options are ignored but are accepted for compatibility 
  with makeztxt:
    --adjust -a
    --length -b
    --creator -c
    --type -u
    --wbits -w
    --compression -z

=head1 OPTIONS

=over 8

=item B<--help>

Print a brief help message and exits.

=item B<--man>

Prints the manual page and exits.

=item B<--output>

Writes PDB to specified filename instead of the default of stripping
the extension (if any) of the input file and appending .pdb.

=item B<--regex>

Automatically creates bookmarks at locations in the text matching the
given perl-style regular expression.  This option may be specified more
than once.  The title for the bookmark is taken from the last parenthesised
expression, or all of the text that matched if there is none.

=item B<--list>

List the bookmarks that were created by the --regex option.  Shows both
the title used, and the character offset into the resulting document.

=item B<--title>

Sets the title of the document.  This is the name that you see when you are
choosing a document to read in Gutenpalm.

=item B<--raw>

Skips reformating of the text.  Normally perlztxt splits the input text into
paragraphs, joins each paragraph onto one line so that Gutenpalm can properly
word-wrap the document.  In addition it tries to join words that are 
hyphnated across lines into one word.

=item B<--version>

Shows the version of perlztxt as well as the version of the backend modules.

=back

=head1 DESCRIPTION

B<perlztxt> reads the given input file and generates a Palm database file
(PDB file) for use with the Gutenpalm document reader.

=head1 AUTHOR

Geoff Reedy E<lt>vader21@imsa.eduE<gt>

=head1 SEE ALSO

http://gutenpalm.sourceforge.net/ - home page for the Gutenpalm document
reader for the Palm platform where you can also find document converters
written in C and in Java.

=cut

