Hello,
I try to index a directory with only pdf, doc, xls and ppt.
I've seen in version 2.5.4 some perl script to filter .ppt, .xls and .doc.
I try to use them with the prog method but when I run swish-e (
"swish-e -c /etc/swish-e/swish.conf -S prog") I have thoses erros :
Undefined subroutine &main::Doc2html called at /etc/swish-e/swish.pl
line 55.
Or
Undefined subroutine &main::pp2hml called at /etc/swish-e/swish.pl
The error depends of the order of the functions.
So I don't undestand why it's work fine for pdf but not for others
format...
I'm looking around ml archive but dont find my St Graal;)
Any idea please ?
Regards,
My configurations files :
/etc/swish-e/swish.conf
----------------------------------------------------------------------------------------------------------------------------------
WordCharacters abcdefghijklmnopqrstuvwxyz0123456789.-
IgnoreFirstChar .-
IgnoreLastChar .-
BeginCharacters abcdefghijklmnopqrstuvwxyz0123456789
EndCharacters abcdefghijklmnopqrstuvwxyz0123456789
#FollowSymLinks yes
IndexReport 3
IndexDir /etc/swish-e/swish.pl
IndexFile /var/lib/swish/index.swish-e
SwishProgParameters /format_ms/
IndexContents TXT .config
IndexContents HTML .doc .xls .ppt .pdf
UndefinedMetaTags auto
-------------------------------------------------------------------------------------------------------------------------------------------
/etc/swish-e/swish.pl
------------------------------------------------------------------------------------------------------------------------------------------
#!/usr/bin/perl -w
use strict;
use lib '../prog-bin';
use lib '/usr/local/lib/swish-e/perl/';
use lib '/usr/local/lib/swish-e/';
use File::Find;
#use SWISH::Filter;
#use SWISH::Filters::Pdf2HTML;
#use SWISH::Filters::pp2html;
#use SWISH::Filters::Doc2html;
#use SWISH::Filters::XLtoHTML;
use pdf2html;
use pp2html;
use XLtoHTML;
use Doc2html;
use constant DEBUG => 1;
my $dir = shift || '.';
find(
{
wanted => \&wanted,
no_chdir => 1,
},
$dir,
);
sub wanted {
return if -d;
if ( /\.pdf$/ ) {
print STDERR "Indexing pdf $File::Find::name\n" if DEBUG;
print ${ pdf2html ( $File::Find::name ) };
} elsif ( /\.doc$/ ) {
print STDERR "Indexing doc $File::Find::name\n" if DEBUG;
print ${ Doc2html ($File::Find::name ) };
} elsif ( /\.ppt$/ ) {
print STDERR "Indexing ppt $File::Find::name\n" if DEBUG;
print ${ pp2html ($File::Find::name ) };
} elsif ( /\.xls$/ ) {
print STDERR "Indexing xls $File::Find::name\n" if DEBUG;
print ${ XLtoHTML ($File::Find::name ) };
} elsif ( /\.config$/ ) {
print STDERR "Indexing $File::Find::name\n" if DEBUG;
print ${ get_content( $File::Find::name ) };
} else {
print STDERR "Skipping $File::Find::name\n" if DEBUG;
}
}
sub get_content {
my $path = shift;
my ( $size, $mtime ) = (stat $path )[7,9];
open FH, $path or die "$path: $!";
my $content = <<EOF;
Content-Length: $size
Last-Mtime: $mtime
Path-Name: $path
EOF
local $/ = undef;
$content .= <FH>;
return \$content;
}
---------------------------------------------------------------------------------------------------------------------------------------------
--
Guguin Benoit
Société Alixen 2 rue Jean Rostand 91 893 Orsay Cedex France
Tel : 01 69 85 24 13, Fax : 01 69 85 24 10
Received on Fri Aug 19 02:46:56 2005