My nightly indexing run is generating some nastygrams:
Error: Bad annotation destination
Error: Bad annotation destination
Error: Bad annotation destination
Error: Bad annotation destination
I assume that something is wrong in my setup, but no
information is given that would allow me to determine
just what the problem is. Help?
-r
P.S. In case it helps, here are my configuration files:
do_spider
---------
:
# do_spider
cd /afs/slac.stanford.edu/u/gl/rdm/fsw_cgi/fsw_docs.d
export SPIDER_DEBUG; SPIDER_DEBUG=failed
swish-e -S prog -c swish2.conf
---------
swish2.conf
-----------
# swish2.conf - spider the FSW web pages
IndexDir spider.pl
# Define what site to index
SwishProgParameters spider.config
IndexFile a_etc/index.fsw_docs
# N.B. Do more specific transformations first!
# That is, do "web/cache" before "/web/".
MetaNames docset
ExtractPath docset regex
!^.+/exp/glast/flight/web/cache/.+$!trv!
ExtractPath docset regex !^.+/exp/glast/flight/web/.+$!web!
ExtractPath docset regex
!^.+/exp/glast/flight/doxygen/.+$!dox!
ExtractPathDefault docset etc
-----------
spider.config
-------------
# spider.config
my (@fsw_ver, %fsw_prod,
$fsw_t1, $fsw_t2);
$fsw_t1 = '/afs/slac.stanford.edu/g/glast/flight';
@fsw_ver = glob("$fsw_t1/*/source/*/prod");
foreach $fsw_t1 (@fsw_ver) {
$fsw_t2 = readlink($fsw_t1);
next unless (defined($fsw_t2));
# Project Package Version
# ------- ------- -------
$fsw_t2 =~ m|^.*/([^/]+)/source/([^/]+)/([^/]+)$|;
$fsw_prod{ "$1/binary/$2/$3" }++;
}
@servers = (
{
base_url =>
'http://www.slac.stanford.edu/exp/glast/flight/web/FSW_home.shtml',
delay_sec => 0,
email => 'rdm@slac.stanford.edu',
test_url => sub {
my $base =
'http://www.slac.stanford.edu/exp/glast/flight';
my $uri = shift;
my $path = $uri->path;
return 0 unless ($uri->path =~ m[\.(pdf|html|shtml)$]);
if ($uri =~ m[^$base/doxygen/Doxyidx.htm$]) {
return 1;
} elsif ($uri =~ m[^$base/doxygen/]) {
if ($uri =~
m|^.+/doxygen/([^/]+/binary/[^/]+/[^/]+)|) {
return (defined($fsw_prod{$1})) ? 1 : 0;
} else {
print STDERR "? uri='$uri'\n";
return 0;
} }
unless ($uri =~ m[^$base/web/]) {
return 0;
}
return 1;
},
filter_content => \&filter_content,
},
);
# This is adapted from an example of how to use the SWISH::Filter
module,
# included with the swish-e distribution.
#
# Returns:
# true if content-type is text/* or if the document was filtered
# false if document was not filtered
# aborts if module cannot be loaded or filter object cannot be
created.
#
my $filter; # cache the object.
sub filter_content {
my ( $uri, $server, $response, $content_ref ) = @_;
# Uncomment this to enable debugging of SWISH::Filter
# $ENV{FILTER_DEBUG} = 1;
my $content_type = $response->content_type;
# Ignore text/* content type -- no need to filter
return 1 if !$content_type || $content_type =~ m!^text/!;
# Load the module - returns FALSE if cannot load module.
unless ( $filter ) {
eval { require SWISH::Filter };
if ( $@ ) {
$server->{abort} = $@;
return;
}
$filter = SWISH::Filter->new;
unless ( $filter ) {
$server->{abort} = "Failed to create filter object";
return;
} }
# If not filtered, return false and doc will be ignored (not
indexed)
my $doc = $filter->convert(
document => $content_ref,
name => $response->base,
content_type => $content_type,
);
return unless $doc;
# return unless $doc->was_filtered # could do this since checking
# for text/*, above
return if $doc->is_binary;
$$content_ref = ${$doc->fetch_doc};
# let's see if we can set the parser.
$server->{parser_type} = $doc->swish_parser_type || '';
return 1;
}
1; # Keeps Perl happy...
-------------
Received on Wed Dec 15 16:24:35 2004