Skip to main content.
home | support | download

Back to List Archive

Peculiar nastygrams

From: Richard Morin <rdm(at)not-real.slac.stanford.edu>
Date: Thu Dec 16 2004 - 00:24:31 GMT
My nightly indexing run is generating some nastygrams:

   Error: Bad annotation destination
   Error: Bad annotation destination
   Error: Bad annotation destination
   Error: Bad annotation destination

I assume that something is wrong in my setup, but no
information is given that would allow me to determine
just what the problem is.  Help?

-r

P.S.  In case it helps, here are my configuration files:


do_spider
---------
:
# do_spider

cd /afs/slac.stanford.edu/u/gl/rdm/fsw_cgi/fsw_docs.d

export SPIDER_DEBUG; SPIDER_DEBUG=failed

swish-e -S prog -c swish2.conf
---------


swish2.conf
-----------
# swish2.conf - spider the FSW web pages

IndexDir spider.pl

# Define what site to index

SwishProgParameters spider.config

IndexFile a_etc/index.fsw_docs

# N.B. Do more specific transformations first!
#      That is, do "web/cache" before "/web/".

MetaNames           docset
ExtractPath         docset  regex  
!^.+/exp/glast/flight/web/cache/.+$!trv!
ExtractPath         docset  regex  !^.+/exp/glast/flight/web/.+$!web!
ExtractPath         docset  regex  
!^.+/exp/glast/flight/doxygen/.+$!dox!
ExtractPathDefault  docset  etc
-----------


spider.config
-------------
# spider.config

     my (@fsw_ver, %fsw_prod,
         $fsw_t1, $fsw_t2);

     $fsw_t1  =  '/afs/slac.stanford.edu/g/glast/flight';
     @fsw_ver =  glob("$fsw_t1/*/source/*/prod");

     foreach $fsw_t1 (@fsw_ver) {

         $fsw_t2 = readlink($fsw_t1);

         next unless (defined($fsw_t2));

         #                Project        Package Version
         #                -------        ------- -------
         $fsw_t2 =~ m|^.*/([^/]+)/source/([^/]+)/([^/]+)$|;

         $fsw_prod{ "$1/binary/$2/$3" }++;
     }

     @servers = (
         {
             base_url   =>
     'http://www.slac.stanford.edu/exp/glast/flight/web/FSW_home.shtml',
             delay_sec  => 0,
             email      => 'rdm@slac.stanford.edu',

             test_url => sub {

                 my $base =  
'http://www.slac.stanford.edu/exp/glast/flight';
                 my $uri  =  shift;
                 my $path =  $uri->path;

                 return 0 unless ($uri->path =~ m[\.(pdf|html|shtml)$]);

                 if ($uri =~ m[^$base/doxygen/Doxyidx.htm$]) {

                     return 1;

                 } elsif ($uri =~ m[^$base/doxygen/]) {

                     if ($uri =~ 
m|^.+/doxygen/([^/]+/binary/[^/]+/[^/]+)|) {

                         return (defined($fsw_prod{$1})) ? 1 : 0;

                     } else {

                         print STDERR "? uri='$uri'\n";
                         return 0;
                 }   }

                 unless ($uri =~ m[^$base/web/]) {

                     return 0;
                 }

                 return 1;
             },

             filter_content  => \&filter_content,
         },
     );


# This is adapted from an example of how to use the SWISH::Filter 
module,
# included with the swish-e distribution.
#
# Returns:
#      true if content-type is text/* or if the document was filtered
#      false if document was not filtered
#      aborts if module cannot be loaded or filter object cannot be 
created.
#

my $filter;  # cache the object.

sub filter_content {
     my ( $uri, $server, $response, $content_ref ) = @_;

     # Uncomment this to enable debugging of SWISH::Filter
#     $ENV{FILTER_DEBUG} = 1;

     my $content_type = $response->content_type;

     # Ignore text/* content type -- no need to filter

     return 1 if !$content_type || $content_type =~ m!^text/!;

     # Load the module - returns FALSE if cannot load module.

     unless ( $filter ) {

         eval { require SWISH::Filter };

         if ( $@ ) {
             $server->{abort} = $@;
             return;
         }

         $filter = SWISH::Filter->new;

         unless ( $filter ) {
             $server->{abort} = "Failed to create filter object";
             return;
     }   }

     # If not filtered, return false and doc will be ignored (not 
indexed)

     my $doc = $filter->convert(
         document     => $content_ref,
         name         => $response->base,
         content_type => $content_type,
     );

     return unless $doc;

     # return unless $doc->was_filtered  # could do this since checking
                                         # for text/*, above
     return if $doc->is_binary;

     $$content_ref = ${$doc->fetch_doc};

     # let's see if we can set the parser.

     $server->{parser_type} = $doc->swish_parser_type || '';

     return 1;
}

1;              # Keeps Perl happy...
-------------
Received on Wed Dec 15 16:24:35 2004