Skip to main content.
home | support | download

Back to List Archive

Re: Fw: SWISH::API and highlighting

From: Jonas Wolf <JOWOLF(at)not-real.uk.ibm.com>
Date: Mon Jul 19 2004 - 10:02:43 GMT
Hmm, so no attachments then. Well, I will inline the code then.

Jonas

--- search.old.cgi      2003-12-18 06:00:18.000000000 +0000
+++ search.cgi  2004-07-19 09:31:29.169054400 +0100
@@ -123,13 +123,6 @@
         $swish = SWISH::API->new( $index );
         die "Failed to create SWISH::API object" unless $swish;
         $swish->AbortLastError if $swish->Error;
-
-
-        # Now cache header data (used for highlighting)
-        %headers = map { lc($_) => ($swish->HeaderValue( $index, $_ 
)||'') } $swish->HeaderNames;
-
-        # and cache the highlighting object
-        $highlight_object = SWISH::PhraseHighlight->new( 
\%highlight_settings, \%headers );
     }
 
 
@@ -159,6 +152,27 @@
     # Store the result objects in an array
     push @records, $result while $cnt-- && ($result = 
$results->NextResult);
 
+    if (!$swish)
+    {
+        my $stemmer = (@records) ? sub
+        {
+            my $word = shift;
+            my $fw = $records[0]->FuzzyWord($word);
+            my ($newword) = $fw->WordList;
+            return $newword;
+        } : sub
+        {
+            my $word = shift;
+            return $word;
+        };
+
+        # Now cache header data (used for highlighting)
+        %headers = map { lc($_) => ($swish->HeaderValue( $index, $_ 
)||'') } $swish->HeaderNames;
+
+        # and cache the highlighting object
+        $highlight_object = SWISH::PhraseHighlight->new( 
\%highlight_settings, \%headers, $stemmer );
+    }
+
     # Now create a filter 'highlight' for use in the template to 
highlight terms
     # Usage requires passing in the *metaname* associated with the 
property
     # that's being highlighted -- this allows the program to know what


--- PhraseHighlight.old.pm      2004-07-19 09:02:32.046875000 +0100
+++ PhraseHighlight.pm  2004-07-19 09:10:48.265625000 +0100
@@ -9,9 +9,7 @@
 use constant DEBUG_HIGHLIGHT => 0;
 
 sub new {
-    my ( $class, $settings, $headers ) = @_;
-
-
+    my ( $class, $settings, $headers, $stemmer ) = @_;
 
     my $self = bless {
         settings => $settings,
@@ -21,12 +19,7 @@
 
 
     if ( $self->header('stemming applied') =~ /^(?:1|yes)$/i ) {
-        eval { require SWISH::Stemmer };
-        if ( $@ ) {
-            warn('Stemmed index needs Stemmer.pm to highlight: ' . $@);
-        } else {
-            $self->{stemmer_function} = \&SWISH::Stemmer::SwishStem;
-        }
+       $self->{stemmer_function} = \&$stemmer;
     }
 
 
@@ -98,6 +91,7 @@
     my $word_pos = $words[0] eq '' ? 2 : 0;  # Start depends on if first 
word was wordcharacters or not
 
 
+    my $show_all_words = 0;
 
     # Remember, that the swish words are every other in @words.
 
@@ -159,6 +153,7 @@
 
                 if ( $stemmer_function ) {
                     my $w = $stemmer_function->($check_word);
+                    print STDERR "<br>Used stemmer on $check_word to 
yield $w<br>" if $w && DEBUG_HIGHLIGHT;
                     $check_word = $w if $w;
                 }
 
@@ -180,7 +175,7 @@
 
             print STDERR "      *** PHRASE MATCHED (word:$word_pos 
offset:$end_pos) *** \n" if DEBUG_HIGHLIGHT;
 
-           $found_phrase++;
+                    $found_phrase++;
 
 
             # We are currently at the end word, so it's easy to set that 
highlight
@@ -201,16 +196,20 @@
 
 
             # Now, flag the words around to be shown
-            my $start = ($word_pos - $Show_Words + 1) * 2;
-            my $stop   = ($word_pos + $end_pos + $Show_Words - 2) * 2;
-            if ( $start < 0 ) {
-                $stop = $stop - $start;
-                $start = 0;
-            }
-
-            $stop = $#words if $stop > $#words;
-
-            $flags[$_]++ for $start .. $stop;
+                    my ($start, $stop);
+                    if (!$show_all_words) {
+                                $start = ($word_pos - $Show_Words + 1) * 
2;
+                                $stop   = ($word_pos + $end_pos + 
$Show_Words - 2) * 2;
+                                if ( $start < 0 ) {
+                                    $stop = $stop - $start;
+                                    $start = 0;
+                                }
+
+                                $stop = $#words if $stop > $#words;
+
+                                $show_all_words = 1 if ($start == 0 && 
$stop == $#words);
+                                if (!$show_all_words) { $flags[$_]++ for 
$start .. $stop; }
+                    }
 
 
             # All done, and mark where to stop looking
@@ -249,7 +248,7 @@
                 last;
             }
 
-            if ( $flags[$i] ) {
+            if ( $show_all_words || $flags[$i] ) {
 
                 push @output, $dotdotdot if !$printing++ && !$first;
                 push @output, $words[$i];
@@ -259,7 +258,7 @@
                 $printing = 0;
             }
 
-           $first = 0;
+                    $first = 0;
 
 
         }
@@ -352,6 +351,3 @@
 }
 
 1;
-
-
-
Received on Mon Jul 19 03:02:53 2004