I've been a fairly long time user of SWISH, but was kinda disappointed
that it wasn't being maintained. I ended up making quite a few bug
fixes which I see have also been made in SWISH-E. It's nice to see that
the SWISH-E people have taken it back up!!!
Anyway, back to the point. There's a small bug in how swish-e
handles IGNORELASTCHAR's. Specifically, if it removes the last
character because it's a member of IGNORELASTCHAR, it doesn't
check if the new last character is also a member of IGNORELASTCHAR.
The following diff contains the changes to index.c needed to
iteratively remove all IGNORELASTCHAR's.
Example, the word:
Briggs'.
should be stripped down to:
Briggs
--
Peter
*** src/index.c Mon Sep 29 16:50:07 1997
--- index.c Wed Nov 5 13:17:49 1997
***************
*** 16,21 ****
--- 16,25 ----
**
** Changed removestops to support printing of stop words
** G. Hill 4/7/97
+ **
+ ** Added stripIgnoreChars and isIgnoreChar routines which iteratively
+ ** remove all ignore characters from the end of each word.
+ ** P. Bergner 10/5/97 bergner@lcse.umn.edu
*/
#include "swish.h"
***************
*** 22,27 ****
--- 26,36 ----
#include "index.h"
#include "hash.h"
+ #ifdef IGNORELAST
+ void stripIgnoreChars(char *word);
+ int isIgnoreChar(char c);
+ #endif
+
/* Have we already indexed a file or directory?
** This function is used to avoid multiple index entries
** or endless looping due to symbolic links.
***************
*** 556,573 ****
strcpy(word, (char *)
convertentities(word));
/* Get rid of specified last char's */
! if (IGNORELAST) {
! for (k=0; word[k] != '\0'; k++)
! ;
! /* Move back one to find the last char */
! k--;
! for (q=0; IGNORELASTCHAR[q] != '\0'; q++)
! if (word[k] == IGNORELASTCHAR[q]){
! word[k] = '\0';
! break;
! }
! }
/* Sorry, have to do isokword() twice to filter out converted strings! */
if (hasokchars(word) && isokword(word)) {
--- 565,575 ----
strcpy(word, (char *)
convertentities(word));
+ #ifdef IGNORELAST
/* Get rid of specified last char's */
! stripIgnoreChars( word );
! #endif
!
/* Sorry, have to do isokword() twice to filter out converted strings! */
if (hasokchars(word) && isokword(word)) {
***************
*** 681,699 ****
strcpy(word, (char *)
convertentities(word));
! /* Get rid of specified last char's */
! if (IGNORELAST) {
! for (k=0; word[k] != '\0'; k++)
! ;
! /* Move back one to find the last char */
! k--;
! for (q=0; IGNORELASTCHAR[q] != '\0'; q++)
! if (word[k] == IGNORELASTCHAR[q]){
! word[k] = '\0';
! break;
! }
! }
!
if (hasokchars(word) && isokword(word))
entrylist = (struct entry *)
addentry(entrylist, word,
--- 683,692 ----
strcpy(word, (char *)
convertentities(word));
! #ifdef IGNORELAST
! /* Get rid of specified last char's */
! stripIgnoreChars( word );
! #endif
if (hasokchars(word) && isokword(word))
entrylist = (struct entry *)
addentry(entrylist, word,
***************
*** 1274,1291 ****
if (isokword(word))
strcpy(word, (char *)
convertentities(word));
/* Get rid of specified last char's */
! if (IGNORELAST) {
! for (k=0; word[k] != '\0'; k++)
! ;
! /* Move back one to find the last char */
! k--;
! for (q=0; IGNORELASTCHAR[q] != '\0'; q++)
! if (word[k] == IGNORELASTCHAR[q]){
! word[k] = '\0';
! break;
! }
! }
if (hasokchars(word) && isokword(word))
entrylist = (struct entry *)
addentry(entrylist, word,
--- 1267,1276 ----
if (isokword(word))
strcpy(word, (char *)
convertentities(word));
+ #ifdef IGNORELAST
/* Get rid of specified last char's */
! stripIgnoreChars( word );
! #endif
if (hasokchars(word) && isokword(word))
entrylist = (struct entry *)
addentry(entrylist, word,
***************
*** 1305,1307 ****
--- 1290,1330 ----
}
return wordcount;
}
+
+ #ifdef IGNORELAST
+
+ /* These 2 routines fix the problem when a word ends with mutiple
+ ** IGNORELASTCHAR's (eg, qwerty'. ). The old code correctly deleted
+ ** the ".", but didn't check if the new last character ("'") is also
+ ** an ignore character.
+ */
+
+ void stripIgnoreChars(char *word)
+ {
+ int i;
+
+ /* Get rid of specified last char's */
+ for (i=0; word[i] != '\0'; i++)
+ ;
+ /* Iteratively strip off the last character if it's an ignore character */
+ while ( isIgnoreChar(word[--i]) )
+ word[i] = '\0';
+ }
+
+ int isIgnoreChar(char c)
+ {
+ int i;
+
+ /* Returns TRUE if the character is a member of IGNORELASTCHAR,
+ ** FALSE otherwise.
+ */
+ for (i=0; IGNORELASTCHAR[i] != '\0'; i++)
+ if (c == IGNORELASTCHAR[i])
+ return TRUE;
+
+ return FALSE;
+ }
+ #endif
+
+
+
Received on Wed Nov 5 11:46:55 1997