Skip to main content.
home | support | download

Back to List Archive

using regexes with IndexOnly

From: Earl Fogel <fogel(at)not-real.duke.usask.ca>
Date: Mon Oct 26 1998 - 17:39:01 GMT
Hello,

I needed a way to tell swish-e to only index files named index.html,
something that is not possible with the current version.

Since the Swish-E folks added regex support to the FileRules directive,
it seemed that the best way to solve my problem was to add regex support
to the IndexOnly directive as well.  For example:

	IndexOnly ^index\.html$
	IndexOnly \.html$

I'll append the diffs below.  I hope they can be incorporated into a future
release.  This is based on swish-e 1.2.4.

Earl Fogel
Computing Services              phone: (306) 966-4861
University of Saskatchewan      email: earl.fogel@usask.ca
--

*** fs.c.orig	Wed Oct 21 12:12:36 1998
--- fs.c	Mon Oct 26 11:29:09 1998
***************
*** 29,35 ****
  static void indexafile(char *path);
  static void printfiles(struct sortentry *e);
  static void printdirs(struct sortentry *e);
! static int isoksuffix(char *filename, struct swline *rulelist);
  static int ishtml(char *filename);
  static int isoktitle(char *title);
  
--- 29,35 ----
  static void indexafile(char *path);
  static void printfiles(struct sortentry *e);
  static void printdirs(struct sortentry *e);
! static int isokfname(char *filename, struct swline *rulelist);
  static int ishtml(char *filename);
  static int isoktitle(char *title);
  
***************
*** 41,47 ****
  static struct swline *fileconlist = 0;
  static struct swline *titconlist = 0;
  static struct swline *fileislist = 0;
! static struct swline *suffixlist = 0;
  static struct swline *nocontentslist = 0;
  
  
--- 41,47 ----
  static struct swline *fileconlist = 0;
  static struct swline *titconlist = 0;
  static struct swline *fileislist = 0;
! static struct swline *fnamelist = 0;
  static struct swline *nocontentslist = 0;
  
  
***************
*** 210,216 ****
  			if ( already_indexed(s) )
  				continue;
  			
! 			if (!isoksuffix(dp->d_name, suffixlist))
  				continue;
  			
  			if (ishtml(s)) {
--- 210,216 ----
  			if ( already_indexed(s) )
  				continue;
  			
! 			if (!isokfname(dp->d_name, fnamelist))
  				continue;
  			
  			if (ishtml(s)) {
***************
*** 294,300 ****
  	if (badfile)
  		return;
  	
! 	if (!isoksuffix(path, suffixlist))
  		return;
  	
  	if (ishtml(path)) {
--- 294,300 ----
  	if (badfile)
  		return;
  	
! 	if (!isokfname(path, fnamelist))
  		return;
  	
  	if (ishtml(path)) {
***************
*** 336,342 ****
  		}
  		if ((fp = fopen(e->filename, "r" )) != NULL ) {
  			wordcount = countwords(fp, e->filename, e->title,
! 				isoksuffix(e->filename, nocontentslist) && nocontentslist != NULL);
  			fclose(fp);
  		}
  		if (verbose == 3) {
--- 336,342 ----
  		}
  		if ((fp = fopen(e->filename, "r" )) != NULL ) {
  			wordcount = countwords(fp, e->filename, e->title,
! 				isokfname(e->filename, nocontentslist) && nocontentslist != NULL);
  			fclose(fp);
  		}
  		if (verbose == 3) {
***************
*** 418,424 ****
  				break;
  			else {
  				c += skiplen;
! 				suffixlist = (struct swline *)addswline(suffixlist, value);
  			}
  		}
      } else if ((c = (char *) lstrstr(line, "directory contains")) &&
--- 418,424 ----
  				break;
  			else {
  				c += skiplen;
! 				fnamelist = (struct swline *)addswline(fnamelist, value);
  			}
  		}
      } else if ((c = (char *) lstrstr(line, "directory contains")) &&
***************
*** 499,536 ****
  
  
  
! /* Check if a file with a particular suffix should be indexed
  ** according to the settings in the configuration file.
  */
  
! int isoksuffix(filename, rulelist)
  char *filename;
  struct swline *rulelist;
  {
! 	int badfile;
! 	char *c, suffix[MAXSUFFIXLEN], checksuffix[MAXSUFFIXLEN];
! 	struct swline *tmplist;
! 	
! 	tmplist = rulelist;
! 	if (tmplist == NULL)
  		return 1;
! 	if ((c = (char *) strrchr(filename, '.')) == NULL)
! 		return 0;
! 	
! 	badfile = 1;
! 	strcpy(checksuffix, c + 1);
! 	while (tmplist != NULL) {
! 		if ((c = (char *) strrchr(tmplist->line, '.')) == NULL)
! 			strcpy(suffix, tmplist->line);
! 		else
! 			strcpy(suffix, c + 1);
! 		if (lstrstr(suffix, checksuffix) && strlen(suffix) ==
! 			strlen(checksuffix))
! 			badfile = 0;
! 		tmplist = tmplist->next;
! 	}
! 	return !(badfile);
  }
  
  /* This checks is a filename has one of the following suffixes:
  ** "htm", "HTM", "html", "HTML", "shtml", "SHTML".
--- 499,524 ----
  
  
  
! /* Check if a file with a particular name should be indexed
  ** according to the settings in the configuration file.
+ ** Uses regular expressions.
  */
  
! int isokfname(filename, rulelist)
  char *filename;
  struct swline *rulelist;
  {
! 	if (rulelist == NULL)
  		return 1;
! 
!         while (rulelist != NULL) {
!                 if (matchARegex(filename, rulelist->line)) {
!                         return 1;
!                 }
!                 rulelist = rulelist->next;
!         }
  }
+ 
  
  /* This checks is a filename has one of the following suffixes:
  ** "htm", "HTM", "html", "HTML", "shtml", "SHTML".
Received on Mon Oct 26 09:49:18 1998