The first thing I wanted to do after getting swish running was to
index a list of files built by a "sitecrawler". This ensures that the
files indexed by swish are only those "in use" and reachable by links
in the HTML documents on the site.
The solution in the FAQ of creating a custom index file each time
seemed rather bogus.
So, to that end, here is a patch to swish-e to make it take a list
of files from a command line option (-I): the files should
contain filenames, one per line.
Creating such a file is left as an exercise for the reader.
---
Tom Malaher - Internet Consultant NetStart Consulting Ltd.
Internet: tmalaher@netstart.com 5016 Norquay Dr NW
Physical: 51 03 N / 114 05 W TZ: Mountain Calgary, AB T2K 2L3
Phone : 403-651-6340 Fax: 403-289-0707 Canada
------------------------------------------------------------------------
*** swish.h.orig Thu Feb 19 09:20:54 1998
--- swish.h Thu Feb 19 09:21:10 1998
***************
*** 296,299 ****
--- 296,300 ----
void printversion();
void printrunning();
long getthetime();
+ void indexfilelist();
#endif
*** swish.c.orig Thu Feb 19 09:20:54 1998
--- swish.c Thu Feb 19 09:26:23 1998
***************
*** 73,78 ****
--- 73,85 ----
argc--;
}
}
+ else if (c == 'I') {
+ index = 1;
+ while ((argv + 1)[0] != '\0' && *(argv + 1)[0] != '-') {
+ indexfilelist((++argv)[0]);
+ argc--;
+ }
+ }
else if (c == 'w') {
while ((argv + 1)[0] != '\0' && *(argv + 1)[0] != '-') { strcpy(word, (++argv)[0]);
argc--;
***************
*** 422,427 ****
--- 429,460 ----
exit(0);
}
+ /* Reads a file full of names of files/dirs to be indexed
+ */
+
+ void indexfilelist(file)
+ char *file;
+ {
+ char line[MAXSTRLEN];
+ FILE *fp;
+
+ if (verbose > 1)
+ printf("Reading list of files from '%s'\n",file);
+ if ((fp = fopen(file, "r")) == NULL) {
+ sprintf(errorstr,
+ "Couldn't open the filelist file \"%s\".", file);
+ progerr(errorstr);
+ }
+ while (fgets(line, MAXSTRLEN, fp) != NULL) {
+ if (line[strlen(line)-1] == '\n')
+ line[strlen(line)-1]=0;
+ if (verbose>2)
+ printf(" Adding file '%s'\n",line);
+ dirlist = (struct swline *) addswline(dirlist, line);
+ }
+ fclose(fp);
+ }
+
/* Gets the current time in seconds since the epoch.
*/
***************
*** 462,468 ****
void usage()
{
! printf(" usage: swish [-i dir file ... ] [-c file] [-f file] [-l] [-v (num)]\n");
printf(" swish -w word1 word2 ... [-f file1 file2 ...] [-C file] [-m num] [-t str]\n");
printf(" swish -M index1 index2 ... outputfile\n");
printf(" swish -D file\n");
--- 495,501 ----
void usage()
{
! printf(" usage: swish [-i dirs files] [-I files] [-c file] [-f file] [-l] [-v (num)]\n");
printf(" swish -w word1 word2 ... [-f file1 file2 ...] [-C file] [-m num] [-t str]\n");
printf(" swish -M index1 index2 ... outputfile\n");
printf(" swish -D file\n");
***************
*** 470,475 ****
--- 503,509 ----
putchar('\n');
printf("options: defaults are in brackets\n");
printf(" -i : create an index from the specified files\n");
+ printf(" -I : files containing lists of files to be indexed\n");
printf(" -w : search for words \"word1 word2 ...\"\n");
printf(" -t : tags to search in - specify as a string\n");
printf(" \"HBthec\" - in head, body, title, header,\n");
Received on Thu Feb 19 09:18:01 1998