Skip to main content.
home | support | download

Back to List Archive

Perl Scripts

From: <Jerry.Porter(at)not-real.targetbase.com>
Date: Wed Aug 04 1999 - 18:45:37 GMT
Hello All,

Thanks to David Norris I have a windows executable thst allows me to index files
with colons.
It seems now I can get my results back with colons imbedded in words.
But, I only get the results back If I execute the search at the command line. If
I execute the search VIA my perl script I get nothing back for the words that
have colons. Searches for words that do not have colons work fine. I do not know
much about perl. Do you have any pointers?

Jerry

Perl sript is below:

#!D:/perl5/bin/perl 
# Change the above line to reflect the location of your installation of PERL
#
# -------------------------------------------------------------------
# Program: ARManswers-cgi.pl
# Author : Jerry E. Porter
#
# Purpose: A gateway interface (CGI) to the SWISH Searcher/indexer
#
# Instructions: 
#
# 1. Install and configure SWISH -- 
#    Available from Enterprise Integration Technologies at 
#    http://www.eit.com/goodies/software/swish/swish.html
#
# 2. Index your site so that SWISH returns the url for each file
#    ie.  Swish should return http://www.yoursite.edu/file_name.html
#    See the SWISH documentation about REPLACE_RULES to see how.
#
# 3. Customize the User-Defined variables below to reflect your site.
#
# 4. Install this file in your cgi directory.  This may vary 
#    from site to site, but is usually in a directory like cgi-bin
#   
# 5. Create a link from your pages to the cgi
#    ex.  http://www.yoursite.edu/cgi-bin/swish-cgi
#    Running the cgi as a URL will generate a blank query form on the fly.
#    
#
# Note: if you don't like the the initial form that comes up, you can modify
#       the print_form subroutine
# 
#       To change the format of the returned results, you may modify the 
#       print_results subroutine


# -------- User defined configuration variables -----------

# Absolute path and command to execute the Swish searcher
   $swish = "D:/swish/swishe.exe";     

# URL of where you put this cgi
   $swishcgi = "http://armanswers.marcgroup.com/cgi-bin/ARManswers-cgi.pl";

# Optional parameters to pass to the Swish searcher
   $params = " ";                            

# Absolute path and filename of your created Swish index file 
   $index1 = "P:/htdocs/ARManswers.WOF/ARManswers.index";
   $index2 = "P:/htdocs/Source/Source.index";

# The Full name of your organization -- Printed with Search Results
   $organization = "Targetbase Marketing";

# The full name of your department -- Printed with search Results
   $department = "ARManswers Search Engine";

# ------ End of Configuration Variables ------------


#sub read_form
# Reads in form data if it exists

read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});

# Split the Name value pairs
@pairs = split(/&/, $buffer);
foreach $pair (@pairs)
{
   ($name, $value) = split(/=/, $pair);
    
   # Un-Webify plus signs and %-encoding
   $value =~ tr/+/ /;
   $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
   push(@search_tags, $value), next if ($name eq 'search_tags');

   $FORM{$name} = $value
}

$query = $FORM{'query'};   
$results = $FORM{'results'};

if (@search_tags) 
{
   $tags = join("",@search_tags);
   $search_tags = "\-t $tags"; 
}
else
{
   $search_tags = "";
}

if ($query) 
{
   &search_parse;
}
else
{
   &print_form;
}

sub print_form
{
 &html_header("ARManswers Search Engine");
 # To change the form that get's generated on the fly, edit the HTML below.
 print <<EOF;

<CENTER>
<FORM ACTION="$swishcgi" method=post>
<table BORDER=1 CELLPADDING=1 CELLSPACING=2>  
<CAPTION>
Enter word(s). You can connect terms with <b>and</b> or <b>or</b><p>
    <b>and</b> will find items that contain both terms<br>
    <b>or</b> will find items that contain either word, but not necessarily 
    both<p>
   Example: NSDictionary <B>and</B> NSString 
</CAPTION>
<TR><TD ALIGN=CENTER COLSPAN=4>
   <INPUT NAME="query" SIZE=60>
</TD></TR>
<TR><TD ALIGN=CENTER COLSPAN=4>
<B>Maximum # of Items</B>
<SELECT name=results>
   <OPTION value=0> No Limit
   <OPTION value=10> 10
   <OPTION value=20> 20
   <OPTION value=30> 30
   <OPTION value=40> 40
   <OPTION value=50> 50
   <OPTION value=100> 100
</SELECT>
</TD></TR>
<!-- <TR><TH COLSPAN=4>
Search In the following Tags:<BR>
Leave Blank to search everything
</TH></TR>
<tr>
   <td><input type=checkbox name=search_tags value="t">Title Tags</td>
   <td><input type=checkbox name=search_tags value="h">Heading Tags</td>
   <td><input type=checkbox name=search_tags value="c">Comment Tags</td>
   <td><input type=checkbox name=search_tags value="e">Emphasized Text</td>
</tr> -->
</table>
<P>

<INPUT TYPE="submit" VALUE="Start Search">
<INPUT TYPE="reset" VALUE="Clear Form">
</FORM>    
</CENTER>

</BODY>
</HTML>
EOF
&html_trailer;
}

sub search_parse
# Run SWISH and parse output
{
#Initialize counter variable for number of results
$count = 0; 
open(SWISH, "$swish -w $query -m $results $search_tags -f $index1 $index2|");
while (<SWISH>)
{ 
  # First, check to see if search produced an error
   chop;
   if ($_ eq "err: no results") 
   {&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>There were no items that
matched your search request.</B></FONT></CENTER>");}

   if ($_ eq "err: could not open index file.</B></FONT>") 
   {&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>Could not open Index File
$index1.</B></FONT></CENTER>");}
   
   if ($_ eq "err: no search words specified") 
   {&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>Please Enter at least one
Search Word.</B></FONT></CENTER>");}

   if ($_ eq "err: a word is too common") 
 {&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>One of your search terms is
too common, please try again.</B></FONT></CENTER>");}
  
# Next Line ignores lines that begin with a non-digit
  next if /^\D/;
  $count++;
  push(@results, $_);

}
   &html_header("ARManswers Search Results");
   print "<FONT SIZE=3><B>The ARManswers search engine found the following
items\n";
   print " that might be relevant to your search topic.<BR>\n";
   print "A higher relevancy score means the item is more\n";
   print " likely to be what you are looking for.</B></FONT><BR><BR>\n";
   print "<CENTER>\n";
   print "Your Search for <B>$query</B>, returned \n";
   print "<BLINK><FONT SIZE=5 COLOR=BLUE><B>$count</B> </BLINK></FONT>Items.\n";
   print "</CENTER>\n";
   print "<hr>\n";
   print "<A href=\"$swishcgi\"><img alt=\"New Search\" border=0 height=17
src=..\/ARManswers\/Images\/search_button.gif width=73></A>\n";
   print "<TABLE WIDTH=100% BORDER=1 CELLPADDING=1 CELLSPACING=2>\n";
   print "<TR><TH WIDTH=*>Question</TH><TH WIDTH=1%>Relevancy<BR>Score</TH><TH
WIDTH=1%>Size of Document<BR>in bytes</TH></TR>\n";

   foreach (@results)
   {
     select(STDOUT);
     ($stringone, $title, $filesize) = split(/\"/, $_);
     ($rank, $url) = split(/ /, $stringone);
     print "<TR><TD BGCOLOR=\"#CCCCCC\"><B><a
href=\"$url\">$title</B></a></TD>\n";
     print "<TD ALIGN=RIGHT BGCOLOR=\"#CCCCCC\">$rank</TD><TD ALIGN=RIGHT
BGCOLOR=\"#CCCCCC\">$filesize Bytes</TD></TR>\n";
   }

   print "</TABLE>\n";

   &html_trailer;
}

sub search_error
{
   &html_header("ARManswers Search Results: ERROR");
   $error_message = $_[0];
   print "$error_message\n";
   print "<HR>\n";
   print "</CENTER>\n";
   print "<TABLE BORDER=0 CELLPADDING=2 CELLSPACING=2>\n";
   print "<TR><TD>\n";
   print "<A href=\"$swishcgi\"><img alt=\"New Search\" border=0 height=17
src=..\/ARManswers\/Images\/search_button.gif width=73></A>\n";
   print "</TD></TR>\n";
   print "</TABLE>\n";
   &html_trailer;
}

sub html_header
# This subroutine takes the document title as a command
# line parameter and adds header information to the top
# of the HTML document to be returned.

{
   $document_title = $_[0];
   print "Content-type: text/html\n\n";
   print "<HTML>\n";
   print "<HEAD>\n";
   print "<TITLE>$document_title</TITLE>\n";
   print "</HEAD>\n";
   print "<BODY BGCOLOR=#EEEEEE
background=..\/ARManswers\/Images\/background_main.gif>\n";
   print "<CENTER><H2>$document_title</H2></CENTER>\n";
   print "<HR>\n";
}

sub html_trailer
# This subroutine prints a suitable HTML trailer
{
   print "<HR>\n";
   print "$organization<br>\n";
   print "$department<P></body>\n";
   print "</body>\n</html>\n";
   exit;
}
Received on Wed Aug 4 11:43:52 1999