#!/usr/local/bin/perl
#########################################################
# part of the HTML Dictionary
# Distributed under the GNU copyleft (any version of your choice)
# No part of these documents may be printed in any for-profit publication
# copyleft sunil@magnetic.demon.co.uk
#########################################################
#########################################################
# this script makes use of Glimpse, developed by
# Udi Manber, Burra Gopal: University of Arizona
# Sun Wu : National Chung-Cheng University, Taiwan
#########################################################
#########################################################
# CONFIGURE THESE
#########################################################
#########################################################
# this little script assumes that the glimpse binaries and
# databases are stored under $glimpse_dir
#
# .....
# |
# glimpse_dir
# |
# +----------+----------+
# bin databases
# |
# +------+-------+-------+
# | | | |
# this that other misc
# |
# +-----+----+
# | |
# fat thin
# | |
# .glimpse_index .glimpse_index
# .glimpse_stat.. .glimpse...
# ....
#
# the fat index should have been indexed using
# glimpseindex -o ....
# glimpseindex -B -f -s ....
#########################################################
#########################################################
# *** EXAMPLE ***
#$glimpse_dir="/usr/local/lib/glimpse";
#$db_name="all";
#$title="search my server";
#$doc_root="/usr/share/htdocs/"
#$doc_server="http://my_server:my_port/";
#$default_search_type = "fat";
#########################################################
$glimpse_dir="/where/everything/is_kept";
$db_name="index_to_search";
$title="title of search screen";
$doc_root="/physical_location_of_docs/";
$doc_server="http://my_server:my_port/";
$default_search_type = "fat"; #or thin
$default_case_sensitive = 0; #or 1
#########################################################
#
# Nothing to configure below here
#
#########################################################
$default_max_hits = 20;
$glimpse_bin="$glimpse_dir/bin";
$glimpse_dbs="$glimpse_dir/databases";
$glimpse_delim=": ";
require "www_lib.pl";
%FIELDS=&GET_FIELDS();
$my_url=&get_this_URL(); #this may fail on cern httpd
$fat_db_dir="$glimpse_dbs/$db_name/fat";
$thin_db_dir="$glimpse_dbs/$db_name/thin";
$input_field="input_field";
$result_field="result_field";
$match_field="match_field";
$word_field="word_field";
$error_field="error_field";
$case_field="case_field";
@result_set;
$this_is_a_subset = 0;
%word_options= (
"Match whole words", "whole",
"Allow fuzzy searching", "partial");
@case_options=(
"Yes",
"No"
);
@match_options= (
"a ridiculously tiny 10",
"a modest 20",
"an exceptionally normal 30",
"a larger than life 40",
"I dont care give me the lot");
%result_options = (
"Just the number of matches", "thin",
"with contextual text", "fat");
@error_options = (0,1,3,5,8);
%TITLE_LIST;
$| = 1; #no buffering
#########################################################
#
#########################################################
sub show_query_form
{
local (@keys);
&PRINT_HEADER ("Search the $title");
@keys = keys %FIELDS;
if (@keys)
{
&h2 ("Search Expression - You didnt enter an expression");
}
else
{
&h2 ("Search Expression");
}
#-------------------------------------------------------------
&form ($my_url);
print "Enter the search expression ";
&h3("options");
&ul;
&li("");
&gen_labelled_select (
"Result type",
$result_field,
keys (%result_options));
&li("");
&gen_labelled_select (
"number of matches",
$match_field,
@match_options);
&li("");
&gen_labelled_select (
"Search Type",
$word_field,
keys (%word_options));
&li("");
&gen_labelled_select (
"match case?",
$case_field,
@case_options);
&li(""); #too slow!!!
&gen_labelled_select (
"errors allowed",
$error_field,
@error_options);
&_ul;
$_form;
print "
";
#-------------------------------------------------------------
&h2 ("Notes");
print "The search engine supports regular expressions";
&dl;
&dt;
&bold;
print "Special characters";
&_bold;
ⅆ
print "
The following characters are reserved to the search engine. They should
be escaped by preceeding with a back-slash if you wish to
search for them.
^ \$ * [ ] | ( ) ! \ ; , # < > - .
^ matches the beginning of a line \$ matches the end of a line . matches any single character \# matches any number of characters * matches any number of the previous character";
&p;
&dt;
&bold;
print "Sets";
&_bold;
ⅆ
print "
a set of characters inside [] matches any of the characters
in that set.
[a-ho-z]
is any character between a and h or between o and z.
[^i-n]
matches any character in the character set except characters
'i' to 'n'.
";
&p;
&dt;
&bold;
print "Complex operations";
&_bold;
ⅆ
print "
You can contruct boolean expressions using \"AND\" and
\"OR\". Complex expressions can be built by surrounding
patterns with curly brackets {}.
'{political OR computer} AND science
will match 'political science' or 'computer science'.";
&p;
&dt;
&bold;
print "exact matches";
&_bold;
ⅆ
print "
the default behaviour is to allow mistakes in the words being
searched for. Surrounding an expression in angle brackets < >
forces an exact match on that part of the expression.
<mathemat>ics
matches mathematical with one or more errors allowed
mathe<matics>
does not match mathematical no matter how many errors are allowed.