summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htsearch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htsearch')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/.cvsignore8
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/AndQuery.cc150
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/AndQuery.h42
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/AndQueryParser.h33
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/BooleanLexer.cc76
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/BooleanLexer.h50
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/BooleanQueryParser.cc238
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/BooleanQueryParser.h43
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Collection.cc105
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Collection.h73
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Display.cc1956
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Display.h238
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/DocMatch.cc222
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/DocMatch.h109
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/ExactWordQuery.cc53
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/ExactWordQuery.h71
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/FuzzyExpander.h46
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/GParser.cc134
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/GParser.h47
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/HtURLSeedScore.cc215
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/HtURLSeedScore.h55
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Makefile.am35
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Makefile.in519
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Makefile.win3230
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/NearQuery.cc143
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/NearQuery.h50
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/NotQuery.cc110
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/NotQuery.h42
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/OperatorQuery.cc49
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/OperatorQuery.h68
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/OrFuzzyExpander.cc94
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/OrFuzzyExpander.h49
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/OrQuery.cc126
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/OrQuery.h39
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/OrQueryParser.h33
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/PhraseQuery.cc175
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/PhraseQuery.h45
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Query.cc89
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Query.h77
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/QueryCache.h45
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/QueryLexer.cc84
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/QueryLexer.h71
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/QueryParser.cc134
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/QueryParser.h75
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/ResultList.cc151
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/ResultList.h50
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/ResultMatch.cc296
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/ResultMatch.h89
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/SimpleLexer.h29
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/SimpleQueryParser.cc96
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/SimpleQueryParser.h52
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.cc184
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.h53
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Template.cc81
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/Template.h54
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/TemplateList.cc106
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/TemplateList.h40
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/VolatileCache.cc77
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/VolatileCache.h44
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/WeightWord.cc146
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/WeightWord.h50
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/WordSearcher.cc109
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/WordSearcher.h49
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/htsearch.cc957
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/htsearch.h71
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/parser.cc918
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/parser.h78
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/parsetest.cc175
-rw-r--r--debian/htdig/htdig-3.2.0b6/htsearch/qtest.cc252
69 files changed, 10353 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/.cvsignore b/debian/htdig/htdig-3.2.0b6/htsearch/.cvsignore
new file mode 100644
index 00000000..f4f41320
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/.cvsignore
@@ -0,0 +1,8 @@
+Makefile
+*.lo
+*.la
+.purify
+.pure
+.deps
+.libs
+htsearch
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/AndQuery.cc b/debian/htdig/htdig-3.2.0b6/htsearch/AndQuery.cc
new file mode 100644
index 00000000..a1a608e5
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/AndQuery.cc
@@ -0,0 +1,150 @@
+//
+// AndQuery.cc
+//
+// AndQuery: an operator query that does 'and' combination
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: AndQuery.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+
+#include "AndQuery.h"
+//
+// l r and
+// ----------------------
+// 0 0 0
+// 0 b 0
+// 0 x 0
+// a 0 0
+// a b intersect(a,b)
+// a x a
+// x 0 0
+// x b b
+// x x x
+//
+// i.e. some 0 => 0
+// ignores can be left out of intersection
+// the shorter of the result lists is put apart for intersection
+// this optimises the intersection process
+//
+
+ResultList *
+AndQuery::Evaluate()
+{
+ ResultList *result = 0;
+ ResultList *shorter = 0;
+
+ operands.Start_Get();
+ Query *operand = (Query *) operands.Get_Next();
+ while(operand && !shorter)
+ {
+ result = operand->GetResults();
+ if(!result)
+ {
+ break;
+ }
+ if(!result->IsIgnore())
+ {
+ shorter = result;
+ }
+ operand = (Query *) operands.Get_Next();
+ }
+ if(shorter)
+ {
+ List longer;
+ while(operand && result)
+ {
+ result = operand->GetResults();
+ if(result && !result->IsIgnore())
+ {
+ if(result->Count() < shorter->Count())
+ {
+ longer.Add(shorter);
+ shorter = result;
+ }
+ else
+ {
+ longer.Add(result);
+ }
+ }
+ operand = (Query *) operands.Get_Next();
+ }
+ if(longer.Count())
+ {
+ result = Intersection(*shorter, longer);
+ longer.Release();
+ }
+ else
+ {
+ result = new ResultList(*shorter);
+ }
+ }
+ return result;
+}
+
+//
+// return a result list containing only the matches common to
+// all input parameters.
+//
+// l is iterated, matches from l are searched in all elements of rs
+//
+//
+// foreach match in shorter
+// confirm the match in each lists
+// if confirmed
+// copy all matches in result
+//
+// the shorter of the input lists is assumed to be in the first parameter
+// this is a modest optimisation in order to minimise iteration
+//
+
+ResultList *
+AndQuery::Intersection(const ResultList &shorter, const List &lists)
+{
+ ResultList *result = 0;
+ DictionaryCursor c;
+ shorter.Start_Get(c);
+ DocMatch *match = (DocMatch *)shorter.Get_NextElement(c);
+ while(match)
+ {
+ List confirms;
+
+ ListCursor lc;
+ lists.Start_Get(lc);
+ ResultList *list = (ResultList *)lists.Get_Next(lc);
+ while(list)
+ {
+ DocMatch *confirm = list->find(match->GetId());
+ if(confirm)
+ {
+ confirms.Add(confirm);
+ }
+ list = (ResultList *)lists.Get_Next(lc);
+ }
+ if(confirms.Count() == lists.Count())
+ {
+ if(!result)
+ {
+ result = new ResultList;
+ }
+ DocMatch *copy = new DocMatch(*match);
+ confirms.Start_Get();
+ DocMatch *confirm = (DocMatch *)confirms.Get_Next();
+ while(confirm)
+ {
+ copy->Merge(*confirm);
+ confirm = (DocMatch *)confirms.Get_Next();
+ }
+ result->add(copy);
+ }
+ confirms.Release();
+ match = (DocMatch *)shorter.Get_NextElement(c);
+ }
+ return result;
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/AndQuery.h b/debian/htdig/htdig-3.2.0b6/htsearch/AndQuery.h
new file mode 100644
index 00000000..f93ccca7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/AndQuery.h
@@ -0,0 +1,42 @@
+#ifndef _AndQuery_h_
+#define _AndQuery_h_
+
+//
+// AndQuery.h
+//
+// AndQuery: an operator query that does 'and' combination
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: AndQuery.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "OperatorQuery.h"
+
+//
+// and query
+//
+class AndQuery : public OperatorQuery
+{
+public:
+
+private:
+ // evaluate operands and intersect results
+ ResultList *Evaluate();
+
+ // create an intersection of the operand results
+ ResultList *Intersection(const ResultList &shorter, const List &longer);
+
+ // used by GetLogicalWords
+ String OperatorString() const { return String("and"); }
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/AndQueryParser.h b/debian/htdig/htdig-3.2.0b6/htsearch/AndQueryParser.h
new file mode 100644
index 00000000..a17c80fb
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/AndQueryParser.h
@@ -0,0 +1,33 @@
+#ifndef _AndQueryParser_h_
+#define _AndQueryParser_h_
+
+//
+// AndQueryParser.h
+//
+// AndQueryParser: a simple query parser for 'all words' queries
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: AndQueryParser.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "SimpleQueryParser.h"
+#include "AndQuery.h"
+
+class AndQueryParser : public SimpleQueryParser
+{
+public:
+ AndQueryParser() {}
+
+private:
+ OperatorQuery *MakeQuery()
+ {
+ return new AndQuery;
+ }
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/BooleanLexer.cc b/debian/htdig/htdig-3.2.0b6/htsearch/BooleanLexer.cc
new file mode 100644
index 00000000..3c965d72
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/BooleanLexer.cc
@@ -0,0 +1,76 @@
+//
+// BooleanLexer.cc
+//
+// BooleanLexer: lexical analyzer for boolean query expressions.
+// defines terminal symbols
+// "word", and, or, not, near, (, ), /
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: BooleanLexer.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "BooleanLexer.h"
+bool
+BooleanLexer::IsOr() const
+{
+ return current == String("or");
+}
+
+bool
+BooleanLexer::IsAnd() const
+{
+ return current == String("and");
+}
+
+bool
+BooleanLexer::IsNot() const
+{
+ return current == String("not");
+}
+
+bool
+BooleanLexer::IsNear() const
+{
+ return current == String("near");
+}
+
+bool
+BooleanLexer::IsSlash() const
+{
+ return current == String("/");
+}
+
+bool
+BooleanLexer::IsLeftParen() const
+{
+ return current == String("(");
+}
+
+
+bool
+BooleanLexer::IsRightParen() const
+{
+ return current == String(")");
+}
+
+bool
+BooleanLexer::IsWord() const
+{
+ return !IsEnd()
+ && !IsQuote()
+ && !IsRightParen()
+ && !IsLeftParen()
+ && !IsSlash()
+ && !IsAnd()
+ && !IsOr()
+ && !IsAnd()
+ && !IsNot()
+ && !IsNear();
+}
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/BooleanLexer.h b/debian/htdig/htdig-3.2.0b6/htsearch/BooleanLexer.h
new file mode 100644
index 00000000..677f9ed3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/BooleanLexer.h
@@ -0,0 +1,50 @@
+#ifndef _BooleanLexer_h_
+#define _BooleanLexer_h_
+
+//
+// BooleanLexer.h
+//
+// BooleanLexer: lexical analyzer for boolean query expressions.
+// defines terminal symbols
+// "word", and, or, not, near, (, ), /
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: BooleanLexer.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "QueryLexer.h"
+
+class BooleanLexer : public QueryLexer
+{
+public:
+ // is the current token a word?
+ bool IsWord() const;
+
+ // is the current token the 'and' keyword?
+ bool IsAnd() const;
+
+ // is the current token the 'or' keyword?
+ bool IsOr() const;
+
+ // is the current token the 'near' keyword?
+ bool IsNear() const;
+
+ // is the current token the 'not' keyword?
+ bool IsNot() const;
+
+ // is the current token the '(' sign?
+ bool IsLeftParen() const;
+
+ // is the current token the ')' sign?
+ bool IsRightParen() const;
+
+ // is the current token the '/' sign?
+ bool IsSlash() const;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/BooleanQueryParser.cc b/debian/htdig/htdig-3.2.0b6/htsearch/BooleanQueryParser.cc
new file mode 100644
index 00000000..dc5451a7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/BooleanQueryParser.cc
@@ -0,0 +1,238 @@
+//
+// BooleanQueryParser.cc
+//
+// BooleanQueryParser: Query parser for full-blown boolean expressions
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: BooleanQueryParser.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "BooleanQueryParser.h"
+
+#include "OrQuery.h"
+#include "NotQuery.h"
+#include "AndQuery.h"
+#include "NearQuery.h"
+#include "PhraseQuery.h"
+#include "FuzzyExpander.h"
+
+//
+// expr == andlist ( 'or' andlist )
+//
+Query *
+BooleanQueryParser::ParseExpression()
+{
+ Query *result = 0;
+ Query *term = ParseAnd();
+ if(term)
+ {
+ if(token.IsOr())
+ {
+ result = new OrQuery;
+ result->Add(term);
+ while(term && token.IsOr())
+ {
+ token.Next();
+ term = ParseAnd();
+ if(term)
+ {
+ result->Add(term);
+ }
+ }
+ }
+ else
+ {
+ result = term;
+ }
+ }
+ if(!term && result)
+ {
+ delete result;
+ result = 0;
+ }
+ return result;
+}
+
+//
+// notlist = nearlist { 'not' nearlist }
+//
+Query *
+BooleanQueryParser::ParseNot()
+{
+ Query *result = 0;
+ Query *near = ParseNear();
+ if(near)
+ {
+ if(token.IsNot())
+ {
+ result = new NotQuery();
+ result->Add(near);
+ while(near && token.IsNot())
+ {
+ token.Next();
+ near = ParseNear();
+ if(near)
+ {
+ result->Add(near);
+ }
+ }
+ }
+ else
+ {
+ result = near;
+ }
+ }
+ if(!near && result)
+ {
+ delete result;
+ result = 0;
+ }
+ return result;
+}
+
+//
+// andlist = notlist { 'and' notlist }
+//
+Query *
+BooleanQueryParser::ParseAnd()
+{
+ Query *result = 0;
+ Query *notList = ParseNot();
+
+ if(notList)
+ {
+ if(token.IsAnd())
+ {
+ result = new AndQuery();
+ result->Add(notList);
+ while(notList && token.IsAnd())
+ {
+ token.Next();
+ notList = ParseNot();
+ if(notList)
+ {
+ result->Add(notList);
+ }
+ }
+ }
+ else
+ {
+ result = notList;
+ }
+ }
+ if(!notList && result)
+ {
+ delete result;
+ result = 0;
+ }
+ return result;
+}
+
+//
+// near == factor { 'near' [ '/' number ] factor }
+// 'near' query is binary
+//
+Query *
+BooleanQueryParser::ParseNear()
+{
+ Query *result = ParseFactor();
+ while(result && token.IsNear())
+ {
+ token.Next();
+ int distance = 10; // config["default_near_distance"];
+ if(token.IsSlash())
+ {
+ distance = 0;
+ token.Next();
+ if(token.IsWord())
+ {
+ distance = token.Value().as_integer();
+ token.Next();
+ }
+ }
+ if(distance > 0)
+ {
+ Query *right = ParseFactor();
+ if(right)
+ {
+ Query *tmp = new NearQuery(distance);
+ tmp->Add(result);
+ tmp->Add(right);
+ result = tmp;
+ }
+ else
+ {
+ delete result;
+ result = 0;
+ }
+ }
+ else
+ {
+ Expected("a distance > 0 for 'Near'");
+ delete result;
+ result = 0;
+ }
+ }
+ return result;
+}
+
+//
+// factor == word | '"' phrase '"' | '(' expression ')'
+//
+Query *
+BooleanQueryParser::ParseFactor()
+{
+ Query *result = 0;
+
+ if(token.IsWord())
+ {
+ result = ParseWord();
+ }
+ else if(token.IsQuote())
+ {
+ token.Next();
+ result = ParsePhrase();
+ if(result)
+ {
+ if(token.IsQuote())
+ {
+ token.Next();
+ }
+ else
+ {
+ Expected("closing \"");
+ delete result;
+ result = 0;
+ }
+ }
+ }
+ else if(token.IsLeftParen())
+ {
+ token.Next();
+ result = ParseExpression();
+ if(result)
+ {
+ if(token.IsRightParen())
+ {
+ token.Next();
+ }
+ else
+ {
+ Expected(")");
+ delete result;
+ result = 0;
+ }
+ }
+ }
+ else
+ {
+ Expected("'(', '\"', or a word");
+ }
+ return result;
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/BooleanQueryParser.h b/debian/htdig/htdig-3.2.0b6/htsearch/BooleanQueryParser.h
new file mode 100644
index 00000000..d65eaa8f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/BooleanQueryParser.h
@@ -0,0 +1,43 @@
+#ifndef _BooleanQueryParser_h_
+#define _BooleanQueryParser_h_
+
+//
+// BooleanQueryParser.h
+//
+// BooleanQueryParser: Query parser for full-blown boolean expressions
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: BooleanQueryParser.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "QueryParser.h"
+#include "BooleanLexer.h"
+
+class BooleanQueryParser : public QueryParser
+{
+public:
+ BooleanQueryParser() {}
+ ~BooleanQueryParser() {}
+
+private:
+ // recursive parse levels
+ // returning constructed query trees
+ Query *ParseExpression();
+ Query *ParseAnd();
+ Query *ParseNot();
+ Query *ParseNear();
+ Query *ParseFactor();
+
+ // lexer access needed by parent class
+ QueryLexer &Token() { return token; }
+
+ // the lexical analyzer
+ BooleanLexer token;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Collection.cc b/debian/htdig/htdig-3.2.0b6/htsearch/Collection.cc
new file mode 100644
index 00000000..9e40f230
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Collection.cc
@@ -0,0 +1,105 @@
+//
+// Collection.cc
+//
+// Collection: Specifies a list of databases to use in the search
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Collection.cc,v 1.7 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "htsearch.h"
+#include "Collection.h"
+#include "ResultMatch.h"
+#include "WeightWord.h"
+#include "StringMatch.h"
+#include "QuotedStringList.h"
+#include "URL.h"
+#include "HtURLCodec.h"
+
+#ifdef HAVE_STD
+#include <fstream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <fstream.h>
+#endif /* HAVE_STD */
+
+#include <stdio.h>
+#include <ctype.h>
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <syslog.h>
+#endif
+
+#include <locale.h>
+
+//*****************************************************************************
+//
+Collection::Collection(const char *name, const char *word_file,
+ const char *index_file, const char *doc_file,
+ const char *doc_excerpt)
+{
+ collectionName = name;
+ wordFile = word_file;
+ indexFile = index_file;
+ docFile = doc_file;
+ docExcerpt = doc_excerpt;
+ matches = NULL;
+ searchWords = NULL;
+ searchWordsPattern = NULL;
+ isopen = 0;
+}
+
+Collection::~Collection()
+{
+ if(matches) delete matches;
+ if(searchWords) delete searchWords;
+ if(searchWordsPattern) delete searchWordsPattern;
+ Close();
+}
+
+void
+Collection::Open()
+{
+ if (!isopen)
+ {
+ docDB.Read(docFile, indexFile, docExcerpt);
+ }
+ isopen = 1;
+}
+
+void
+Collection::Close()
+{
+ if (isopen)
+ {
+ docDB.Close();
+ }
+ isopen = 0;
+}
+
+DocumentRef *
+Collection::getDocumentRef(int id)
+{
+ Open();
+ return docDB[id];
+}
+
+int
+Collection::ReadExcerpt(DocumentRef &ref)
+{
+ Open();
+ return docDB.ReadExcerpt(ref);
+}
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Collection.h b/debian/htdig/htdig-3.2.0b6/htsearch/Collection.h
new file mode 100644
index 00000000..5b61e6bb
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Collection.h
@@ -0,0 +1,73 @@
+//
+// Collection.h
+//
+// Collection: Specifies a list of databases to use in the search
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Collection.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+#ifndef _Collection_h_
+#define _Collection_h_
+
+#include "Object.h"
+#include "ResultList.h"
+#include "ResultMatch.h"
+#include "TemplateList.h"
+#include "cgi.h"
+#include "StringMatch.h"
+#include "List.h"
+#include "DocumentDB.h"
+#include "Database.h"
+#include "Dictionary.h"
+
+class Collection : public Object
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ Collection(const char *name, const char *wordFile,
+ const char *indexFile, const char *docFile,
+ const char *docExcerpt);
+ ~Collection();
+
+ void Collection::Open();
+
+ void Collection::Close();
+
+ char *getWordFile() { return wordFile.get(); }
+ DocumentRef *getDocumentRef(int id);
+ ResultList *getResultList() { return matches; }
+ void setResultList(ResultList *list) { matches = list; }
+
+ List *getSearchWords() { return searchWords; }
+ void setSearchWords(List *list) { searchWords = list; }
+
+ StringMatch *getSearchWordsPattern() { return searchWordsPattern;}
+ void setSearchWordsPattern(StringMatch *smatch)
+ { searchWordsPattern = smatch; }
+
+ int ReadExcerpt(DocumentRef &ref);
+
+protected:
+ String collectionName;
+ String wordFile;
+ String indexFile;
+ String docFile;
+ String docExcerpt;
+ ResultList *matches;
+ List *searchWords;
+ StringMatch *searchWordsPattern;
+
+ DocumentDB docDB;
+ // Database *docIndex;
+
+ int isopen;
+};
+
+#endif // _Collection_h_
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Display.cc b/debian/htdig/htdig-3.2.0b6/htsearch/Display.cc
new file mode 100644
index 00000000..f2300137
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Display.cc
@@ -0,0 +1,1956 @@
+//
+// Display.cc
+//
+// Display: Takes results of search and fills in the HTML templates
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Display.cc,v 1.122 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "htsearch.h"
+#include "Display.h"
+#include "ResultMatch.h"
+#include "WeightWord.h"
+#include "StringMatch.h"
+#include "QuotedStringList.h"
+#include "URL.h"
+#include "HtSGMLCodec.h"
+#include "HtURLCodec.h"
+#include "HtURLRewriter.h"
+#include "WordType.h"
+#include "Collection.h"
+#include "HtURLSeedScore.h"
+//#include "HtURLRewriter.h"
+#include "SplitMatches.h"
+
+#ifdef HAVE_STD
+#include <fstream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <fstream.h>
+#endif /* HAVE_STD */
+
+#include <stdio.h>
+#include <stdlib.h> // for abs
+#include <ctype.h>
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <syslog.h>
+#endif
+
+#include <locale.h>
+
+
+#include <math.h>
+#include <float.h>
+
+#if !defined(DBL_MAX)
+# if defined (MAXDOUBLE)
+# define DBL_MAX MAXDOUBLE
+# elif defined(HUGE_VAL)
+# define DBL_MAX HUGE_VAL
+# elif defined(MAXFLOAT)
+# define DBL_MAX MAXFLOAT
+# else
+# define DBL_MAX 1e37
+# endif
+#endif
+
+//*****************************************************************************
+//
+Display::Display(Dictionary *collections)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ selected_collections = collections;
+ limitTo = 0;
+ excludeFrom = 0;
+ // needExcerpt = 0;
+ templateError = 0;
+
+ maxStars = config->Value("max_stars");
+ maxScore = -DBL_MAX;
+ minScore = DBL_MAX;
+ setupImages();
+ setupTemplates();
+
+ if (!templates.createFromString(config->Find("template_map")))
+ {
+ // Error in createFromString.
+ // Let's try the default template_map
+
+ config->Add("template_map",
+ "Long builtin-long builtin-long Short builtin-short builtin-short");
+ if (!templates.createFromString(config->Find("template_map")))
+ {
+ // Unrecoverable Error
+ // (No idea why this would happen)
+ templateError = 1;
+ }
+ }
+
+ currentTemplate = templates.get(config->Find("template_name"));
+ if (!currentTemplate)
+ {
+ //
+ // Must have been some error. Resort to the builtin-long (slot 0)
+ //
+ currentTemplate = (Template *) templates.templates[0];
+ }
+ if (!currentTemplate)
+ {
+ //
+ // Another error!? Time to bail out...
+ //
+ templateError = 1;
+ }
+ // if (mystrcasestr(currentTemplate->getMatchTemplate(), "excerpt"))
+ // needExcerpt = 1;
+}
+
+//*****************************************************************************
+Display::~Display()
+{
+ // docDB.Close();
+}
+
+//*****************************************************************************
+//
+void
+Display::display(int pageNumber)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ int good_sort = 0;
+ good_sort = ResultMatch::setSortType(config->Find("sort"));
+ if (!good_sort)
+ {
+ // Must temporarily stash the message in a String, since
+ // displaySyntaxError will overwrite the static temp used in form.
+
+ String s(form("No such sort method: `%s'", (const char*)config->Find("sort")));
+
+ displaySyntaxError(s);
+ return;
+ }
+
+ List *matches = buildMatchList();
+ int currentMatch = 0;
+ int numberDisplayed = 0;
+ ResultMatch *match = 0;
+ int number = 0;
+ number = config->Value("matches_per_page");
+ if (number <= 0)
+ number = 10;
+ int startAt = (pageNumber - 1) * number;
+
+ if (config->Boolean("logging"))
+ {
+ logSearch(pageNumber, matches);
+ }
+
+ displayHTTPheaders();
+ setVariables(pageNumber, matches);
+
+ //
+ // The first match is guaranteed to have the highest score of
+ // all the matches. We use this to compute the number of stars
+ // to display for all the other matches.
+ //
+ match = (ResultMatch *) (*matches)[0];
+ if (!match)
+ {
+ //
+ // No matches.
+ //
+ delete matches;
+// if( config->Boolean("nph") ) cout << "HTTP/1.0 200 OK\r\n";
+// cout << "Content-type: text/html\r\n\r\n";
+ displayNomatch();
+ return;
+ }
+ // maxScore = match->getScore(); // now done in buildMatchList()
+
+// if( config->Boolean("nph") ) cout << "HTTP/1.0 200 OK\r\n";
+// cout << "Content-type: text/html\r\n\r\n";
+ String wrap_file = config->Find("search_results_wrapper");
+ String *wrapper = 0;
+ char *header = 0, *footer = 0;
+ if (wrap_file.length())
+ {
+ wrapper = readFile(wrap_file.get());
+ if (wrapper && wrapper->length())
+ {
+ char wrap_sepr[] = "HTSEARCH_RESULTS";
+ char *h = wrapper->get();
+ char *p = strstr(h, wrap_sepr);
+ if (p)
+ {
+ if (p > h && p[-1] == '$')
+ {
+ footer = p + strlen(wrap_sepr);
+ header = h;
+ p[-1] = '\0';
+ }
+ else if (p > h+1 && p[-2] == '$' &&
+ (p[-1] == '(' || p[-1] == '{') &&
+ (p[strlen(wrap_sepr)] == ')' ||
+ p[strlen(wrap_sepr)] == '}'))
+ {
+ footer = p + strlen(wrap_sepr) + 1;
+ header = h;
+ p[-2] = '\0';
+ }
+ }
+ }
+ }
+ if (header)
+ expandVariables(header);
+ else
+ displayHeader();
+
+ //
+ // Display the window of matches requested.
+ //
+ if (!currentTemplate->getStartTemplate().empty())
+ {
+ expandVariables(currentTemplate->getStartTemplate());
+ }
+
+ matches->Start_Get();
+ while ((match = (ResultMatch *)matches->Get_Next()) &&
+ numberDisplayed < number)
+ {
+ if (currentMatch >= startAt)
+ {
+ // DocumentRef *ref = docDB[match->getID()];
+ Collection *collection = match->getCollection();
+ DocumentRef *ref = collection->getDocumentRef(match->getID());
+ if (!ref || ref->DocState() != Reference_normal)
+ continue; // The document isn't present or shouldn't be displayed
+ ref->DocAnchor(match->getAnchor());
+ ref->DocScore(match->getScore());
+ displayMatch(match, ref, currentMatch+1);
+ numberDisplayed++;
+ delete ref;
+ }
+ currentMatch++;
+ }
+
+ if (!currentTemplate->getEndTemplate().empty())
+ {
+ expandVariables(currentTemplate->getEndTemplate());
+ }
+ if (footer)
+ expandVariables(footer);
+ else
+ displayFooter();
+
+ if (wrapper)
+ delete wrapper;
+ delete matches;
+}
+
+//*****************************************************************************
+// Return true if the specified URL should be counted towards the results.
+int
+Display::includeURL(const String& url)
+{
+
+ if (limitTo && limitTo->match(url, 1, 0) == 0)
+ return 0;
+ else
+ {
+
+ if (excludeFrom && excludeFrom->match(url, 0, 0) != 0)
+ return 0;
+ else
+ return 1;
+ }
+}
+
+//*****************************************************************************
+void
+Display::displayMatch(ResultMatch *match, DocumentRef *ref, int current)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ String *str = 0;
+
+ char *coded_url = ref->DocURL();
+ String url = HtURLCodec::instance()->decode(coded_url);
+ HtURLRewriter::instance()->replace(url);
+ ref->DocURL(url.get()); // for star_patterns & template_patterns match
+ vars.Add("URL", new String(url.get()));
+
+ vars.Remove("ANCHOR"); // get rid of any previous setting
+ int iA = ref->DocAnchor();
+
+ String *anchor = 0;
+ int fanchor = 0;
+ if (iA > 0) // if an anchor was found
+ {
+ List *anchors = ref->DocAnchors();
+ if (anchors->Count() >= iA)
+ {
+ anchor = new String();
+ fanchor = 1;
+ *anchor << "#" << ((String*) (*anchors)[iA-1])->get();
+ vars.Add("ANCHOR", anchor);
+ }
+ }
+
+ //
+ // no condition for determining excerpt any more:
+ // we need it anyway to see if an anchor is relevant
+ //
+ int first = -1;
+ String urlanchor(url);
+ if (anchor)
+ urlanchor << anchor;
+ vars.Add("EXCERPT", excerpt(match, ref, urlanchor, fanchor, first));
+ //
+ // anchor only relevant if an excerpt was found, i.e.,
+ // the search expression matches the body of the document
+ // instead of only META keywords.
+ //
+ if (first < 0)
+ {
+ vars.Remove("ANCHOR");
+ }
+
+ vars.Add("METADESCRIPTION", new String(ref->DocMetaDsc()));
+ vars.Add("SCORE", new String(form("%f", ref->DocScore())));
+ vars.Add("CURRENT", new String(form("%d", current)));
+ char *title = ref->DocTitle();
+ if (!title || !*title)
+ {
+ if ( strcmp(config->Find("no_title_text"), "filename") == 0 )
+ {
+ // use actual file name
+ title = strrchr(url.get(), '/');
+ if (title)
+ {
+ title++; // Skip slash
+ str = new String(form("[%s]", title));
+ decodeURL(*str); // convert %20 to space, etc
+ }
+ else
+ // URL without '/' ??
+ str = new String("[No title]");
+ }
+ else
+ // use configure 'no title' text
+ str = new String(config->Find("no_title_text"));
+ }
+ else
+ str = new String(title);
+ vars.Add("TITLE", str);
+ vars.Add("STARSRIGHT", generateStars(ref, 1));
+ vars.Add("STARSLEFT", generateStars(ref, 0));
+ vars.Add("SIZE", new String(form("%d", ref->DocSize())));
+ vars.Add("SIZEK", new String(form("%d",
+ (ref->DocSize() + 1023) / 1024)));
+
+ if (maxScore != 0 && maxScore != minScore)
+ {
+ int percent = (int)((ref->DocScore() - minScore) * 100 /
+ (maxScore - minScore));
+ if (percent <= 0)
+ percent = 1;
+ vars.Add("PERCENT", new String(form("%d", percent)));
+ }
+ else
+ vars.Add("PERCENT", new String("100"));
+
+ {
+ str = new String();
+ char buffer[100];
+ time_t t = ref->DocTime();
+ if (t)
+ {
+ struct tm *tm = localtime(&t);
+ String datefmt = config->Find("date_format");
+ const String locale = config->Find("locale");
+ if (datefmt.empty())
+ {
+ if (config->Boolean("iso_8601"))
+ datefmt = "%Y-%m-%d %H:%M:%S %Z";
+ else
+ datefmt = "%x";
+ }
+ if (!locale.empty())
+ {
+ setlocale(LC_TIME,locale);
+ }
+ strftime(buffer, sizeof(buffer), (char*)datefmt, tm);
+ *str << buffer;
+ }
+ vars.Add("MODIFIED", str);
+ }
+
+ vars.Add("HOPCOUNT", new String(form("%d", ref->DocHopCount())));
+ vars.Add("DOCID", new String(form("%d", ref->DocID())));
+ vars.Add("BACKLINKS", new String(form("%d", ref->DocBackLinks())));
+
+ {
+ str = new String();
+ List *list = ref->Descriptions();
+ int n = list->Count();
+ for (int i = 0; i < n; i++)
+ {
+ *str << ((String*) (*list)[i])->get() << "<br>";
+ }
+ vars.Add("DESCRIPTIONS", str);
+ String *description = new String();
+ if (list->Count())
+ *description << ((String*) (*list)[0]);
+ vars.Add("DESCRIPTION", description);
+ }
+
+ int index = 0;
+ int length = 0;
+ int status = -1;
+ if (URLtemplate.hasPattern())
+ status = URLtemplate.FindFirst(ref->DocURL(), index, length);
+ if (status >= 0 && index >= 0)
+ displayParsedFile( ((String*) URLtemplateList[index])->get() );
+ else
+ expandVariables(currentTemplate->getMatchTemplate());
+}
+
+//*****************************************************************************
+void
+Display::setVariables(int pageNumber, List *matches)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ String tmp;
+ int i;
+ int nMatches = 0;
+
+ if (matches)
+ nMatches = matches->Count();
+
+ int matchesPerPage = config->Value("matches_per_page");
+ if (matchesPerPage <= 0)
+ matchesPerPage = 10;
+ int nPages = (nMatches + matchesPerPage - 1) / matchesPerPage;
+
+ if (nPages > config->Value("maximum_pages", 10))
+ nPages = config->Value("maximum_pages", 10);
+ if (nPages < 1)
+ nPages = 1; // We always have at least one page...
+ vars.Add("MATCHES_PER_PAGE", new String(config->Find("matches_per_page")));
+ vars.Add("MAX_STARS", new String(config->Find("max_stars")));
+ vars.Add("CONFIG", new String(config->Find("config")));
+ vars.Add("VERSION", new String(config->Find("version")));
+ vars.Add("RESTRICT", new String(config->Find("restrict")));
+ vars.Add("EXCLUDE", new String(config->Find("exclude")));
+ vars.Add("KEYWORDS", new String(config->Find("keywords")));
+ vars.Add("MATCHES", new String(form("%d", nMatches)));
+ vars.Add("PLURAL_MATCHES", new String((nMatches == 1) ? (char *)"" : (const char *) config->Find("plural_suffix")));
+ vars.Add("PAGE", new String(form("%d", pageNumber)));
+ vars.Add("PAGES", new String(form("%d", nPages)));
+ vars.Add("FIRSTDISPLAYED",
+ new String(form("%d", (pageNumber - 1) *
+ matchesPerPage + 1)));
+ if (nPages > 1)
+ vars.Add("PAGEHEADER", new String(config->Find("page_list_header")));
+ else
+ vars.Add("PAGEHEADER", new String(config->Find("no_page_list_header")));
+
+ i = pageNumber * matchesPerPage;
+ if (i > nMatches)
+ i = nMatches;
+ vars.Add("LASTDISPLAYED", new String(form("%d", i)));
+
+ if (config->Find("script_name").length() != 0) {
+ vars.Add("CGI", new String(config->Find("script_name")));
+ } else {
+ vars.Add("CGI", new String(getenv("SCRIPT_NAME")));
+ }
+ vars.Add("STARTYEAR", new String(config->Find("startyear")));
+ vars.Add("STARTMONTH", new String(config->Find("startmonth")));
+ vars.Add("STARTDAY", new String(config->Find("startday")));
+ vars.Add("ENDYEAR", new String(config->Find("endyear")));
+ vars.Add("ENDMONTH", new String(config->Find("endmonth")));
+ vars.Add("ENDDAY", new String(config->Find("endday")));
+
+ String *str;
+ char *format = input->get("format");
+ String *in;
+
+ vars.Add("SELECTED_FORMAT", new String(format));
+
+ str = new String();
+ *str << "<select name=\"format\">\n";
+ for (i = 0; i < templates.displayNames.Count(); i++)
+ {
+ in = (String *) templates.internalNames[i];
+ *str << "<option value=\"" << in->get() << '"';
+ if (format && mystrcasecmp(in->get(), format) == 0)
+ {
+ *str << " selected";
+ }
+ *str << '>' << ((String*)templates.displayNames[i])->get() << '\n';
+ }
+ *str << "</select>\n";
+ vars.Add("FORMAT", str);
+
+ str = new String();
+ tmp = config->Find("match_method");
+ vars.Add("SELECTED_METHOD", new String(tmp));
+ QuotedStringList ml(config->Find("method_names"), " \t\r\n");
+ *str << "<select name=\"method\">\n";
+ for (i = 0; i < ml.Count(); i += 2)
+ {
+ *str << "<option value=\"" << ml[i] << '"';
+ if (mystrcasecmp(ml[i], tmp) == 0)
+ {
+ *str << " selected";
+ vars.Add("MATCH_MESSAGE", new String(ml[i+1]));
+ }
+ *str << '>' << ml[i + 1] << '\n';
+ }
+ *str << "</select>\n";
+ vars.Add("METHOD", str);
+
+ ////////////////// Multiple database support //////////////////////
+ // Emit collection table. Ensure that previously selected collections
+ // are "checked".
+ // Collections are specified in the config file with the
+ // "collection_names" attribute. An example of the corresponding snippet
+ // in the config file is as follows:
+ //
+ // collection_names: htdig_docs htdig_bugs
+ //
+ // htdig_bugs and htdig_docs are the two collections (databases) and
+ // their corresponding config files are: $CONFIG_DIR/htdig_bugs.conf and
+ // $CONFIG_DIR/htdig_docs.conf respectively.
+ //
+ QuotedStringList clist(config->Find("collection_names"), " \t\r\n");
+ for (i =0; i < clist.Count(); i++)
+ {
+ String config_name = clist[i];
+
+ for (int j=0; j < collectionList.Count(); j++)
+ {
+ if (strcmp(config_name.get(), collectionList[j]) == 0)
+ {
+ str = new String();
+ *str << "checked";
+ String collection_id = "COLLECTION_";
+ collection_id << config_name;
+ vars.Add(collection_id, str);
+ break;
+ }
+ }
+ }
+
+ ////////////////// Multiple database support //////////////////////
+
+ str = new String();
+ QuotedStringList sl(config->Find("sort_names"), " \t\r\n");
+ const String st = config->Find("sort");
+ StringMatch datetime;
+ datetime.IgnoreCase();
+ datetime.Pattern("date|time");
+ *str << "<select name=\"sort\">\n";
+ for (i = 0; i < sl.Count(); i += 2)
+ {
+ *str << "<option value=\"" << sl[i] << '"';
+ if (mystrcasecmp(sl[i], st) == 0 ||
+ datetime.Compare(sl[i]) && datetime.Compare(st) ||
+ mystrncasecmp(sl[i], st, 3) == 0 &&
+ datetime.Compare(sl[i]+3) && datetime.Compare(st.get()+3))
+ *str << " selected";
+ *str << '>' << sl[i + 1] << '\n';
+ }
+ *str << "</select>\n";
+ vars.Add("SORT", str);
+ vars.Add("SELECTED_SORT", new String(st));
+
+ // Handle user-defined select lists.
+ // Uses octuples containing these values:
+ // <tempvar> <inparm> <namelistattr> <ntuple> <ivalue> <ilabel>
+ // <defattr> <deflabel>
+ // e.g.:
+ // METHOD_LIST method method_names 2 1 2 match_method ""
+ // FORMAT_LIST format template_map 3 2 1 template_name ""
+ // EXCLUDE_LIST exclude exclude_names 2 1 2 exclude ""
+ // MATCH_LIST matchesperpage matches_per_page_list 1 1 1
+ // matches_per_page "Previous Amount"
+ QuotedStringList builds(config->Find("build_select_lists"), " \t\r\n");
+ for (int b = 0; b <= builds.Count()-8; b += 8)
+ {
+ int ntuple = atoi(builds[b+3]);
+ int ivalue = atoi(builds[b+4]);
+ int ilabel = atoi(builds[b+5]);
+ int nsel = 0;
+ int mult = 0, asinput = 0;
+ const char *cp;
+ char sepc = '\001';
+ String currval;
+ String pre, post;
+ QuotedStringList nameopt(builds[b], ",", 1);
+ QuotedStringList namelist(config->Find(builds[b+2]), " \t\r\n");
+ if (ntuple > 0 && ivalue > 0 && ivalue <= ntuple
+ && ilabel > 0 && ilabel <= ntuple && namelist.Count() % ntuple == 0
+ && nameopt.Count() > 0)
+ {
+ if (strcmp(builds[b+1], "restrict") == 0
+ || strcmp(builds[b+1], "exclude") == 0)
+ sepc = '|';
+ if (nameopt.Count() == 1)
+ ; // default is single select
+ else if (mystrcasecmp(nameopt[1], "multiple") == 0)
+ mult = 1;
+ else if (mystrcasecmp(nameopt[1], "radio") == 0)
+ asinput = 1;
+ else if (mystrcasecmp(nameopt[1], "checkbox") == 0)
+ {
+ mult = 1;
+ asinput = 1;
+ }
+ if (nameopt.Count() > 2)
+ pre = nameopt[2];
+ else
+ pre = "";
+ if (nameopt.Count() > 3)
+ post = nameopt[3];
+ else
+ post = "";
+
+ str = new String();
+ if (!asinput)
+ {
+ *str << "<select ";
+ if (mult)
+ *str << "multiple ";
+ *str << "name=\"" << builds[b+1] << "\">\n";
+ }
+ for (i = 0; i < namelist.Count(); i += ntuple)
+ {
+ if (*builds[b+6])
+ currval = config->Find(builds[b+6]);
+ else if (input->exists(builds[b+1]))
+ currval = input->get(builds[b+1]);
+ else
+ currval = 0;
+ if (!asinput)
+ *str << pre << "<option value=\"" << namelist[i+ivalue-1] << '"';
+ else if (mult)
+ *str << pre << "<input type=\"checkbox\" name=\"" << builds[b+1]
+ << "\" value=\"" << namelist[i+ivalue-1] << '"';
+ else
+ *str << pre << "<input type=\"radio\" name=\"" << builds[b+1]
+ << "\" value=\"" << namelist[i+ivalue-1] << '"';
+ if (!mult
+ && mystrcasecmp(namelist[i+ivalue-1], currval.get()) == 0
+ || mult &&
+ (cp = mystrcasestr(currval.get(), namelist[i+ivalue-1])) != NULL
+ && (cp == currval.get() || cp[-1] == '\001' || cp[-1] == sepc)
+ && (*(cp += strlen(namelist[i+ivalue-1])) == '\0'
+ || *cp == '\001' || *cp == sepc))
+ {
+ if (!asinput)
+ *str << " selected";
+ else
+ *str << " checked";
+ ++nsel;
+ }
+ *str << '>' << namelist[i+ilabel-1] << post << '\n';
+ }
+ if (!nsel && builds[b+7][0] && input->exists(builds[b+1]))
+ {
+ if (!asinput)
+ *str << pre << "<option value=\"" << input->get(builds[b+1])
+ << "\" selected>" << builds[b+7] << post << '\n';
+ else if (mult)
+ *str << pre << "<input type=\"checkbox\" name=\"" << builds[b+1]
+ << "\" value=\"" << input->get(builds[b+1])
+ << "\" checked>" << builds[b+7] << post << '\n';
+ else
+ *str << pre << "<input type=\"radio\" name=\"" << builds[b+1]
+ << "\" value=\"" << input->get(builds[b+1])
+ << "\" checked>" << builds[b+7] << post << '\n';
+ }
+ if (!asinput)
+ *str << "</select>\n";
+ vars.Add(nameopt[0], str);
+ }
+ }
+
+ //
+ // If a paged output is required, set the appropriate variables
+ //
+ if (nPages > 1)
+ {
+ if (pageNumber > 1)
+ {
+ str = new String("<a href=\"");
+ tmp = 0;
+ createURL(tmp, pageNumber - 1);
+ *str << tmp << "\">" << config->Find("prev_page_text") << "</a>";
+ }
+ else
+ {
+ str = new String(config->Find("no_prev_page_text"));
+ }
+ vars.Add("PREVPAGE", str);
+
+ if (pageNumber < nPages)
+ {
+ str = new String("<a href=\"");
+ tmp = 0;
+ createURL(tmp, pageNumber + 1);
+ *str << tmp << "\">" << config->Find("next_page_text") << "</a>";
+ }
+ else
+ {
+ str = new String(config->Find("no_next_page_text"));
+ }
+ vars.Add("NEXTPAGE", str);
+
+ str = new String();
+ char *p;
+ QuotedStringList pnt(config->Find("page_number_text"), " \t\r\n");
+ QuotedStringList npnt(config->Find("no_page_number_text"), " \t\r\n");
+ QuotedStringList sep(config->Find("page_number_separator"), " \t\r\n");
+ if (nPages > config->Value("maximum_page_buttons", 10))
+ nPages = config->Value("maximum_page_buttons", 10);
+ for (i = 1; i <= nPages; i++)
+ {
+ if (i == pageNumber)
+ {
+ p = npnt[i - 1];
+ if (!p)
+ p = form("%d", i);
+ *str << p;
+ }
+ else
+ {
+ p = pnt[i - 1];
+ if (!p)
+ p = form("%d", i);
+ *str << "<a href=\"";
+ tmp = 0;
+ createURL(tmp, i);
+ *str << tmp << "\">" << p << "</a>";
+ }
+ if (i != nPages && sep.Count() > 0)
+ *str << sep[(i-1)%sep.Count()];
+ else if (i != nPages && sep.Count() <= 0)
+ *str << " ";
+ }
+ vars.Add("PAGELIST", str);
+ }
+ StringList form_vars(config->Find("allow_in_form"), " \t\r\n");
+ String* key;
+ for (i= 0; i < form_vars.Count(); i++)
+ {
+ if(!config->Find(form_vars[i]).empty())
+ {
+ key= new String(form_vars[i]);
+ key->uppercase();
+ vars.Add(key->get(), new String(config->Find(form_vars[i])));
+ }
+ }
+}
+
+//*****************************************************************************
+void
+Display::createURL(String &url, int pageNumber)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ String s;
+ int i;
+#define encodeInput(name) (s = input->get(name), encodeURL(s), s.get())
+
+ if (!config->Find("script_name").empty()) {
+ url << config->Find("script_name");
+ } else {
+ url << getenv("SCRIPT_NAME");
+ }
+
+ url << '?';
+
+ if (input->exists("restrict"))
+ url << "restrict=" << encodeInput("restrict") << ';';
+ if (input->exists("exclude"))
+ url << "exclude=" << encodeInput("exclude") << ';';
+ // Not needed: The next loop below handles this output
+ //if (input->exists("config"))
+ // url << "config=" << encodeInput("config") << ';';
+
+ // Put out all specified collections. If none selected, resort to
+ // default behaviour.
+ char *config_name = collectionList[0];
+ String config_encoded;
+ if (config_name && config_name[0] == '\0')
+ config_name = NULL;
+
+ if (config_name)
+ {
+ for (i = 0; i < collectionList.Count(); i++)
+ {
+ config_name = collectionList[i];
+ config_encoded = config_name;
+ encodeURL(config_encoded);
+ url << "config=" << config_encoded << ';';
+ }
+ }
+
+ if (input->exists("method"))
+ url << "method=" << encodeInput("method") << ';';
+ if (input->exists("format"))
+ url << "format=" << encodeInput("format") << ';';
+ if (input->exists("sort"))
+ url << "sort=" << encodeInput("sort") << ';';
+ if (input->exists("matchesperpage"))
+ url << "matchesperpage=" << encodeInput("matchesperpage") << ';';
+ if (input->exists("keywords"))
+ url << "keywords=" << encodeInput("keywords") << ';';
+ if (input->exists("words"))
+ url << "words=" << encodeInput("words") << ';';
+ if (input->exists("startyear"))
+ url << "startyear=" << encodeInput("startyear") << ';';
+ if (input->exists("startmonth"))
+ url << "startmonth=" << encodeInput("startmonth") << ';';
+ if (input->exists("startday"))
+ url << "startday=" << encodeInput("startday") << ';';
+ if (input->exists("endyear"))
+ url << "endyear=" << encodeInput("endyear") << ';';
+ if (input->exists("endmonth"))
+ url << "endmonth=" << encodeInput("endmonth") << ';';
+ if (input->exists("endday"))
+ url << "endday=" << encodeInput("endday") << ';';
+ StringList form_vars(config->Find("allow_in_form"), " \t\r\n");
+ for (i= 0; i < form_vars.Count(); i++)
+ {
+ if (input->exists(form_vars[i]))
+ {
+ s = form_vars[i];
+ encodeURL(s); // shouldn't be needed, but just in case
+ url << s << '=';
+ url << encodeInput(form_vars[i]) << ';';
+ }
+ }
+ url << "page=" << pageNumber;
+}
+
+//*****************************************************************************
+void
+Display::displayHTTPheaders()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ String content_type = config->Find("search_results_contenttype");
+ if (config->Boolean("nph"))
+ cout << "HTTP/1.0 200 OK\r\n";
+ if (content_type.length())
+ cout << "Content-type: " << content_type << "\r\n\r\n";
+}
+
+//*****************************************************************************
+void
+Display::displayHeader()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ displayParsedFile(config->Find("search_results_header"));
+}
+
+//*****************************************************************************
+void
+Display::displayFooter()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ displayParsedFile(config->Find("search_results_footer"));
+}
+
+//*****************************************************************************
+void
+Display::displayNomatch()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ displayParsedFile(config->Find("nothing_found_file"));
+}
+
+//*****************************************************************************
+void
+Display::displaySyntaxError(const String& message)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ displayHTTPheaders();
+ setVariables(0, 0);
+ vars.Add("SYNTAXERROR", new String(message));
+ displayParsedFile(config->Find("syntax_error_file"));
+}
+
+//*****************************************************************************
+void
+Display::displayParsedFile(const String& filename)
+{
+ FILE *fl = fopen(filename, "r");
+ char buffer[1000];
+
+ while (fl && fgets(buffer, sizeof(buffer), fl))
+ {
+ expandVariables(buffer);
+ }
+ if (fl)
+ fclose(fl);
+ else if (debug)
+ cerr << "displayParsedFile: Can't open " << filename << endl;
+}
+
+//*****************************************************************************
+// If the result templates need to depend on the URL of the match, we need
+// an efficient way to determine which template file to use. To do this, we
+// will build a StringMatch object with all the URL patterns and also
+// a List parallel to that pattern that contains the actual template file
+// names to use for each URL.
+//
+void
+Display::setupTemplates()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ String templatePatterns = config->Find("template_patterns");
+ if (!templatePatterns.empty())
+ {
+ //
+ // The templatePatterns string will have pairs of values. The first
+ // value of a pair will be a pattern, the second value will be a
+ // result template file name.
+ //
+ char *token = strtok(templatePatterns, " \t\r\n");
+ String pattern;
+ while (token)
+ {
+ //
+ // First token is a pattern...
+ //
+ pattern << token << '|';
+
+ //
+ // Second token is an URL
+ //
+ token = strtok(0, " \t\r\n");
+ URLtemplateList.Add(new String(token));
+ if (token)
+ token = strtok(0, " \t\r\n");
+ }
+ pattern.chop(1);
+ URLtemplate.Pattern(pattern);
+ }
+}
+
+//*****************************************************************************
+// If the star images need to depend on the URL of the match, we need
+// an efficient way to determine which image to use. To do this, we
+// will build a StringMatch object with all the URL patterns and also
+// a List parallel to that pattern that contains the actual images to
+// use for each URL.
+//
+void
+Display::setupImages()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ String starPatterns = config->Find("star_patterns");
+ if (!starPatterns.empty())
+ {
+ //
+ // The starPatterns string will have pairs of values. The first
+ // value of a pair will be a pattern, the second value will be an
+ // URL to an image.
+ //
+ char *token = strtok(starPatterns, " \t\r\n");
+ String pattern;
+ while (token)
+ {
+ //
+ // First token is a pattern...
+ //
+ pattern << token << '|';
+
+ //
+ // Second token is an URL
+ //
+ token = strtok(0, " \t\r\n");
+ URLimageList.Add(new String(token));
+ if (token)
+ token = strtok(0, " \t\r\n");
+ }
+ pattern.chop(1);
+ URLimage.Pattern(pattern);
+ }
+}
+
+//*****************************************************************************
+String *
+Display::generateStars(DocumentRef *ref, int right)
+{
+ int i;
+ String *result = new String();
+ HtConfiguration* config= HtConfiguration::config();
+ if (!config->Boolean("use_star_image", 1))
+ return result;
+
+ String image = config->Find("star_image");
+ const String blank = config->Find("star_blank");
+ double score;
+
+ if (maxScore != 0 && maxScore != minScore)
+ {
+ score = (ref->DocScore() - minScore) / (maxScore - minScore);
+ if(debug) cerr << "generateStars: doc, min, max " << ref->DocScore() << ", " << minScore << ", " << maxScore <<endl;
+ }
+ else
+ {
+ maxScore = ref->DocScore();
+ score = 1;
+ }
+ int nStars = int(score * (maxStars - 1) + 0.5) + 1;
+
+ vars.Add("NSTARS", new String(form("%.d", nStars)));
+ if(debug) cerr << "generateStars: nStars " << nStars << " of " << maxStars <<endl;
+
+ if (right)
+ {
+ for (i = 0; i < maxStars - nStars; i++)
+ {
+ *result << "<img src=\"" << blank << "\" alt=\" \">";
+ }
+ }
+
+ int match = 0;
+ int length = 0;
+ int status;
+
+ if (URLimage.hasPattern())
+ status = URLimage.FindFirst(ref->DocURL(), match, length);
+ else
+ status = -1;
+
+ if (status >= 0 && match >= 0)
+ {
+ image = ((String*) URLimageList[match])->get();
+ }
+
+ for (i = 0; i < nStars; i++)
+ {
+ *result << "<img src=\"" << image << "\" alt=\"*\">";
+ }
+
+ if (!right)
+ {
+ for (i = 0; i < maxStars - nStars; i++)
+ {
+ *result << "<img src=\"" << blank << "\" alt=\" \">";
+ }
+ }
+
+ return result;
+}
+
+//*****************************************************************************
+String *
+Display::readFile(const String& filename)
+{
+ FILE *fl;
+ String *s = new String();
+ char line[1024];
+
+ fl = fopen(filename, "r");
+ while (fl && fgets(line, sizeof(line), fl))
+ {
+ *s << line;
+ }
+ if (fl)
+ fclose(fl);
+ else if (debug)
+ cerr << "readFile: Can't open " << filename << endl;
+ return s;
+}
+
+//*****************************************************************************
+void
+Display::expandVariables(const String& str_arg)
+{
+ const char* str = str_arg;
+ enum
+ {
+ StStart, StLiteral, StVarStart, StVarClose, StVarPlain, StGotVar
+ } state = StStart;
+ String var = "";
+
+ while (str && *str)
+ {
+ switch (state)
+ {
+ case StStart:
+ if (*str == '\\')
+ state = StLiteral;
+ else if (*str == '$')
+ state = StVarStart;
+ else
+ cout << *str;
+ break;
+ case StLiteral:
+ cout << *str;
+ state = StStart;
+ break;
+ case StVarStart:
+ if (*str == '%' || *str == '=')
+ var << *str; // code for URL-encoded/decoded variable
+ else if (*str == '&')
+ {
+ var << *str; // code for SGML-encoded variable
+ if (mystrncasecmp("&amp;", str, 5) == 0)
+ str += 4;
+ }
+ else if (*str == '(' || *str == '{')
+ state = StVarClose;
+ else if (isalnum(*str) || *str == '_' || *str == '-')
+ {
+ var << *str;
+ state = StVarPlain;
+ }
+ else
+ state = StStart;
+ break;
+ case StVarClose:
+ if (*str == ')' || *str == '}')
+ state = StGotVar;
+ else if (isalnum(*str) || *str == '_' || *str == '-')
+ var << *str;
+ else
+ state = StStart;
+ break;
+ case StVarPlain:
+ if (isalnum(*str) || *str == '_' || *str == '-')
+ var << *str;
+ else
+ {
+ state = StGotVar;
+ continue;
+ }
+ break;
+ case StGotVar:
+ //
+ // We have a complete variable in var. Look it up and
+ // see if we can find a good replacement for it.
+ //
+ outputVariable(var);
+ var = "";
+ state = StStart;
+ continue;
+ }
+ str++;
+ }
+ if (state == StGotVar || state == StVarPlain)
+ {
+ //
+ // The end of string was reached, but we are still trying to
+ // put a variable together. Since we now have a complete
+ // variable, we will look up the value for it.
+ //
+ outputVariable(var);
+ }
+}
+
+//*****************************************************************************
+void
+Display::outputVariable(const String& var)
+{
+ String *temp;
+ String value = "";
+ const char *ev, *name;
+
+ // We have a complete variable name in var. Look it up and
+ // see if we can find a good replacement for it, either in our
+ // vars dictionary or in the environment variables.
+ name = var;
+ while (*name == '&' || *name == '%' || *name == '=')
+ name++;
+ temp = (String *) vars[name];
+ if (temp)
+ value = *temp;
+ else
+ {
+ ev = getenv(name);
+ if (ev)
+ value = ev;
+ }
+ while (--name >= var.get() && value.length())
+ {
+ if (*name == '%')
+ encodeURL(value);
+ else if(*name == '&')
+ value = HtSGMLCodec::instance()->decode(value);
+ else // (*name == '=')
+ decodeURL(value);
+ }
+ cout << value;
+}
+
+//*****************************************************************************
+List *
+Display::buildMatchList()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ char *cpid;
+ String url;
+ ResultMatch *thisMatch;
+ SplitMatches matches(*config);
+ double backlink_factor = config->Double("backlink_factor");
+ double date_factor = config->Double("date_factor");
+ double backlink_score = 0;
+ double date_score = 0;
+ double base_score = 0;
+
+
+ // Additions made here by Mike Grommet ...
+
+ tm startdate; // structure to hold the startdate specified by the user
+ tm enddate; // structure to hold the enddate specified by the user
+ time_t now = time((time_t *)0); // fill in all fields for mktime
+ tm *lt = localtime(&now); // - Gilles's fix
+ startdate = *lt;
+ enddate = *lt;
+
+ time_t eternity = ~(1<<(sizeof(time_t)*8-1)); // will be the largest value holdable by a time_t
+ tm endoftime; // the time_t eternity will be converted into a tm, held by this variable
+
+ time_t timet_startdate;
+ time_t timet_enddate;
+ int monthdays[] = {31,28,31,30,31,30,31,31,30,31,30,31};
+
+ // boolean to test to see if we need to build date information or not
+ int dategiven = ((config->Value("startmonth")) ||
+ (config->Value("startday")) ||
+ (config->Value("startyear")) ||
+ (config->Value("endmonth")) ||
+ (config->Value("endday")) ||
+ (config->Value("endyear")));
+
+ // find the end of time
+ lt = gmtime(&eternity);
+ endoftime = *lt;
+
+ if(dategiven) // user specified some sort of date information
+ {
+ int reldate = ((config->Value("startmonth") < 0) ||
+ (config->Value("startday") < 0) ||
+ (config->Value("startyear") < 0));
+ int t;
+
+ // set up the startdate structure
+ // see man mktime for details on the tm structure
+ startdate.tm_sec = 0;
+ startdate.tm_min = 0;
+ startdate.tm_hour = 0;
+ startdate.tm_yday = 0;
+ startdate.tm_wday = 0;
+
+ // The concept here is that if a user did not specify a part of a date,
+ // then we will make assumtions...
+ // For instance, suppose the user specified Feb, 1999 as the start
+ // range, we take steps to make sure that the search range date starts
+ // at Feb 1, 1999,
+ // along these same lines: (these are in MM-DD-YYYY format)
+ // Startdates: Date Becomes
+ // 01-01 01-01-1970
+ // 01-1970 01-01-1970
+ // 04-1970 04-01-1970
+ // 1970 01-01-1970
+ // These things seem to work fine for start dates, as all months have
+ // the same first day however the ending date can't work this way.
+
+ if(config->Value("startday")) // form input specified a start day
+ {
+ t = config->Value("startday");
+ if (t < 0)
+ {
+ time_t then = now + (t * (24*60*60));
+ lt = localtime(&then);
+ startdate.tm_mday = lt->tm_mday;
+ startdate.tm_mon = lt->tm_mon;
+ startdate.tm_year = lt->tm_year;
+ }
+ else
+ startdate.tm_mday = t;
+ // tm days are 1 based, they are passed in as 1 based
+ }
+ else if (!reldate)
+ startdate.tm_mday = 1; // otherwise, no start day, default to 1
+
+ if(config->Value("startmonth")) // form input specified a start month
+ {
+ t = config->Value("startmonth");
+ if (t < 0)
+ startdate.tm_mon += t;
+ else
+ startdate.tm_mon = t - 1;
+ // tm months are zero based. They are passed in as 1 based
+ while (startdate.tm_mon < 0)
+ {
+ startdate.tm_mon += 12;
+ startdate.tm_year--;
+ }
+ }
+ else if (!reldate)
+ startdate.tm_mon = 0; // otherwise, no start month, default to 0
+
+ // year is handled a little differently... the tm_year structure
+ // wants the tm_year in a format of year - 1900.
+ // since we are going to convert these dates to a time_t,
+ // a time_t value of zero, the earliest possible date
+ // occurs Jan 1, 1970. If we allow dates < 1970, then we
+ // could get negative time_t values right???
+ // (barring minor timezone offsets west of GMT, where Epoch is 12-31-69)
+
+ if(config->Value("startyear")) // form input specified a start year
+ {
+ t = config->Value("startyear");
+ if (t < 0)
+ startdate.tm_year += t;
+ else
+ {
+ startdate.tm_year = config->Value("startyear") - 1900;
+ if (startdate.tm_year < 69-1900) // correct for 2-digit years 00-68
+ startdate.tm_year += 2000; // - Gilles's fix
+ if (startdate.tm_year < 0) // correct for 2-digit years 69-99
+ startdate.tm_year += 1900;
+ }
+ }
+ else if (!reldate)
+ startdate.tm_year = 1970-1900;
+ // otherwise, no start day, specify start at 1970
+
+ reldate = ((config->Value("endmonth") < 0) ||
+ (config->Value("endday") < 0) ||
+ (config->Value("endyear") < 0));
+
+ // set up the enddate structure
+ enddate.tm_sec = 59; // allow up to last second of end day
+ enddate.tm_min = 59; // - Gilles's fix
+ enddate.tm_hour = 23;
+ enddate.tm_yday = 0;
+ enddate.tm_wday = 0;
+
+ if(config->Value("endday") < 0) // form input specified relative end day
+ {
+ // relative end day must be done before month or year
+ t = config->Value("endday");
+ time_t then = now + (t * (24*60*60));
+ lt = localtime(&then);
+ enddate.tm_mday = lt->tm_mday;
+ enddate.tm_mon = lt->tm_mon;
+ enddate.tm_year = lt->tm_year;
+ }
+
+ if(config->Value("endmonth")) // form input specified an end month
+ {
+ t = config->Value("endmonth");
+ if (t < 0)
+ enddate.tm_mon += t;
+ else
+ enddate.tm_mon = t - 1;
+ // tm months are zero based. They are passed in as 1 based
+ while (enddate.tm_mon < 0)
+ {
+ enddate.tm_mon += 12;
+ enddate.tm_year--;
+ }
+ }
+ else if (!reldate)
+ enddate.tm_mon = 11; // otherwise, no end month, default to 11
+
+ if(config->Value("endyear")) // form input specified a end year
+ {
+ t = config->Value("endyear");
+ if (t < 0)
+ enddate.tm_year += t;
+ else
+ {
+ enddate.tm_year = config->Value("endyear") - 1900;
+ if (enddate.tm_year < 69-1900) // correct for 2-digit years 00-68
+ enddate.tm_year += 2000; // - Gilles's fix
+ if (enddate.tm_year < 0) // correct for 2-digit years 69-99
+ enddate.tm_year += 1900;
+ }
+ }
+ else if (!reldate)
+ enddate.tm_year = endoftime.tm_year;
+ // otherwise, no end year, specify end at the end of time allowable
+
+ // Months have different number of days, and this makes things more
+ // complicated than the startdate range.
+ // Following the example above, here is what we want to happen:
+ // Enddates: Date Becomes
+ // 04-31 04-31-endoftime.tm_year
+ // 05-1999 05-31-1999, may has 31 days... we want to search until the end of may so...
+ // 1999 12-31-1999, search until the end of the year
+
+ if(config->Value("endday") > 0) // form input specified an end day
+ {
+ enddate.tm_mday = config->Value("endday");
+ // tm days are 1 based, they are passed in as 1 based
+ }
+ else if (!reldate)
+ {
+ // otherwise, no end day, default to the end of the month
+ enddate.tm_mday = monthdays[enddate.tm_mon];
+ if (enddate.tm_mon == 1) // February, so check for leap year
+ if (((enddate.tm_year+1900) % 4 == 0 &&
+ (enddate.tm_year+1900) % 100 != 0) ||
+ (enddate.tm_year+1900) % 400 == 0)
+ enddate.tm_mday += 1; // Feb. 29 - Gilles's fix
+ }
+
+ // Convert the tm values into time_t values.
+ // Web servers specify modification times in GMT, but htsearch
+ // displays these modification times in the server's local time zone.
+ // For consistency, we would prefer to select based on this same
+ // local time zone. - Gilles's fix
+
+ timet_startdate = mktime(&startdate);
+ timet_enddate = mktime(&enddate);
+
+ // I'm not quite sure what behavior I want to happen if
+ // someone reverses the start and end dates, and one of them is invalid.
+ // for now, if there is a completely invalid date on the start or end
+ // date, I will force the start date to time_t 0, and the end date to
+ // the maximum that can be handled by a time_t.
+
+ if(timet_startdate < 0)
+ timet_startdate = 0;
+ if(timet_enddate < 0)
+ timet_enddate = eternity;
+
+ // what if the user did something really goofy like choose an end date
+ // that's before the start date
+
+ if(timet_enddate < timet_startdate) // if so, then swap them so they are in order
+ {
+ time_t timet_temp = timet_enddate;
+ timet_enddate = timet_startdate;
+ timet_startdate = timet_temp;
+ }
+ }
+ else // no date was specifed, so plug in some defaults
+ {
+ timet_startdate = 0;
+ timet_enddate = eternity;
+ }
+
+ // ... MG
+
+
+ URLSeedScore adjustments(*config);
+
+ // If we knew where to pass it, this would be a good place to pass
+ // on errors from adjustments.ErrMsg().
+
+// Deal with all collections
+//
+ selected_collections->Start_Get();
+ Collection *collection= NULL;
+ while ((collection = (Collection *) selected_collections->Get_NextElement()))
+ {
+ ResultList *results = collection->getResultList();
+ if (results == NULL)
+ continue;
+
+ results->Start_Get();
+ while ((cpid = results->Get_Next()))
+ {
+ int id = atoi(cpid);
+
+ // DocumentRef *thisRef = docDB[id];
+
+ DocMatch *dm = results->find(cpid);
+ Collection *collection = NULL;
+ if (dm)
+ collection = dm->collection;
+ if (collection == NULL) continue;
+ DocumentRef *thisRef = collection->getDocumentRef(id);
+
+ //
+ // If it wasn't there, then ignore it
+ //
+ if (thisRef == 0)
+ {
+ continue;
+ }
+
+ url = thisRef->DocURL();
+ HtURLRewriter::instance()->replace(url);
+ if (!includeURL(url.get()))
+ {
+ // Get rid of it to free the memory!
+ delete thisRef;
+
+ continue;
+ }
+
+ // Code added by Mike Grommet for date search ranges
+ // check for valid date range. toss it out if it isn't relevant.
+ if ((timet_startdate > 0 || timet_enddate < eternity) &&
+ (thisRef->DocTime() < timet_startdate || thisRef->DocTime() > timet_enddate))
+ {
+ delete thisRef;
+ continue;
+ }
+
+ thisMatch = ResultMatch::create();
+ thisMatch->setID(id);
+ thisMatch->setCollection(collection);
+
+ //
+ // Assign the incomplete score to this match. This score was
+ // computed from the word database only, no excerpt context was
+ // known at that time, or info about the document itself,
+ // so this still needs to be done.
+ //
+
+ // Moved up: DocMatch *dm = results->find(cpid);
+ double score = dm->score;
+
+ // We need to scale based on date relevance and backlinks
+ // Other changes to the score can happen now
+ // Or be calculated by the result match in getScore()
+
+ // This formula derived through experimentation
+ // We want older docs to have smaller values and the
+ // ultimate values to be a reasonable size (max about 100)
+
+ base_score = score;
+ if (date_factor != 0.0)
+ {
+
+// Macro for calculating the date factor (31536000 is the number of
+// seconds in a 365 days year). The formula gives less weight
+// as the distance between the date document and the current time
+// increases (the absolute value is for documents with future date)
+#define DATE_FACTOR(df, n, dd) ((df) * 100 / (1+(double)(abs((n) - (dd)) / 31536000)))
+ date_score = DATE_FACTOR(date_factor, now, thisRef->DocTime());
+ score += date_score;
+ }
+
+ if (backlink_factor != 0.0)
+ {
+ int links = thisRef->DocLinks();
+ if (links == 0)
+ links = 1; // It's a hack, but it helps...
+
+ backlink_score = backlink_factor
+ * (thisRef->DocBackLinks() / (double)links);
+ score += backlink_score;
+ }
+
+ if (debug) {
+ cerr << thisRef->DocURL() << "\n";
+ }
+
+ thisMatch->setTime(thisRef->DocTime());
+ thisMatch->setTitle(thisRef->DocTitle());
+
+ score = adjustments.adjust_score(score, thisRef->DocURL());
+
+ score = log(1.0 + score);
+ thisMatch->setScore(score);
+ thisMatch->setAnchor(dm->anchor);
+
+ //
+ // Append this match to our list of matches.
+ //
+ if (score > 0.0)
+ matches.Add(thisMatch, thisRef->DocURL());
+
+ // Get rid of it to free the memory!
+ delete thisRef;
+
+ if (debug)
+ {
+ cerr << " base_score " << base_score << " date_score " << date_score << " backlink_score " << backlink_score << "\n";
+ cerr << " score " << score << "(" << thisMatch->getScore() << "), maxScore " << maxScore <<", minScore " << minScore << endl;
+ }
+
+ if (maxScore < score)
+ {if(debug) cerr << "Set maxScore = score" <<endl;
+ maxScore = score;
+ }
+ if (minScore > score && score > 0.0)
+ {if(debug) cerr << "Set minScore = score" <<endl;
+ minScore = score;
+ }
+ }
+ }
+
+ //
+ // Each sub-area is then sorted by relevance level.
+ //
+ List *matches_part; // Outside of loop to keep for-scope warnings away.
+ for (matches_part = matches.Get_First();
+ matches_part != 0;
+ matches_part = matches.Get_Next())
+ sort(matches_part);
+
+ // Then all sub-lists are concatenated and put in a new list.
+ return matches.JoinedLists();
+}
+
+//*****************************************************************************
+String *
+Display::excerpt(ResultMatch *match, DocumentRef *ref, String urlanchor, int fanchor, int &first)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ // It is necessary to keep alive the String you .get() a char * from,
+ // as long as you use the char *.
+
+ //String head_string;
+
+ char *head;
+ int use_meta_description=0;
+ Collection *collection = match->getCollection();
+
+ if (config->Boolean("use_meta_description",0)
+ && strlen(ref->DocMetaDsc()) != 0)
+ {
+ // Set the head to point to description
+ head = ref->DocMetaDsc();
+ use_meta_description = 1;
+ }
+ else
+ {
+ // docDB.ReadExcerpt(*ref);
+ collection->ReadExcerpt(*ref);
+ head = ref->DocHead(); // head points to the top
+ }
+
+ //head_string = HtSGMLCodec::instance()->decode(head);
+ //head = head_string.get();
+
+ int which, length;
+ char *temp = head;
+ String part;
+ String *text = new String("");
+
+ StringMatch *allWordsPattern = NULL;
+ if (collection)
+ allWordsPattern = collection->getSearchWordsPattern();
+ if (!allWordsPattern)
+ return text;
+
+ // htsearch displays the description when:
+ // 1) a description has been found
+ // 2) the option "use_meta_description" is set to true
+ // If previous conditions are false and "excerpt_show_top" is set to true
+ // it shows the whole head. Else, it acts as default.
+
+ if (config->Boolean("excerpt_show_top", 0) || use_meta_description ||
+ !allWordsPattern->hasPattern())
+ first = 0;
+ else
+ first = allWordsPattern->FindFirstWord(head, which, length);
+
+ if (first < 0 && config->Boolean("no_excerpt_show_top"))
+ first = 0; // No excerpt, but we want to show the top.
+
+ if (first < 0)
+ {
+ //
+ // No excerpt available, don't show top, so display message
+ //
+ if (!config->Find("no_excerpt_text").empty())
+ {
+ *text << config->Find("no_excerpt_text");
+ }
+ }
+ else
+ if ( first == 0 || config->Value( "max_excerpts" ) == 1 )
+ {
+ int headLength = strlen(head);
+ int length = config->Value("excerpt_length", 50);
+ char *start;
+ char *end;
+ WordType type(*config);
+
+ if (!config->Boolean("add_anchors_to_excerpt"))
+ // negate flag if it's on (anchor available)
+ fanchor = 0;
+
+ //
+ // Figure out where to start the excerpt. Basically we go back
+ // half the excerpt length from the first matched word
+ //
+ start = &temp[first] - length / 2;
+ if (start < temp)
+ start = temp;
+ else
+ {
+ *text << config->Find("start_ellipses");
+ while (*start && type.IsStrictChar(*start))
+ start++;
+ }
+
+ //
+ // Figure out the end of the excerpt.
+ //
+ end = start + length;
+ if (end > temp + headLength)
+ {
+ end = temp + headLength;
+ *text << hilight(match, start, urlanchor, fanchor);
+ }
+ else
+ {
+ while (*end && type.IsStrictChar(*end))
+ end++;
+ *end = '\0';
+ *text << hilight(match, start, urlanchor, fanchor);
+ *text << config->Find("end_ellipses");
+ }
+ }
+ else
+ {
+ *text = buildExcerpts( allWordsPattern, match, head, urlanchor, fanchor );
+ }
+
+ return text;
+}
+//
+//*****************************************************************************
+// Handle cases where multiple document excerpts are requested.
+//
+const String
+Display::buildExcerpts( StringMatch *allWordsPattern, ResultMatch* match, char *head, String urlanchor, int fanchor )
+{
+ HtConfiguration* config= HtConfiguration::config();
+ if ( !config->Boolean( "add_anchors_to_excerpt" ) )
+ {
+ fanchor = 0;
+ }
+
+ int headLength = strlen( head );
+ int excerptNum = config->Value( "max_excerpts", 1 );
+ int excerptLength = config->Value( "excerpt_length", 50 );
+ int lastPos = 0;
+ int curPos = 0;
+
+ String text;
+
+ for ( int i = 0; i < excerptNum; ++i )
+ {
+ int which, termLength;
+
+ int nextPos = allWordsPattern->FindFirstWord( head + lastPos,
+ which, termLength );
+
+ if ( nextPos < 0 )
+ {
+ // Ran out of matching terms
+ break;
+ }
+ else
+ {
+ // Determine offset from beginning of head
+ curPos = lastPos + nextPos;
+ }
+
+ // Slip a break in since there is another excerpt coming
+ if ( i != 0 )
+ {
+ text << "<br>";
+ }
+
+ // Determine where excerpt starts
+ char *start = &head[curPos] - excerptLength / 2;
+
+ if ( start < head )
+ {
+ start = head;
+ }
+ else
+ {
+ text << config->Find("start_ellipses");
+
+ while ( *start && HtIsStrictWordChar( *start ) )
+ {
+ start++;
+ }
+ }
+
+ // Determine where excerpt ends
+ char *end = start + excerptLength;
+
+ if ( end > head + headLength )
+ {
+ end = head + headLength;
+
+ text << hilight( match, start, urlanchor, fanchor );
+ }
+ else
+ {
+ while ( *end && HtIsStrictWordChar( *end ) )
+ {
+ end++;
+ }
+
+ // Save end char so that it can be restored
+ char endChar = *end;
+
+ *end = '\0';
+
+ text << hilight(match, start, urlanchor, fanchor);
+ text << config->Find("end_ellipses");
+
+ *end = endChar;
+ }
+
+ // No more words left to examine in head
+ if ( (lastPos = curPos + termLength) > headLength )
+ break;
+ }
+
+ return text;
+}
+
+//*****************************************************************************
+String
+Display::hilight(ResultMatch *match, const String& str_arg, const String& urlanchor, int fanchor)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ const String start_highlight = config->Find("start_highlight");
+ const String end_highlight = config->Find("end_highlight");
+ const String anchor_target = config->Find("anchor_target");
+ const char *str = str_arg;
+ String result;
+ int pos = 0;
+ int which, length;
+ WeightWord *ww;
+ int first = 1;
+ String s;
+#define SGMLencodedChars(p, l) (s = 0, s.append(p, l), HtSGMLCodec::instance()->decode(s))
+
+ result = 0;
+ Collection *collection = match->getCollection();
+ StringMatch *allWordsPattern = NULL;
+ if (collection)
+ allWordsPattern = collection->getSearchWordsPattern();
+ List *searchWords = NULL;
+ if (collection)
+ searchWords = collection->getSearchWords();
+ if (!allWordsPattern || !searchWords)
+ return result;
+
+ while (allWordsPattern->hasPattern() &&
+ (pos = allWordsPattern->FindFirstWord(str, which, length)) >= 0)
+ {
+ //result.append(str, pos);
+ result << SGMLencodedChars(str, pos);
+ ww = (WeightWord *) (*searchWords)[which];
+ result << start_highlight;
+ if (first && fanchor)
+ {
+ result << "<a ";
+ if (anchor_target.length() > 0)
+ result << "target=\"" << anchor_target << "\" ";
+ result << "href=\"" << urlanchor << "\">";
+ }
+ //result.append(str + pos, length);
+ result << SGMLencodedChars(str + pos, length);
+ if (first && fanchor)
+ result << "</a>";
+ result << end_highlight;
+ str += pos + length;
+ first = 0;
+ }
+ //result.append(str);
+ result << SGMLencodedChars(str, strlen(str));
+ return result;
+}
+
+//*****************************************************************************
+void
+Display::sort(List *matches)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ int numberOfMatches = matches->Count();
+ int i;
+
+ if (numberOfMatches <= 1)
+ return;
+
+ ResultMatch **array = new ResultMatch*[numberOfMatches];
+ for (i = 0; i < numberOfMatches; i++)
+ {
+ array[i] = (ResultMatch *)(*matches)[i];
+ }
+ matches->Release();
+
+ qsort((char *) array, numberOfMatches, sizeof(ResultMatch *),
+ array[0]->getSortFun());
+
+ const String st = config->Find("sort");
+ if (!st.empty() && mystrncasecmp("rev", st, 3) == 0)
+ {
+ for (i = numberOfMatches; --i >= 0; )
+ matches->Add(array[i]);
+ }
+ else
+ {
+ for (i = 0; i < numberOfMatches; i++)
+ matches->Add(array[i]);
+ }
+ delete [] array;
+}
+
+//*****************************************************************************
+void
+Display::logSearch(int page, List *matches)
+{
+//Note: This is Posix and dependent on a running syslogd..
+//does not work for Win32
+//TODO: Look into using native windows system logs instead
+#ifndef _MSC_VER /* _WIN32 */
+ HtConfiguration* config= HtConfiguration::config();
+ // Currently unused time_t t;
+ int nMatches = 0;
+ int level = LOG_LEVEL;
+ int facility = LOG_FACILITY;
+ char *host = getenv("REMOTE_HOST");
+ char *ref = getenv("HTTP_REFERER");
+
+ if (host == NULL)
+ host = getenv("REMOTE_ADDR");
+ if (host == NULL)
+ host = "-";
+
+ if (ref == NULL)
+ ref = "-";
+
+ if (matches)
+ nMatches = matches->Count();
+
+ openlog("htsearch", LOG_PID, facility);
+ syslog(level, "%s [%s] (%s) [%s] [%s] (%d/%s) - %d -- %s\n",
+ host,
+ input->exists("config") ? input->get("config") : "default",
+ (const char*)config->Find("match_method"),
+ input->exists("words") ? input->get("words") : "",
+ logicalWords.get(),
+ nMatches, (const char*)config->Find("matches_per_page"),
+ page, ref
+ );
+#endif
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Display.h b/debian/htdig/htdig-3.2.0b6/htsearch/Display.h
new file mode 100644
index 00000000..2d144ab1
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Display.h
@@ -0,0 +1,238 @@
+//
+// Display.h
+//
+// Display: Takes results of search and fills in the HTML templates
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Display.h,v 1.27 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifndef _Display_h_
+#define _Display_h_
+
+#include "Object.h"
+#include "ResultList.h"
+#include "ResultMatch.h"
+#include "TemplateList.h"
+#include "cgi.h"
+#include "StringMatch.h"
+#include "List.h"
+#include "DocumentDB.h"
+#include "Database.h"
+#include "Dictionary.h"
+#include "HtRegex.h"
+
+class Display : public Object
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ // Display(const String& docFile, const String& indexFile, const String& excerptFile);
+
+ Display(Dictionary *selected_collections);
+ ~Display();
+
+ void setStartTemplate(const String& templateName);
+ void setMatchTemplate(const String& templateName);
+ void setEndTemplate(const String& templateName);
+
+ // inline void setResults(ResultList *results);
+ // inline void setSearchWords(List *searchWords);
+ inline void setLimit(HtRegex *);
+ inline void setExclude(HtRegex *);
+ // inline void setAllWordsPattern(StringMatch *);
+ inline void setLogicalWords(char *);
+ inline void setOriginalWords(char *);
+ inline void setCGI(cgi *);
+
+ void display(int pageNumber);
+ void displayMatch(ResultMatch *match, DocumentRef *ref, int current);
+ void displayHTTPheaders();
+ void displayHeader();
+ void displayFooter();
+ void displayNomatch();
+ void displaySyntaxError(const String &);
+
+ int hasTemplateError() {return templateError;}
+
+protected:
+ //
+ // Multiple database support
+ //
+ Dictionary *selected_collections;
+
+ //
+ // Search Policy
+ char *search_policy;
+
+ //
+ // The list of search results.
+ //
+ // ResultList *results;
+
+ //
+ // The database that contains documents.
+ //
+ // DocumentDB docDB;
+
+ //
+ // A list of words that we are searching for
+ //
+ // List *searchWords;
+
+ //
+ // Pattern that all result URLs must match or exclude
+ //
+ HtRegex *limitTo;
+ HtRegex *excludeFrom;
+
+ //
+ // Pattern of all the words
+ //
+ // StringMatch *allWordsPattern;
+
+ //
+ // Variables for substitution into text are stored in a dictionary
+ //
+ Dictionary vars;
+
+ //
+ // Since the creation of excerpts is somewhat time consuming, we will
+ // only compute them if they're actually going to be used. This is the
+ // flag that tells us if we will need the excerpt.
+ //
+ int needExcerpt;
+
+ //
+ // Since we might have errors we cannot recover from, this tells us
+ // what happened.
+ //
+ int templateError;
+
+ //
+ // To allow the result templates to be dependant on the match URL, we need
+ // the following:
+ //
+ StringMatch URLtemplate;
+ List URLtemplateList;
+
+ //
+ // To allow the star images to be dependant on the match URL, we need
+ // the following:
+ //
+ StringMatch URLimage;
+ List URLimageList;
+
+ //
+ // Maximum number of stars to display
+ //
+ int maxStars;
+ double maxScore;
+ double minScore;
+
+ //
+ // For display, we have different versions of the list of words.
+ //
+ String logicalWords;
+ String originalWords;
+
+ //
+ // To be able to recreate the URL that will get to us again, we need
+ // the info from the HTML form that called us.
+ //
+ cgi *input;
+
+ //
+ // Match output is done through templates. This is the interface to these
+ // templates.
+ //
+ TemplateList templates;
+ Template *currentTemplate;
+
+ //
+ // Methods...
+ //
+ List *buildMatchList();
+ void sort(List *);
+
+ int includeURL(const String&);
+ String *readFile(const String&);
+ void expandVariables(const String&);
+ void outputVariable(const String&);
+ String *excerpt(ResultMatch *match, DocumentRef *ref,
+ String urlanchor, int fanchor, int &first);
+ const String buildExcerpts(StringMatch *allWordsPattern,
+ ResultMatch *match, char *head,
+ String urlanchor, int fanchor );
+ String hilight(ResultMatch *match, const String& str,
+ const String& urlanchor, int fanchor);
+ void setupTemplates();
+ void setupImages();
+ String *generateStars(DocumentRef *, int);
+ void displayParsedFile(const String&);
+ void setVariables(int, List *);
+ void createURL(String &, int);
+ void logSearch(int, List *);
+};
+
+//*****************************************************************************
+inline void
+Display::setLimit(HtRegex *limit)
+{
+ limitTo = limit;
+}
+
+inline void
+Display::setExclude(HtRegex *exclude)
+{
+ excludeFrom = exclude;
+}
+
+#if 0
+inline void
+Display::setAllWordsPattern(StringMatch *pattern)
+{
+ allWordsPattern = pattern;
+}
+
+inline void
+Display::setResults(ResultList *results)
+{
+ this->results = results;
+}
+
+inline void
+Display::setSearchWords(List *searchWords)
+{
+ this->searchWords = searchWords;
+}
+#endif
+
+inline void
+Display::setLogicalWords(char *s)
+{
+ logicalWords = s;
+ vars.Add("LOGICAL_WORDS", new String(logicalWords));
+}
+
+inline void
+Display::setOriginalWords(char *s)
+{
+ originalWords = s;
+ vars.Add("WORDS", new String(originalWords));
+}
+
+inline void
+Display::setCGI(cgi *aCgi)
+{
+ input = aCgi;
+}
+
+#endif
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/DocMatch.cc b/debian/htdig/htdig-3.2.0b6/htsearch/DocMatch.cc
new file mode 100644
index 00000000..575c82aa
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/DocMatch.cc
@@ -0,0 +1,222 @@
+// DocMatch.cc
+//
+// DocMatch: Data object only. Contains information related to a given
+// document that was matched by a search. For instance, the
+// score of the document for this search.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: DocMatch.cc,v 1.8 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "DocMatch.h"
+#include "HtConfiguration.h"
+#include "HtWordReference.h"
+
+#ifdef HAVE_STD
+#include <iostream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <iostream.h>
+#endif /* HAVE_STD */
+
+//*******************************************************************************
+// DocMatch::DocMatch()
+//
+
+
+//*******************************************************************************
+// DocMatch::~DocMatch()
+//
+DocMatch::~DocMatch()
+{
+}
+
+//
+// merge with another match
+// sets anchor to the lower value
+// merges location lists
+//
+void
+DocMatch::Merge(const DocMatch &match)
+{
+ if(match.anchor < anchor)
+ {
+ anchor = match.anchor;
+ }
+ AddLocations(match.GetLocations());
+}
+
+//
+// adds locations to an existing list
+// avoiding duplicates, in location order
+//
+void
+DocMatch::AddLocations(const List *locs)
+{
+ List *merge = new List;
+ ListCursor c;
+
+ locations->Start_Get();
+ locs->Start_Get(c);
+ Location *a = (Location *)locations->Get_Next();
+ Location *b = (Location *)locs->Get_Next(c);
+ while(a && b)
+ {
+ if(a->from < b->from)
+ {
+ merge->Add(a);
+ a = (Location *)locations->Get_Next();
+ }
+ else if(a->from > b->from)
+ {
+ merge->Add(new Location(*b));
+ b = (Location *)locs->Get_Next(c);
+ }
+ else // (a->from == b->from)
+ {
+ if(a->to < b->to)
+ {
+ merge->Add(new Location(*a));
+ merge->Add(new Location(*b));
+ }
+ else if(a->to > b->to)
+ {
+ merge->Add(new Location(*b));
+ merge->Add(new Location(*a));
+ }
+ else // (a->to == b->to)
+ {
+ merge->Add(new Location(
+ a->from,
+ a->to,
+ a->flags,
+ a->weight + b->weight));
+ }
+ a = (Location *)locations->Get_Next();
+ b = (Location *)locs->Get_Next(c);
+ }
+ }
+ while(a)
+ {
+ merge->Add(a);
+ a = (Location *)locations->Get_Next();
+ }
+ while(b)
+ {
+ merge->Add(new Location(*b));
+ b = (Location *)locs->Get_Next(c);
+ }
+ locations->Release();
+ delete locations;
+ locations = merge;
+}
+
+//
+// set the location list
+//
+void
+DocMatch::SetLocations(List *locs)
+{
+ delete locations;
+ locations = locs;
+}
+
+//
+// copy constructor, copies locations
+//
+DocMatch::DocMatch(const DocMatch &other)
+{
+ score = -1.0;
+ //score = other.score;
+ id = other.id;
+ anchor = other.anchor;
+ locations = new List;
+ AddLocations(other.GetLocations());
+}
+
+//
+// set weight of all locations
+//
+void
+DocMatch::SetWeight(double weight)
+{
+ locations->Start_Get();
+ for(int i = 0; i < locations->Count(); i++)
+ {
+ Location *loc = (Location *)locations->Get_Next();
+ loc->weight = weight;
+ }
+}
+
+//
+// debug dump
+//
+void
+DocMatch::Dump()
+{
+ cerr << "DocMatch id: " << id << " {" << endl;
+ locations->Start_Get();
+ for(int i = 0; i < locations->Count(); i++)
+ {
+ Location *loc = (Location *)locations->Get_Next();
+ cerr << "location [" << loc->from;
+ cerr << ", " << loc->to << "] ";
+ cerr << "weight " << loc->weight;
+ cerr << " flags " << loc->flags;
+ cerr << endl;
+ }
+ cerr << "score: " << GetScore() << endl << "}" << endl;
+}
+
+double
+DocMatch::GetScore()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ static double text_factor = config->Double("text_factor", 1);
+ static double caps_factor = config->Double("caps_factor", 1);
+ static double title_factor = config->Double("title_factor", 1);
+ static double heading_factor = config->Double("heading_factor", 1);
+ static double keywords_factor = config->Double("keywords_factor", 1);
+ static double meta_desc_factor = config->Double("meta_description_factor", 1);
+ static double author_factor = config->Double("author_factor", 1);
+ static double description_factor = config->Double("description_factor", 1);
+ static double url_text_factor = config->Double("url_text_factor", 1);
+
+ if (score == -1.0)
+ {
+ score = 0.0;
+
+ double locresult = 0.0;
+ ListCursor c;
+ locations->Start_Get(c);
+ Location *loc = (Location *)locations->Get_Next(c);
+ while(loc)
+ {
+ locresult = 0.0;
+ if (loc->flags == FLAG_TEXT) locresult += text_factor;
+ if (loc->flags & FLAG_CAPITAL) locresult += caps_factor;
+ if (loc->flags & FLAG_TITLE) locresult += title_factor;
+ if (loc->flags & FLAG_HEADING) locresult += heading_factor;
+ if (loc->flags & FLAG_KEYWORDS) locresult += keywords_factor;
+ if (loc->flags & FLAG_DESCRIPTION) locresult += meta_desc_factor;
+ if (loc->flags & FLAG_AUTHOR) locresult += author_factor;
+ if (loc->flags & FLAG_LINK_TEXT) locresult += description_factor;
+ if (loc->flags & FLAG_URL) locresult += url_text_factor;
+
+ score += loc->weight * locresult;
+ loc = (Location *)locations->Get_Next(c);
+ }
+ }
+ return score;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/DocMatch.h b/debian/htdig/htdig-3.2.0b6/htsearch/DocMatch.h
new file mode 100644
index 00000000..798aadb3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/DocMatch.h
@@ -0,0 +1,109 @@
+//
+// DocMatch.h
+//
+// DocMatch: Data object only. Contains information related to a given
+// document that was matched by a search. For instance, the
+// score of the document for this search.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: DocMatch.h,v 1.9 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifndef _DocMatch_h_
+#define _DocMatch_h_
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "Object.h"
+#include "List.h"
+
+class Collection;
+
+//
+// an element of the DocMatch location list
+//
+struct Location : public Object
+{
+ Location(int f, int t, unsigned int l, double w = 1.0) :
+ from(f), to(t), flags(l), weight(w) {}
+ Location(const Location &l) :
+ from(l.from), to(l.to), flags(l.flags), weight(l.weight) {}
+ int from;
+ int to;
+ unsigned int flags;
+ double weight;
+};
+
+//
+// an element of a ResultList
+//
+class DocMatch : public Object
+{
+public:
+ // default constructor
+ DocMatch() :
+ locations(new List),
+ score(-1.0),
+ id(0),
+ anchor(0),
+ collection(0) {}
+
+ // copy constructor
+ DocMatch(const DocMatch &);
+
+ // destructor
+ ~DocMatch();
+
+ // match join
+ void Merge(const DocMatch &);
+
+ // score accessor
+ double GetScore();
+ void SetScore(double);
+
+ // doc id accessors
+ int GetId() const { return id; }
+ void SetId(int x) { id = x; }
+
+ // anchor accessors
+ int GetAnchor() const { return anchor; }
+ void SetAnchor(int x) { anchor = x; }
+
+ // location list accessors
+ const List *GetLocations() const { return locations; }
+ void SetLocations(List *);
+ void AddLocations(const List *);
+
+ // add one location to the list
+ // use with caution -- does not ensure {ordered}
+ void AddLocation(Location *x) { locations->Add(x); }
+
+ // set weight of all locations
+ void SetWeight(double weight);
+
+ // debug
+ void Dump();
+
+private:
+ List *locations;
+// the rest should be private:
+// but is already used by the old htsearch
+public:
+
+ double score;
+ int id;
+ int anchor;
+ short int orMatches;
+ Collection *collection; // Multiple databases
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/ExactWordQuery.cc b/debian/htdig/htdig-3.2.0b6/htsearch/ExactWordQuery.cc
new file mode 100644
index 00000000..1bbb4b3a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/ExactWordQuery.cc
@@ -0,0 +1,53 @@
+//
+// ExactWordQuery.cc
+//
+// ExactWordQuery: A Query tree leaf object. Wraps a database access
+// that generates ResultLists for word matches.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: ExactWordQuery.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "ExactWordQuery.h"
+#include "WordSearcher.h"
+
+//
+// the searcher object used by all instances
+// of ExactWord
+//
+WordSearcher *
+ExactWordQuery::searcher = 0;
+
+//
+// set the weight of the matches to my weight
+//
+void
+ExactWordQuery::AdjustWeight(ResultList &results)
+{
+ results.SetWeight(weight);
+}
+
+//
+// tell the searcher to fetch my word in the database
+// return 0 if no matches
+//
+ResultList *
+ExactWordQuery::Evaluate()
+{
+ ResultList *result = 0;
+ if(searcher)
+ {
+ result = searcher->Search(word);
+ }
+ if(result && !result->Count() && !result->IsIgnore())
+ {
+ delete result;
+ result = 0;
+ }
+ return result;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/ExactWordQuery.h b/debian/htdig/htdig-3.2.0b6/htsearch/ExactWordQuery.h
new file mode 100644
index 00000000..fafcb878
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/ExactWordQuery.h
@@ -0,0 +1,71 @@
+#ifndef _ExactWordQuery_h_
+#define _ExactWordQuery_h_
+
+//
+// ExactWordQuery.h
+//
+// ExactWordQuery: A Query tree leaf object. Wraps a database access
+// that generates ResultLists for word matches.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: ExactWordQuery.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "Query.h"
+
+class WordSearcher;
+
+class ExactWordQuery : public Query
+{
+public:
+ // construct for word w
+ ExactWordQuery(const String &w) :
+ word(w), weight(1.0) {}
+
+ // destruct
+ ~ExactWordQuery() {}
+
+ // set the common db wrapper
+ static void SetSearcher(WordSearcher *c) { searcher = c; }
+
+ // weight accessor
+ void SetWeight(double x) { weight = x; }
+ double GetWeight() const { return weight; }
+
+private:
+ // forbidden
+ ExactWordQuery() {}
+
+ // go search the db
+ ResultList *Evaluate();
+
+ // set my weight to the list
+ void AdjustWeight(ResultList &);
+
+ // unparse
+ String GetLogicalWords() const { return word; }
+
+ // unique cache index
+ String GetSignature() const
+ { return String("Exact:")+GetLogicalWords(); }
+
+ // i represent this
+ String word;
+
+ // my weight
+ double weight;
+
+ // db wrapper common to all word queries
+ static WordSearcher *searcher;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/FuzzyExpander.h b/debian/htdig/htdig-3.2.0b6/htsearch/FuzzyExpander.h
new file mode 100644
index 00000000..0544d814
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/FuzzyExpander.h
@@ -0,0 +1,46 @@
+#ifndef _FuzzyExpander_h_
+#define _FuzzyExpander_h_
+
+//
+// FuzzyExpander.h
+//
+// FuzzyExpander: (abstract) root of a family of query factories.
+// They make fuzzy queries for given words
+// and store word weights to results
+// by using the existing fuzzy algorithms
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: FuzzyExpander.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+//
+// for details about the basic architectural pattern see the book:
+// Design Patterns, by the infamous GoF
+// Factory pattern
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "htString.h"
+
+class Query;
+
+// abstract
+class FuzzyExpander
+{
+public:
+ FuzzyExpander() {}
+ virtual ~FuzzyExpander() {}
+
+ // generate a query for this word
+ virtual Query *MakeQuery(const String &word) = 0;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/GParser.cc b/debian/htdig/htdig-3.2.0b6/htsearch/GParser.cc
new file mode 100644
index 00000000..abf9dbac
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/GParser.cc
@@ -0,0 +1,134 @@
+//
+// GParser.cc
+//
+// GParser: An alternate boolean parser, does not use operator precedence.
+// -- but why is it called G? :-)
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: GParser.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "GParser.h"
+#include "OrQuery.h"
+#include "NearQuery.h"
+#include "AndQuery.h"
+#include "NotQuery.h"
+
+Query *
+GParser::ParseFactor()
+{
+ Query *result = 0;
+
+ if(token.IsWord())
+ {
+ result = ParseWord();
+ }
+ else if(token.IsQuote())
+ {
+ token.Next();
+ result = ParsePhrase();
+ if(result)
+ {
+ if(token.IsQuote())
+ {
+ token.Next();
+ }
+ else
+ {
+ Expected("closing \"");
+ delete result;
+ result = 0;
+ }
+ }
+ }
+ else if(token.IsLeftParen())
+ {
+ token.Next();
+ result = ParseExpression();
+ if(result)
+ {
+ if(token.IsRightParen())
+ {
+ token.Next();
+ }
+ else
+ {
+ Expected(")");
+ delete result;
+ result = 0;
+ }
+ }
+ }
+ else
+ {
+ Expected("'(', '\"', or a word");
+ }
+ return result;
+}
+
+OperatorQuery *
+GParser::MakeOperatorQuery(const String &op) const
+{
+cerr << "Making operator for " << op << endl;
+ OperatorQuery *result = 0;
+ if(op == String("or"))
+ {
+ result = new OrQuery;
+ }
+ else if(op == String("and"))
+ {
+ result = new AndQuery;
+ }
+ else if(op == String("not"))
+ {
+ result = new NotQuery;
+ }
+ else if(op == String("near"))
+ {
+ result = new NearQuery;
+ }
+ return result;
+}
+
+
+Query *
+GParser::ParseExpression()
+{
+ List factors;
+ Query *result = 0;
+ String op = "";
+
+ Query *factor = ParseFactor();
+ if(factor)
+ {
+ result = factor;
+ }
+ while(factor && (token.IsOr() || token.IsAnd() || token.IsNot() || token.IsNear()))
+ {
+ if(op != token.Value())
+ {
+ Query *previous = result;
+ result = MakeOperatorQuery(token.Value());
+ result->Add(previous);
+ op = token.Value();
+ }
+ token.Next();
+ factor = ParseFactor();
+ if(factor)
+ {
+ result->Add(factor);
+ }
+ }
+ if(!factor && result)
+ {
+ delete result;
+ result = 0;
+ }
+ return result;
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/GParser.h b/debian/htdig/htdig-3.2.0b6/htsearch/GParser.h
new file mode 100644
index 00000000..d66bdcd2
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/GParser.h
@@ -0,0 +1,47 @@
+#ifndef _GParser_h_
+#define _GParser_h_
+
+//
+// GParser.h
+//
+// GParser: An alternate boolean parser, does not use operator precedence.
+// -- but why is it called G? :-)
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: GParser.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "QueryParser.h"
+#include "BooleanLexer.h"
+
+class OperatorQuery;
+
+class GParser : public QueryParser
+{
+public:
+ GParser() {}
+ ~GParser() {}
+
+private:
+ // apply the single-level syntax
+ Query *ParseExpression();
+
+ // apply the factor syntax
+ Query *ParseFactor();
+
+ // return the adequate operator for an operator keyword
+ OperatorQuery *MakeOperatorQuery(const String &op) const;
+
+ // give the parent access to the lexical analyzer
+ QueryLexer &Token() { return token; }
+
+ // the lexer
+ BooleanLexer token;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/HtURLSeedScore.cc b/debian/htdig/htdig-3.2.0b6/htsearch/HtURLSeedScore.cc
new file mode 100644
index 00000000..d372d22c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/HtURLSeedScore.cc
@@ -0,0 +1,215 @@
+//
+// HtURLSeedScore.cc
+//
+// URLSeedScore:
+// Holds a list of configured adjustments to be applied on a given
+// score and given URL.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 2000-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: HtURLSeedScore.cc,v 1.6 2004/05/28 13:15:24 lha Exp $
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "StringList.h"
+#include "HtRegex.h"
+#include "HtURLSeedScore.h"
+
+#include <stdio.h>
+#include <ctype.h>
+
+// This class is only used in private members of URLSeedScore.
+// The OO-right thing would be to nest this inside the private
+// declaration of HtURLSeedScore, but that would cause portability
+// problems according to
+// <URL:http://www.mozilla.org/hacking/portable-cpp.html#inner_classes>.
+
+class ScoreAdjustItem : public Object
+{
+public:
+ // Construct from a string applicable to StringMatch, and a string to
+ // parse for a formula.
+ ScoreAdjustItem(String &, String &);
+
+ ~ScoreAdjustItem();
+
+ // Does this item match?
+ inline bool Match(const String &s) { return match.match(s, 1, 0) != 0; }
+
+ // Return the argument adjusted according to this item.
+ double adjust_score(double orig)
+ { return orig*my_mul_factor + my_add_constant; }
+
+ // Error in parsing? Message given here if non-empty string.
+ String& ErrMsg() { return myErrMsg; }
+
+private:
+ double my_add_constant;
+ double my_mul_factor;
+ HtRegex match;
+
+ static String myErrMsg;
+
+ // These member functions are not supposed to be implemented, but
+ // mentioned here as private so the compiler will not generate them if
+ // someone puts in buggy code that would use them.
+ ScoreAdjustItem();
+ ScoreAdjustItem(const ScoreAdjustItem &);
+ void operator= (const ScoreAdjustItem &);
+};
+
+// Definition of myErrMsg.
+String ScoreAdjustItem::myErrMsg("");
+
+ScoreAdjustItem::ScoreAdjustItem(String &url_regex, String &formula)
+{
+ double mul_factor = 1;
+ double add_constant = 0;
+ bool factor_found = false;
+ bool constant_found = false;
+ int chars_so_far;
+ StringList l(url_regex.get(), '|');
+ match.setEscaped(l);
+
+ // FIXME: Missing method to check if the regex was in error.
+ // myErrMsg = form("%s is not a valid regex", url_regex.get());
+
+ char *s = formula.get();
+
+ // Parse the ([*]N[ ]*)?[+]?M format.
+ if (s[0] == '*')
+ {
+ // Skip past the '*'.
+ s++;
+
+ // There is a mul_factor. Let's parse it.
+ chars_so_far = 0;
+ sscanf(s, "%lf%n", &mul_factor, &chars_so_far);
+
+ // If '%lf' failed to match, then it will show up as either no
+ // assignment to chars_so_far, or as writing 0 there.
+ if (chars_so_far == 0)
+ {
+ myErrMsg = form("%s is not a valid adjustment formula", s);
+ return;
+ }
+
+ // Skip past the number.
+ s += chars_so_far;
+
+ // Skip any whitespaces.
+ while (isspace(*s))
+ s++;
+
+ // Eat any plus-sign; it's redundant if alone, and may come before a
+ // minus.
+ if (*s == '+')
+ s++;
+
+ factor_found = true;
+ }
+
+ // If there's anything here, it must be the additive constant.
+ if (*s)
+ {
+ chars_so_far = 0;
+ sscanf(s, "%lf%n", &add_constant, &chars_so_far);
+
+ // If '%lf' failed to match, then it will show up as either no
+ // assignment to chars_so_far, or as writing 0 there.
+ // We also need to check that it was the end of the input.
+ if (chars_so_far == 0 || s[chars_so_far] != 0)
+ {
+ myErrMsg = form("%s is not a valid adjustment formula",
+ formula.get());
+ return;
+ }
+
+ constant_found = true;
+ }
+
+ // Either part must be there.
+ if (!factor_found && !constant_found)
+ {
+ myErrMsg = form("%s is not a valid formula", formula.get());
+ return;
+ }
+
+ my_add_constant = add_constant;
+ my_mul_factor = mul_factor;
+}
+
+ScoreAdjustItem::~ScoreAdjustItem()
+{
+}
+
+URLSeedScore::URLSeedScore(Configuration &config)
+{
+ char *config_item = "url_seed_score";
+
+ StringList sl(config[config_item], "\t \r\n");
+
+ myAdjustmentList = new List();
+
+ if (sl.Count() % 2)
+ {
+ myErrMsg = form("%s is not a list of pairs (odd number of items)",
+ config_item);
+
+ // We *could* continue, but that just means the error will be harder
+ // to find, unless someone actually sees the error message.
+ return;
+ }
+
+ // Parse each as in TemplateList::createFromString.
+ for (int i = 0; i < sl.Count(); i += 2)
+ {
+ String url_regex = sl[i];
+ String adjust_formula = sl[i+1];
+
+ ScoreAdjustItem *adjust_item
+ = new ScoreAdjustItem(url_regex, adjust_formula);
+
+ if (adjust_item->ErrMsg().length() != 0)
+ {
+ // No point in continuing beyond the error; we might just
+ // overwrite the first error.
+ myErrMsg = form("While parsing %s: %s",
+ config_item,
+ adjust_item->ErrMsg().get());
+ return;
+ }
+
+ myAdjustmentList->Add(adjust_item);
+ }
+}
+
+URLSeedScore::~URLSeedScore()
+{
+ delete myAdjustmentList;
+}
+
+double
+URLSeedScore::noninline_adjust_score(double orig_score, const String &url)
+{
+ List *adjlist = myAdjustmentList;
+ ScoreAdjustItem *adjust_item;
+
+ adjlist->Start_Get();
+
+ while ((adjust_item = (ScoreAdjustItem *) adjlist->Get_Next()))
+ {
+ // Use the first match only.
+ if (adjust_item->Match(url))
+ return adjust_item->adjust_score(orig_score);
+ }
+
+ // We'll get here if no match was found.
+ return orig_score;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/HtURLSeedScore.h b/debian/htdig/htdig-3.2.0b6/htsearch/HtURLSeedScore.h
new file mode 100644
index 00000000..49f8e64e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/HtURLSeedScore.h
@@ -0,0 +1,55 @@
+//
+// HtURLSeedScore.h
+//
+// URLSeedScore: Constructed from a Configuration, see doc
+// for format of config item "url_seed_score".
+// Method "double adjust_score(double score, const String &url)"
+// returns an adjusted score, given the original score, or returns the
+// original score if there was no adjustment to do.
+//
+// $Id: HtURLSeedScore.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 2000-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+#ifndef __HtURLSeedScore_h
+#define __HtURLSeedScore_h
+
+#include "Configuration.h"
+#include "List.h"
+
+class URLSeedScore
+{
+public:
+ URLSeedScore(Configuration &);
+ ~URLSeedScore();
+
+ // Return the "adjusted" score. Use an inline method to avoid
+ // function-call overhead when this feature is unused.
+ double adjust_score(double score, const String& url)
+ {
+ return myAdjustmentList->Count() == 0
+ ? score : noninline_adjust_score(score, url);
+ }
+
+ // If an error was discovered during the parsing of
+ // the configuration, this member gives a
+ // nonempty String with an error message.
+ const String& ErrMsg() { return myErrMsg; }
+
+private:
+ double noninline_adjust_score(double score, const String& url);
+
+ // These member functions are not supposed to be implemented.
+ URLSeedScore();
+ URLSeedScore(const URLSeedScore &);
+ void operator= (const URLSeedScore &);
+
+ List *myAdjustmentList;
+ String myErrMsg;
+};
+
+#endif /* __HtURLSeedScore_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Makefile.am b/debian/htdig/htdig-3.2.0b6/htsearch/Makefile.am
new file mode 100644
index 00000000..520c37a6
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Makefile.am
@@ -0,0 +1,35 @@
+
+include $(top_srcdir)/Makefile.config
+
+bindir = $(CGIBIN_DIR)
+
+LOCAL_DEFINES = -DCONFIG_DIR=\"$(CONFIG_DIR)\" -I$(top_srcdir)/htfuzzy
+
+bin_PROGRAMS = htsearch qtest
+
+htsearch_SOURCES = Display.cc DocMatch.cc ResultList.cc ResultMatch.cc \
+ Template.cc TemplateList.cc WeightWord.cc htsearch.cc \
+ parser.cc Collection.cc SplitMatches.cc HtURLSeedScore.cc
+noinst_HEADERS = Display.h DocMatch.h ResultList.h ResultMatch.h \
+ Template.h TemplateList.h WeightWord.h htsearch.h parser.h \
+ Collection.h SplitMatches.h HtURLSeedScore.h \
+ WordSearcher.h AndQuery.h AndQueryParser.h BooleanLexer.h \
+ BooleanQueryParser.h ExactWordQuery.h FuzzyExpander.h GParser.h \
+ NearQuery.h NotQuery.h OperatorQuery.h OrFuzzyExpander.h \
+ OrQuery.h OrQueryParser.h PhraseQuery.h Query.h QueryCache.h \
+ QueryLexer.h QueryParser.h SimpleLexer.h SimpleQueryParser.h \
+ VolatileCache.h
+htsearch_DEPENDENCIES = $(top_builddir)/htfuzzy/libfuzzy.la $(HTLIBS)
+htsearch_LDFLAGS = $(PROFILING) ${extra_ldflags}
+htsearch_LDADD = $(top_builddir)/htfuzzy/libfuzzy.la $(HTLIBS)
+
+qtest_SOURCES = DocMatch.cc ResultList.cc AndQuery.cc \
+ BooleanLexer.cc BooleanQueryParser.cc ExactWordQuery.cc \
+ GParser.cc NearQuery.cc NotQuery.cc OperatorQuery.cc \
+ OrFuzzyExpander.cc OrQuery.cc PhraseQuery.cc Query.cc \
+ QueryLexer.cc QueryParser.cc SimpleQueryParser.cc VolatileCache.cc \
+ WordSearcher.cc qtest.cc
+qtest_DEPENDENCIES = $(top_builddir)/htfuzzy/libfuzzy.la $(HTLIBS)
+qtest_LDFLAGS = $(PROFILING) ${extra_ldflags}
+qtest_LDADD = $(top_builddir)/htfuzzy/libfuzzy.la $(HTLIBS)
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Makefile.in b/debian/htdig/htdig-3.2.0b6/htsearch/Makefile.in
new file mode 100644
index 00000000..c83b8e2c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Makefile.in
@@ -0,0 +1,519 @@
+# Makefile.in generated by automake 1.7.9 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# To compile with profiling do the following:
+#
+# make CFLAGS=-g CXXFLAGS=-g PROFILING=-p all
+#
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_triplet = @host@
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMDEP_FALSE = @AMDEP_FALSE@
+AMDEP_TRUE = @AMDEP_TRUE@
+AMTAR = @AMTAR@
+APACHE = @APACHE@
+APACHE_MODULES = @APACHE_MODULES@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CGIBIN_DIR = @CGIBIN_DIR@
+COMMON_DIR = @COMMON_DIR@
+CONFIG_DIR = @CONFIG_DIR@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATABASE_DIR = @DATABASE_DIR@
+DEFAULT_CONFIG_FILE = @DEFAULT_CONFIG_FILE@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+F77 = @F77@
+FFLAGS = @FFLAGS@
+FIND = @FIND@
+GUNZIP = @GUNZIP@
+HAVE_SSL = @HAVE_SSL@
+HTDIG_MAJOR_VERSION = @HTDIG_MAJOR_VERSION@
+HTDIG_MICRO_VERSION = @HTDIG_MICRO_VERSION@
+HTDIG_MINOR_VERSION = @HTDIG_MINOR_VERSION@
+IMAGE_DIR = @IMAGE_DIR@
+IMAGE_URL_PREFIX = @IMAGE_URL_PREFIX@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@
+MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@
+MAKEINFO = @MAKEINFO@
+MV = @MV@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+RANLIB = @RANLIB@
+RRDTOOL = @RRDTOOL@
+SEARCH_DIR = @SEARCH_DIR@
+SEARCH_FORM = @SEARCH_FORM@
+SED = @SED@
+SENDMAIL = @SENDMAIL@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TAR = @TAR@
+TESTS_FALSE = @TESTS_FALSE@
+TESTS_TRUE = @TESTS_TRUE@
+TIME = @TIME@
+TIMEV = @TIMEV@
+USER = @USER@
+VERSION = @VERSION@
+YACC = @YACC@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_F77 = @ac_ct_F77@
+ac_ct_RANLIB = @ac_ct_RANLIB@
+ac_ct_STRIP = @ac_ct_STRIP@
+am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
+am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
+am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
+am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+
+bindir = $(CGIBIN_DIR)
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+datadir = @datadir@
+exec_prefix = @exec_prefix@
+extra_ldflags = @extra_ldflags@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+oldincludedir = @oldincludedir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+subdirs = @subdirs@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+
+AUTOMAKE_OPTIONS = foreign no-dependencies
+
+INCLUDES = -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\" \
+ -I$(top_srcdir)/include -I$(top_srcdir)/htlib \
+ -I$(top_srcdir)/htnet -I$(top_srcdir)/htcommon \
+ -I$(top_srcdir)/htword \
+ -I$(top_srcdir)/db -I$(top_builddir)/db \
+ $(LOCAL_DEFINES) $(PROFILING)
+
+
+HTLIBS = $(top_builddir)/htnet/libhtnet.la \
+ $(top_builddir)/htcommon/libcommon.la \
+ $(top_builddir)/htword/libhtword.la \
+ $(top_builddir)/htlib/libht.la \
+ $(top_builddir)/htcommon/libcommon.la \
+ $(top_builddir)/htword/libhtword.la \
+ $(top_builddir)/db/libhtdb.la \
+ $(top_builddir)/htlib/libht.la
+
+
+LOCAL_DEFINES = -DCONFIG_DIR=\"$(CONFIG_DIR)\" -I$(top_srcdir)/htfuzzy
+
+bin_PROGRAMS = htsearch qtest
+
+htsearch_SOURCES = Display.cc DocMatch.cc ResultList.cc ResultMatch.cc \
+ Template.cc TemplateList.cc WeightWord.cc htsearch.cc \
+ parser.cc Collection.cc SplitMatches.cc HtURLSeedScore.cc
+
+noinst_HEADERS = Display.h DocMatch.h ResultList.h ResultMatch.h \
+ Template.h TemplateList.h WeightWord.h htsearch.h parser.h \
+ Collection.h SplitMatches.h HtURLSeedScore.h \
+ WordSearcher.h AndQuery.h AndQueryParser.h BooleanLexer.h \
+ BooleanQueryParser.h ExactWordQuery.h FuzzyExpander.h GParser.h \
+ NearQuery.h NotQuery.h OperatorQuery.h OrFuzzyExpander.h \
+ OrQuery.h OrQueryParser.h PhraseQuery.h Query.h QueryCache.h \
+ QueryLexer.h QueryParser.h SimpleLexer.h SimpleQueryParser.h \
+ VolatileCache.h
+
+htsearch_DEPENDENCIES = $(top_builddir)/htfuzzy/libfuzzy.la $(HTLIBS)
+htsearch_LDFLAGS = $(PROFILING) ${extra_ldflags}
+htsearch_LDADD = $(top_builddir)/htfuzzy/libfuzzy.la $(HTLIBS)
+
+qtest_SOURCES = DocMatch.cc ResultList.cc AndQuery.cc \
+ BooleanLexer.cc BooleanQueryParser.cc ExactWordQuery.cc \
+ GParser.cc NearQuery.cc NotQuery.cc OperatorQuery.cc \
+ OrFuzzyExpander.cc OrQuery.cc PhraseQuery.cc Query.cc \
+ QueryLexer.cc QueryParser.cc SimpleQueryParser.cc VolatileCache.cc \
+ WordSearcher.cc qtest.cc
+
+qtest_DEPENDENCIES = $(top_builddir)/htfuzzy/libfuzzy.la $(HTLIBS)
+qtest_LDFLAGS = $(PROFILING) ${extra_ldflags}
+qtest_LDADD = $(top_builddir)/htfuzzy/libfuzzy.la $(HTLIBS)
+subdir = htsearch
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/include/config.h
+CONFIG_CLEAN_FILES =
+bin_PROGRAMS = htsearch$(EXEEXT) qtest$(EXEEXT)
+PROGRAMS = $(bin_PROGRAMS)
+
+am_htsearch_OBJECTS = Display.$(OBJEXT) DocMatch.$(OBJEXT) \
+ ResultList.$(OBJEXT) ResultMatch.$(OBJEXT) Template.$(OBJEXT) \
+ TemplateList.$(OBJEXT) WeightWord.$(OBJEXT) htsearch.$(OBJEXT) \
+ parser.$(OBJEXT) Collection.$(OBJEXT) SplitMatches.$(OBJEXT) \
+ HtURLSeedScore.$(OBJEXT)
+htsearch_OBJECTS = $(am_htsearch_OBJECTS)
+am_qtest_OBJECTS = DocMatch.$(OBJEXT) ResultList.$(OBJEXT) \
+ AndQuery.$(OBJEXT) BooleanLexer.$(OBJEXT) \
+ BooleanQueryParser.$(OBJEXT) ExactWordQuery.$(OBJEXT) \
+ GParser.$(OBJEXT) NearQuery.$(OBJEXT) NotQuery.$(OBJEXT) \
+ OperatorQuery.$(OBJEXT) OrFuzzyExpander.$(OBJEXT) \
+ OrQuery.$(OBJEXT) PhraseQuery.$(OBJEXT) Query.$(OBJEXT) \
+ QueryLexer.$(OBJEXT) QueryParser.$(OBJEXT) \
+ SimpleQueryParser.$(OBJEXT) VolatileCache.$(OBJEXT) \
+ WordSearcher.$(OBJEXT) qtest.$(OBJEXT)
+qtest_OBJECTS = $(am_qtest_OBJECTS)
+
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include
+depcomp =
+am__depfiles_maybe =
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --mode=compile $(CXX) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DIST_SOURCES = $(htsearch_SOURCES) $(qtest_SOURCES)
+HEADERS = $(noinst_HEADERS)
+
+DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.in \
+ $(top_srcdir)/Makefile.config Makefile.am
+SOURCES = $(htsearch_SOURCES) $(qtest_SOURCES)
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/Makefile.config $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --foreign htsearch/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(bindir)
+ @list='$(bin_PROGRAMS)'; for p in $$list; do \
+ p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+ if test -f $$p \
+ || test -f $$p1 \
+ ; then \
+ f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) $$p $(DESTDIR)$(bindir)/$$f"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) $$p $(DESTDIR)$(bindir)/$$f || exit 1; \
+ else :; fi; \
+ done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; for p in $$list; do \
+ f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
+ echo " rm -f $(DESTDIR)$(bindir)/$$f"; \
+ rm -f $(DESTDIR)$(bindir)/$$f; \
+ done
+
+clean-binPROGRAMS:
+ @list='$(bin_PROGRAMS)'; for p in $$list; do \
+ f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f $$p $$f"; \
+ rm -f $$p $$f ; \
+ done
+htsearch$(EXEEXT): $(htsearch_OBJECTS) $(htsearch_DEPENDENCIES)
+ @rm -f htsearch$(EXEEXT)
+ $(CXXLINK) $(htsearch_LDFLAGS) $(htsearch_OBJECTS) $(htsearch_LDADD) $(LIBS)
+qtest$(EXEEXT): $(qtest_OBJECTS) $(qtest_DEPENDENCIES)
+ @rm -f qtest$(EXEEXT)
+ $(CXXLINK) $(qtest_LDFLAGS) $(qtest_OBJECTS) $(qtest_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT) core *.core
+
+distclean-compile:
+ -rm -f *.tab.c
+
+.cc.o:
+ $(CXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+
+.cc.obj:
+ $(CXXCOMPILE) -c -o $@ `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
+
+.cc.lo:
+ $(LTCXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+
+ETAGS = etags
+ETAGSFLAGS =
+
+CTAGS = ctags
+CTAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$tags$$unique" \
+ || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique
+
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(CTAGS_ARGS)$$tags$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+ $(mkinstalldirs) $(distdir)/..
+ @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
+ list='$(DISTFILES)'; for file in $$list; do \
+ case $$file in \
+ $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+ $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
+ esac; \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkinstalldirs) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(HEADERS)
+
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(bindir)
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-libtool distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-info-am
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+ clean-generic clean-libtool ctags distclean distclean-compile \
+ distclean-generic distclean-libtool distclean-tags distdir dvi \
+ dvi-am info info-am install install-am install-binPROGRAMS \
+ install-data install-data-am install-exec install-exec-am \
+ install-info install-info-am install-man install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-binPROGRAMS \
+ uninstall-info-am
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Makefile.win32 b/debian/htdig/htdig-3.2.0b6/htsearch/Makefile.win32
new file mode 100644
index 00000000..dfcc9edf
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Makefile.win32
@@ -0,0 +1,30 @@
+#
+# Makefile - makefile for htsearch
+#
+
+APP_NAME = Right Now Web CGI
+RNT_PRODUCT = rnw
+
+TARGET = $(BINDIR)/htsearch$(EXESFX)
+
+include ../Makedefs.win32
+
+# -----------------------------------------------------------------------------
+# add new executable members to this list
+
+
+CXXSRC = Display.cc DocMatch.cc ResultList.cc ResultMatch.cc \
+ Template.cc TemplateList.cc WeightWord.cc htsearch.cc parser.cc \
+ Collection.cc SplitMatches.cc HtURLSeedScore.cc
+
+CPPFLAGS += -DHAVE_CONFIG_H -I. -I../include -I../htlib -I../htcommon -I../htword -I../db -I../htnet
+
+LDLIBS = ../lib/$(ARCH)/libhtdb.lib ../lib/$(ARCH)/libcommon.lib ../lib/$(ARCH)/libht.lib ../lib/$(ARCH)/libhtword.lib ../lib/$(ARCH)/libhtnet.lib ../lib/$(ARCH)/libfuzzy.lib
+OTHERLIBS = ws2_32.lib L:/win32/lib/zlib114/zlib.lib
+
+DEPLIBS += $(LDLIBS)
+
+$(TARGET): $(OBJDIRDEP) $(BINDIRDEP) $(OBJS) $(DEPLIBS)
+ $(EXELD) $(LDFLAGS) $(OBJS) $(LDLIBS) $(OTHERLIBS)
+
+include ../Makerules.win32
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/NearQuery.cc b/debian/htdig/htdig-3.2.0b6/htsearch/NearQuery.cc
new file mode 100644
index 00000000..52487fdc
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/NearQuery.cc
@@ -0,0 +1,143 @@
+//
+// NearQuery.cc
+//
+// NearQuery: An operator query that filters matches by proximity.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: NearQuery.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "NearQuery.h"
+
+String
+NearQuery::OperatorString() const
+{
+ String s;
+ s << "near/" << distance;
+ return s;
+}
+
+//
+// l r nextTo
+// -----------------------
+// 0 0 0
+// 0 b 0
+// 0 x 0
+// a 0 0
+// a b near(a, b)
+// a x a
+// x 0 0
+// x b b
+// x x x
+//
+ResultList *
+NearQuery::Evaluate()
+{
+ ResultList *result = 0;
+ Query *left = (Query *)operands[0];
+ Query *right = (Query *)operands[1];
+
+ if(left && right)
+ {
+ ResultList *l = left->GetResults();
+ if(l)
+ {
+ ResultList *r = right->GetResults();
+ if(r)
+ {
+ if(l->IsIgnore())
+ {
+ result = new ResultList(*r);
+ }
+ else if(r->IsIgnore())
+ {
+ result = new ResultList(*l);
+ }
+ else
+ {
+ result = Near(*l, *r);
+ }
+ }
+ }
+ }
+ return result;
+}
+
+ResultList *
+NearQuery::Near(const ResultList &l, const ResultList &r)
+{
+ ResultList *result = 0;
+ DictionaryCursor c;
+ l.Start_Get(c);
+ DocMatch *match = (DocMatch *)l.Get_NextElement(c);
+ while(match)
+ {
+ DocMatch *confirm = r.find(match->GetId());
+ if(confirm)
+ {
+ List *locations = MergeLocations(
+ *match->GetLocations(),
+ *confirm->GetLocations());
+ if(locations)
+ {
+ if(!result)
+ {
+ result = new ResultList;
+ }
+ DocMatch *copy = new DocMatch(*match);
+ copy->SetLocations(locations);
+ result->add(copy);
+ }
+ }
+ match = (DocMatch *)l.Get_NextElement(c);
+ }
+ return result;
+}
+
+//
+//: merge match positions in a 'near' operation
+// all combinations are tested; the pairs of positions near enough are kept
+//
+List *
+NearQuery::MergeLocations(const List &p, const List &q)
+{
+ List *result = 0;
+ ListCursor pc;
+ p.Start_Get(pc);
+ const Location *left = (const Location *)p.Get_Next(pc);
+ while(left)
+ {
+ ListCursor qc;
+ q.Start_Get(qc);
+ const Location *right = (const Location *)q.Get_Next(qc);
+ while(right)
+ {
+ int dist = right->from - left->to;
+ if(dist < 1)
+ {
+ dist = left->from - right->to;
+ if(dist < 1)
+ {
+ dist = 0;
+ }
+ }
+ if(unsigned(dist) <= distance)
+ {
+ if(!result)
+ {
+ result = new List;
+ }
+ result->Add(new Location(*left));
+ result->Add(new Location(*right));
+ }
+ right = (const Location *)q.Get_Next(qc);
+ }
+ left = (const Location *)p.Get_Next(pc);
+ }
+ return result;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/NearQuery.h b/debian/htdig/htdig-3.2.0b6/htsearch/NearQuery.h
new file mode 100644
index 00000000..77de762b
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/NearQuery.h
@@ -0,0 +1,50 @@
+#ifndef _NearQuery_h_
+#define _NearQuery_h_
+
+//
+// NearQuery.h
+//
+// NearQuery: An operator query that filters matches by proximity.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: NearQuery.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "OperatorQuery.h"
+
+class NearQuery : public OperatorQuery
+{
+public:
+ // binary fashion
+ NearQuery(Query *left, Query *right, unsigned int dist) :
+ distance(dist)
+ { Add(left); Add(right); }
+
+ // n-ary fashion -- will ignore operands for n>2
+ NearQuery(unsigned int dist = 10) :
+ distance(dist) {}
+
+private:
+ // get results from operands and filter
+ ResultList *Evaluate();
+
+ // create a result with neighboring matches
+ ResultList *Near(const ResultList &, const ResultList &);
+
+ // merge neighboring location lists
+ List *MergeLocations(const List &, const List &);
+
+ String OperatorString() const;
+ unsigned int distance;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/NotQuery.cc b/debian/htdig/htdig-3.2.0b6/htsearch/NotQuery.cc
new file mode 100644
index 00000000..11a55c70
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/NotQuery.cc
@@ -0,0 +1,110 @@
+//
+// NotQuery.cc
+//
+// NotQuery: 'not' query operator (n-ary not!)
+// i.e. not(a, b, c, d...) == a except (b or c or d or...)
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: NotQuery.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+
+#include "NotQuery.h"
+//
+// l r not
+// -------------------------
+// 0 0 0
+// 0 b 0
+// 0 x 0
+// a 0 a
+// a b diff(a,b)
+// a x a
+// x 0 x
+// x b x
+// x x x
+//
+// note l is the first operand, r is the rest
+// i.e. l = 0 => not = 0
+// l = x => not = x
+// r = 0 => not = l
+// r = x => not = l
+// subtract otherwise
+//
+ResultList *
+NotQuery::Evaluate()
+{
+ operands.Start_Get();
+ Query *operand = (Query *) operands.Get_Next();
+ ResultList *result = 0;
+ ResultList *positive = operand->GetResults();
+ if(positive)
+ {
+ List negative;
+ if(!positive->IsIgnore())
+ {
+ operand = (Query *) operands.Get_Next();
+ while(operand)
+ {
+ ResultList *next = operand->GetResults();
+ if(next && !next->IsIgnore())
+ {
+ negative.Add(next);
+ }
+ operand = (Query *) operands.Get_Next();
+ }
+ }
+ if(negative.Count())
+ {
+ result = Subtract(*positive, negative);
+ negative.Release();
+ }
+ else
+ {
+ result = new ResultList(*positive);
+ }
+ }
+ return result;
+}
+
+//
+// make a result list containing all matches in positive
+// with docId absent from negatives
+//
+ResultList *
+NotQuery::Subtract(const ResultList &positive, const List &negatives)
+{
+ ResultList *result = 0;
+ DictionaryCursor pc;
+ positive.Start_Get(pc);
+ DocMatch *match = (DocMatch *)positive.Get_NextElement(pc);
+ while(match)
+ {
+ bool confirm = true;
+ ListCursor lc;
+ negatives.Start_Get(lc);
+ ResultList *negative = (ResultList *)negatives.Get_Next(lc);
+ while(confirm && negative)
+ {
+ if(negative->exists(match->GetId()))
+ {
+ confirm = false;
+ }
+ negative = (ResultList *)negatives.Get_Next(lc);
+ }
+ if(confirm)
+ {
+ if(!result)
+ {
+ result = new ResultList;
+ }
+ result->add(new DocMatch(*match));
+ }
+ match = (DocMatch *)positive.Get_NextElement(pc);
+ }
+ return result;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/NotQuery.h b/debian/htdig/htdig-3.2.0b6/htsearch/NotQuery.h
new file mode 100644
index 00000000..0585d7ad
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/NotQuery.h
@@ -0,0 +1,42 @@
+#ifndef _NotQuery_h_
+#define _NotQuery_h_
+
+//
+// NotQuery.h
+//
+// NotQuery: 'not' query operator (n-ary not!)
+// i.e. not(a, b, c, d...) == a except (b or c or d or...)
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: NotQuery.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "OperatorQuery.h"
+
+//
+//
+class NotQuery : public OperatorQuery
+{
+public:
+
+private:
+ // evaluate operands and operate
+ ResultList *Evaluate();
+
+ // create a difference of the operand results
+ ResultList *Subtract(const ResultList &, const List &);
+
+ // used by GetLogicalWords
+ String OperatorString() const { return String("not"); }
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/OperatorQuery.cc b/debian/htdig/htdig-3.2.0b6/htsearch/OperatorQuery.cc
new file mode 100644
index 00000000..ebb7ba58
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/OperatorQuery.cc
@@ -0,0 +1,49 @@
+//
+// OperatorQuery.cc
+//
+// OperatorQuery: (abstract class) a query that combines result lists
+// returned by other queries kept in an operand list.
+// how they are combined is tbd by the concrete classes.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: OperatorQuery.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "OperatorQuery.h"
+//
+// return a string with the query as a boolean expression
+// descends recursively over the operand
+//
+String
+OperatorQuery::GetLogicalWords() const
+{
+ ListCursor c;
+ String out;
+ out << "(";
+ if(operands.Count())
+ {
+ operands.Start_Get(c);
+ out << ((Query *) operands.Get_Next(c))->GetLogicalWords();
+ Query *next = (Query *) operands.Get_Next(c);
+ while(next)
+ {
+ out << " " << OperatorString() << " ";
+ if(next)
+ {
+ out << next->GetLogicalWords();
+ }
+ else
+ {
+ out << "*nothing*";
+ }
+ next = (Query *) operands.Get_Next(c);
+ }
+ }
+ out << ")";
+ return out;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/OperatorQuery.h b/debian/htdig/htdig-3.2.0b6/htsearch/OperatorQuery.h
new file mode 100644
index 00000000..5c612ccf
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/OperatorQuery.h
@@ -0,0 +1,68 @@
+#ifndef _OperatorQuery_h_
+#define _OperatorQuery_h_
+
+//
+// OperatorQuery.h
+//
+// OperatorQuery: (abstract class) a query that combines result lists
+// returned by other queries kept in an operand list.
+// how they are combined is tbd by the concrete classes.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: OperatorQuery.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+//
+// for details about the basic architectural pattern see the book:
+// Design Patterns, by the infamous GoF
+// Interpreter pattern
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "Query.h"
+#include "List.h"
+
+// abstract
+class OperatorQuery : public Query
+{
+public:
+ virtual ~OperatorQuery()
+ {
+ operands.Destroy();
+ }
+
+ // add an operand to the operation
+ void Add(Query *operand)
+ {
+ operands.Add(operand);
+ }
+
+protected:
+ OperatorQuery() {}
+
+ // get results from operands and combine them ad-hoc
+ virtual ResultList *Evaluate() = 0;
+
+ // keyword name of the operation
+ virtual String OperatorString() const = 0;
+
+ // human-readable unparsed string
+ virtual String GetLogicalWords() const;
+
+ // cache index
+ String GetSignature() const
+ { return String("Compound:")+GetLogicalWords(); }
+
+ // children query operands
+ List operands;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/OrFuzzyExpander.cc b/debian/htdig/htdig-3.2.0b6/htsearch/OrFuzzyExpander.cc
new file mode 100644
index 00000000..d288496d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/OrFuzzyExpander.cc
@@ -0,0 +1,94 @@
+//
+// OrFuzzyExpander.cc
+//
+// OrFuzzyExpander: a concrete Fuzzy expander that makes a OR with
+// all the results returned by the applicable Fuzzies.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: OrFuzzyExpander.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "OrFuzzyExpander.h"
+#include "Dictionary.h"
+#include "ExactWordQuery.h"
+#include "OrQuery.h"
+
+extern int debug;
+
+//
+// creates a query with a OrQuery with all the
+// distinct fuzzy results
+//
+// additionally, sets fuzzy scores for used words
+//
+Query *
+OrFuzzyExpander::MakeQuery(const String &word)
+{
+ Query *result = 0;
+ Dictionary exacts;
+
+ // for each configured fuzzy
+ filters.Start_Get();
+ Fuzzy *fuzzy = (Fuzzy *)filters.Get_Next();
+ while(fuzzy)
+ {
+ // for each word expanded by fuzzy
+ List words;
+ String nonconst = word;
+ fuzzy->getWords(nonconst, words);
+ words.Start_Get();
+ String *w = (String *)words.Get_Next();
+ while(w)
+ {
+ // if not yet expanded by another fuzzy
+ // add it to the big Or
+ if(debug) cerr << "fuzzy " << word << "=" << *w << endl;
+ ExactWordQuery *exact = (ExactWordQuery *)exacts[*w];
+ if(!exact)
+ {
+ exact = new ExactWordQuery(*w);
+ exact->SetWeight(fuzzy->getWeight());
+ exacts.Add(*w, exact);
+ }
+ // otherwise, just adjust the weight
+ else
+ {
+ exact->SetWeight(
+ exact->GetWeight() +
+ fuzzy->getWeight());
+ }
+ w = (String *)words.Get_Next();
+ }
+ fuzzy = (Fuzzy *)filters.Get_Next();
+ }
+
+ // return the expanded query
+ // a single word or
+ // a Or with all the expanded words
+ exacts.Start_Get();
+ Query *exact = (Query *)exacts.Get_NextElement();
+ if(exact)
+ {
+ result = exact;
+ exact = (Query *)exacts.Get_NextElement();
+ }
+ if(exact)
+ {
+ Query *tmp = result;
+ result = new OrQuery;
+ result->Add(tmp);
+ while(exact)
+ {
+ result->Add(exact);
+ exact = (Query *)exacts.Get_NextElement();
+ }
+ }
+ exacts.Release();
+
+ return result;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/OrFuzzyExpander.h b/debian/htdig/htdig-3.2.0b6/htsearch/OrFuzzyExpander.h
new file mode 100644
index 00000000..4287f261
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/OrFuzzyExpander.h
@@ -0,0 +1,49 @@
+#ifndef _OrFuzzyExpander_h_
+#define _OrFuzzyExpander_h_
+
+//
+// OrFuzzyExpander.h
+//
+// OrFuzzyExpander: a concrete Fuzzy expander that makes a OR with
+// all the results returned by the applicable Fuzzies.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: OrFuzzyExpander.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "FuzzyExpander.h"
+#include "List.h"
+#include "Fuzzy.h"
+
+//
+// makes a Or query with all the fuzzy expansions
+//
+class Fuzzy;
+class OrFuzzyExpander : public FuzzyExpander
+{
+public:
+ OrFuzzyExpander() {}
+ virtual ~OrFuzzyExpander() { filters.Release(); }
+
+ // use this filter
+ void Add(Fuzzy *filter) { filters.Add(filter); }
+
+
+private:
+ // generate a OrQuery with all fuzzies found
+ Query *MakeQuery(const String &word);
+
+ // Fuzzies to be used
+ List filters;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/OrQuery.cc b/debian/htdig/htdig-3.2.0b6/htsearch/OrQuery.cc
new file mode 100644
index 00000000..facbd9b0
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/OrQuery.cc
@@ -0,0 +1,126 @@
+//
+// OrQuery.cc
+//
+// OrQuery: an operator query that merges all the results of its operands
+// i.e. does 'or' combination
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: OrQuery.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+
+#include "OrQuery.h"
+//
+// return a ResultList containing an Or of the results of the operands
+// evaluate all operands to do so
+//
+// l r or
+// ---------------------
+// 0 0 0
+// 0 b b
+// 0 x x
+// a 0 a
+// a b union(a,b)
+// a x a
+// x 0 x
+// x b b
+// x x x
+//
+// i.e. nulls and ignored are left out union
+//
+// Note that all operands are evaluated
+// Ignored operands are not included in the operation
+// the longer input result list is passed separately to Union
+//
+
+ResultList *
+OrQuery::Evaluate()
+{
+ ResultList *result = 0;
+ ResultList *longer = 0;
+ List shorter;
+ int ignores = 0;
+ operands.Start_Get();
+ Query *operand = (Query *) operands.Get_Next();
+ while(operand)
+ {
+ ResultList *next = operand->GetResults();
+ if(next)
+ {
+ if(!next->IsIgnore())
+ {
+ if(!longer || longer->Count() < next->Count())
+ {
+ if(longer)
+ {
+ shorter.Add(longer);
+ }
+ longer = next;
+ }
+ else
+ {
+ shorter.Add(next);
+ }
+ }
+ else
+ {
+ ignores++;
+ }
+ }
+ operand = (Query *) operands.Get_Next();
+ }
+ if(longer)
+ {
+ result = Union(*longer, shorter);
+ shorter.Release();
+ }
+ else if(ignores == operands.Count())
+ {
+ result = new ResultList;
+ result->Ignore();
+ }
+ return result;
+}
+
+//
+// copy unique DocMatches to the resulting list
+// matches with the same docId are merged
+// the longer list is assumed to be the first parameter
+// this is a modest optimisation
+//
+ResultList *
+OrQuery::Union(const ResultList &longer, const List &lists)
+{
+ ResultList *result = new ResultList(longer);
+
+ ListCursor lc;
+ lists.Start_Get(lc);
+ ResultList *current = (ResultList *) lists.Get_Next(lc);
+ while(current)
+ {
+ DictionaryCursor c;
+ current->Start_Get(c);
+ DocMatch *match = (DocMatch *) current->Get_NextElement(c);
+ while(match)
+ {
+ DocMatch *previous = result->find(match->GetId());
+ if(previous)
+ {
+ previous->Merge(*match);
+ }
+ else
+ {
+ DocMatch *copy = new DocMatch(*match);
+ result->add(copy);
+ }
+ match = (DocMatch *) current->Get_NextElement(c);
+ }
+ current = (ResultList *) lists.Get_Next(lc);
+ }
+ return result;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/OrQuery.h b/debian/htdig/htdig-3.2.0b6/htsearch/OrQuery.h
new file mode 100644
index 00000000..c7f2c09c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/OrQuery.h
@@ -0,0 +1,39 @@
+#ifndef _OrQuery_h_
+#define _OrQuery_h_
+
+//
+// OrQuery.h
+//
+// OrQuery: an operator query that merges all the results of its operands
+// i.e. does 'or' combination
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: OrQuery.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "OperatorQuery.h"
+
+class OrQuery : public OperatorQuery
+{
+public:
+
+private:
+ // evaluate operands and join results
+ ResultList *Evaluate();
+
+ // create a union of the operand results
+ ResultList *Union(const ResultList &longer, const List &shorter);
+
+ String OperatorString() const { return String("or"); }
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/OrQueryParser.h b/debian/htdig/htdig-3.2.0b6/htsearch/OrQueryParser.h
new file mode 100644
index 00000000..ec6a3337
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/OrQueryParser.h
@@ -0,0 +1,33 @@
+#ifndef _OrQueryParser_h_
+#define _OrQueryParser_h_
+
+//
+// OrQueryParser.h
+//
+// OrQueryParser: a query parser for 'any word' (or) queries
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: OrQueryParser.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "SimpleQueryParser.h"
+#include "OrQuery.h"
+
+class OrQueryParser : public SimpleQueryParser
+{
+public:
+ OrQueryParser() {}
+
+private:
+ OperatorQuery *MakeQuery()
+ {
+ return new OrQuery;
+ }
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/PhraseQuery.cc b/debian/htdig/htdig-3.2.0b6/htsearch/PhraseQuery.cc
new file mode 100644
index 00000000..a42d97b3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/PhraseQuery.cc
@@ -0,0 +1,175 @@
+//
+// PhraseQuery.cc
+//
+// PhraseQuery: an operator query that filters sequenced word matches
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: PhraseQuery.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "PhraseQuery.h"
+
+//
+// evaluate operands and make a result with matches if some.
+//
+ResultList *
+PhraseQuery::Evaluate()
+{
+ ResultList *result = 0;
+
+ operands.Start_Get();
+ Query *next = (Query *)operands.Get_Next();
+ if(next)
+ {
+ result = (ResultList *)next->GetResults();
+ next = (Query *)operands.Get_Next();
+ }
+ if(result)
+ {
+ result = new ResultList(*result);
+ }
+ while(result && next)
+ {
+ ResultList *r = next->GetResults();
+ if(r)
+ {
+ if(result->IsIgnore())
+ {
+ delete result;
+ result = new ResultList(*r);
+ }
+ else if(!r->IsIgnore())
+ {
+ ResultList *tmp = result;
+ result = Near(*tmp, *r);
+ delete tmp;
+ }
+ next = (Query *)operands.Get_Next();
+ }
+ else
+ {
+ delete result;
+ result = 0;
+ }
+ }
+ return result;
+}
+
+String
+PhraseQuery::GetLogicalWords() const
+{
+ ListCursor c;
+ String out;
+ out << "\"";
+ if(operands.Count())
+ {
+ operands.Start_Get(c);
+ out << ((Query *) operands.Get_Next(c))->GetLogicalWords();
+ Query *next = (Query *) operands.Get_Next(c);
+ while(next)
+ {
+ out << " ";
+ if(next)
+ {
+ out << next->GetLogicalWords();
+ }
+ else
+ {
+ out << "*nothing*";
+ }
+ next = (Query *) operands.Get_Next(c);
+ }
+ }
+ out << "\"";
+ return out;
+}
+
+//
+// return a resultlist containing matches that are contiguous
+//
+ResultList *
+PhraseQuery::Near(const ResultList &l, const ResultList &r)
+{
+ ResultList *result = 0;
+ DictionaryCursor c;
+ l.Start_Get(c);
+ DocMatch *match = (DocMatch *)l.Get_NextElement(c);
+ while(match)
+ {
+ DocMatch *confirm = r.find(match->GetId());
+ if(confirm)
+ {
+ List *locations = MergeLocations(
+ *match->GetLocations(),
+ *confirm->GetLocations());
+ if(locations)
+ {
+ if(!result)
+ {
+ result = new ResultList;
+ }
+ DocMatch *copy = new DocMatch(*match);
+ copy->SetLocations(locations);
+ result->add(copy);
+ }
+ }
+ match = (DocMatch *)l.Get_NextElement(c);
+ }
+ return result;
+}
+
+
+//
+//: merge match positions in a 'next' operation
+// each position of left operand match is tested against right operand positions
+// if two contiguous positions are found, they are merged into a single one
+// beginning at the begin of the left operand
+// and ending and the end of the right operand
+//
+List *
+PhraseQuery::MergeLocations(const List &p, const List &q)
+{
+ List *result = 0;
+ ListCursor pc;
+ p.Start_Get(pc);
+ const Location *left = (const Location *)p.Get_Next(pc);
+ while(left)
+ {
+ ListCursor qc;
+ q.Start_Get(qc);
+ const Location *right = (const Location *)q.Get_Next(qc);
+ while(right)
+ {
+ if(left->to + 1 == right->from)
+ {
+ double prevsize = left->to - left->from + 1.0;
+ double addsize = right->to - right->from + 1.0;
+ double weight =
+ ((left->weight * prevsize) +
+ (right->weight * addsize)) /
+ (right->to - left->from + 1.0);
+
+ if(!result)
+ {
+ result = new List;
+ }
+
+ result->Add(new Location(
+ left->from,
+ right->to,
+ left->flags & right->flags,
+ weight));
+ break;
+ }
+ right = (const Location *)q.Get_Next(qc);
+ }
+ left = (const Location *)p.Get_Next(pc);
+ }
+ return result;
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/PhraseQuery.h b/debian/htdig/htdig-3.2.0b6/htsearch/PhraseQuery.h
new file mode 100644
index 00000000..c93ddeaa
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/PhraseQuery.h
@@ -0,0 +1,45 @@
+#ifndef _PhraseQuery_h_
+#define _PhraseQuery_h_
+
+//
+// PhraseQuery.h
+//
+// PhraseQuery: an operator query that filters sequenced word matches
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: PhraseQuery.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "OperatorQuery.h"
+
+class PhraseQuery : public OperatorQuery
+{
+public:
+ PhraseQuery() {}
+ ~PhraseQuery() {}
+
+private:
+ // get results from operands and filter
+ ResultList *Evaluate();
+
+ // create a result with neighboring matches
+ ResultList *Near(const ResultList &, const ResultList &);
+
+ // merge neighboring location lists, constructing phrase locations
+ List *MergeLocations(const List &, const List &);
+
+ String OperatorString() const { return ""; }
+
+ String GetLogicalWords() const;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Query.cc b/debian/htdig/htdig-3.2.0b6/htsearch/Query.cc
new file mode 100644
index 00000000..0be01033
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Query.cc
@@ -0,0 +1,89 @@
+//
+// Query.cc
+//
+// Query: (abstract) a parsed, 'executable' digger database query
+// a query tree is formed by leaf objects (ExactWordQuery) and
+// node objects (OperatorQuery) derived from this class.
+// Query execution results are returned as ResultList objects.
+// Query evaluation is cached. Cache policy is delegated to the
+// QueryCache class family.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Query.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "Query.h"
+#include "VolatileCache.h"
+
+//
+// the in-memory query result cache. the default instance is
+// defined static so its destructor is called at program exit
+//
+VolatileCache theDefaultCache;
+
+QueryCache *
+Query::cache = &theDefaultCache;
+
+extern int debug;
+
+//
+// destructor
+//
+Query::~Query()
+{
+}
+
+//
+// return a ResultList with the query results
+// results are initially fetched from the cache
+// if not cached, the query is evaluated
+// Weight of the results is adjusted at each invocation, as
+// the same result list may be shared by different queries
+// but different weights may be assigned to the word
+//
+//
+ResultList *
+Query::GetResults()
+{
+ ResultList *result = 0;
+
+ // try to find in cache before trying eval
+ String signature;
+ if(cache)
+ {
+ signature = GetSignature();
+ result = cache->Lookup(signature);
+ }
+
+ // no cache or not in cache, evaluate
+ if(!result)
+ {
+ if(debug) cerr << "EVAL: " << signature << endl;
+ result = Evaluate();
+
+ if(cache)
+ {
+ cache->Add(signature, result);
+ }
+ }
+
+ // adjust if something found/returned
+ if(result)
+ {
+ if(result->Count())
+ {
+ AdjustWeight(*result);
+ }
+ else if(!result->IsIgnore())
+ {
+ result = 0;
+ }
+ }
+ return result;
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Query.h b/debian/htdig/htdig-3.2.0b6/htsearch/Query.h
new file mode 100644
index 00000000..f6045ac8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Query.h
@@ -0,0 +1,77 @@
+#ifndef __Query_h__
+#define __Query_h__
+
+//
+// Query.h
+//
+// Query: (abstract) a parsed, 'executable' digger database query
+// a query tree is formed by leaf objects (ExactWordQuery) and
+// node objects (OperatorQuery) derived from this class.
+// Query execution results are returned as ResultList objects.
+// Query evaluation is cached. Cache policy is delegated to the
+// QueryCache class family.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Query.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+//
+// for details about the basic architectural patterns see the book:
+// Design Patterns, by the infamous GoF
+// Interpreter pattern
+// Factory pattern
+// Flyweight pattern
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "Object.h"
+#include "htString.h"
+#include "ResultList.h"
+
+class QueryCache;
+
+// abstract
+class Query : public Object
+{
+public:
+ // destr
+ virtual ~Query();
+
+ // does nothing here -- hack for comfortable parser coding
+ virtual void Add(Query *) {}
+
+ // get a boolean-style query string
+ virtual String GetLogicalWords() const = 0;
+
+ // evaluate if necessary and return results
+ ResultList *GetResults();
+
+ // set a cache policy
+ static void SetCache(QueryCache *c) { cache = c; }
+
+protected:
+ // get an unique cache index
+ virtual String GetSignature() const = 0;
+
+ Query() {}
+
+ // generate results
+ virtual ResultList *Evaluate() = 0;
+
+ // by default, nothing -- for use of leaf queries
+ virtual void AdjustWeight(ResultList &) {}
+
+private:
+ // the current cache object, if some
+ static QueryCache *cache;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/QueryCache.h b/debian/htdig/htdig-3.2.0b6/htsearch/QueryCache.h
new file mode 100644
index 00000000..fd9f53fa
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/QueryCache.h
@@ -0,0 +1,45 @@
+#ifndef _QueryCache_h_
+#define _QueryCache_h_
+
+//
+// QueryCache.h
+//
+// QueryCache: (abstract) interface for the current Query cache policy.
+// A cache stores ResultLists indexed by a signature string.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: QueryCache.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+#include "Object.h"
+#include "htString.h"
+
+class ResultList;
+
+// abstract
+class QueryCache : public Object
+{
+public:
+ // destructor
+ virtual ~QueryCache() {}
+
+ // get cached result for a query signature
+ virtual ResultList *Lookup(const String &signature) = 0;
+
+ // add result to be cached for a query signature
+ virtual void Add(const String &signature, ResultList *entry) = 0;
+
+protected:
+ // construction
+ QueryCache() {}
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/QueryLexer.cc b/debian/htdig/htdig-3.2.0b6/htsearch/QueryLexer.cc
new file mode 100644
index 00000000..ea57e3ce
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/QueryLexer.cc
@@ -0,0 +1,84 @@
+//
+// QueryLexer.cc
+//
+// QueryLexer: (abstract) a lexical analyzer used by a QueryParser.
+// This class defines the common public interface of this
+// family of lexers. It implements a tokenizer, and also
+// the definition of the 'quote' and 'end' terminal symbols.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: QueryLexer.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "QueryLexer.h"
+#include "defaults.h"
+#include "WordType.h"
+
+
+extern int debug;
+
+QueryLexer::QueryLexer()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ prefix_match = config->Find("prefix_match_character");
+}
+
+void
+QueryLexer::Set(const String &query_string)
+{
+ query = query_string;
+ current_char = 0;
+ Next();
+}
+
+void
+QueryLexer::Next()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ unsigned char text = query[current_char];
+ WordType type(*config);
+ current = "";
+
+ while (text
+ && !current.length()
+ && !type.IsStrictChar(text))
+ {
+ if (text == '(' || text == ')' || text == '\"' || text == '/')
+ {
+ current << text;
+ if (debug) cerr << "lexer symbol: " << current << endl;
+ }
+ text = query[++current_char];
+ }
+
+ if (!current.length() && text)
+ {
+ while (text
+ && (type.IsChar(text) && text != '/'
+ || prefix_match.indexOf(text, 0) != -1))
+ {
+ current << text;
+ text = query[++current_char];
+ }
+ }
+ current.lowercase();
+ if (debug) cerr << "lexer current word: " << current << endl;
+}
+
+bool
+QueryLexer::IsEnd() const
+{
+ return current == String("");
+}
+
+bool
+QueryLexer::IsQuote() const
+{
+ return current == String("\"");
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/QueryLexer.h b/debian/htdig/htdig-3.2.0b6/htsearch/QueryLexer.h
new file mode 100644
index 00000000..bbf57734
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/QueryLexer.h
@@ -0,0 +1,71 @@
+#ifndef _QueryLexer_h_
+#define _QueryLexer_h_
+
+//
+// QueryLexer.h
+//
+// QueryLexer: (abstract) a lexical analyzer used by a QueryParser.
+// This class defines the common public interface of this
+// family of lexers. It implements a tokenizer, and also
+// the definition of the 'quote' and 'end' terminal symbols.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: QueryLexer.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "htString.h"
+
+class QueryLexer
+{
+public:
+ virtual ~QueryLexer() {}
+
+ // set the query string and advance to the first token
+ void Set(const String &query_string);
+
+ // advance to the next token
+ virtual void Next();
+
+ // is the current token a word?
+ virtual bool IsWord() const = 0;
+
+ // is the current token a quote sign?
+ bool IsQuote() const;
+
+ // is the current token end-of-query?
+ bool IsEnd() const;
+
+ // get the current token value
+ const String &Value() const { return current; }
+
+ // get the full query string
+ const String &FullString() const { return query; }
+
+
+protected:
+ QueryLexer();
+
+ // the full query string
+ String query;
+
+ // the current token value
+ String current;
+
+ // the current position in the query string
+ int current_char;
+
+ // suffix string used by the 'prefix' fuzzy
+ String prefix_match;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/QueryParser.cc b/debian/htdig/htdig-3.2.0b6/htsearch/QueryParser.cc
new file mode 100644
index 00000000..ad74b8ba
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/QueryParser.cc
@@ -0,0 +1,134 @@
+//
+// QueryParser.cc
+//
+// QueryParser: (abstract) root of the family of classes that create
+// Query trees by analyzing query strings.
+// The main public interface consists on Parse(),
+// which does the job.
+// The subclasses must provide a lexer.
+// This class implements also the common behaviour needed to
+// parse single words and phrases.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: QueryParser.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "QueryParser.h"
+#include "Query.h"
+#include "htString.h"
+#include "ExactWordQuery.h"
+#include "PhraseQuery.h"
+#include "FuzzyExpander.h"
+
+extern int debug;
+
+FuzzyExpander *
+QueryParser::expander = 0;
+
+//
+// parse a query string
+//
+//
+Query *
+QueryParser::Parse(const String &query_string)
+{
+ error = "";
+ Token().Set(query_string);
+
+ Query *result = ParseExpression();
+ if(result && !Token().IsEnd())
+ {
+ Expected("end of query");
+ // delete result;
+ result = 0;
+ }
+ return result;
+}
+
+// parse one word
+// return a fuzzy word query
+//
+Query *
+QueryParser::ParseWord()
+{
+ Query *result = 0;
+ if(expander)
+ {
+ result = expander->MakeQuery(Token().Value());
+ }
+ else
+ {
+ result = new ExactWordQuery(Token().Value());
+ }
+ Token().Next();
+ return result;
+}
+
+//
+// parse one word
+// return an exact query
+//
+Query *
+QueryParser::ParseExactWord()
+{
+ Query *result = new ExactWordQuery(Token().Value());
+ Token().Next();
+ return result;
+}
+
+//
+// phrase == word { word }
+//
+Query *
+QueryParser::ParsePhrase()
+{
+ Query *result = 0;
+ Query *word = 0;
+ if(!Token().IsEnd() && !Token().IsQuote())
+ {
+ word = ParseExactWord();
+ }
+ if(word)
+ {
+ result = new PhraseQuery;
+ result->Add(word);
+ while(word && !Token().IsEnd() && !Token().IsQuote())
+ {
+ word = ParseExactWord();
+ if(word)
+ {
+ result->Add(word);
+ }
+ }
+ }
+ if(!word && result)
+ {
+ delete result;
+ result = 0;
+ }
+ if(!result)
+ {
+ Expected("at least one word after \"");
+ }
+ return result;
+}
+
+void
+QueryParser::Expected(const String &what)
+{
+ error << "Expected " << what;
+ if(Token().IsEnd())
+ {
+ error << " at the end";
+ }
+ else
+ {
+ error << " instead of '" << Token().Value() << "'";
+ }
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/QueryParser.h b/debian/htdig/htdig-3.2.0b6/htsearch/QueryParser.h
new file mode 100644
index 00000000..0af8ae30
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/QueryParser.h
@@ -0,0 +1,75 @@
+#ifndef _QueryParser_h_
+#define _QueryParser_h_
+
+//
+// QueryParser.h
+//
+// QueryParser: (abstract) root of the family of classes that create
+// Query trees by analyzing query strings.
+// The main public interface consists on Parse(),
+// which does the job.
+// The subclasses must provide a lexer.
+// This class implements also the common behaviour needed to
+// parse single words and phrases.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: QueryParser.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "QueryLexer.h"
+
+class Query;
+class FuzzyExpander;
+
+// abstract
+class QueryParser
+{
+public:
+ virtual ~QueryParser() {}
+
+ // do it
+ Query *Parse(const String &query_string);
+
+ // contains a diagnostic if Parse() failed
+ const String &Error() const
+ { return error; }
+
+ // set a fuzzy word expansion policy
+ static void SetFuzzyExpander(FuzzyExpander *x)
+ { expander = x; }
+
+protected:
+ QueryParser() {}
+
+ // apply a syntax -- tbd by derived classes
+ virtual Query *ParseExpression() = 0;
+
+ // access to the lexer -- provided by children
+ virtual QueryLexer &Token() = 0;
+
+ // parse one (fuzzy) word
+ Query *ParseWord();
+
+ // parse an exact word
+ Query *ParseExactWord();
+
+ // parse a phrase
+ Query *ParsePhrase();
+
+ // set the error string on syntax error
+ void Expected(const String &what);
+
+ // the current fuzzy expansion policy if some
+ static FuzzyExpander *expander;
+
+private:
+ // syntax error if some
+ String error;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/ResultList.cc b/debian/htdig/htdig-3.2.0b6/htsearch/ResultList.cc
new file mode 100644
index 00000000..969c7bb0
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/ResultList.cc
@@ -0,0 +1,151 @@
+//
+// ResultList.cc
+//
+// ResultList: A Dictionary indexed on the document id that holds
+// documents found for a search.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: ResultList.cc,v 1.10 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "ResultList.h"
+#include "htString.h"
+
+
+//*****************************************************************************
+// ResultList::ResultList()
+//
+ResultList::ResultList()
+{
+ isIgnore = 0;
+}
+
+
+//*****************************************************************************
+// ResultList::~ResultList()
+//
+ResultList::~ResultList()
+{
+ //Destroy();
+}
+
+
+//*****************************************************************************
+//
+void
+ResultList::add(DocMatch *dm)
+{
+ String t;
+ t << dm->GetId();
+ Add(t, dm);
+}
+
+
+//*****************************************************************************
+//
+DocMatch *
+ResultList::find(int id) const
+{
+ String t;
+ t << id;
+ return (DocMatch *) Find(t);
+}
+
+
+//*****************************************************************************
+//
+DocMatch *
+ResultList::find(char *id) const
+{
+ return (DocMatch *) Find(id);
+}
+
+
+//*****************************************************************************
+//
+void
+ResultList::remove(int id)
+{
+ String t;
+ t << id;
+ Remove(t);
+}
+
+
+//*****************************************************************************
+//
+int
+ResultList::exists(int id) const
+{
+ String t;
+ t << id;
+ return Exists(t);
+}
+
+
+//*****************************************************************************
+//
+HtVector *
+ResultList::elements()
+{
+ HtVector *list = new HtVector(Count() + 1);
+ char *id;
+
+ Start_Get();
+ while ((id = Get_Next()))
+ {
+ list->Add(Find(id));
+ }
+ return list;
+}
+
+void
+ResultList::SetWeight(double weight)
+{
+ HtVector *els = elements();
+ for(int i = 0; i < els->Count(); i++)
+ {
+ DocMatch *match = (DocMatch *)(*els)[i];
+ match->SetWeight(weight);
+ }
+ els->Release();
+}
+
+
+ResultList::ResultList(const ResultList &other)
+{
+ DictionaryCursor c;
+ isIgnore = other.isIgnore;
+ other.Start_Get(c);
+ DocMatch *match = (DocMatch *)other.Get_NextElement(c);
+ while(match)
+ {
+ add(new DocMatch(*match));
+ match = (DocMatch *)other.Get_NextElement(c);
+ }
+}
+
+void
+ResultList::Dump() const
+{
+ cerr << "ResultList {" << endl;
+ cerr << "Ignore: " << isIgnore << " Count: " << Count() << endl;
+ DictionaryCursor c;
+ Start_Get(c);
+ DocMatch *match = (DocMatch *)Get_NextElement(c);
+ while(match)
+ {
+ match->Dump();
+ match = (DocMatch *)Get_NextElement(c);
+ }
+ cerr << "}" << endl;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/ResultList.h b/debian/htdig/htdig-3.2.0b6/htsearch/ResultList.h
new file mode 100644
index 00000000..5aa925ab
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/ResultList.h
@@ -0,0 +1,50 @@
+//
+// ResultList.h
+//
+// ResultList: A Dictionary indexed on the document id that holds
+// documents found for a search.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: ResultList.h,v 1.8 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifndef _ResultList_h_
+#define _ResultList_h_
+
+#include "Dictionary.h"
+#include "DocMatch.h"
+#include "HtVector.h"
+
+class ResultList : public Dictionary
+{
+public:
+ ResultList();
+ ~ResultList();
+ ResultList(const ResultList &);
+
+ void add(DocMatch *);
+ void remove(int id);
+ DocMatch *find(int id) const;
+ DocMatch *find(char *id) const;
+ int exists(int id) const;
+
+ HtVector *elements();
+
+ void SetWeight(double weight);
+ bool IsIgnore() const { return isIgnore != 0; }
+ void Ignore() { isIgnore = 1; }
+
+ void Dump() const;
+//private:
+
+ int isIgnore;
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/ResultMatch.cc b/debian/htdig/htdig-3.2.0b6/htsearch/ResultMatch.cc
new file mode 100644
index 00000000..54e5f611
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/ResultMatch.cc
@@ -0,0 +1,296 @@
+//
+// ResultMatch.cc
+//
+// ResultMatch: Contains information related to a given
+// document that was matched by a search. For instance, the
+// score of the document for this search. Similar to the
+// DocMatch class but designed for result display purposes.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: ResultMatch.cc,v 1.10 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "ResultMatch.h"
+
+// Definition of how to search
+ResultMatch::SortType ResultMatch::mySortType;
+
+//*****************************************************************************
+//
+ResultMatch::ResultMatch()
+{
+}
+
+
+//*****************************************************************************
+//
+ResultMatch::~ResultMatch()
+{
+}
+
+
+//*****************************************************************************
+// Default-access-methods. Just dummies when that data is not used.
+char *ResultMatch::getTitle()
+{ return ""; }
+
+time_t ResultMatch::getTime()
+{ return 0; }
+
+void ResultMatch::setTitle(char *)
+{ }
+
+void ResultMatch::setTime(time_t)
+{ }
+
+// Then for each sort-type, we derive a class, which will keep
+// any necessary additional piece of data, and return the compare-function.
+
+// We could have a real cute implementation with global
+// constructors registering a factory method with ResultMatch,
+// so it would just check a list and never need to be changed
+// when new search methods are introduced, but that seems futile.
+// It is more practical to just add search methods here and
+// change the createMatch method, last.
+
+
+//*****************************************************************************
+class ScoreMatch : public ResultMatch
+{
+ // This one needs no additional data
+public:
+ virtual ResultMatch::CmpFun getSortFun();
+ ScoreMatch();
+ ~ScoreMatch();
+private:
+ static int compare(const void *a1, const void *a2);
+};
+
+ScoreMatch::ScoreMatch() {}
+ScoreMatch::~ScoreMatch() {}
+
+int
+ScoreMatch::compare(const void *a1, const void *a2)
+{
+ ResultMatch *m1 = *((ResultMatch **) a1);
+ ResultMatch *m2 = *((ResultMatch **) a2);
+ double score1 = m1->getScore();
+ double score2 = m2->getScore();
+
+ if(score1 == score2)
+ return 0;
+ else if(score1 < score2)
+ return 1;
+ else
+ return -1;
+
+ // return m2->getScore() - m1->getScore();
+}
+
+ResultMatch::CmpFun
+ScoreMatch::getSortFun() { return compare; }
+
+//*****************************************************************************
+class TimeMatch : public ResultMatch
+{
+public:
+ virtual ResultMatch::CmpFun getSortFun();
+ virtual void setTime(time_t);
+ virtual time_t getTime();
+ TimeMatch();
+ ~TimeMatch();
+private:
+ // We need a time_t here, and to override the get/setTime methods.
+ time_t myTime;
+
+ static int compare(const void *a1, const void *a2);
+};
+
+TimeMatch::TimeMatch() {}
+TimeMatch::~TimeMatch() {}
+
+void
+TimeMatch::setTime(time_t t)
+{
+ myTime = t;
+}
+
+time_t TimeMatch::getTime()
+{
+ return myTime;
+}
+
+int
+TimeMatch::compare(const void *a1, const void *a2)
+{
+ ResultMatch *m1 = *((ResultMatch **) a1);
+ ResultMatch *m2 = *((ResultMatch **) a2);
+ time_t t1 = m1->getTime();
+ time_t t2 = m2->getTime();
+
+ return (int) (t2 - t1);
+}
+
+ResultMatch::CmpFun
+TimeMatch::getSortFun() { return compare; }
+
+//*****************************************************************************
+class IDMatch : public ResultMatch
+{
+ // This one needs no additional data
+public:
+ virtual ResultMatch::CmpFun getSortFun();
+ IDMatch();
+ ~IDMatch();
+private:
+ static int compare(const void *a1, const void *a2);
+};
+
+IDMatch::IDMatch() {}
+IDMatch::~IDMatch() {}
+
+int
+IDMatch::compare(const void *a1, const void *a2)
+{
+ ResultMatch *m1 = *((ResultMatch **) a1);
+ ResultMatch *m2 = *((ResultMatch **) a2);
+ int i1 = m1->getID();
+ int i2 = m2->getID();
+
+ return (i1 - i2);
+}
+
+ResultMatch::CmpFun
+IDMatch::getSortFun() { return compare; }
+
+//*****************************************************************************
+class TitleMatch : public ResultMatch
+{
+public:
+ virtual ResultMatch::CmpFun getSortFun();
+ virtual void setTitle(char *t);
+ virtual char *getTitle();
+ TitleMatch();
+ ~TitleMatch();
+private:
+ // We need a String here, and to override the get/setTitle methods.
+ // It has to be a String, as the "char *" goes away shortly
+ // after creating the object.
+ String myTitle;
+
+ static int compare(const void *a1, const void *a2);
+};
+
+TitleMatch::TitleMatch() {}
+TitleMatch::~TitleMatch() {}
+
+void
+TitleMatch::setTitle(char *t)
+{
+ myTitle = t;
+}
+
+char *
+TitleMatch::getTitle()
+{
+ return myTitle;
+}
+
+int
+TitleMatch::compare(const void *a1, const void *a2)
+{
+ ResultMatch *m1 = *((ResultMatch **) a1);
+ ResultMatch *m2 = *((ResultMatch **) a2);
+ char *t1 = m1->getTitle();
+ char *t2 = m2->getTitle();
+
+ if (!t1) t1 = "";
+ if (!t2) t2 = "";
+ return mystrcasecmp(t1, t2);
+}
+
+ResultMatch::CmpFun
+TitleMatch::getSortFun() { return compare; }
+
+//*****************************************************************************
+int
+ResultMatch::setSortType(const String& sorttype)
+{
+ static const struct
+ {
+ char *typest;
+ SortType type;
+ }
+ sorttypes[] =
+ {
+ {"score", SortByScore},
+ {"date", SortByTime},
+ {"time", SortByTime},
+ {"title", SortByTitle},
+ {"id", SortByID}
+ };
+ int i = 0;
+ const char *st = sorttype;
+ if (st && *st)
+ {
+ if (mystrncasecmp("rev", st, 3) == 0)
+ st += 3;
+ for (i = sizeof(sorttypes)/sizeof(sorttypes[0]); --i >= 0; )
+ {
+ if (mystrcasecmp(sorttypes[i].typest, st) == 0)
+ {
+ mySortType = sorttypes[i].type;
+ return 1;
+ }
+ }
+ return 0;
+ }
+ else
+ {
+ // If not specified, default to SortByScore
+ mySortType = SortByScore;
+ return 1;
+ }
+}
+
+//*****************************************************************************
+// Now here's the switchboard: a create-function that returns a
+// "new":ed object of the right class for what to compare.
+// To have the pairing managed in a (dynamically registered)
+// list may seem interesting, but since everything is here
+// anyway, there's little need but a small cuteness-factor.
+// We could also change the guts to use some kind of creator
+// object, if there would be a win.
+
+ResultMatch *
+ResultMatch::create()
+{
+ switch (mySortType)
+ {
+ case ResultMatch::SortByScore:
+ return new ScoreMatch();
+
+ case ResultMatch::SortByTime:
+ return new TimeMatch();
+
+ case ResultMatch::SortByTitle:
+ return new TitleMatch();
+
+ case ResultMatch::SortByID:
+ return new IDMatch();
+
+ default:
+ // It is doubtful which is better: to abort() or paper
+ // over something bad here.
+ return new ScoreMatch();
+ }
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/ResultMatch.h b/debian/htdig/htdig-3.2.0b6/htsearch/ResultMatch.h
new file mode 100644
index 00000000..4cac3c5a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/ResultMatch.h
@@ -0,0 +1,89 @@
+//
+// ResultMatch.h
+//
+// ResultMatch: Contains information related to a given
+// document that was matched by a search. For instance, the
+// score of the document for this search. Similar to the
+// DocMatch class but designed for result display purposes.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: ResultMatch.h,v 1.11 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifndef _ResultMatch_h_
+#define _ResultMatch_h_
+
+#include "Object.h"
+#include "htString.h"
+
+class DocumentRef;
+class Collection;
+
+class ResultMatch : public Object
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ ResultMatch();
+ ~ResultMatch();
+ static ResultMatch *create();
+ //
+ // Data access members
+ //
+ void setAnchor(int a) {anchor = a;}
+ void setID(int i) {id = i;}
+ void setScore(double s) {score = s;}
+
+ int getAnchor() {return anchor;}
+ double getScore() {return score;}
+ int getID() {return id;}
+
+ // Multiple database support
+ void setCollection(Collection *coll) { collection = coll; }
+ Collection *getCollection() { return collection; }
+
+ static int setSortType(const String& sorttype);
+
+ // A method for each type of data Display wants to cram in.
+ // Will only store the pieces necessary for the
+ // search-type as defined in setSortType, the others are dummies.
+ virtual char *getTitle();
+ virtual time_t getTime();
+
+ virtual void setTitle(char *title);
+ virtual void setTime(time_t t);
+
+ // This is likely to help weak compilers as well as the eye.
+ typedef int (*CmpFun)(const void *, const void *);
+
+ // The purpose of the derived classes is to define their own.
+ virtual CmpFun getSortFun() = 0;
+
+ // Sun's C++ compiler doesn't like private types used in other structs
+ // so make this public
+ enum SortType
+ {
+ SortByScore,
+ SortByTime,
+ SortByTitle,
+ SortByID
+ };
+
+private:
+ double score;
+ int anchor;
+ int id;
+ Collection *collection;
+
+ static SortType mySortType;
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/SimpleLexer.h b/debian/htdig/htdig-3.2.0b6/htsearch/SimpleLexer.h
new file mode 100644
index 00000000..54fbd8ea
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/SimpleLexer.h
@@ -0,0 +1,29 @@
+#ifndef _SimpleLexer_h_
+#define _SimpleLexer_h_
+
+//
+// SimpleLexer.h
+//
+// SimpleLexer: query lexer for simple (no-keyword) queries
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: SimpleLexer.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "QueryLexer.h"
+
+class SimpleLexer : public QueryLexer
+{
+public:
+ SimpleLexer() : QueryLexer() {}
+
+ // everything is a word
+ bool IsWord() const { return !IsEnd(); }
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/SimpleQueryParser.cc b/debian/htdig/htdig-3.2.0b6/htsearch/SimpleQueryParser.cc
new file mode 100644
index 00000000..ebe1901e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/SimpleQueryParser.cc
@@ -0,0 +1,96 @@
+//
+// SimpleQueryParser.cc
+//
+// SimpleQueryParser: (abstract) a family of parsers that generate queries
+// for strings with the syntax (word|phrase){(word|phrase)}
+// combining them in a single operator.
+// The operator to apply is tbd by concrete classes.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: SimpleQueryParser.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "SimpleQueryParser.h"
+#include "OperatorQuery.h"
+
+//
+// expr == term { term }
+//
+Query *
+SimpleQueryParser::ParseExpression()
+{
+ Query *result = 0;
+ Query *term = ParseTerm();
+ if(term)
+ {
+ if(token.IsEnd())
+ {
+ result = term;
+ }
+ else
+ {
+ result = MakeQuery();
+ result->Add(term);
+ while(!token.IsEnd())
+ {
+ term = ParseTerm();
+ if(term)
+ {
+ result->Add(term);
+ }
+ }
+ }
+ }
+ if(!term)
+ {
+ delete result;
+ result = 0;
+ }
+ return result;
+}
+
+
+//
+// term == word | '"' phrase '"'
+//
+Query *
+SimpleQueryParser::ParseTerm()
+{
+ Query *result = 0;
+
+ if(token.IsQuote())
+ {
+ token.Next();
+ result = ParsePhrase();
+ if(result)
+ {
+ if(token.IsQuote())
+ {
+ token.Next();
+ }
+ else
+ {
+ Expected("closing \"");
+ delete result;
+ result = 0;
+ }
+ }
+ }
+ else if(token.IsWord())
+ {
+ // don't advance token here!
+ result = ParseWord();
+ }
+ else
+ {
+ Expected("a word or a quoted phrase");
+ }
+ return result;
+}
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/SimpleQueryParser.h b/debian/htdig/htdig-3.2.0b6/htsearch/SimpleQueryParser.h
new file mode 100644
index 00000000..93ff08ee
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/SimpleQueryParser.h
@@ -0,0 +1,52 @@
+#ifndef _SimpleQueryParser_h_
+#define _SimpleQueryParser_h_
+
+//
+// SimpleQueryParser.h
+//
+// SimpleQueryParser: (abstract) a family of parsers that generate queries
+// for strings with the syntax (word|phrase){(word|phrase)}
+// combining them in a single operator.
+// The operator to apply is tbd by concrete classes.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: SimpleQueryParser.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "QueryParser.h"
+#include "SimpleLexer.h"
+
+// abstract
+class OperatorQuery;
+
+class SimpleQueryParser : public QueryParser
+{
+public:
+ virtual ~SimpleQueryParser() {}
+
+protected:
+ SimpleQueryParser() {}
+
+ // get a combination query
+ virtual OperatorQuery *MakeQuery() = 0;
+
+private:
+ // apply expr == term { term }
+ Query *ParseExpression();
+
+ // apply term == word | phrase
+ Query *ParseTerm();
+
+ // let the parent access the lexer
+ QueryLexer &Token() { return token; }
+
+ // the used lexer
+ SimpleLexer token;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.cc b/debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.cc
new file mode 100644
index 00000000..6d7f97d8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.cc
@@ -0,0 +1,184 @@
+//
+// SplitMatches.cc
+//
+// SplitMatches:
+// Holds a list of lists with the matches, as specified in
+// search_results_order.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 2000-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: SplitMatches.cc,v 1.6 2004/05/28 13:15:24 lha Exp $
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "StringList.h"
+#include "HtRegex.h"
+#include "SplitMatches.h"
+
+#include <stdio.h>
+#include <ctype.h>
+
+// This class is only used in private members of SplitMatches.
+// The OO-right thing would be to nest this inside the private
+// declaration of SplitMatches, but that would cause portability
+// problems according to
+// <URL:http://www.mozilla.org/hacking/portable-cpp.html#inner_classes>.
+//
+// It is used as a container for a key (String) and a list.
+//
+class MatchArea : public Object
+{
+public:
+ // Construct from a string applicable to StringMatch.
+ MatchArea(const String &);
+
+ ~MatchArea();
+
+ // Does this item match?
+ // Fail if template is empty, since explicit "*" maps to empty template
+ inline bool Match(char *s)
+ { return match.match(s, 0, 0) != 0; }
+
+ // Return the contained list.
+ List *MatchList() { return &myList; }
+
+private:
+ HtRegex match;
+ List myList;
+
+ // These member functions are not supposed to be implemented, but
+ // mentioned here as private so the compiler will not generate them if
+ // someone puts in buggy code that would use them.
+ MatchArea();
+ MatchArea(const MatchArea &);
+ void operator= (const MatchArea &);
+};
+
+MatchArea::MatchArea(const String &url_regex)
+{
+ // We do not want to "install" the catch-the-rest pattern as a real
+ // pattern; it must always return false for the "Match" operator.
+ if (strcmp("*", url_regex.get()) != 0)
+ {
+ StringList l(url_regex.get(),'|');
+ match.setEscaped(l);
+ }
+}
+
+MatchArea::~MatchArea()
+{
+}
+
+SplitMatches::SplitMatches(Configuration &config)
+{
+ char *config_item = "search_results_order";
+
+ StringList sl(config[config_item], "\t \r\n");
+
+ mySubAreas = new List();
+ myDefaultList = 0;
+
+ // Parse each as in TemplateList::createFromString.
+ for (int i = 0; i < sl.Count(); i++)
+ {
+ String sub_area_pattern = sl[i];
+ MatchArea *match_item = new MatchArea(sub_area_pattern);
+ mySubAreas->Add(match_item);
+
+ // If this is the magic catch-rest sub-area-pattern, we want to
+ // use its list-pointer to store all URLs that do not match
+ // anything else.
+ // We will iterate over a list where one of the patterns is
+ // known to not match, but that's a small penalty for keeping
+ // the code simple.
+ if (strcmp("*", sub_area_pattern.get()) == 0)
+ myDefaultList = match_item->MatchList();
+ }
+
+ // If we did not have a catch-the-rest pattern, install one at the
+ // end of the list.
+ if (myDefaultList == 0)
+ {
+ MatchArea *match_item = new MatchArea(String("*"));
+ mySubAreas->Add(match_item);
+
+ myDefaultList = match_item->MatchList();
+ }
+}
+
+SplitMatches::~SplitMatches()
+{
+ // myDefaultList is a pointer to one of the items in mySubAreas and
+ // must not be explicitly deleted here.
+
+ delete mySubAreas;
+}
+
+void
+SplitMatches::Add(ResultMatch *match, char *url)
+{
+ List *area_list = mySubAreas;
+ MatchArea *area_item;
+
+ area_list->Start_Get();
+
+ // This is a linear search. If there's a problem with that, we
+ // can improve it. For now, a list with tens of areas seems lots,
+ // and break-even with a more clever search-scheme is probably in
+ // the hundreds.
+ while ((area_item = (MatchArea *) area_list->Get_Next()))
+ {
+ // Use the first match only.
+ if (area_item->Match(url))
+ {
+ area_item->MatchList()->Add(match);
+ return;
+ }
+ }
+
+ // We'll get here if no match was found, so we add to the
+ // catch-the-rest list.
+ myDefaultList->Add(match);
+}
+
+// Just a simple iterator function.
+List *
+SplitMatches::Get_Next()
+{
+ MatchArea *next_area = (MatchArea *) mySubAreas->Get_Next();
+ List *next_area_list = 0;
+
+ if (next_area != 0)
+ next_area_list = next_area->MatchList();
+
+ return next_area_list;
+}
+
+// Rip out the sub-areas lists and concatenate them into one list.
+List *
+SplitMatches::JoinedLists()
+{
+
+ // We make a new list here, so we don't have to worry about
+ // mySubAreas being dangling or null.
+ List *all_areas = new List();
+ List *sub_areas = mySubAreas;
+ MatchArea *area;
+
+ sub_areas->Start_Get();
+
+ while ((area = (MatchArea *) sub_areas->Get_Next()))
+ {
+ // "Destructively" move the contents of the list,
+ // leaving the original list empty.
+ all_areas->AppendList(*(area->MatchList()));
+ }
+
+ return all_areas;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.h b/debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.h
new file mode 100644
index 00000000..2d42a441
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/SplitMatches.h
@@ -0,0 +1,53 @@
+//
+// SplitMatches.h
+//
+// SplitMatches: Constructed from a Configuration, see doc
+// for format of config item "search_results_order".
+// Used to contain a number of ResultMatches, putting them in separate
+// lists depending on the URL with method Add.
+// Iterator methods Get_First and Get_Next returns the sub-lists.
+// Method Joined returns a new list with all the sub-lists
+// concatenated.
+//
+// $Id: SplitMatches.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 2000-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+#ifndef _splitmatches_h
+#define _splitmatches_h
+
+#include "Configuration.h"
+#include "ResultMatch.h"
+#include "List.h"
+
+class SplitMatches
+{
+public:
+ SplitMatches(Configuration &);
+ ~SplitMatches();
+
+ void Add(ResultMatch *, char *);
+ List *JoinedLists();
+ List *Get_First()
+ { mySubAreas->Start_Get(); return Get_Next(); }
+
+ List *Get_Next();
+
+private:
+ // These member functions are not supposed to be implemented.
+ SplitMatches();
+ SplitMatches(const SplitMatches &);
+ void operator= (const SplitMatches &);
+
+ // (Lists of) Matches for each sub-area regex.
+ List *mySubAreas;
+
+ // Matches for everything else.
+ List *myDefaultList;
+};
+
+#endif /* _splitmatches_h */
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Template.cc b/debian/htdig/htdig-3.2.0b6/htsearch/Template.cc
new file mode 100644
index 00000000..d1d48095
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Template.cc
@@ -0,0 +1,81 @@
+//
+// Template.cc
+//
+// Template: A template to set the display of the search results.
+// MatchTemplate is used for every match, Start and End templates
+// are used between the header and the first match and the
+// last match and the footer respectively.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Template.cc,v 1.8 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "Template.h"
+
+#include <stdio.h>
+
+
+//*****************************************************************************
+Template::Template()
+{
+}
+
+
+//*****************************************************************************
+Template::~Template()
+{
+}
+
+
+//*****************************************************************************
+// The start and end templates are created from the filename of the
+// main template by appending ".start" and ".end" to the filename
+// respectively.
+//
+void
+Template::createFromFile(const char *filename)
+{
+ String realFile;
+
+ realFile = filename;
+ realFile << ".start";
+ readFile(startTemplate, (char*)realFile);
+
+ realFile = filename;
+ realFile << ".end";
+ readFile(endTemplate, (char*)realFile);
+
+ readFile(matchTemplate, filename);
+}
+
+//*****************************************************************************
+// Append the contents of a file to a string. Nothing happens if the file
+// doesn't exist.
+//
+void
+Template::readFile(String &s, const char *filename) const
+{
+ FILE *fl = fopen(filename, "r");
+ char buffer[1000];
+
+ if (!fl)
+ return;
+ s = 0;
+ while (fgets(buffer, sizeof(buffer), fl))
+ {
+ s << buffer;
+ }
+ fclose(fl);
+}
+
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/Template.h b/debian/htdig/htdig-3.2.0b6/htsearch/Template.h
new file mode 100644
index 00000000..5a7e6af3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/Template.h
@@ -0,0 +1,54 @@
+//
+// Template.h
+//
+// Template: A template to set the display of the search results.
+// MatchTemplate is used for every match, Start and End templates
+// are used between the header and the first match and the
+// last match and the footer respectively.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Template.h,v 1.8 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifndef _Template_h_
+#define _Template_h_
+
+#include "Object.h"
+#include "htString.h"
+
+//
+// This class holds information about output templates.
+//
+class Template : public Object
+{
+public:
+ Template();
+ ~Template();
+
+ const String& getMatchTemplate() const { return matchTemplate; }
+ const String& getStartTemplate() const { return startTemplate; }
+ const String& getEndTemplate() const { return endTemplate; }
+
+ void setMatchTemplate(const char *s) { matchTemplate = s; }
+ void setStartTemplate(const char *s) { startTemplate = s; }
+ void setEndTemplate(const char *s) { endTemplate = s; }
+
+ void createFromFile(const char *filename);
+
+protected:
+ String matchTemplate;
+ String startTemplate;
+ String endTemplate;
+
+private:
+ void readFile(String &, const char *) const;
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/TemplateList.cc b/debian/htdig/htdig-3.2.0b6/htsearch/TemplateList.cc
new file mode 100644
index 00000000..869f3fb1
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/TemplateList.cc
@@ -0,0 +1,106 @@
+//
+// TemplateList.cc
+//
+//
+// TemplateList: As it sounds--a list of search result templates. Reads the
+// configuration and any template files from disk, then retrieves
+// the relevant template for display.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: TemplateList.cc,v 1.11 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "TemplateList.h"
+#include "URL.h"
+#include "QuotedStringList.h"
+
+//*****************************************************************************
+TemplateList::TemplateList()
+{
+}
+
+
+//*****************************************************************************
+TemplateList::~TemplateList()
+{
+}
+
+
+//*****************************************************************************
+// Return the template that belongs to the given internal template
+// name. If no template can be found, NULL is returned.
+//
+Template *
+TemplateList::get(const String& internalName)
+{
+ for (int i = 0; i < internalNames.Count(); i++)
+ {
+ const String *s = (const String *) internalNames[i];
+ if (mystrcasecmp(*s, internalName) == 0)
+ return (Template *) templates[i];
+ }
+ return 0;
+}
+
+
+//*****************************************************************************
+// Create a list of templates from a configuration string. The string
+// will have triplets of: display name, internal name, and filename.
+// There are two special cases for the internal name: builtin-long and
+// builtin-short. These will cause a hardcoded template to be
+// created. All other templates are read in from the specified
+// filename.
+//
+int
+TemplateList::createFromString(const String& str)
+{
+ QuotedStringList sl(str, "\t \r\n");
+ String display, internal, file;
+ Template *t;
+
+ if ( sl.Count() % 3) return 0; // Make sure we have a multiple of three
+
+ for (int i = 0; i < sl.Count(); i += 3)
+ {
+ display = sl[i];
+ decodeURL(display);
+ internal = sl[i + 1];
+ file = sl[i + 2];
+ displayNames.Add(new String(display));
+ internalNames.Add(new String(internal));
+
+ t = new Template();
+
+ if (mystrcasecmp((char*)file, "builtin-long") == 0)
+ {
+ String s;
+ s << "<dl><dt><strong><a href=\"$&(URL)\">$&(TITLE)</a></strong>";
+ s << "$(STARSLEFT)\n";
+ s << "</dt><dd>$(EXCERPT)<br>\n";
+ s << "<em><a href=\"$&(URL)\">$&(URL)</a></em>\n";
+ s << " <font size=\"-1\">$(MODIFIED), $(SIZE) bytes</font>\n";
+ s << "</dd></dl>\n";
+ t->setMatchTemplate((char*)s);
+ }
+ else if (mystrcasecmp((char*)file, "builtin-short") == 0)
+ {
+ t->setMatchTemplate("$(STARSRIGHT) <strong><a href=\"$&(URL)\">$&(TITLE)</a></strong><br>\n");
+ }
+ else
+ {
+ t->createFromFile((char*)file);
+ }
+ templates.Add(t);
+ }
+
+ return 1;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/TemplateList.h b/debian/htdig/htdig-3.2.0b6/htsearch/TemplateList.h
new file mode 100644
index 00000000..f6986fc0
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/TemplateList.h
@@ -0,0 +1,40 @@
+//
+// TemplateList.h
+//
+// TemplateList: As it sounds--a list of search result templates. Reads the
+// configuration and any template files from disk, then retrieves
+// the relevant template for display.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: TemplateList.h,v 1.8 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifndef _TemplateList_h_
+#define _TemplateList_h_
+
+#include "Template.h"
+#include "Object.h"
+#include "List.h"
+
+class TemplateList : public Object
+{
+public:
+ TemplateList();
+ ~TemplateList();
+
+ int createFromString(const String& str);
+ Template *get(const String& internalName);
+
+ List displayNames;
+ List internalNames;
+ List templates;
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/VolatileCache.cc b/debian/htdig/htdig-3.2.0b6/htsearch/VolatileCache.cc
new file mode 100644
index 00000000..d3f21f5e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/VolatileCache.cc
@@ -0,0 +1,77 @@
+//
+// VolatileCache.cc
+//
+// VolatileCache: the simplest non-persistent Query result cache.
+// This is default policy.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: VolatileCache.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "VolatileCache.h"
+#include "ResultList.h"
+
+//
+// a pseudo-constant empty result list
+// used to avoid null pointers in the cache
+//
+ResultList theEmptyResult;
+
+ResultList * const
+VolatileCache::empty = &theEmptyResult;
+
+extern int debug;
+
+//
+// find a cache entry
+//
+ResultList *
+VolatileCache::Lookup(const String &signature)
+{
+ ResultList *result = (ResultList *)cache[signature];
+ return result;
+}
+
+//
+// add a cache entry
+//
+void
+VolatileCache::Add(const String &signature, ResultList *entry)
+{
+ ResultList *previous = (ResultList *)cache[signature];
+ if(previous && previous != empty)
+ {
+ delete previous;
+ }
+ if(!entry)
+ {
+ entry = empty;
+ }
+ cache.Add(signature, entry);
+}
+
+//
+// clear the in-memory cache
+// avoids deletion of the shared 'empty' element
+//
+VolatileCache::~VolatileCache()
+{
+ if(debug) cerr << "query CLEAR: entries=" << cache.Count() << endl;
+ cache.Start_Get();
+ ResultList *kill = (ResultList *)cache.Get_NextElement();
+ while(kill)
+ {
+ if(kill != empty)
+ {
+ delete kill;
+ }
+ kill = (ResultList *)cache.Get_NextElement();
+ }
+ cache.Release();
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/VolatileCache.h b/debian/htdig/htdig-3.2.0b6/htsearch/VolatileCache.h
new file mode 100644
index 00000000..c57d09d3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/VolatileCache.h
@@ -0,0 +1,44 @@
+#ifndef _VolatileCache_h_
+#define _VolatileCache_h_
+
+//
+// VolatileCache.h
+//
+// VolatileCache: the simplest non-persistent Query result cache.
+// This is default policy.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: VolatileCache.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "QueryCache.h"
+#include "Dictionary.h"
+
+class VolatileCache : public QueryCache
+{
+public:
+ // cons & destr
+ VolatileCache() {}
+ ~VolatileCache();
+
+ // get cached result from in-memory cache
+ ResultList *Lookup(const String &signature);
+
+ // add result to in-memory cache
+ void Add(const String &signature, ResultList *entry);
+
+private:
+ Dictionary cache;
+ static ResultList * const empty;
+};
+
+#endif
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/WeightWord.cc b/debian/htdig/htdig-3.2.0b6/htsearch/WeightWord.cc
new file mode 100644
index 00000000..49eb2e03
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/WeightWord.cc
@@ -0,0 +1,146 @@
+//
+// WeightWord.cc
+//
+// WeightWord: Contains the information necessary for a particular search word
+// including the resulting weight (scaling factor) and
+// whether the word should be hidden (ignored).
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WeightWord.cc,v 1.10 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "WeightWord.h"
+
+#ifdef HAVE_STD
+#include <fstream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <fstream.h>
+#endif /* HAVE_STD */
+
+//***************************************************************************
+// WeightWord::WeightWord()
+//
+WeightWord::WeightWord()
+{
+ weight = 1;
+ records = 0;
+ isExact = 0;
+ isHidden = 0;
+ isIgnore = 0;
+
+ flags = FLAGS_MATCH_ONE;
+}
+
+
+//***************************************************************************
+// WeightWord::WeightWord(WeightWord *ww)
+//
+WeightWord::WeightWord(WeightWord *ww)
+{
+ weight = ww->weight;
+ records = ww->records;
+ isExact = ww->isExact;
+ isHidden = ww->isHidden;
+ flags = ww->flags;
+ word = ww->word;
+ isIgnore = 0;
+}
+
+
+//***************************************************************************
+// WeightWord::WeightWord(char *word, double weight)
+//
+WeightWord::WeightWord(char *word, double weight)
+{
+ records = 0;
+ isExact = 0;
+ isHidden = 0;
+ isIgnore = 0;
+
+ // allow a match with any field
+ flags = FLAGS_MATCH_ONE;
+
+ set(word);
+ this->weight = weight;
+}
+
+//***************************************************************************
+// WeightWord::WeightWord(char *word, double weight, unsigned int f)
+//
+WeightWord::WeightWord(char *word, double weight, unsigned int f)
+{
+ records = 0;
+
+ flags = f;
+ // if no fields specified, allow a match with any field
+ if (!(flags & FLAGS_MATCH_ONE))
+ flags ^= FLAGS_MATCH_ONE;
+
+ // ideally, these flags should all just be stored in a uint...
+ isExact = ((flags & FLAG_EXACT) != 0);
+ isHidden = ((flags & FLAG_HIDDEN) != 0);
+ isIgnore = ((flags & FLAG_IGNORE) != 0);
+
+ set(word);
+ this->weight = weight;
+}
+
+
+//***************************************************************************
+// WeightWord::~WeightWord()
+//
+WeightWord::~WeightWord()
+{
+}
+
+
+//***************************************************************************
+// void WeightWord::set(char *word)
+//
+void WeightWord::set(char *word)
+{
+#if 0
+ isExact = 0;
+ isHidden = 0;
+ while (strchr(word, ':'))
+ {
+ //
+ // This word contains modifiers.
+ //
+ if (mystrncasecmp(word, "exact:", 6) == 0)
+ {
+ word += 6;
+ isExact = 1;
+ }
+ else if (mystrncasecmp(word, "hidden:", 7) == 0)
+ {
+ word += 7;
+ isHidden = 1;
+ }
+ else
+ {
+ //
+ // There is a ':' but not a valid attribute. It must be part
+ // of the word we are searching for.
+ //
+ break;
+ }
+
+ }
+#endif
+ this->word = word;
+ this->word.lowercase();
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/WeightWord.h b/debian/htdig/htdig-3.2.0b6/htsearch/WeightWord.h
new file mode 100644
index 00000000..313ea362
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/WeightWord.h
@@ -0,0 +1,50 @@
+//
+// WeightWord.h
+//
+// WeightWord: Contains the information necessary for a particular search word
+// including the resulting weight (scaling factor) and
+// whether the word should be hidden (ignored).
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WeightWord.h,v 1.8 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifndef _WeightWord_h_
+#define _WeightWord_h_
+
+#include "htString.h"
+#include "WordRecord.h"
+#include "HtWordReference.h" // for FLAG_...
+
+class WeightWord : public Object
+{
+public:
+ //
+ // Construction/Destruction
+ //
+ WeightWord();
+ WeightWord(char *word, double weight);
+ WeightWord(char *word, double weight, unsigned int flags);
+ WeightWord(WeightWord *);
+
+ virtual ~WeightWord();
+
+ void set(char *word);
+
+ String word;
+ double weight;
+ WordRecord *records;
+ unsigned int flags;
+ short int isExact;
+ short int isHidden;
+ short int isIgnore;
+};
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/WordSearcher.cc b/debian/htdig/htdig-3.2.0b6/htsearch/WordSearcher.cc
new file mode 100644
index 00000000..7e1669f7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/WordSearcher.cc
@@ -0,0 +1,109 @@
+//
+// WordSearcher.cc
+//
+// WordSearcher: a simple word database readonly-access wrapper
+// generates ResultLists for the Query framework.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordSearcher.cc,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#include "WordSearcher.h"
+#include "WordType.h"
+#include "ResultList.h"
+#include "HtWordReference.h"
+#include "defaults.h"
+
+extern int debug;
+
+//
+// constructor, opens the database
+//
+WordSearcher::WordSearcher(const String &filename) :
+ references(*(HtConfiguration::config()))
+{
+ references.Open(filename, O_RDONLY);
+}
+
+//
+// gather results for a word, either from db or ignored
+//
+ResultList *
+WordSearcher::Search(const String &word)
+{
+ ResultList *result = 0;
+ if(IsIgnore(word))
+ {
+ if(debug) cerr << "IGNORE: " << word << endl;
+ result = new ResultList;
+ result->Ignore();
+ }
+ else
+ {
+ result = Fetch(word);
+ }
+ return result;
+}
+
+//
+// see if word must be ignored
+//
+bool
+WordSearcher::IsIgnore(const String &word)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ String copy = word;
+ WordType type(*config);
+ return 0 != type.Normalize(copy);
+}
+
+//
+// gather all references in the db, construct a ResultList
+//
+ResultList *
+WordSearcher::Fetch(const String &word)
+{
+ if(debug) cerr << "FETCH: " << word << endl;
+ ResultList *result = 0;
+ List *refs = references[word];
+
+ if(refs && refs->Count())
+ {
+ if(debug) cerr << "REFERENCES: " << refs->Count() << endl;
+ result = new ResultList;
+ DocMatch *match = new DocMatch;
+
+ refs->Start_Get();
+ HtWordReference *ref = (HtWordReference *)refs->Get_Next();
+ match->SetId(ref->DocID());
+ match->SetAnchor(ref->Anchor());
+ result->add(match);
+ unsigned int current = ref->DocID();
+ if(debug) cerr << "At: " << ref->DocID() << endl;
+ while(ref)
+ {
+ if(ref->DocID() != current)
+ {
+ if(debug) cerr << "At: "<<ref->DocID()<< endl;
+ match = new DocMatch;
+ match->SetId(ref->DocID());
+ match->SetAnchor(ref->Anchor());
+ result->add(match);
+ current = ref->DocID();
+ }
+ if(debug) cerr << "@ "<<ref->Location()<< endl;
+ match->AddLocation(
+ new Location(
+ ref->Location(),
+ ref->Location(),
+ ref->Flags()));
+ ref = (HtWordReference *)refs->Get_Next();
+ }
+ }
+ return result;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/WordSearcher.h b/debian/htdig/htdig-3.2.0b6/htsearch/WordSearcher.h
new file mode 100644
index 00000000..2ef656c9
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/WordSearcher.h
@@ -0,0 +1,49 @@
+#ifndef _WordSearcher_h_
+#define _WordSearcher_h_
+
+//
+// WordSearcher.h
+//
+// WordSearcher: a simple word database readonly-access wrapper
+// generates ResultLists for the Query framework.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordSearcher.h,v 1.4 2004/05/28 13:15:24 lha Exp $
+//
+
+#if HAVE_CONFIG_H
+#include "htconfig.h"
+#endif
+
+#include "htString.h"
+#include "HtWordList.h"
+
+class ResultList;
+
+class WordSearcher
+{
+public:
+ // constructor
+ WordSearcher(const String &filename);
+
+ // fetch results for one exact word
+ ResultList *Search(const String &word);
+
+private:
+ // word is to be ignored
+ bool IsIgnore(const String &word);
+
+ // fetch results in database
+ ResultList *Fetch(const String &word);
+
+ // the database wrapper
+ HtWordList references;
+};
+
+
+#endif /* _WordSearcher_h_ */
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/htsearch.cc b/debian/htdig/htdig-3.2.0b6/htsearch/htsearch.cc
new file mode 100644
index 00000000..8c410784
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/htsearch.cc
@@ -0,0 +1,957 @@
+//
+// htsearch.cc
+//
+// htsearch: The main search CGI. Parses the CGI input, reads the config files
+// and calls the necessary code to put together the result lists
+// and the final display.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: htsearch.cc,v 1.72 2004/05/28 13:15:24 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "htsearch.h"
+#include "WeightWord.h"
+#include "parser.h"
+#include "Display.h"
+#include "../htfuzzy/Fuzzy.h"
+#include "cgi.h"
+#include "WordRecord.h"
+#include "HtWordList.h"
+#include "StringList.h"
+#include "IntObject.h"
+#include "HtURLCodec.h"
+#include "HtURLRewriter.h"
+#include "WordContext.h"
+#include "HtRegex.h"
+#include "Collection.h"
+
+#include <time.h>
+#include <ctype.h>
+#include <signal.h>
+
+
+// If we have this, we probably want it.
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#elif HAVE_GETOPT_LOCAL
+#include <getopt_local.h>
+#endif
+
+typedef void (*SIGNAL_HANDLER) (...);
+
+// ResultList *htsearch(const String&, List &, Parser *);
+void htsearch(Collection *, List &, Parser *);
+
+void setupWords(char *, List &, int, Parser *, String &);
+void createLogicalWords(List &, String &, String &);
+void reportError(char *);
+void convertToBoolean(List &words);
+void doFuzzy(WeightWord *, List &, List &);
+void addRequiredWords(List &, StringList &);
+void usage();
+
+int debug = 0;
+int minimum_word_length = 3;
+StringList boolean_keywords;
+
+StringList collectionList; // List of databases to search on
+
+// reconised word prefixes (for field-restricted search and per-word fuzzy
+// algorithms) in *descending* alphabetical order.
+// Don't use a dictionary structure, as setup time outweights saving.
+struct {char *name; unsigned int flag; } colonPrefix [] =
+{
+ { "url", FLAG_URL },
+ { "title", FLAG_TITLE },
+ { "text", FLAG_PLAIN }, // FLAG_TEXT is 0, i.e. *no* flag...
+ { "link", FLAG_LINK_TEXT },
+ { "keyword", FLAG_KEYWORDS },
+ { "hidden", FLAG_HIDDEN },
+ { "heading", FLAG_HEADING },
+ { "exact", FLAG_EXACT },
+ { "descr", FLAG_DESCRIPTION },
+// { "cap", FLAG_CAPITAL },
+ { "author", FLAG_AUTHOR },
+ { "", 0 },
+};
+
+//*****************************************************************************
+// int main()
+//
+int
+main(int ac, char **av)
+{
+ int c;
+ extern char *optarg;
+ int override_config=0;
+ // List searchWords;
+ List *searchWords = NULL;
+ String configFile = DEFAULT_CONFIG_FILE;
+ int pageNumber = 1;
+ HtRegex limit_to;
+ HtRegex exclude_these;
+ String logicalWords;
+ String origPattern;
+ String logicalPattern;
+ // StringMatch searchWordsPattern;
+ StringMatch *searchWordsPattern = NULL;
+ StringList requiredWords;
+ int i;
+ Dictionary selected_collections; // Multiple database support
+
+ //
+ // Parse command line arguments
+ //
+ while ((c = getopt(ac, av, "c:dv")) != -1)
+ {
+ switch (c)
+ {
+ case 'c':
+ // The default is obviously to do this securely
+ // but if people want to shoot themselves in the foot...
+#ifndef ALLOW_INSECURE_CGI_CONFIG
+ if (!getenv("REQUEST_METHOD"))
+ {
+#endif
+ configFile = optarg;
+ override_config=1;
+#ifndef ALLOW_INSECURE_CGI_CONFIG
+ }
+#endif
+ break;
+ case 'v':
+ debug++;
+ break;
+ case 'd':
+ debug++;
+ break;
+ case '?':
+ usage();
+ break;
+ }
+ }
+
+ //
+ // The total search can NEVER take more than 5 minutes.
+ //
+#ifndef _MSC_VER /* _WIN32 */
+ alarm(5 * 60);
+#endif
+
+ //
+ // Parse the CGI parameters.
+ //
+ char none[] = "";
+ cgi input(optind < ac ? av[optind] : none);
+
+ // Multiple databases may be specified for search.
+ // Identify all databases specified with the "config=" parameter.
+ if (input.exists("config"))
+ {
+ collectionList.Create(input["config"], " \t\001|");
+ }
+ if (collectionList.Count() == 0)
+ collectionList.Add(""); // use default config
+ String errorMsg = "";
+ String originalWords = input["words"];
+ originalWords.chop(" \t\r\n");
+
+ HtConfiguration* config= HtConfiguration::config();
+
+ // Iterate over all specified collections (databases)
+ for (int cInd=0; errorMsg.empty() && cInd < collectionList.Count(); cInd++)
+ {
+ // Each collection is handled in an iteration. Reset the following so
+ // that we start with a clean slate.
+ //
+ logicalWords = 0;
+ origPattern = 0;
+ logicalPattern = 0;
+ searchWords = new List;
+ searchWordsPattern = new StringMatch;
+
+ char *config_name = collectionList[cInd];
+ if (config_name && config_name[0] == '\0')
+ config_name = NULL; // use default config
+
+ //
+ // Setup the configuration database. First we read the compiled defaults.
+ // Then we override those with defaults read in from the configuration
+ // file, and finally we override some attributes with information we
+ // got from the HTML form.
+ //
+ config->Defaults(&defaults[0]);
+ // To allow . in filename while still being 'secure',
+ // e.g. htdig-f.q.d.n.conf
+ if (!override_config && config_name
+ && (strstr(config_name, "./") == NULL))
+ {
+ char *configDir = getenv("CONFIG_DIR");
+ if (configDir)
+ {
+ configFile = configDir;
+ }
+ else
+ {
+ configFile = CONFIG_DIR;
+ }
+ if (strlen(config_name) == 0)
+ configFile = DEFAULT_CONFIG_FILE;
+ else
+ configFile << '/' << config_name << ".conf";
+ }
+ if (access((char*)configFile, R_OK) < 0)
+ {
+ reportError(form("Unable to read configuration file '%s'",
+ configFile.get()));
+ }
+ config->Read(configFile);
+
+ // Initialize htword library (key description + wordtype...)
+ WordContext::Initialize(*config);
+
+ if (input.exists("method"))
+ config->Add("match_method", input["method"]);
+ if (input.exists("format"))
+ config->Add("template_name", input["format"]);
+
+ if (input.exists("matchesperpage"))
+ {
+ // minimum check for a valid int value of "matchesperpage" cgi variable
+ if (atoi(input["matchesperpage"]) > 0)
+ config->Add("matches_per_page", input["matchesperpage"]);
+ }
+
+ if (input.exists("page"))
+ pageNumber = atoi(input["page"]);
+ if (input.exists("config"))
+ config->Add("config", input["config"]);
+ if (input.exists("restrict"))
+ config->Add("restrict", input["restrict"]);
+ if (input.exists("exclude"))
+ config->Add("exclude", input["exclude"]);
+ if (input.exists("keywords"))
+ config->Add("keywords", input["keywords"]);
+ requiredWords.Create(config->Find("keywords"), " \t\r\n\001");
+ if (input.exists("sort"))
+ config->Add("sort", input["sort"]);
+
+ // Changes added 3-31-99, by Mike Grommet
+ // Check form entries for starting date, and ending date
+ // Each date consists of a month, day, and year
+
+ if (input.exists("startmonth"))
+ config->Add("startmonth", input["startmonth"]);
+ if (input.exists("startday"))
+ config->Add("startday", input["startday"]);
+ if (input.exists("startyear"))
+ config->Add("startyear", input["startyear"]);
+
+ if (input.exists("endmonth"))
+ config->Add("endmonth", input["endmonth"]);
+ if (input.exists("endday"))
+ config->Add("endday", input["endday"]);
+ if (input.exists("endyear"))
+ config->Add("endyear", input["endyear"]);
+
+ // END OF CHANGES BY MIKE GROMMET
+
+
+ minimum_word_length = config->Value("minimum_word_length", minimum_word_length);
+
+ StringList form_vars(config->Find("allow_in_form"), " \t\r\n");
+ for (i= 0; i < form_vars.Count(); i++)
+ {
+ if (input.exists(form_vars[i]))
+ config->Add(form_vars[i], input[form_vars[i]]);
+ }
+
+ //
+ // Compile the URL limit patterns.
+ //
+
+ if (config->Find("restrict").length())
+ {
+ // Create a temporary list from either the configuration
+ // file or the input parameter
+ StringList l(config->Find("restrict"), " \t\r\n\001|");
+ limit_to.setEscaped(l);
+ String u = l.Join('|');
+ config->Add("restrict", u); // re-create the config attribute
+ }
+ if (config->Find("exclude").length())
+ {
+ // Create a temporary list from either the configuration
+ // file or the input parameter
+ StringList l(config->Find("exclude"), " \t\r\n\001|");
+ exclude_these.setEscaped(l);
+ String u = l.Join('|');
+ config->Add("exclude", u); // re-create the config attribute
+ }
+
+ //
+ // Check url_part_aliases and common_url_parts for
+ // errors.
+ String url_part_errors = HtURLCodec::instance()->ErrMsg();
+
+ if (url_part_errors.length() != 0)
+ reportError(form("Invalid url_part_aliases or common_url_parts: %s",
+ url_part_errors.get()));
+
+ // for htsearch, use search_rewrite_rules attribute for HtURLRewriter.
+ config->AddParsed("url_rewrite_rules", "${search_rewrite_rules}");
+ url_part_errors = HtURLRewriter::instance()->ErrMsg();
+ if (url_part_errors.length() != 0)
+ reportError(form("Invalid url_rewrite_rules: %s",
+ url_part_errors.get()));
+
+ // Load boolean_keywords from configuration
+ // they should be placed in this order:
+ // 0 1 2
+ // and or not
+ boolean_keywords.Destroy();
+ boolean_keywords.Create(config->Find("boolean_keywords"),
+ "| \t\r\n\001");
+ if (boolean_keywords.Count() != 3)
+ reportError("boolean_keywords attribute should have three entries");
+
+ Parser *parser = new Parser();
+
+ //
+ // Parse the words to search for from the argument list.
+ // This will produce a list of WeightWord objects.
+ //
+ setupWords(originalWords, *searchWords,
+ strcmp(config->Find("match_method"), "boolean") == 0,
+ parser, origPattern);
+
+ //
+ // Convert the list of WeightWord objects to a pattern string
+ // that we can compile.
+ //
+ createLogicalWords(*searchWords, logicalWords, logicalPattern);
+
+ //
+ // Assemble the full pattern for excerpt matching and highlighting
+ //
+ origPattern += logicalPattern;
+ searchWordsPattern->IgnoreCase();
+ searchWordsPattern->IgnorePunct();
+ searchWordsPattern->Pattern(logicalPattern); // this should now be enough
+ //searchWordsPattern.Pattern(origPattern);
+ //if (debug > 2)
+ // cout << "Excerpt pattern: " << origPattern << "\n";
+
+ //
+ // If required keywords were given in the search form, we will
+ // modify the current searchWords list to include the required
+ // words.
+ //
+ if (requiredWords.Count() > 0)
+ {
+ addRequiredWords(*searchWords, requiredWords);
+ }
+
+ //
+ // Perform the actual search. The function htsearch() is used for this.
+ // The Dictionary it returns is then passed on to the Display object to
+ // actually render the results in HTML.
+ //
+ const String word_db = config->Find("word_db");
+ if (access(word_db, R_OK) < 0)
+ {
+ reportError(form("Unable to read word database file '%s'\nDid you run htdig?",
+ word_db.get()));
+ }
+ // ResultList *results = htsearch((char*)word_db, searchWords, parser);
+
+ String doc_index = config->Find("doc_index");
+ if (access((char*)doc_index, R_OK) < 0)
+ {
+ reportError(form("Unable to read document index file '%s'\nDid you run htdig?",
+ doc_index.get()));
+ }
+
+ const String doc_db = config->Find("doc_db");
+ if (access(doc_db, R_OK) < 0)
+ {
+ reportError(form("Unable to read document database file '%s'\nDid you run htdig?",
+ doc_db.get()));
+ }
+
+ const String doc_excerpt = config->Find("doc_excerpt");
+ if (access(doc_excerpt, R_OK) < 0)
+ {
+ reportError(form("Unable to read document excerpts '%s'\nDid you run htdig?",
+ doc_excerpt.get()));
+ }
+
+ // Multiple database support
+ Collection *collection = new Collection((char*)configFile,
+ word_db.get(), doc_index.get(), doc_db.get(), doc_excerpt.get());
+
+ // Perform search within the collection. Each collection stores its
+ // own result list.
+ htsearch(collection, *searchWords, parser);
+ collection->setSearchWords(searchWords);
+ collection->setSearchWordsPattern(searchWordsPattern);
+ selected_collections.Add(configFile, collection);
+
+ if (parser->hadError())
+ errorMsg = parser->getErrorMessage();
+
+ delete parser;
+ }
+
+ // Display display(doc_db, 0, doc_excerpt);
+ Display display(&selected_collections);
+ if (display.hasTemplateError())
+ {
+ reportError(form("Unable to read template file '%s'\nDoes it exist?",
+ (const char*)config->Find("template_name")));
+ return 0;
+ }
+ display.setOriginalWords(originalWords);
+ // display.setResults(results);
+ // display.setSearchWords(&searchWords);
+ display.setLimit(&limit_to);
+ display.setExclude(&exclude_these);
+ // display.setAllWordsPattern(searchWordsPattern);
+ display.setCGI(&input);
+ display.setLogicalWords(logicalWords);
+ if (!errorMsg.empty())
+ display.displaySyntaxError(errorMsg);
+ else
+ display.display(pageNumber);
+
+ // delete results;
+ // delete parser;
+ return 0;
+}
+
+//*****************************************************************************
+void
+createLogicalWords(List &searchWords, String &logicalWords, String &wm)
+{
+ String pattern;
+ int i;
+ int wasHidden = 0;
+ int inPhrase = 0;
+
+ for (i = 0; i < searchWords.Count(); i++)
+ {
+ WeightWord *ww = (WeightWord *) searchWords[i];
+ if (!ww->isHidden)
+ {
+ if (strcmp((char*)ww->word, "&") == 0 && wasHidden == 0)
+ logicalWords << ' ' << boolean_keywords[AND] << ' ';
+ else if (strcmp((char*)ww->word, "|") == 0 && wasHidden == 0)
+ logicalWords << ' ' << boolean_keywords[OR] << ' ';
+ else if (strcmp((char*)ww->word, "!") == 0 && wasHidden == 0)
+ logicalWords << ' ' << boolean_keywords[NOT] << ' ';
+ else if (strcmp((char*)ww->word, "\"") == 0 && wasHidden == 0)
+ {
+ if (inPhrase)
+ logicalWords.chop(' ');
+ inPhrase = !inPhrase;
+ logicalWords << "\"";
+ }
+ else if (wasHidden == 0)
+ {
+ logicalWords << ww->word;
+ if (inPhrase)
+ logicalWords << " ";
+ }
+ wasHidden = 0;
+ }
+ else
+ wasHidden = 1;
+ // generate patterns to search for and highlight in excerpt
+ if (ww->weight > 0 // Ignore boolean syntax stuff
+ && (!ww->isIgnore || inPhrase)) // Ignore bad/short words
+ { // but highlight them in phrases
+ char spacer = inPhrase ? ' ' : '|';
+ if (wm.length())
+ wm << spacer;
+ wm << ww->word;
+ if (!ww->isIgnore) // ignore bad/short words for searching
+ {
+ if (pattern.length())
+ pattern << spacer;
+ pattern << ww->word;
+ }
+ }
+ }
+
+ if (debug)
+ {
+ cerr << "LogicalWords: " << logicalWords << endl;
+ cerr << "Pattern: " << pattern << endl;
+ cerr << "Highlight Pattern: " << wm << endl;
+ }
+}
+
+void
+dumpWords(List &words, char *msg = "")
+{
+ if (debug)
+ {
+ cerr << msg << ": '";
+ for (int i = 0; i < words.Count(); i++)
+ {
+ WeightWord *ww = (WeightWord *) words[i];
+ cerr << ww->word << ':' << ww->isHidden << ' ';
+ }
+ cerr << "'\n";
+ }
+}
+
+//*****************************************************************************
+// void setupWords(char *allWords, List &searchWords,
+// int boolean, Parser *parser, String &originalPattern)
+//
+void
+setupWords(char *allWords, List &searchWords, int boolean, Parser *parser,
+ String &originalPattern)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ List tempWords;
+ int i;
+
+ //
+ // Parse the words we need to search for. It should be a list of words
+ // with optional 'and' and 'or' between them. The list of words
+ // will be put in the searchWords list and at the same time in the
+ // String pattern separated with '|'.
+ //
+
+ //
+ // Convert the string to a list of WeightWord objects. The special
+ // characters '(' and ')' will be put into their own WeightWord objects.
+ //
+ unsigned char *pos = (unsigned char*) allWords;
+ unsigned char t;
+ String word;
+ const String prefix_suffix = config->Find("prefix_match_character");
+
+ while (*pos)
+ {
+ while (1)
+ {
+ if (debug > 3)
+ cerr << "setupWords: " << pos << endl;
+ t = *pos++;
+ if (isspace(t))
+ {
+ continue;
+ }
+ else if (t == '"')
+ {
+ tempWords.Add(new WeightWord("\"", -1.0));
+ break;
+ }
+ else if (boolean && (t == '(' || t == ')'))
+ {
+ char s[2];
+ s[0] = t;
+ s[1] = '\0';
+ tempWords.Add(new WeightWord(s, -1.0));
+ break;
+ }
+ else if (HtIsWordChar(t) ||
+ (strchr(prefix_suffix, t) != NULL) ||
+ (t >= 161 && t <= 255))
+ {
+ unsigned int fieldFlag = 0;
+ word = 0;
+ do // while recognised prefix, followed by ':'
+ {
+ while (t && (HtIsWordChar(t) ||
+ (strchr(prefix_suffix, t) != NULL) ||
+ (t >= 161 && t <= 255)))
+ {
+ word << (char) t;
+ t = *pos++;
+ }
+ if (debug > 2)
+ cerr << "word: " << word << endl;
+ if (t == ':') // e.g. "author:word" to search
+ { // only in author
+ word.lowercase();
+ t = *pos++;
+ if (t && (HtIsWordChar (t) ||
+ (strchr(prefix_suffix, t) != NULL) ||
+ (t >= 161 && t <= 255)))
+ {
+ int i, cmp;
+ const char *w = word.get();
+ // linear search of known prefixes, with "" flag.
+ for (i = 0; (cmp = mystrcasecmp (w, colonPrefix[i].name)) < 0; i++)
+ ;
+ if (debug > 2)
+ cerr << "field: "<< colonPrefix[i].name << endl;
+ if (cmp == 0) // if prefix found...
+ {
+ fieldFlag |= colonPrefix [i].flag;
+ word = 0;
+ }
+ }
+ }
+ } while (!word.length() && t);
+ pos--;
+ if (!t && !word.length()) // query ended with junk chars
+ break;
+
+ if (boolean && (mystrcasecmp(word.get(), "+") == 0
+ || mystrcasecmp(word.get(), boolean_keywords[AND]) == 0))
+ {
+ tempWords.Add(new WeightWord("&", -1.0));
+ }
+ else if (boolean &&
+ mystrcasecmp(word.get(), boolean_keywords[OR]) == 0)
+ {
+ tempWords.Add(new WeightWord("|", -1.0));
+ }
+ else if (boolean && (mystrcasecmp(word.get(), "-") == 0
+ || mystrcasecmp(word.get(), boolean_keywords[NOT]) == 0))
+ {
+ tempWords.Add(new WeightWord("!", -1.0));
+ }
+ else
+ {
+ // Add word to excerpt matching list
+ originalPattern << word << "|";
+ WeightWord *ww = new WeightWord(word, 1.0, fieldFlag);
+ if(HtWordNormalize(word) & WORD_NORMALIZE_NOTOK)
+ ww->isIgnore = 1;
+ tempWords.Add(ww);
+ }
+ break;
+ }
+ }
+ }
+
+ dumpWords(tempWords, "tempWords");
+
+ //
+ // If the user specified boolean expression operators, the whole
+ // expression has to be syntactically correct. If not, we need
+ // to report a syntax error.
+ //
+ if (boolean)
+ {
+ if (!parser->checkSyntax(&tempWords))
+ {
+ for (i = 0; i < tempWords.Count(); i++)
+ {
+ searchWords.Add(tempWords[i]);
+ }
+ tempWords.Release();
+ return;
+// reportError("Syntax error");
+ }
+ }
+ else
+ {
+ convertToBoolean(tempWords);
+ }
+
+ dumpWords(tempWords, "Boolean");
+
+ //
+ // We need to assign weights to the words according to the search_algorithm
+ // configuration attribute.
+ // For algorithms other than exact, we need to also do word lookups.
+ //
+ StringList algs(config->Find("search_algorithm"), " \t");
+ List algorithms;
+ String name, weight;
+ double fweight;
+ Fuzzy *fuzzy = 0;
+
+ //
+ // Generate the list of algorithms to use and associate the given
+ // weights with them.
+ //
+ for (i = 0; i < algs.Count(); i++)
+ {
+ name = strtok(algs[i], ":");
+ weight = strtok(0, ":");
+ if (name.length() == 0)
+ name = "exact";
+ if (weight.length() == 0)
+ weight = "1";
+ fweight = atof((char*)weight);
+
+ fuzzy = Fuzzy::getFuzzyByName(name, *config);
+ if (fuzzy)
+ {
+ if (debug > 1)
+ cerr << "Adding algorithm " << name.get() << endl;
+ fuzzy->setWeight(fweight);
+ fuzzy->openIndex();
+ algorithms.Add(fuzzy);
+ } else if (debug)
+ cerr << "Unknown fuzzy search algorithm " << name.get() << endl;
+ }
+
+ dumpWords(searchWords, "initial");
+
+ //
+ // For each of the words, apply all the algorithms.
+ //
+ int in_phrase = 0; // If we get into a phrase, we don't want to fuzz.
+ for (i = 0; i < tempWords.Count(); i++)
+ {
+ WeightWord *ww = (WeightWord *) tempWords[i];
+ if (ww->weight > 0 && !ww->isIgnore && !in_phrase)
+// I think that should be:
+// if (ww->weight > 0 && !ww->isIgnore && !in_phrase && !ww->isExact)
+ {
+ //
+ // Apply all the algorithms to the word.
+ //
+ if (debug)
+ cerr << "Fuzzy on: " << ww->word << endl;
+ doFuzzy(ww, searchWords, algorithms);
+ delete ww;
+ }
+ else if (ww->word.length() == 1 && ww->word[0] == '"')
+ {
+ in_phrase = !in_phrase;
+ if (debug)
+ cerr << "Add: " << ww->word << endl;
+ searchWords.Add(ww);
+ }
+ else
+ {
+ //
+ // This is '(', ')', '&', or '|'. These will be automatically
+ // transfered to the searchWords list.
+ //
+ if (debug)
+ cerr << "Add: " << ww->word << endl;
+ searchWords.Add(ww);
+ }
+ dumpWords(searchWords, "searchWords");
+ }
+ tempWords.Release();
+}
+
+
+//*****************************************************************************
+void
+doFuzzy(WeightWord *ww, List &searchWords, List &algorithms)
+{
+ List fuzzyWords;
+ List weightWords;
+ Fuzzy *fuzzy = 0;
+ WeightWord *newWw = 0;
+ String *word = 0;
+
+ algorithms.Start_Get();
+ while ((fuzzy = (Fuzzy *) algorithms.Get_Next()))
+ {
+ if (debug > 1)
+ cerr << " " << fuzzy->getName();
+ fuzzy->getWords(ww->word, fuzzyWords);
+ fuzzyWords.Start_Get();
+ while ((word = (String *) fuzzyWords.Get_Next()))
+ {
+ if (debug > 1)
+ cerr << " " << word->get();
+ // (should be a "copy with changed weight" constructor...)
+ newWw = new WeightWord(word->get(), fuzzy->getWeight());
+ newWw->isExact = ww->isExact;
+ newWw->isHidden = ww->isHidden;
+ newWw->flags = ww->flags;
+ weightWords.Add(newWw);
+ }
+ if (debug > 1)
+ cerr << endl;
+ fuzzyWords.Destroy();
+ }
+
+ //
+ // We now have a list of substitute words. They need to be added
+ // to the searchWords.
+ //
+ if (weightWords.Count())
+ {
+ if (weightWords.Count() > 1)
+ searchWords.Add(new WeightWord("(", -1.0));
+ for (int i = 0; i < weightWords.Count(); i++)
+ {
+ if (i > 0)
+ searchWords.Add(new WeightWord("|", -1.0));
+ searchWords.Add(weightWords[i]);
+ }
+ if (weightWords.Count() > 1)
+ searchWords.Add(new WeightWord(")", -1.0));
+ }
+ else // if no fuzzy matches, add exact word, but give it tiny weight
+ {
+ searchWords.Add(new WeightWord(ww->word.get(), 0.000001));
+ }
+ weightWords.Release();
+}
+
+
+//*****************************************************************************
+// void convertToBoolean(List &words)
+//
+void
+convertToBoolean(List &words)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ List list;
+ int i;
+ int do_and = strcmp(config->Find("match_method"), "and") == 0;
+ int in_phrase = 0;
+
+ String quote = "\"";
+
+ if (words.Count() == 0)
+ return;
+ list.Add(words[0]);
+
+ // We might start off with a phrase match
+ if (((WeightWord *) words[0])->word == quote)
+ in_phrase = 1;
+
+ for (i = 1; i < words.Count(); i++)
+ {
+ if (do_and && !in_phrase)
+ list.Add(new WeightWord("&", -1.0));
+ else if (!in_phrase)
+ list.Add(new WeightWord("|", -1.0));
+
+ if (((WeightWord *) words[i])->word == quote)
+ in_phrase = !in_phrase;
+
+ list.Add(words[i]);
+ }
+ words.Release();
+
+ for (i = 0; i < list.Count(); i++)
+ {
+ words.Add(list[i]);
+ }
+ list.Release();
+}
+
+
+//*****************************************************************************
+// Dictionary *htsearch(char *wordfile, List &searchWords, Parser *parser)
+// This returns a dictionary indexed by document ID and containing a
+// List of HtWordReference objects.
+//
+void
+htsearch(Collection *collection, List &searchWords, Parser *parser)
+{
+ //
+ // Pick the database type we are going to use
+ //
+ ResultList *matches = new ResultList;
+ if (searchWords.Count() > 0)
+ {
+ // parser->setDatabase(wordfile);
+ parser->setCollection(collection);
+ parser->parse(&searchWords, *matches);
+ }
+
+ collection->setResultList(matches);
+ // return matches;
+}
+
+
+//*****************************************************************************
+// Modify the search words list to include the required words as well.
+// This is done by putting the existing search words in parenthesis and
+// appending the required words separated with "and".
+void
+addRequiredWords(List &searchWords, StringList &requiredWords)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ static int any_keywords = config->Boolean("any_keywords", 0);
+ if (requiredWords.Count() == 0)
+ return;
+ if (searchWords.Count() > 0)
+ {
+ searchWords.Insert(new WeightWord("(", -1.0), 0);
+ searchWords.Add(new WeightWord(")", -1.0));
+ searchWords.Add(new WeightWord("&", -1.0));
+ }
+ if (requiredWords.Count() == 1)
+ {
+ searchWords.Add(new WeightWord(requiredWords[0], 1.0));
+ }
+ else
+ {
+ searchWords.Add(new WeightWord("(", -1.0));
+ searchWords.Add(new WeightWord(requiredWords[0], 1.0));
+ for (int i = 1; i < requiredWords.Count(); i++)
+ {
+ if (any_keywords)
+ searchWords.Add(new WeightWord("|", -1.0));
+ else
+ searchWords.Add(new WeightWord("&", -1.0));
+ searchWords.Add(new WeightWord(requiredWords[i], 1.0));
+ }
+ searchWords.Add(new WeightWord(")", -1.0));
+ }
+}
+
+
+//*****************************************************************************
+// Report an error. Since we don' know if we are running as a CGI or not,
+// we will assume this is the first thing returned by a CGI program.
+//
+void
+reportError(char *msg)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ cout << "Content-type: text/html\r\n\r\n";
+ cout << "<html><head><title>htsearch error</title></head>\n";
+ cout << "<body bgcolor=\"#ffffff\">\n";
+ cout << "<h1>ht://Dig error</h1>\n";
+ cout << "<p>htsearch detected an error. Please report this to the\n";
+ cout << "webmaster of this site by sending an e-mail to:\n";
+ cout << "<a href=\"mailto:" << config->Find("maintainer") << "\">";
+ cout << config->Find("maintainer") << "</a>\n";
+ cout << "The error message is:</p>\n";
+ cout << "<pre>\n" << msg << "\n</pre>\n</body></html>\n";
+ exit(1);
+}
+
+//*****************************************************************************
+// void usage()
+// Display program usage information--assumes we're running from a cmd line
+//
+void usage()
+{
+ cout << "usage: htsearch [-v][-d][-c configfile] [query_string]\n";
+ cout << "This program is part of ht://Dig " << VERSION << "\n\n";
+ cout << "Options:\n";
+ cout << "\t-v -d\tVerbose mode. This increases the verbosity of the\n";
+ cout << "\t\tprogram. Using more than 2 is probably only useful\n";
+ cout << "\t\tfor debugging purposes. The default verbose mode\n";
+ cout << "\t\tgives a progress on what it is doing and where it is.\n\n";
+ cout << "\t-c configfile\n";
+ cout << "\t\tUse the specified configuration file instead on the\n";
+ cout << "\t\tdefault.\n\n";
+ cout << "\tquery_string\tA CGI-style query string can be given as a single\n";
+ cout << "\t\targument, and is only used if the REQUEST_METHOD environment\n";
+ cout << "\t\tvariable is not set. If no query_string is given, and\n";
+ cout << "\t\tREQUEST_METHOD is not set, htsearch will prompt for the query.\n\n";
+ exit(0);
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/htsearch.h b/debian/htdig/htdig-3.2.0b6/htsearch/htsearch.h
new file mode 100644
index 00000000..59133e38
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/htsearch.h
@@ -0,0 +1,71 @@
+//
+// htsearch.h
+//
+// htsearch: The main search CGI. Parses the CGI input, reads the config files
+// and calls the necessary code to put together the result lists
+// and the final display.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: htsearch.h,v 1.16 2004/05/28 13:15:25 lha Exp $
+//
+
+#ifndef _htsearch_h_
+#define _htsearch_h_
+
+#include "List.h"
+#include "StringList.h"
+#include "Dictionary.h"
+#include "DocumentRef.h"
+#include "Database.h"
+#include "good_strtok.h"
+#include "DocumentDB.h"
+#include "htString.h"
+#include "HtConfiguration.h"
+#include "ResultMatch.h"
+#include "ResultList.h"
+#include "HtWordReference.h"
+#include "StringMatch.h"
+#include "defaults.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef HAVE_STD
+#include <fstream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <fstream.h>
+#endif /* HAVE_STD */
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <unistd.h>
+#endif
+
+extern int n_matches;
+extern int do_and;
+extern int do_short;
+extern StringList fields;
+
+#ifndef _MSC_VER /* _WIN32 */
+extern StringMatch limit_to;
+#endif
+
+extern StringMatch URLimage;
+extern List URLimageList;
+extern StringMatch wm;
+extern Database *dbf;
+extern String logicalWords;
+extern String originalWords;
+extern int debug;
+extern StringList collectionList;
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/parser.cc b/debian/htdig/htdig-3.2.0b6/htsearch/parser.cc
new file mode 100644
index 00000000..3ed1531c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/parser.cc
@@ -0,0 +1,918 @@
+//
+// parser.cc
+//
+// parser: Parses a boolean expression tree, retrieving and scoring
+// the resulting document list
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: parser.cc,v 1.36 2004/06/11 16:50:33 grdetil Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "parser.h"
+#include "HtPack.h"
+#include "Collection.h"
+#include "Dictionary.h"
+#include "QuotedStringList.h"
+
+#define WORD 1000
+#define DONE 1001
+
+QuotedStringList boolean_syntax_errors;
+enum ErrorIndices { EXPECTED, SEARCH_WORD, AT_END, INSTEAD_OF, END_OF_EXPR, QUOTE };
+
+//*****************************************************************************
+Parser::Parser() :
+ words(*(HtConfiguration::config()))
+{
+ tokens = 0;
+ result = 0;
+ current = 0;
+ valid = 1;
+}
+
+
+//*****************************************************************************
+// int Parser::checkSyntax(List *tokenList)
+// As the name of the function implies, we will only perform a syntax check
+// on the list of tokens.
+//
+int
+Parser::checkSyntax(List *tokenList)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ void reportError(char *);
+ // Load boolean_syntax_errors from configuration
+ // they should be placed in this order:
+ // 0 1 2 3 4
+ // Expected "a search word" "at the end" "instead of" "end of expression"
+ // 5
+ // "a closing quote"
+ boolean_syntax_errors.Destroy();
+ boolean_syntax_errors.Create(config->Find("boolean_syntax_errors"), "| \t\r\n\001");
+ if (boolean_syntax_errors.Count() == 5)
+ { // for backward compatibility
+ boolean_syntax_errors.Add (new String ("a closing quote"));
+ if (debug)
+ cerr << "Parser::checkSyntax() : boolean_syntax_errors should have six entries\n";
+ } else if (boolean_syntax_errors.Count() != 6)
+ reportError("boolean_syntax_errors attribute should have six entries");
+ tokens = tokenList;
+ valid = 1;
+ fullexpr(0);
+ return valid;
+}
+
+//*****************************************************************************
+/* Called by: Parser::parse(List*, ResultList&), checkSyntax(List*) */
+/* Inputs: output -- if zero, simply check syntax */
+/* otherwise, list matching documents in head of "stack" */
+void
+Parser::fullexpr(int output)
+{
+ tokens->Start_Get();
+ lookahead = lexan();
+ expr(output);
+ if (valid && lookahead != DONE)
+ {
+ setError(boolean_syntax_errors[END_OF_EXPR]);
+ }
+}
+
+//*****************************************************************************
+int
+Parser::lexan()
+{
+ current = (WeightWord *) tokens->Get_Next();
+ if (!current)
+ return DONE;
+ else if (mystrcasecmp((char*)current->word, "&") == 0)
+ return '&';
+ else if (mystrcasecmp((char*)current->word, "|") == 0)
+ return '|';
+ else if (mystrcasecmp((char*)current->word, "!") == 0)
+ return '!';
+ else if (mystrcasecmp((char*)current->word, "(") == 0)
+ return '(';
+ else if (mystrcasecmp((char*)current->word, ")") == 0)
+ return ')';
+ else if (mystrcasecmp((char*)current->word, "\"") == 0)
+ return '"';
+ else
+ return WORD;
+}
+
+//*****************************************************************************
+// Attempt to deal with expressions in the form
+// term | term | term ...
+/* Called by: Parser::fullexpr(int), factor(int) */
+/* Inputs: output -- if zero, simply check syntax */
+void
+Parser::expr(int output)
+{
+ term(output);
+ while (1)
+ {
+ if (match('|'))
+ {
+ term(output);
+ if (output)
+ {
+ if(debug) cerr << "or--" << endl;
+ perform_or();
+ if(debug) cerr << "stack:" << stack.Size() << endl;
+ }
+ }
+ else
+ break;
+ }
+ if (valid && lookahead == WORD)
+ {
+ String expected = "'";
+ expected << boolean_keywords[AND] << "' "<< boolean_keywords[OR] <<" '"
+ << boolean_keywords[OR] << "'";
+ setError(expected.get());
+ }
+}
+
+//*****************************************************************************
+// Attempt to deal with terms in the form
+// factor & factor & factor ...
+/* Called by: Parser::expr(int) */
+/* Inputs: output -- if zero, simply check syntax */
+void
+Parser::term(int output)
+{
+
+ factor(output);
+ if(debug) cerr << "term:factor" << endl;
+ while (1)
+ {
+ if(match('&'))
+ {
+ factor(output);
+ if(output)
+ {
+ if(debug) cerr << "and--" << endl;
+ perform_and();
+ if(debug) cerr << "stack:" << stack.Size() << endl;
+ }
+ }
+ else if(match('!'))
+ {
+ factor(output);
+ if(output)
+ {
+ if(debug) cerr << "not--" << endl;
+ perform_not();
+ if(debug) cerr << "stack:" << stack.Size() << endl;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+}
+
+//*****************************************************************************
+/* Gather and score a (possibly bracketed) boolean expression */
+/* Called by: Parser::term(int) */
+/* Inputs: output -- if zero, simply check syntax */
+void
+Parser::factor(int output)
+{
+ if(match('"'))
+ {
+ phrase(output);
+ }
+ else if (match('('))
+ {
+ expr(output);
+ if (match(')'))
+ {
+ return;
+ }
+ else
+ {
+ setError("')'");
+ }
+ }
+ else if (lookahead == WORD)
+ {
+ if (output)
+ {
+ perform_push();
+ }
+ lookahead = lexan();
+ }
+ else
+ {
+ setError(boolean_syntax_errors[SEARCH_WORD]);
+// setError("a search word, a quoted phrase, a boolean expression between ()");
+ }
+}
+
+//*****************************************************************************
+/* Gather and score a quoted phrase */
+/* Called by: Parser::factor(int) */
+/* Inputs: output -- if zero, simply check syntax */
+void
+Parser::phrase(int output)
+{
+ List *wordList = 0;
+ double weight = 1.0;
+
+ while (1)
+ {
+ if (match('"'))
+ {
+ if (output)
+ {
+ if(!wordList) wordList = new List;
+ if(debug) cerr << "scoring phrase" << endl;
+ score(wordList, weight, FLAGS_MATCH_ONE); // look in all fields
+ }
+ break;
+ }
+ else if (lookahead == WORD)
+ {
+ weight *= current->weight;
+ if (output)
+ perform_phrase(wordList);
+
+ lookahead = lexan();
+ }
+ else if (lookahead == DONE)
+ {
+ setError(boolean_syntax_errors[QUOTE]);
+ break;
+ }
+ else
+ {
+ // skip '&' '|' and '!' in the phrase
+ current->isIgnore = 1;
+ if (output)
+ perform_phrase(wordList);
+ lookahead = lexan ();
+ }
+ } // end while
+ if(wordList) delete wordList;
+}
+
+//*****************************************************************************
+int
+Parser::match(int t)
+{
+ if (lookahead == t)
+ {
+ lookahead = lexan();
+ return 1;
+ }
+ else
+ return 0;
+}
+
+//*****************************************************************************
+void
+Parser::setError(char *expected)
+{
+ if (valid)
+ {
+ valid = 0;
+ error = 0;
+ error << boolean_syntax_errors[EXPECTED] << ' ' << expected;
+ if (lookahead == DONE || !current)
+ {
+ error << ' ' << boolean_syntax_errors[AT_END];
+ }
+ else
+ {
+ error << ' ' << boolean_syntax_errors[INSTEAD_OF] << " '"
+ << current->word.get() << "'";
+ switch (lookahead)
+ {
+ case '&': error << ' ' << boolean_keywords[OR] << " '"
+ << boolean_keywords[AND] << "'";
+ break;
+ case '|': error << ' ' << boolean_keywords[OR] << " '"
+ << boolean_keywords[OR] << "'";
+ break;
+ case '!': error << ' ' << boolean_keywords[OR] << " '"
+ << boolean_keywords[NOT] << "'";
+ break;
+ }
+ }
+ if (debug) cerr << "Syntax error: " << error << endl;
+ }
+}
+
+//*****************************************************************************
+// Perform a lookup of the current word and push the result onto the stack
+//
+void
+Parser::perform_push()
+{
+ HtConfiguration* config= HtConfiguration::config();
+ static int maximum_word_length = config->Value("maximum_word_length", 12);
+ String temp = current->word.get();
+ char *p;
+
+ if(debug)
+ cerr << "perform_push @"<< stack.Size() << ": " << temp << endl;
+
+ String wildcard = config->Find("prefix_match_character");
+ if (!wildcard.get())
+ wildcard = "*";
+ if (temp == wildcard)
+ {
+ if (debug) cerr << "Wild card search\n";
+ ResultList *list = new ResultList;
+ String doc_db = config->Find("doc_db");
+ DocumentDB docdb;
+ docdb.Read(doc_db);
+ List *docs = docdb.DocIDs();
+
+ //
+ // Traverse all the known documents
+ //
+ DocumentRef *ref;
+ IntObject *id;
+ DocMatch *dm;
+ docs->Start_Get();
+ while ((id = (IntObject *) docs->Get_Next()))
+ {
+ ref = docdb[id->Value()];
+ if (debug)
+ cerr << (ref ? "Wildcard match" : "Wildcard empty") << endl;
+ if (ref)
+ {
+ dm = new DocMatch;
+ dm->score = current->weight;
+ dm->id = ref->DocID();
+ dm->orMatches = 1;
+ dm->anchor = 0;
+ list->add(dm);
+ }
+ delete ref;
+ }
+ delete docs;
+ stack.push(list);
+
+ return;
+ }
+
+ // Must be after wildcard: "*" is "isIgnore" because it is too short.
+ if (current->isIgnore)
+ {
+ if(debug) cerr << "ignore: " << temp << " @" << stack.Size() << endl;
+ //
+ // This word needs to be ignored. Make it so.
+ //
+ ResultList *list = new ResultList;
+ list->isIgnore = 1;
+ stack.push(list);
+ return;
+ }
+
+ temp.lowercase();
+ p = temp.get();
+ if (temp.length() > maximum_word_length)
+ p[maximum_word_length] = '\0';
+
+ List* result = words[p];
+ score(result, current->weight, current->flags);
+ delete result;
+}
+
+//*****************************************************************************
+// BUG: Phrases containing "bad words" can have *any* "bad word" in that
+// position. Words less than minimum_word_length ignored entirely,
+// as they are not indexed.
+void
+Parser::perform_phrase(List * &oldWords)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ static int maximum_word_length = config->Value("maximum_word_length", 12);
+ String temp = current->word.get();
+ char *p;
+ List *newWords = 0;
+ HtWordReference *oldWord, *newWord;
+
+ // how many words ignored since last checked word?
+ static int ignoredWords = 0;
+
+ // if the query is empty, no further effort is needed
+ if(oldWords && oldWords->Count() == 0)
+ {
+ if(debug) cerr << "phrase not found, skip" << endl;
+ return;
+ }
+
+ if(debug) cerr << "phrase current: " << temp << endl;
+ if (current->isIgnore)
+ {
+ //
+ // This word needs to be ignored. Make it so.
+ //
+ if (temp.length() >= config->Value ("minimum_word_length") && oldWords)
+ ignoredWords++;
+ if(debug) cerr << "ignoring: " << temp << endl;
+ return;
+ }
+
+ temp.lowercase();
+ p = temp.get();
+ if (temp.length() > maximum_word_length)
+ p[maximum_word_length] = '\0';
+
+ newWords = words[p];
+ if(debug) cerr << "new words count: " << newWords->Count() << endl;
+
+ // If we don't have a prior list of words, we want this one...
+ if (!oldWords)
+ {
+ oldWords = new List;
+ if(debug) cerr << "phrase adding first: " << temp << endl;
+ newWords->Start_Get();
+ while ((newWord = (HtWordReference *) newWords->Get_Next()))
+ {
+ oldWords->Add(newWord);
+ }
+ if(debug) cerr << "old words count: " << oldWords->Count() << endl;
+ return;
+ }
+
+ // OK, now we have a previous list in wordList and a new list
+ List *results = new List;
+
+ Dictionary newDict(5000);
+
+ String nid;
+ newWords->Start_Get();
+ while ((newWord = (HtWordReference *) newWords->Get_Next()))
+ {
+ nid = "";
+ int did = newWord->DocID();
+ nid << did;
+ nid << "-";
+ int loc = newWord->Location();
+ nid << loc;
+ if (! newDict.Exists(nid)) {
+ newDict.Add(nid, (Object *)newWord);
+ } else {
+// cerr << "perform_phrase: NewWords Duplicate: " << nid << "\n";
+// Double addition is a problem if you don't want your original objects deleted
+ }
+ }
+
+ String oid;
+ oldWords->Start_Get();
+ while ((oldWord = (HtWordReference *) oldWords->Get_Next()))
+ {
+ oid = "";
+ int did = oldWord->DocID();
+ oid << did;
+ oid << "-";
+ int loc = oldWord->Location();
+ oid << loc + ignoredWords+1;
+ if (newDict.Exists(oid))
+ {
+ newWord = (HtWordReference *)newDict.Find(oid);
+
+ HtWordReference *result = new HtWordReference(*oldWord);
+
+ result->Flags(oldWord->Flags() & newWord->Flags());
+ result->Location(newWord->Location());
+
+ results->Add(result);
+ }
+ }
+ ignoredWords = 0; // most recent word is not a non-ignored word
+
+ newDict.Release();
+
+ if(debug) cerr << "old words count: " << oldWords->Count() << endl;
+ if(debug) cerr << "results count: " << results->Count() << endl;
+ oldWords->Destroy();
+ results->Start_Get();
+ while ((newWord = (HtWordReference *) results->Get_Next()))
+ {
+ oldWords->Add(newWord);
+ }
+ if(debug) cerr << "old words count: " << oldWords->Count() << endl;
+ results->Release();
+ delete results;
+
+ newWords->Destroy();
+ delete newWords;
+
+}
+
+//*****************************************************************************
+// Allocate scores based on words in wordList.
+// Fields within which the word must appear are specified in flags
+// (see HtWordReference.h).
+void
+Parser::score(List *wordList, double weight, unsigned int flags)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ DocMatch *dm;
+ HtWordReference *wr;
+ static double text_factor = config->Double("text_factor", 1);
+ static double caps_factor = config->Double("caps_factor", 1);
+ static double title_factor = config->Double("title_factor", 1);
+ static double heading_factor = config->Double("heading_factor", 1);
+ static double keywords_factor = config->Double("keywords_factor", 1);
+ static double meta_description_factor = config->Double("meta_description_factor", 1);
+ static double author_factor = config->Double("author_factor", 1);
+ static double description_factor = config->Double("description_factor", 1);
+ double wscore;
+ int docanchor;
+ int word_count;
+
+ if (!wordList || wordList->Count() == 0)
+ {
+ // We can't score an empty list, so push a null pointer...
+ if(debug) cerr << "score: empty list, push 0 @" << stack.Size() << endl;
+
+ stack.push(0);
+ return;
+ }
+
+ ResultList *list = new ResultList;
+ if(debug) cerr << "score: push @" << stack.Size() << endl;
+ stack.push(list);
+ // We're now guaranteed to have a non-empty list
+ // We'll use the number of occurences of this word for scoring
+ word_count = wordList->Count();
+
+ wordList->Start_Get();
+ while ((wr = (HtWordReference *) wordList->Get_Next()))
+ {
+ //
+ // ******* Compute the score for the document
+ //
+
+ // If word not in one of the required fields, skip the entry.
+ // Plain text sets no flag in dbase, so treat it separately.
+ if (!(wr->Flags() & flags) && (wr->Flags() || !(flags & FLAG_PLAIN)))
+ {
+ if (debug > 2)
+ cerr << "Flags " << wr->Flags() << " lack " << flags << endl;
+ continue;
+ }
+
+ wscore = 0.0;
+ if (wr->Flags() == FLAG_TEXT) wscore += text_factor;
+ if (wr->Flags() & FLAG_CAPITAL) wscore += caps_factor;
+ if (wr->Flags() & FLAG_TITLE) wscore += title_factor;
+ if (wr->Flags() & FLAG_HEADING) wscore += heading_factor;
+ if (wr->Flags() & FLAG_KEYWORDS) wscore += keywords_factor;
+ if (wr->Flags() & FLAG_DESCRIPTION) wscore += meta_description_factor;
+ if (wr->Flags() & FLAG_AUTHOR) wscore += author_factor;
+ if (wr->Flags() & FLAG_LINK_TEXT) wscore += description_factor;
+ wscore *= weight;
+ wscore = wscore / (double)word_count;
+ docanchor = wr->Anchor();
+ dm = list->find(wr->DocID());
+ if (dm)
+ {
+ wscore += dm->score;
+ if (dm->anchor < docanchor)
+ docanchor = dm->anchor;
+ // We wish to *update* this, not add a duplicate
+ list->remove(wr->DocID());
+ }
+
+ dm = new DocMatch;
+ dm->id = wr->DocID();
+ dm->score = wscore;
+ dm->orMatches = 1; // how many "OR" terms this doc has
+ dm->anchor = docanchor;
+ list->add(dm);
+ }
+}
+
+
+//*****************************************************************************
+// The top two entries in the stack need to be ANDed together.
+//
+// a b a and b
+// 0 0 0
+// 0 1 0
+// 0 x 0
+// 1 0 0
+// 1 1 intersect(a,b)
+// 1 x a
+// x 0 0
+// x 1 b
+// x x x
+//
+void
+Parser::perform_and()
+{
+ ResultList *l1 = (ResultList *) stack.pop();
+ ResultList *l2 = (ResultList *) stack.pop();
+ int i;
+ DocMatch *dm, *dm2, *dm3;
+ HtVector *elements;
+
+ if(!(l2 && l1))
+ {
+ if(debug) cerr << "and: at least one empty operator, pushing 0 @" << stack.Size() << endl;
+ stack.push(0);
+ if(l1) delete l1;
+ if(l2) delete l2;
+ return;
+ }
+
+ //
+ // If either of the arguments is set to be ignored, we will use the
+ // other as the result.
+ // remember l2 and l1, l2 not l1
+
+ if (l1->isIgnore && l2->isIgnore)
+ {
+ if(debug) cerr << "and: ignoring all, pushing ignored list @" << stack.Size() << endl;
+ ResultList *result = new ResultList;
+ result->isIgnore = 1;
+ delete l1; delete l2;
+ stack.push(result);
+ return;
+ }
+ else if (l1->isIgnore)
+ {
+ if(debug) cerr << "and: ignoring l1, pushing l2 @" << stack.Size() << endl;
+ stack.push(l2);
+ delete l1;
+ return;
+ }
+ else if (l2->isIgnore)
+ {
+ if(debug) cerr << "and: ignoring l2, pushing l2 @" << stack.Size() << endl;
+ stack.push(l1);
+ delete l2;
+ return;
+ }
+
+ ResultList *result = new ResultList;
+ stack.push(result);
+ elements = l2->elements();
+
+ if(debug)
+ cerr << "perform and: " << elements->Count() << " " << l1->elements()->Count() << " ";
+
+ for (i = 0; i < elements->Count(); i++)
+ {
+ dm = (DocMatch *) (*elements)[i];
+ dm2 = l1->find(dm->id);
+ if (dm2)
+ {
+ //
+ // Duplicate document. Add scores and average "OR-matches" count
+ //
+ dm3 = new DocMatch;
+// "if (dm2)" means "?:" operator not needed...
+// dm3->score = dm->score + (dm2 ? dm2->score : 0);
+// dm3->orMatches = (dm->orMatches + (dm2 ? dm2->orMatches : 0))/2;
+ dm3->score = dm->score + dm2->score;
+ dm3->orMatches = (dm->orMatches + dm2->orMatches)/2;
+ dm3->id = dm->id;
+ dm3->anchor = dm->anchor;
+// if (dm2 && dm2->anchor < dm3->anchor)
+ if (dm2->anchor < dm3->anchor)
+ dm3->anchor = dm2->anchor;
+ result->add(dm3);
+ }
+ }
+ if(debug)
+ cerr << result->elements()->Count() << endl;
+
+ elements->Release();
+ delete elements;
+ delete l1;
+ delete l2;
+}
+
+// a b a not b
+// 0 0 0
+// 0 1 0
+// 0 x 0
+// 1 0 a
+// 1 1 intersect(a,not b)
+// 1 x a
+// x 0 x
+// x 1 x
+// x x x
+void
+Parser::perform_not()
+{
+ ResultList *l1 = (ResultList *) stack.pop();
+ ResultList *l2 = (ResultList *) stack.pop();
+ int i;
+ DocMatch *dm, *dm2, *dm3;
+ HtVector *elements;
+
+
+ if(!l2)
+ {
+ if(debug) cerr << "not: no positive term, pushing 0 @" << stack.Size() << endl;
+ // Should probably be interpreted as "* not l1"
+ stack.push(0);
+ if(l1) delete l1;
+ return;
+ }
+ if(!l1 || l1->isIgnore || l2->isIgnore)
+ {
+ if(debug) cerr << "not: no negative term, pushing positive @" << stack.Size() << endl;
+ stack.push(l2);
+ if(l1) delete l1;
+ return;
+ }
+
+ ResultList *result = new ResultList;
+ if(debug) cerr << "not: pushing result @" << stack.Size() << endl;
+ stack.push(result);
+ elements = l2->elements();
+
+ if(debug)
+ cerr << "perform not: " << elements->Count() << " " << l1->elements()->Count() << " ";
+
+ for (i = 0; i < elements->Count(); i++)
+ {
+ dm = (DocMatch *) (*elements)[i];
+ dm2 = l1->find(dm->id);
+ if (!dm2)
+ {
+ //
+ // Duplicate document.
+ //
+ dm3 = new DocMatch;
+ dm3->score = dm->score;
+ dm3->orMatches = dm->orMatches;
+ dm3->id = dm->id;
+ dm3->anchor = dm->anchor;
+ result->add(dm3);
+ }
+ }
+ if(debug)
+ cerr << result->elements()->Count() << endl;
+
+ elements->Release();
+ delete elements;
+ delete l1;
+ delete l2;
+}
+
+//*****************************************************************************
+// The top two entries in the stack need to be ORed together.
+//
+void
+Parser::perform_or()
+{
+ ResultList *l1 = (ResultList *) stack.pop();
+ ResultList *result = (ResultList *) stack.peek();
+ int i;
+ DocMatch *dm, *dm2;
+ HtVector *elements;
+
+ //
+ // If either of the arguments is not present, we will use the other as
+ // the results.
+ //
+ if (!l1 && result)
+ {
+ if(debug) cerr << "or: no 2nd operand" << endl;
+ return; // result in top of stack
+ }
+ else if (l1 && !result)
+ {
+ if(debug) cerr << "or: no 1st operand" << endl;
+ stack.pop();
+ stack.push(l1);
+ return;
+ }
+ else if (!l1 && !result)
+ {
+ if(debug) cerr << "or: no operands" << endl;
+ stack.pop();
+ stack.push(0); // empty result
+ return;
+ }
+
+ //
+ // If either of the arguments is set to be ignored, we will use the
+ // other as the result.
+ //
+ if (l1->isIgnore)
+ {
+ delete l1;
+ return;
+ }
+ else if (result->isIgnore)
+ {
+ result = (ResultList *) stack.pop();
+ stack.push(l1);
+ delete result;
+ return;
+ }
+
+ elements = l1->elements();
+ if(debug)
+ cerr << "perform or: " << elements->Count() << " " << result->elements()->Count() << " ";
+ for (i = 0; i < elements->Count(); i++)
+ {
+ dm = (DocMatch *) (*elements)[i];
+ dm2 = result->find(dm->id);
+ if (dm2)
+ {
+ //
+ // Update document. Add scores and add "OR-match" counts
+ //
+ dm2->score += dm->score;
+ dm2->orMatches += dm->orMatches;
+ if (dm->anchor < dm2->anchor)
+ dm2->anchor = dm->anchor;
+ }
+ else
+ {
+ dm2 = new DocMatch;
+ dm2->score = dm->score;
+ dm2->orMatches = dm->orMatches;
+ dm2->id = dm->id;
+ dm2->anchor = dm->anchor;
+ result->add(dm2);
+ }
+ }
+ if(debug)
+ cerr << result->elements()->Count() << endl;
+ elements->Release();
+ delete elements;
+ delete l1;
+}
+
+//*****************************************************************************
+// void Parser::parse(List *tokenList, ResultList &resultMatches)
+//
+void
+Parser::parse(List *tokenList, ResultList &resultMatches)
+{
+ HtConfiguration* config= HtConfiguration::config();
+ tokens = tokenList;
+ DocumentRef *ref = NULL;
+
+ fullexpr(1);
+
+ ResultList *result = (ResultList *) stack.pop();
+ if (!result) // Ouch!
+ {
+// It seems we now end up here on a syntax error, so don't clear anything!
+// valid = 0;
+// error = 0;
+// error << "Expected to have something to parse!";
+ return;
+ }
+ HtVector *elements = result->elements();
+ DocMatch *dm;
+
+ // multimatch_factor gives extra weight to matching documents which
+ // contain more than one "OR" term. This is applied after the whole
+ // document is parsed, so multiple matches don't give exponentially
+ // increasing weights
+ double multimatch_factor = config->Double("multimatch_factor");
+
+ for (int i = 0; i < elements->Count(); i++)
+ {
+ dm = (DocMatch *) (*elements)[i];
+ ref = collection->getDocumentRef(dm->GetId());
+ if(ref && ref->DocState() == Reference_normal)
+ {
+ dm->collection = collection; // back reference
+ if (dm->orMatches > 1)
+ dm->score *= 1+multimatch_factor;
+ resultMatches.add(dm);
+ }
+ }
+ elements->Release();
+ result->Release();
+ delete elements;
+ delete result;
+}
+
+void
+Parser::setCollection(Collection *coll)
+{
+ if (coll)
+ words.Open(coll->getWordFile(), O_RDONLY);
+ collection = coll;
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/parser.h b/debian/htdig/htdig-3.2.0b6/htsearch/parser.h
new file mode 100644
index 00000000..8f510d8c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/parser.h
@@ -0,0 +1,78 @@
+//
+// parser.h
+//
+// parser: Parses a boolean expression tree, retrieving and scoring
+// the resulting document list
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: parser.h,v 1.18 2004/05/28 13:15:25 lha Exp $
+//
+
+#ifndef _parser_h_
+#define _parser_h_
+
+#include "htsearch.h"
+#include "WeightWord.h"
+#include "ResultList.h"
+#include "DocMatch.h"
+#include "Database.h"
+#include "htString.h"
+#include "Stack.h"
+#include "HtWordList.h"
+#include <ctype.h>
+
+class Collection;
+
+class Parser
+{
+public:
+ Parser();
+
+ int checkSyntax(List *);
+ void parse(List *, ResultList &);
+
+ // void setDatabase(const String& db) { words.Open(db, O_RDONLY); }
+ void setCollection(Collection *collection);
+ char *getErrorMessage() {return error.get();}
+ int hadError() {return valid == 0;}
+
+protected:
+ void fullexpr(int);
+ int lexan();
+ void phrase(int);
+ void expr(int);
+ void term(int);
+ void factor(int);
+ int match(int);
+ void setError(char *);
+ void perform_push();
+ void perform_and();
+ void perform_not();
+ void perform_or();
+ void perform_phrase(List * &);
+
+ void score(List *, double weight, unsigned int flags);
+
+ List *tokens;
+ List *result;
+ WeightWord *current;
+ int lookahead;
+ int valid;
+ Stack stack;
+ String error;
+ Collection *collection; // Multiple database support
+
+ HtWordList words;
+};
+
+extern StringList boolean_keywords;
+enum KeywordIndices { AND, OR, NOT };
+
+#endif
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/parsetest.cc b/debian/htdig/htdig-3.2.0b6/htsearch/parsetest.cc
new file mode 100644
index 00000000..63165377
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/parsetest.cc
@@ -0,0 +1,175 @@
+//
+// parsetest.cc
+//
+// parsetest: A program to test the ParseTree classes as replacement for the current
+// parsing code
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: parsetest.cc,v 1.4 2004/05/28 13:15:25 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "cgi.h"
+#include "defaults.h"
+#include "ParseTree.h"
+#include "AndParseTree.h"
+#include "OrParseTree.h"
+#include "ExactParseTree.h"
+#include "WordContext.h"
+
+// If we have this, we probably want it.
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+
+void reportError(char *msg);
+void usage();
+
+int debug = 0;
+
+//*****************************************************************************
+// int main()
+//
+int
+main(int ac, char **av)
+{
+ int c;
+ extern char *optarg;
+ int override_config=0;
+ List *searchWords = NULL;
+ String configFile = DEFAULT_CONFIG_FILE;
+ String logicalWords;
+
+ //
+ // Parse command line arguments
+ //
+ while ((c = getopt(ac, av, "c:dv")) != -1)
+ {
+ switch (c)
+ {
+ case 'c':
+ configFile = optarg;
+ override_config = 1;
+ break;
+ case 'v':
+ debug++;
+ break;
+ case 'd':
+ debug++;
+ break;
+ case '?':
+ usage();
+ break;
+ }
+ }
+
+ //
+ // Parse the CGI parameters.
+ //
+ char none[] = "";
+ cgi input(optind < ac ? av[optind] : none);
+
+ String originalWords = input["words"];
+ originalWords.chop(" \t\r\n");
+
+ // Set up the config
+ config.Defaults(&defaults[0]);
+
+ if (access((char*)configFile, R_OK) < 0)
+ {
+ reportError(form("Unable to find configuration file '%s'",
+ configFile.get()));
+ }
+
+ config.Read(configFile);
+
+ // Initialize htword library (key description + wordtype...)
+ WordContext::Initialize(config);
+
+ ParseTree *testParse;
+
+ testParse = new ParseTree;
+ if ( testParse->Parse(originalWords) != NOTOK)
+ {
+ cout << "Parsing as a boolean query... " << endl;
+ cout << "Initial Query:" << testParse->GetQuery() << endl;
+ cout << "Logical Words:" << testParse->GetLogicalWords() << endl;
+ }
+ else
+ cout << "Parsing as a boolean query FAILED" << endl;
+ delete testParse;
+
+ testParse = new AndParseTree;
+ if ( testParse->Parse(originalWords) != NOTOK)
+ {
+ cout << "Parsing as an AND query... " << endl;
+ cout << "Initial Query:" << testParse->GetQuery() << endl;
+ cout << "Logical Words:" << testParse->GetLogicalWords() << endl;
+ }
+ else
+ cout << "Parsing as an AND query FAILED" << endl;
+ delete testParse;
+
+ testParse = new OrParseTree;
+ if ( testParse->Parse(originalWords) != NOTOK)
+ {
+ cout << "Parsing as an OR query... " << endl;
+ cout << "Initial Query:" << testParse->GetQuery() << endl;
+ cout << "Logical Words:" << testParse->GetLogicalWords() << endl;
+ }
+ else
+ cout << "Parsing as an OR query FAILED" << endl;
+ delete testParse;
+
+ testParse = new ExactParseTree;
+ if ( testParse->Parse(originalWords) != NOTOK)
+ {
+ cout << "Parsing as an EXACT query... " << endl;
+ cout << "Initial Query:" << testParse->GetQuery() << endl;
+ cout << "Logical Words:" << testParse->GetLogicalWords() << endl;
+ }
+ else
+ cout << "Parsing as an EXACT query FAILED" << endl;
+ delete testParse;
+
+}
+
+//*****************************************************************************
+// void usage()
+// Display program usage information--assumes we're running from a cmd line
+//
+void usage()
+{
+ cout << "usage: parsetest [-v][-d][-c configfile]\n";
+ cout << "This program is part of ht://Dig " << VERSION << "\n\n";
+ cout << "Options:\n";
+ cout << "\t-v -d\tVerbose mode. This increases the verbosity of the\n";
+ cout << "\t\tprogram. Using more than 2 is probably only useful\n";
+ cout << "\t\tfor debugging purposes. The default verbose mode\n";
+ cout << "\t\tgives a progress on what it is doing and where it is.\n\n";
+ cout << "\t-c configfile\n";
+ cout << "\t\tUse the specified configuration file instead on the\n";
+ cout << "\t\tdefault.\n\n";
+ exit(0);
+}
+
+//*****************************************************************************
+// Report an error and die
+//
+void reportError(char *msg)
+{
+ cout << "parsetest: " << msg << "\n\n";
+ exit(1);
+}
diff --git a/debian/htdig/htdig-3.2.0b6/htsearch/qtest.cc b/debian/htdig/htdig-3.2.0b6/htsearch/qtest.cc
new file mode 100644
index 00000000..36a6d8c7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htsearch/qtest.cc
@@ -0,0 +1,252 @@
+//
+// qtest.cc
+//
+// qtest: A program to test the Query classes as replacement for the current
+// parsing code
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: qtest.cc,v 1.5 2004/05/28 13:15:25 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "cgi.h"
+#include "defaults.h"
+#include "WordContext.h"
+
+#ifdef HAVE_STD
+#include <iostream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <iostream.h>
+#endif /* HAVE_STD */
+
+#include "QueryParser.h"
+#include "Query.h"
+#include "ResultList.h"
+#include "Exact.h"
+#include "Accents.h"
+#include "Prefix.h"
+#include "WordSearcher.h"
+#include "OrFuzzyExpander.h"
+#include "ExactWordQuery.h"
+#include "OrQueryParser.h"
+#include "AndQueryParser.h"
+#include "BooleanQueryParser.h"
+#include "GParser.h"
+
+// If we have this, we probably want it.
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+
+void reportError(char *msg);
+void usage();
+
+int debug = 0;
+
+void
+ParseAndGet(QueryParser &parser, const String &string);
+
+//*****************************************************************************
+// int main()
+//
+int
+main(int ac, char **av)
+{
+ int c;
+ extern char *optarg;
+ int override_config=0;
+ String configFile = DEFAULT_CONFIG_FILE;
+ String logicalWords;
+ bool doall = true,
+ doand = false,
+ door = false,
+ dobool = false,
+ dogeoffs = false;
+
+ //
+ // Parse command line arguments
+ //
+ while ((c = getopt(ac, av, "c:dvkaobg")) != -1)
+ {
+ switch (c)
+ {
+ case 'c':
+ configFile = optarg;
+ override_config = 1;
+ break;
+ case 'v':
+ debug++;
+ break;
+ case 'd':
+ debug++;
+ break;
+ case 'a':
+ doall = false;
+ doand = true;
+ break;
+ case 'o':
+ doall = false;
+ door = true;
+ break;
+ case 'b':
+ doall = false;
+ dobool = true;
+ break;
+ case 'g':
+ doall = false;
+ dogeoffs = true;
+ break;
+ case '?':
+ usage();
+ break;
+ }
+ }
+
+ //
+ // Parse the CGI parameters.
+ //
+ char none[] = "";
+ cgi input(optind < ac ? av[optind] : none);
+
+ String originalWords = input["words"];
+ originalWords.chop(" \t\r\n");
+
+ HtConfiguration* config= HtConfiguration::config();
+ // Set up the config
+ config->Defaults(&defaults[0]);
+
+ if (access((char*)configFile, R_OK) < 0)
+ {
+ reportError(form("Unable to find configuration file '%s'",
+ configFile.get()));
+ }
+
+ config->Read(configFile);
+
+ // Initialize htword library (key description + wordtype...)
+ WordContext::Initialize(*config);
+
+ OrFuzzyExpander exp;
+ Exact exact(*config);
+ exact.setWeight(1.0);
+ exact.openIndex();
+ exp.Add(&exact);
+ Accents accents(*config);
+ accents.setWeight(0.7);
+ accents.openIndex();
+ exp.Add(&accents);
+ Prefix prefix(*config);
+ prefix.setWeight(0.7);
+ prefix.openIndex();
+ exp.Add(&prefix);
+ QueryParser::SetFuzzyExpander(&exp);
+
+ WordSearcher searcher(config->Find("word_db"));
+ ExactWordQuery::SetSearcher(&searcher);
+
+ // -- put here your prefered cache
+ //QueryCache *cache = new XXX;
+ //Query::SetCache(cache);
+
+ OrQueryParser o;
+ BooleanQueryParser b;
+ GParser g;
+ AndQueryParser a;
+
+ if(doall || doand)
+ {
+ cout << "Trying and..." << endl;
+ ParseAndGet(a, originalWords);
+ }
+
+ if(doall || door)
+ {
+ cout << "Trying or..." << endl;
+ ParseAndGet(o, originalWords);
+ }
+
+ if(doall || dobool)
+ {
+ cout << "Trying boolean..." << endl;
+ ParseAndGet(b, originalWords);
+ }
+
+ if(doall || dogeoffs)
+ {
+ cout << "Trying no-precedence-boolean..." << endl;
+ ParseAndGet(g, originalWords);
+ }
+}
+
+void
+ParseAndGet(QueryParser &parser, const String &query)
+{
+ Query *q = parser.Parse(query);
+ if(q)
+ {
+ cout << "Parsed: " << q->GetLogicalWords() << endl;
+ ResultList *l = q->GetResults();
+ if(l)
+ {
+ cout << "Evaluated with " << l->Count() << " matches" << endl;
+ if(debug) l->Dump();
+ }
+ else
+ {
+ cout << "No matches" << endl;;
+ }
+ }
+ else
+ {
+ cerr << "syntax error: " << flush << parser.Error() << endl;
+ }
+ delete q;
+}
+
+
+//*****************************************************************************
+// void usage()
+// Display program usage information--assumes we're running from a cmd line
+//
+void usage()
+{
+ cout << "usage: qtest [-a][-o][-b][-g][-v][-d][-c configfile]\n";
+ cout << "This program is part of ht://Dig " << VERSION << "\n\n";
+ cout << "Options:\n";
+ cout << "\t-v -d\tVerbose mode. This increases the verbosity of the\n";
+ cout << "\t\tprogram. Using more than 2 is probably only useful\n";
+ cout << "\t\tfor debugging purposes. The default verbose mode\n";
+ cout << "\t\tgives a progress on what it is doing and where it is.\n\n";
+ cout << "\t-c configfile\n";
+ cout << "\t\tUse the specified configuration file instead on the\n";
+ cout << "\t\tdefault.\n\n";
+ cout << "\t-a\tPerform only and/all parsing\n\n";
+ cout << "\t-o\tPerform only or/any parsing\n\n";
+ cout << "\t-b\tPerform only boolean parsing\n\n";
+ cout << "\t-g\tPerform only no-precedence-boolean parsing\n\n";
+ exit(0);
+}
+
+//*****************************************************************************
+// Report an error and die
+//
+void reportError(char *msg)
+{
+ cout << "qtest: " << msg << "\n\n";
+ exit(1);
+}