summaryrefslogtreecommitdiffstats
path: root/indexlib/tests
diff options
context:
space:
mode:
Diffstat (limited to 'indexlib/tests')
-rw-r--r--indexlib/tests/Makefile.am9
-rw-r--r--indexlib/tests/configure.in.in8
-rw-r--r--indexlib/tests/create-test.cpp28
-rw-r--r--indexlib/tests/ifile-test.cpp156
-rwxr-xr-xindexlib/tests/large-scale/do-test.zsh55
-rw-r--r--indexlib/tests/large-scale/generate.py51
-rw-r--r--indexlib/tests/leafdatavector-test.cpp129
-rw-r--r--indexlib/tests/match-test.cpp99
-rw-r--r--indexlib/tests/mempool-test.cpp53
-rw-r--r--indexlib/tests/memvector-test.cpp258
-rwxr-xr-xindexlib/tests/run-tests.zsh52
-rw-r--r--indexlib/tests/slow-test.cpp13
-rw-r--r--indexlib/tests/stringarray-test.cpp104
-rw-r--r--indexlib/tests/stringset-test.cpp194
-rw-r--r--indexlib/tests/testdriver.cpp61
-rw-r--r--indexlib/tests/tokenizer-test.cpp69
16 files changed, 1339 insertions, 0 deletions
diff --git a/indexlib/tests/Makefile.am b/indexlib/tests/Makefile.am
new file mode 100644
index 000000000..468c69c64
--- /dev/null
+++ b/indexlib/tests/Makefile.am
@@ -0,0 +1,9 @@
+if enable_indexlib_unittests
+TESTDRIVER = testdriver
+else
+TESTDRIVER =
+endif
+noinst_PROGRAMS = $(TESTDRIVER)
+testdriver_SOURCES = testdriver.cpp create-test.cpp ifile-test.cpp leafdatavector-test.cpp match-test.cpp mempool-test.cpp memvector-test.cpp slow-test.cpp stringarray-test.cpp stringset-test.cpp tokenizer-test.cpp
+testdriver_CXXFLAGS = -I.. $(all_includes)
+testdriver_LDADD = ../libindex.la -lboost_unit_test_framework -lz
diff --git a/indexlib/tests/configure.in.in b/indexlib/tests/configure.in.in
new file mode 100644
index 000000000..5b1811045
--- /dev/null
+++ b/indexlib/tests/configure.in.in
@@ -0,0 +1,8 @@
+AC_ARG_ENABLE(indexlib-unittests,
+ [ --enable-indexlib-unittests Enables indexlib's unittests (used for debugging only, needs boost::unit_test)],
+ [case "${enableval}" in
+ yes) indexlib_unittests=true ;;
+ no) indexlib_unittests=false;;
+ *) AC_MSG_ERROR(bad value ${enableval} for --enable-indexlib-unittests) ;;
+ esac],[indexlib_unittests=false])
+ AM_CONDITIONAL(enable_indexlib_unittests, test x$indexlib_unittests = xtrue)
diff --git a/indexlib/tests/create-test.cpp b/indexlib/tests/create-test.cpp
new file mode 100644
index 000000000..beeb7d5f9
--- /dev/null
+++ b/indexlib/tests/create-test.cpp
@@ -0,0 +1,28 @@
+#include <boost/test/unit_test.hpp>
+#include "create.h"
+#include "index.h"
+
+using namespace ::boost::unit_test;
+
+namespace create_test {
+
+const char* fname = "create-test-delete-me/////";
+
+void cleanup() {
+ indexlib::remove( fname );
+}
+
+void simple() {
+ cleanup();
+ std::auto_ptr<indexlib::index> ptr = indexlib::create( fname );
+ BOOST_CHECK( ptr.get() );
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Create tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ return test;
+}
+
+}
+
diff --git a/indexlib/tests/ifile-test.cpp b/indexlib/tests/ifile-test.cpp
new file mode 100644
index 000000000..9b9f92832
--- /dev/null
+++ b/indexlib/tests/ifile-test.cpp
@@ -0,0 +1,156 @@
+#include <boost/test/unit_test.hpp>
+#include "ifile.h"
+#include <string>
+#include <stdarg.h>
+
+using namespace ::boost::unit_test;
+namespace ifile_test {
+//using indexlib::detail::ifile;
+const char* fname = "ifile-test-delete-me";
+void cleanup() {
+ ifile::remove( fname );
+}
+
+inline
+bool check_results( const ifile& ifi, const char* str, ... ) {
+ const char* s;
+ va_list args;
+ va_start( args, str );
+ std::vector<unsigned> res = ifi.search( str )->list();
+ unsigned i = 0;
+
+ while ( s = va_arg( args, const char* ) ) {
+ if ( i == res.size() ) return false;
+ if ( std::string( s ) != ifi.lookup_docname( res[ i++ ] ) ) return false;
+ }
+ va_end( args );
+ return i == res.size();
+}
+
+
+inline
+unsigned count_results( const ifile& ifi, const char* str ) {
+ return ifi.search( str )->list().size();
+}
+
+void simple() {
+ cleanup();
+ ifile ifi( fname );
+ ifi.add( "this", "doc" );
+ BOOST_CHECK_EQUAL( ifi.search( "this" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "this" )->list()[ 0 ], 0 );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( ifi.search( "this" )->list()[ 0 ] ), "doc" );
+ ifi.add( "that", "doc2" );
+ BOOST_CHECK_EQUAL( ifi.search( "this" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "this" )->list()[ 0 ], 0 );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( ifi.search( "this" )->list()[ 0 ] ), "doc" );
+
+ BOOST_CHECK_EQUAL( ifi.search( "that" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "that" )->list()[ 0 ], 1 );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( ifi.search( "that" )->list()[ 0 ] ), "doc2" );
+}
+
+void ndocs() {
+ cleanup();
+ ifile ifi( fname );
+ ifi.add( "one", "one" );
+ ifi.add( "one", "two" );
+ BOOST_CHECK_EQUAL( ifi.ndocs(), 2 );
+
+ ifi.add( "one", "three" );
+ ifi.add( "one", "four" );
+
+ BOOST_CHECK_EQUAL( ifi.ndocs(), 4 );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( 0 ), std::string( "one" ) );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( 1 ), std::string( "two" ) );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( 2 ), std::string( "three" ) );
+ BOOST_CHECK_EQUAL( ifi.lookup_docname( 3 ), std::string( "four" ) );
+}
+
+void space() {
+ cleanup();
+ ifile ifi( fname );
+
+ ifi.add( "one two three", "doc" );
+ BOOST_CHECK_EQUAL( ifi.search( "two" )->list().size(), 1 );
+}
+
+void numbers() {
+ cleanup();
+ ifile ifi( fname );
+
+ ifi.add( "one 123 123456789 four444 five", "doc" );
+ BOOST_CHECK_EQUAL( ifi.search( "123" )->list().size(), 1 );
+ BOOST_CHECK_EQUAL( ifi.search( "123456789" )->list().size(), 1 );
+ BOOST_CHECK_EQUAL( ifi.search( "four444" )->list().size(), 1 );
+ BOOST_CHECK_EQUAL( ifi.search( "five" )->list().size(), 1 );
+}
+
+void partial() {
+ cleanup();
+ ifile ifi( fname );
+ ifi.add( "longword", "doc_0" );
+
+ BOOST_CHECK_EQUAL( ifi.search( "l" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "long" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "longword" )->list().size(), 1u );
+
+ BOOST_CHECK_EQUAL( ifi.search( "longword" )->list().size(), 1u );
+
+ ifi.add( "longnord", "doc_1" );
+ BOOST_CHECK_EQUAL( ifi.search( "l" )->list().size(), 2u );
+ BOOST_CHECK_EQUAL( ifi.search( "long" )->list().size(), 2u );
+ BOOST_CHECK_EQUAL( ifi.search( "longw" )->list().size(), 1u );
+ BOOST_CHECK_EQUAL( ifi.search( "longn" )->list().size(), 1u );
+}
+
+void several() {
+ cleanup();
+ ifile ifi( fname );
+ ifi.add( "one two three four", "0" );
+ ifi.add( "two three four", "1" );
+ ifi.add( "something else", "2" );
+ ifi.add( "something two", "3" );
+ ifi.add( "two something four", "4" );
+ ifi.add( "else something", "5" );
+ ifi.add( "else four", "6" );
+
+ BOOST_CHECK_EQUAL( count_results( ifi, "one" ), 1u );
+ BOOST_CHECK_EQUAL( count_results( ifi, "one two three four" ), 1u );
+ BOOST_CHECK_EQUAL( count_results( ifi, "two three four" ), 2u );
+
+ BOOST_CHECK_EQUAL( count_results( ifi, "one two" ), 1u );
+ BOOST_CHECK_EQUAL( count_results( ifi, "one" ), 1u );
+
+ BOOST_CHECK_EQUAL( count_results( ifi, "something else" ), 2u );
+ BOOST_CHECK_EQUAL( count_results( ifi, "something two" ), 2u );
+}
+
+void remove_doc() {
+ cleanup();
+ ifile ifi( fname );
+ ifi.add( "one two three four", "0" );
+ ifi.add( "two three four", "1" );
+ ifi.add( "three four five", "2" );
+ ifi.remove_doc( "1" );
+
+ BOOST_CHECK( check_results( ifi, "one", "0", NULL ) );
+ BOOST_CHECK( check_results( ifi, "two", "0", NULL ) );
+ BOOST_CHECK( check_results( ifi, "three", "0", "2", NULL ) );
+ BOOST_CHECK_EQUAL( count_results( ifi, "four" ), 0u );
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Ifile tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &ndocs ) );
+ test->add( BOOST_TEST_CASE( &space ) );
+ //test->add( BOOST_TEST_CASE( &numbers ) );
+ test->add( BOOST_TEST_CASE( &partial ) );
+ test->add( BOOST_TEST_CASE( &several ) );
+ test->add( BOOST_TEST_CASE( &remove) );
+ return test;
+}
+
+} // namespace
+
diff --git a/indexlib/tests/large-scale/do-test.zsh b/indexlib/tests/large-scale/do-test.zsh
new file mode 100755
index 000000000..b8d47b45d
--- /dev/null
+++ b/indexlib/tests/large-scale/do-test.zsh
@@ -0,0 +1,55 @@
+#!/usr/bin/env zsh
+
+# SET INPUT FILE BELOW
+inputfile=$1
+inputfile=ulyss12.txt
+
+indexlibadmin=../../indexlibadmin
+index=index
+
+rm -rf index
+mkdir index
+
+if test -z $inputfile; then
+ cat <<-END 1>&2
+ This test needs a large input file as a seed.
+
+ You might consider using http://www.gutenberg.org/ as a starting point to get a file.
+
+ Please edit this script ($0) to set the input file.
+END
+ exit 1
+fi
+
+rm -rf output
+mkdir output/
+
+rm -rf tmp
+mkdir tmp/
+
+python generate.py < $inputfile
+
+$indexlibadmin remove $index
+for t in output/text_*; do
+ $indexlibadmin add $index $t
+done
+
+
+for w in output/words_*.list; do
+ $indexlibadmin search $index "`cat $w`" >tmp/got 2>/dev/null
+ source output/`basename $w list`script
+ if ! diff -q tmp/got tmp/expected; then
+ cat <<-END
+ Pattern $w was wrong!
+
+ Diff:
+ END
+ diff -u tmp/got tmp/expected
+ echo "End of Diff."
+ exit 1
+ fi
+done
+
+rm -f tmp/got tmp/expected tmp/pat
+rmdir tmp
+
diff --git a/indexlib/tests/large-scale/generate.py b/indexlib/tests/large-scale/generate.py
new file mode 100644
index 000000000..3a66df3be
--- /dev/null
+++ b/indexlib/tests/large-scale/generate.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+import random
+import re
+
+def init_chain(infile):
+ chain = {}
+ last = ('','')
+ for line in infile:
+ for word in line.split():
+ if not chain.has_key(last):
+ chain[last]=[]
+ chain[last].append(word)
+ last=(last[1],word)
+ chain[last]=None
+ return chain
+
+def output(chain,length,outputfile):
+ last = ('','')
+ start=2000
+ for i in range(length+start):
+ if chain[last] is None:
+ break
+ word = random.choice(chain[last])
+ last=(last[1],word)
+ if i > start:
+ outputfile.write(word)
+ outputfile.write(' ')
+ outputfile.write("\n")
+
+def get_words(chain,nwords,outputfile,scriptfile):
+ scriptfile.write("(for f in output/text_*; echo $f) > tmp/so_far\n")
+ for i in range(nwords):
+ word='1'
+ while re.compile("\d").search(word):
+ word=random.choice(random.choice(chain.keys()))
+ word=re.sub(r'\W','',word)
+ outputfile.write(word+"\n")
+ scriptfile.write("grep -i -E -e '(\W|^)%s' -l output/text_* >tmp/part_%s\n" % (word,word))
+ scriptfile.write("perl -e '($file1, $file2) = @ARGV; open F2, $file2; while (<F2>) {$h2{$_}++}; open F1, $file1; while (<F1>) {if ($h2{$_}) {print $_; $h2{$_} = 0;}}' tmp/part_%s tmp/so_far >tmp/so_far_\n" % word) # From scriptome
+ scriptfile.write("mv tmp/so_far_ tmp/so_far\n")
+ scriptfile.write("rm tmp/part_%s\n" % word)
+ scriptfile.write("mv tmp/so_far tmp/expected\n")
+
+
+chain=init_chain(file("/dev/stdin"))
+for i in range(10000):
+ output(chain,2000,file("output/text_"+str(i+1),'w'))
+
+
+for i in range(1000):
+ get_words(chain,random.randint(1,5),file("output/words_%s.list"%str(i+1),'w'),file("output/words_%s.script"%str(i+1),'w'))
diff --git a/indexlib/tests/leafdatavector-test.cpp b/indexlib/tests/leafdatavector-test.cpp
new file mode 100644
index 000000000..8763327e6
--- /dev/null
+++ b/indexlib/tests/leafdatavector-test.cpp
@@ -0,0 +1,129 @@
+#include <boost/test/unit_test.hpp>
+#include <map>
+#include "leafdatavector.h"
+
+using namespace ::boost::unit_test;
+namespace leafdatavector_test {
+
+const char* fname = "leafdatavector-test-delete-me";
+void cleanup() {
+ leafdatavector::remove( fname );
+}
+
+void simple() {
+ cleanup();
+ leafdatavector f( fname );
+ f.add( 0, 1 );
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 1u );
+ BOOST_CHECK_EQUAL( f.get( 0 )[ 0 ], 1u );
+ f.add( 0, 2 );
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 2u );
+}
+
+void persistent() {
+ cleanup();
+ {
+ leafdatavector f( fname );
+ f.add( 0, 1 );
+ }
+ {
+ leafdatavector f( fname );
+ BOOST_CHECK_EQUAL( f.get( 0 )[ 0 ], 1u );
+ }
+}
+
+void complicated() {
+ cleanup();
+ leafdatavector f( fname );
+
+ f.add( 0, 1 );
+ f.add( 0, 3 );
+ f.add( 1, 3 );
+ f.add( 0, 2 );
+ f.add( 0, 4 );
+ f.add( 1, 8 );
+ f.add( 2, 4 );
+ f.add( 1, 5 );
+ f.add( 2, 5 );
+ f.add( 0, 5 );
+ f.add( 0, 9 );
+
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 6u );
+ BOOST_CHECK_EQUAL( f.get( 1 ).size(), 3u );
+ BOOST_CHECK_EQUAL( f.get( 2 ).size(), 2u );
+ std::vector<unsigned> one = f.get( 1 );
+ std::sort( one.begin(), one.end() );
+ BOOST_CHECK_EQUAL( one.size(), 3u );
+ BOOST_CHECK_EQUAL( one[ 0 ], 3u );
+ BOOST_CHECK_EQUAL( one[ 1 ], 5u );
+ BOOST_CHECK_EQUAL( one[ 2 ], 8u );
+}
+
+void unique() {
+ cleanup();
+ leafdatavector f( fname );
+
+ f.add( 0, 1 );
+ f.add( 0, 1 );
+ f.add( 0, 1 );
+
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 1u );
+
+ f.add( 0, 4 );
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 2u );
+
+ f.add( 0, 1 );
+ f.add( 0, 4 );
+
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 2u );
+
+}
+
+void large() {
+ cleanup();
+ leafdatavector f( fname );
+ std::map<uint, uint> counts;
+
+ for ( uint i = 0; i != 32; ++i ) {
+ for ( uint j = 0; j != 256 + 3; ++j ) {
+ uint ref = i * ( j + 51 ) / 13 + i % 75 + j + 3;
+ f.add( j, ref );
+ ++counts[ j ];
+ }
+ }
+ for ( std::map<uint,uint>::const_iterator first = counts.begin(), past = counts.end();
+ first != past; ++first ) {
+ BOOST_CHECK_EQUAL( first->second, f.get( first->first ).size() );
+ }
+
+}
+
+void one_zero() {
+ cleanup();
+ leafdatavector f( fname );
+
+ f.add( 0, 0 );
+ f.add( 0, 1 );
+ f.add( 0, 3 );
+
+ BOOST_CHECK_EQUAL( f.get( 0 ).size(), 3u );
+ BOOST_CHECK_EQUAL( f.get( 0 )[ 0 ], 0u );
+ BOOST_CHECK_EQUAL( f.get( 0 )[ 1 ], 1u );
+ BOOST_CHECK_EQUAL( f.get( 0 )[ 2 ], 3u );
+
+}
+
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "leafdatavector tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &persistent ) );
+ test->add( BOOST_TEST_CASE( &complicated ) );
+ test->add( BOOST_TEST_CASE( &unique ) );
+ test->add( BOOST_TEST_CASE( &large ) );
+ test->add( BOOST_TEST_CASE( &one_zero ) );
+ return test;
+}
+
+} // namespace
+
diff --git a/indexlib/tests/match-test.cpp b/indexlib/tests/match-test.cpp
new file mode 100644
index 000000000..16b8a8e95
--- /dev/null
+++ b/indexlib/tests/match-test.cpp
@@ -0,0 +1,99 @@
+#include <boost/test/unit_test.hpp>
+#include "match.h"
+
+using namespace ::boost::unit_test;
+namespace match_test {
+using indexlib::Match;
+
+void cleanup() {
+}
+
+void simple() {
+ cleanup();
+ Match m( "pat" );
+ BOOST_CHECK_EQUAL( m.process( "not here" ), false );
+ BOOST_CHECK_EQUAL( m.process( "p a t" ), false );
+ BOOST_CHECK_EQUAL( m.process( "pa t" ), false );
+
+
+ BOOST_CHECK_EQUAL( m.process( "pat" ), true );
+ BOOST_CHECK_EQUAL( m.process( "pattern" ), true );
+ BOOST_CHECK_EQUAL( m.process( " pat " ), true );
+ BOOST_CHECK_EQUAL( m.process( "zpat patx ipato " ), true );
+}
+
+void empty() {
+ cleanup();
+ {
+ Match m( "pat" );
+ BOOST_CHECK( !m.process( "" ) );
+ }
+ {
+ Match m( "" );
+ BOOST_CHECK( m.process( "" ) );
+ BOOST_CHECK( m.process( "string" ) );
+ }
+}
+
+
+void string() {
+ cleanup();
+ Match m( std::string( "pat" ) );
+
+ BOOST_CHECK_EQUAL( m.process( std::string( "not here" ) ), false );
+ BOOST_CHECK_EQUAL( m.process( std::string( "here pattern" ) ), true );
+}
+
+void casesensitive() {
+ cleanup();
+ Match m( std::string( "pat" ), ~Match::caseinsensitive );
+
+ BOOST_CHECK_EQUAL( m.process( std::string( "PAT" ) ), false );
+ BOOST_CHECK_EQUAL( m.process( std::string( "aPATa" ) ), false );
+ BOOST_CHECK_EQUAL( m.process( std::string( "pAt" ) ), false );
+ BOOST_CHECK_EQUAL( m.process( std::string( "pattern" ) ), true );
+}
+
+void caseinsensitive() {
+ cleanup();
+ Match m( std::string( "pat" ), Match::caseinsensitive );
+
+ BOOST_CHECK_EQUAL( m.process( std::string( "PAT" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "aPATa" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "pAt" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "pattern" ) ), true );
+}
+
+
+void verylarge() {
+ cleanup();
+ Match m( std::string( "pat0123456789012345678901234567890" ) );
+
+ BOOST_CHECK_EQUAL( m.process( std::string( "pat0123456789012345678901234567890" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat0123456789012345678901234567890" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat0123456789012345678901234567890xxxxxxxx" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat01234x6789012345678901234567890xxxxxxxx" ) ), false );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat01234x678901234567890123456789xxxxxxxxx" ) ), false );
+
+ m = Match( std::string( "12345678901234567890123456789012" ) );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat012345678901234567890123456789012xxxxxxxxx" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat012345678901234567890123456789012" ) ), true );
+ BOOST_CHECK_EQUAL( m.process( std::string( "xxxxxxpat01234x678901234567890123456789xxxxxxxxx" ) ), false );
+}
+
+
+
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Match tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &empty ) );
+ test->add( BOOST_TEST_CASE( &string ) );
+ test->add( BOOST_TEST_CASE( &casesensitive ) );
+ test->add( BOOST_TEST_CASE( &caseinsensitive ) );
+ test->add( BOOST_TEST_CASE( &verylarge ) );
+ return test;
+}
+
+} // namespace
+
diff --git a/indexlib/tests/mempool-test.cpp b/indexlib/tests/mempool-test.cpp
new file mode 100644
index 000000000..a0895243c
--- /dev/null
+++ b/indexlib/tests/mempool-test.cpp
@@ -0,0 +1,53 @@
+#include <boost/test/unit_test.hpp>
+#include "mempool.h"
+#include "leafdata.h"
+
+using namespace ::boost::unit_test;
+namespace mempool_test {
+const char* fname = "mempool-test-delete-me";
+void cleanup() {
+ ::unlink( fname );
+}
+
+void deallocate() {
+ cleanup();
+ mempool<leaf_data_pool_traits> pool( std::auto_ptr<memory_manager>( new mmap_manager( fname ) ) );
+
+ std::vector<leafdataptr> pointers;
+ for ( int i = 0; i != 32; ++i ) {
+ pointers.push_back( pool.allocate( 16 ) );
+ leafdata::init( pointers.back() );
+ }
+ const unsigned size = pool.size();
+
+ for ( int i = 0; i != pointers.size(); ++i ) {
+ pool.deallocate(pointers.at(i));
+ }
+
+ for ( int i = 0; i != 32; ++i ) {
+ pointers.push_back( pool.allocate( 16 ) );
+ leafdata::init( pointers.back() );
+ }
+ BOOST_CHECK_EQUAL( size, pool.size() );
+}
+
+void large() {
+ cleanup();
+ mempool<leaf_data_pool_traits> pool( std::auto_ptr<memory_manager>( new mmap_manager( fname ) ) );
+
+ pool.allocate( 4095 );
+ pool.allocate( 4097 );
+ pool.allocate( 4096*2 );
+ pool.allocate( 4096*4 );
+ pool.allocate( 4096*8 );
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Mempool Tests" );
+ test->add( BOOST_TEST_CASE( &deallocate ) );
+ test->add( BOOST_TEST_CASE( &large ) );
+ return test;
+}
+
+} // namespace
+
diff --git a/indexlib/tests/memvector-test.cpp b/indexlib/tests/memvector-test.cpp
new file mode 100644
index 000000000..60023a4d1
--- /dev/null
+++ b/indexlib/tests/memvector-test.cpp
@@ -0,0 +1,258 @@
+#include <boost/test/unit_test.hpp>
+#include <boost/format.hpp>
+#include <iostream>
+using namespace ::boost::unit_test;
+
+#include <unistd.h>
+#include "memvector.h"
+
+namespace memvector_test {
+
+const char* fname = "test.vector-delete-me";
+void cleanup() {
+ memvector<uint32_t>::remove( fname );
+}
+void test_size() {
+ cleanup();
+ memvector<uint32_t> test( fname );
+ test.push_back( 1 );
+ test.push_back( 2 );
+ test.push_back( 3 );
+ test.push_back( 4 );
+ BOOST_CHECK_EQUAL( test.size(), 4u );
+}
+
+template <typename T>
+void test_put_recover() {
+ cleanup();
+ memvector<T> test( fname );
+ for ( int i = 0; i != 20; ++i ) {
+ test.push_back( T( i*13 + i*i*45 + 23 ) );
+ }
+ for ( int i = 0; i != 20; ++i ) {
+ BOOST_CHECK_EQUAL( test[ i ], T( i*13 + i*i*45 + 23 ) );
+ }
+}
+
+void resize() {
+ cleanup();
+ memvector<uint32_t> test( fname );
+ test.push_back( 1 );
+ test.resize( 50 );
+ BOOST_CHECK_EQUAL( test.size(), 50u );
+}
+
+
+void test_persistent() {
+ cleanup();
+ {
+ memvector<uint32_t> test( fname );
+ test.push_back( 1 );
+ test.push_back( 2 );
+ test.push_back( 3 );
+ test.push_back( 4 );
+ test.push_back( 5 );
+ }
+ {
+ memvector<uint32_t> test( fname );
+ BOOST_CHECK_EQUAL( test.size(), 5u );
+ for ( unsigned i = 0; i != test.size(); ++i )
+ BOOST_CHECK_EQUAL( test[ i ], i + 1 );
+ }
+}
+
+void test_insert() {
+ cleanup();
+ memvector<uint16_t> test( fname );
+ test.push_back( 12 );
+ test.push_back( 12 );
+ test.push_back( 12 );
+ test.push_back( 12 );
+
+ test.insert( test.begin() + 2, 13 );
+
+ BOOST_CHECK_EQUAL( test.size(), 5u );
+ BOOST_CHECK_EQUAL( test[ 0 ], 12u );
+ BOOST_CHECK_EQUAL( test[ 1 ], 12u );
+ BOOST_CHECK_EQUAL( test[ 2 ], 13u );
+ BOOST_CHECK_EQUAL( test[ 3 ], 12u );
+ BOOST_CHECK_EQUAL( test[ 4 ], 12u );
+}
+
+void test_iterator() {
+ cleanup();
+ memvector<unsigned> test( fname );
+ test.push_back( 1 );
+ test.push_back( 2 );
+
+ BOOST_CHECK_EQUAL( test[ 0 ], 1u );
+ BOOST_CHECK_EQUAL( test[ 1 ], 2u );
+
+ BOOST_CHECK_EQUAL( *test.begin(), 1u );
+ BOOST_CHECK_EQUAL( *( test.begin() + 1 ), 2u );
+
+ memvector<unsigned>::iterator iter = test.begin();
+
+ BOOST_CHECK_EQUAL( *iter, 1u );
+
+ BOOST_CHECK( test.begin() == iter );
+
+ *iter= 3;
+
+ BOOST_CHECK_EQUAL( test[ 0 ], 3u );
+ BOOST_CHECK_EQUAL( *iter, 3u );
+ BOOST_CHECK_EQUAL( *test.begin(), 3u );
+
+ ++iter;
+
+ BOOST_CHECK_EQUAL( *iter, 2u );
+
+ *iter = 5;
+
+ BOOST_CHECK_EQUAL( *iter, 5u );
+ BOOST_CHECK_EQUAL( test[ 1 ], 5u );
+
+ BOOST_CHECK_EQUAL( std::distance( test.begin(), test.end() ) , test.size() );
+ test.push_back( 5 );
+ BOOST_CHECK_EQUAL( std::distance( test.begin(), test.end() ) , test.size() );
+ test.push_back( 5 );
+ BOOST_CHECK_EQUAL( std::distance( test.begin(), test.end() ) , test.size() );
+}
+
+void test_iteration() {
+ cleanup();
+ memvector<unsigned> test( fname );
+
+ test.push_back( 1 );
+ test.push_back( 2 );
+ test.push_back( 5 );
+ test.push_back( 3 );
+
+ memvector<unsigned>::const_iterator iter = test.begin();
+
+ BOOST_CHECK( iter == test.begin() );
+ BOOST_CHECK( iter != test.end() );
+
+ BOOST_CHECK_EQUAL( *iter, 1u );
+ ++iter;
+ BOOST_CHECK_EQUAL( *iter, 2u );
+ iter += 2;
+ BOOST_CHECK_EQUAL( *iter, 3u );
+ *iter = 7;
+ BOOST_CHECK_EQUAL( *iter, 7u );
+ --iter;
+ BOOST_CHECK_EQUAL( *iter, 5u );
+ BOOST_CHECK( iter != test.end() );
+ iter += 2;
+ BOOST_CHECK( iter == test.end() );
+}
+
+void test_sort() {
+ cleanup();
+ memvector<unsigned> test( fname );
+ test.push_back( 10 );
+ test.push_back( 0 );
+ test.push_back( 14 );
+ test.push_back( 8 );
+ test.push_back( 12 );
+ test.push_back( 5 );
+ test.push_back( 4 );
+ test.push_back( 3 );
+
+
+ BOOST_CHECK_EQUAL( *std::min_element( test.begin(), test.end() ), 0 );
+ BOOST_CHECK( std::min_element( test.begin(), test.end() ) == test.begin() + 1 );
+ BOOST_CHECK_EQUAL( *std::max_element( test.begin(), test.end() ), 14 );
+ BOOST_CHECK( std::max_element( test.begin(), test.end() ) == test.begin() + 2 );
+
+ std::sort( test.begin(), test.end() );
+ BOOST_CHECK_EQUAL( test[ 0 ], 0 );
+ BOOST_CHECK_EQUAL( test[ 1 ], 3 );
+ BOOST_CHECK_EQUAL( test[ 2 ], 4 );
+ BOOST_CHECK_EQUAL( test[ 3 ], 5 );
+ BOOST_CHECK_EQUAL( test[ 4 ], 8 );
+ BOOST_CHECK_EQUAL( test[ 5 ], 10 );
+ BOOST_CHECK_EQUAL( test[ 6 ], 12 );
+ BOOST_CHECK_EQUAL( test[ 7 ], 14 );
+}
+
+void remove() {
+ {
+ cleanup();
+ memvector<unsigned> test( fname );
+ test.push_back( 1 );
+ BOOST_CHECK_EQUAL( test.size(), 1 );
+ }
+ memvector<unsigned>::remove( fname );
+ memvector<unsigned> test( fname );
+ BOOST_CHECK_EQUAL( test.size(), 0 );
+}
+
+void assign() {
+ cleanup();
+ memvector<uint32_t> test( fname );
+ test.push_back( 2 );
+ test[ 0 ] = 3;
+ BOOST_CHECK_EQUAL( test[ 0 ], 3u );
+}
+
+void erase() {
+ cleanup();
+ memvector<uint32_t> test( fname );
+ test.push_back( 2 );
+ test.push_back( 4 );
+ test.push_back( 8 );
+ test.push_back( 16 );
+ test.push_back( 32 );
+
+ BOOST_CHECK_EQUAL( test.size(), 5u );
+ test.erase( test.begin() + 1 );
+
+ BOOST_CHECK_EQUAL( test[ 0 ], 2u );
+ BOOST_CHECK_EQUAL( test[ 1 ], 8u );
+ BOOST_CHECK_EQUAL( test[ 2 ], 16u );
+ BOOST_CHECK_EQUAL( test[ 3 ], 32u );
+ BOOST_CHECK_EQUAL( test.size(), 4u );
+
+ test.erase( test.begin() + 3 );
+ BOOST_CHECK_EQUAL( test[ 0 ], 2u );
+ BOOST_CHECK_EQUAL( test[ 1 ], 8u );
+ BOOST_CHECK_EQUAL( test[ 2 ], 16u );
+ BOOST_CHECK_EQUAL( test.size(), 3u );
+
+}
+
+void clear() {
+ cleanup();
+ memvector<uint32_t> test( fname );
+ test.push_back( 2 );
+ test.push_back( 4 );
+ test.push_back( 8 );
+ test.push_back( 16 );
+ test.push_back( 32 );
+
+ test.clear();
+
+
+ BOOST_CHECK_EQUAL( test.size(), 0u );
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Memvector tests" );
+ test->add( BOOST_TEST_CASE( &test_size ) );
+ test->add( BOOST_TEST_CASE( &test_put_recover<uint32_t> ) );
+ test->add( BOOST_TEST_CASE( &test_put_recover<uint16_t> ) );
+ test->add( BOOST_TEST_CASE( &test_put_recover<uint8_t> ) );
+ test->add( BOOST_TEST_CASE( &resize ) );
+ test->add( BOOST_TEST_CASE( &test_persistent ) );
+ test->add( BOOST_TEST_CASE( &remove ) );
+ test->add( BOOST_TEST_CASE( &assign ) );
+ test->add( BOOST_TEST_CASE( &erase ) );
+ test->add( BOOST_TEST_CASE( &clear ) );
+ return test;
+
+}
+
+} // namespace
+
+
diff --git a/indexlib/tests/run-tests.zsh b/indexlib/tests/run-tests.zsh
new file mode 100755
index 000000000..46a10c7b4
--- /dev/null
+++ b/indexlib/tests/run-tests.zsh
@@ -0,0 +1,52 @@
+#!/usr/bin/env zsh
+
+index=delete-me
+files=(
+ one ' On October 11th 2005, the KDE Project released KOffice 1.4.2. KOffice is a free light-weight yet feature rich office solution that integrates with KDE, supports the OASIS OpenDocument file format as does OpenOffice.org 2 and provides filters for other office suites such as Microsoft Office. Read the KOffice 1.4.2 Release Notes.'
+ two 'KDE is a powerful Free Software graphical desktop environment for Linux and Unix workstations. It combines ease of use, contemporary functionality, and outstanding graphical design with the technological superiority of the Unix operating system. More... '
+ three 'The YaKuake Package for Debian sarge and sid.
+ Yet Another Kuake aka YaKuake VERSION 2.6
+ http://www.kde-look.org/content/show.php?content=29153
+
+ have fun!
+
+ Thx OldKid for compile on debian amd64.
+
+
+'
+ numbers '123456789'
+)
+expected=( \
+ kde "onetwothree"
+ noshow "Empty results"
+ poWeRFuL 'two'
+ 'kde BUT debian' "onetwo"
+ debian 'three'
+ '12345678' 'numbers'
+ )
+driver=./indexlibadmin
+unittests=./testdriver
+
+$unittests
+
+echo "Running tests on the command line..."
+
+mkdir $index
+for name data in $files; do
+ $driver add $index $name - <<<$data
+done
+
+for q res in $expected ; do
+ $driver search $index $q | tr -d '\n' | read got
+ if test $res != $got; then
+ echo "ERROR in test '$q'"
+ echo "EXPECTED:"
+ echo -$res-
+ echo "GOT:"
+ echo -$got-
+ fi
+done
+
+rm -rf $index
+
+echo "done."
diff --git a/indexlib/tests/slow-test.cpp b/indexlib/tests/slow-test.cpp
new file mode 100644
index 000000000..05b687913
--- /dev/null
+++ b/indexlib/tests/slow-test.cpp
@@ -0,0 +1,13 @@
+#include "slow.h"
+
+#include <boost/test/unit_test.hpp>
+
+using namespace ::boost::unit_test;
+namespace slow_test {
+const char* fname = "slow.test-delete-me";
+
+void cleanup() {
+ slow::remove( fname );
+}
+
+}
diff --git a/indexlib/tests/stringarray-test.cpp b/indexlib/tests/stringarray-test.cpp
new file mode 100644
index 000000000..d0f5ecefa
--- /dev/null
+++ b/indexlib/tests/stringarray-test.cpp
@@ -0,0 +1,104 @@
+#include <boost/test/unit_test.hpp>
+using namespace ::boost::unit_test;
+
+#include <unistd.h>
+#include "stringarray.h"
+
+namespace stringarray_test {
+
+const char* fname = "test.stringarray-delete-me";
+void cleanup() {
+ stringarray::remove( fname );
+}
+
+void test_size() {
+ stringarray test( fname );
+ test.add( "one" );
+ test.add( "one" );
+ test.add( "one" );
+ test.add( "one" );
+ //BOOST_CHECK_EQUAL( test.size(), 4 );
+ cleanup();
+}
+
+void test_put_recover() {
+ stringarray test( fname );
+ BOOST_CHECK_EQUAL( test.add( "one" ), 0 );
+ BOOST_CHECK_EQUAL( test.add( "two" ), 1 );
+ BOOST_CHECK_EQUAL( test.add( "three" ), 2 );
+ BOOST_CHECK_EQUAL( test.add( "four" ), 3 );
+
+ BOOST_CHECK_EQUAL( test.get( 0 ), "one" );
+ BOOST_CHECK_EQUAL( test.get( 1 ), "two" );
+ BOOST_CHECK_EQUAL( test.get( 2 ), "three" );
+ BOOST_CHECK_EQUAL( test.get( 3 ), "four" );
+
+ cleanup();
+}
+
+void test_persistent() {
+ {
+ stringarray test( fname );
+ BOOST_CHECK_EQUAL( test.add( "one" ), 0 );
+ BOOST_CHECK_EQUAL( test.add( "two" ), 1 );
+ BOOST_CHECK_EQUAL( test.add( "three" ), 2 );
+ BOOST_CHECK_EQUAL( test.add( "four" ), 3 );
+ }
+ {
+ stringarray test( fname );
+
+ //BOOST_CHECK_EQUAL( test.size(), 4 );
+ BOOST_CHECK_EQUAL( test.get( 0 ), "one" );
+ BOOST_CHECK_EQUAL( test.get( 1 ), "two" );
+ BOOST_CHECK_EQUAL( test.get( 2 ), "three" );
+ BOOST_CHECK_EQUAL( test.get( 3 ), "four" );
+
+ }
+ cleanup();
+}
+
+void cstr() {
+ stringarray test( fname );
+
+ test.add( "one" );
+ test.add( "two" );
+ test.add( "three" );
+ test.add( "four" );
+
+ BOOST_CHECK( !strcmp( test.get_cstr( 0 ), "one" ) );
+ BOOST_CHECK( strcmp( test.get_cstr( 0 ), "not one" ) );
+ BOOST_CHECK( !strcmp( test.get_cstr( 1 ), "two" ) );
+ BOOST_CHECK( !strcmp( test.get_cstr( 2 ), "three" ) );
+ BOOST_CHECK( !strcmp( test.get_cstr( 3 ), "four" ) );
+
+ cleanup();
+}
+
+void erase() {
+ stringarray test( fname );
+
+ test.add( "one" );
+ test.add( "two" );
+ test.add( "three" );
+ test.add( "four" );
+
+ test.erase( 1 );
+ BOOST_CHECK_EQUAL( test.get( 0 ), "one" );
+ BOOST_CHECK_EQUAL( test.get( 1 ), "three" );
+ BOOST_CHECK_EQUAL( test.size(), 3u );
+ cleanup();
+}
+
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Memvector tests" );
+ test->add( BOOST_TEST_CASE( &test_size ) );
+ test->add( BOOST_TEST_CASE( &test_put_recover ) );
+ test->add( BOOST_TEST_CASE( &test_persistent ) );
+ test->add( BOOST_TEST_CASE( &cstr ) );
+ test->add( BOOST_TEST_CASE( &erase ) );
+ return test;
+
+}
+
+} //namespace
diff --git a/indexlib/tests/stringset-test.cpp b/indexlib/tests/stringset-test.cpp
new file mode 100644
index 000000000..56d326950
--- /dev/null
+++ b/indexlib/tests/stringset-test.cpp
@@ -0,0 +1,194 @@
+#include <boost/test/unit_test.hpp>
+#include "stringset.h"
+
+using namespace ::boost::unit_test;
+namespace stringset_test {
+
+const char* fname = "stringset-test-delete-me";
+void cleanup() {
+ stringset::remove( fname );
+}
+
+void simple() {
+ cleanup();
+ stringset set( fname );
+ set.add( "string1" );
+ set.add( "string2" );
+
+ BOOST_CHECK( set.count( "string1" ) );
+ BOOST_CHECK( set.count( "string2" ) );
+
+ BOOST_CHECK( !set.count( "string3" ) );
+ BOOST_CHECK( !set.count( "other" ) );
+}
+
+void empty() {
+ cleanup();
+ stringset set( fname );
+ BOOST_CHECK( set.empty() );
+}
+
+
+void persistent() {
+ cleanup();
+ {
+ stringset set( fname );
+ set.add( "string" );
+ set.add( "victor" );
+ set.add( "file" );
+
+ BOOST_CHECK( set.count( "string" ) );
+ BOOST_CHECK( set.count( "victor" ) );
+ BOOST_CHECK( set.count( "file" ) );
+ }
+ {
+ stringset set( fname );
+ BOOST_CHECK( set.count( "string" ) );
+ BOOST_CHECK( set.count( "victor" ) );
+ BOOST_CHECK( set.count( "file" ) );
+ }
+}
+
+void iterator() {
+ cleanup();
+ stringset set( fname );
+ set.add( "string" );
+
+ stringset::const_iterator iter = set.begin();
+
+ BOOST_CHECK_EQUAL( std::string( "string" ), *iter );
+ BOOST_CHECK_EQUAL( set.begin(), iter );
+ BOOST_CHECK( !( set.end() == iter ) );
+ ++iter;
+ BOOST_CHECK_EQUAL( set.end(), iter );
+}
+
+void order() {
+ cleanup();
+ stringset set( fname );
+
+ set.add( "two" );
+ set.add( "wlast" );
+ set.add( "one" );
+
+ stringset::const_iterator iter = set.begin();
+
+ BOOST_CHECK_EQUAL( *iter, std::string( "one" ) );
+ ++iter;
+ BOOST_CHECK_EQUAL( *iter, std::string( "two" ) );
+ ++iter;
+ BOOST_CHECK_EQUAL( *iter, std::string( "wlast" ) );
+ ++iter;
+ BOOST_CHECK_EQUAL( iter, set.end() );
+}
+
+void order_of() {
+ cleanup();
+ stringset set( fname );
+ set.add( "one" );
+ BOOST_CHECK_EQUAL( set.order_of( "one" ), 0 );
+ BOOST_CHECK_EQUAL( set.order_of( "two" ), unsigned( -1 ) );
+ set.add( "two" );
+ BOOST_CHECK_EQUAL( set.order_of( "two" ), 1 );
+ set.add( "before" );
+ BOOST_CHECK_EQUAL( set.order_of( "two" ), 2 );
+ BOOST_CHECK_EQUAL( set.order_of( "one" ), 1 );
+ BOOST_CHECK_EQUAL( set.order_of( "before" ), 0 );
+}
+
+void id_of() {
+ cleanup();
+ stringset set( fname );
+ set.add( "one" );
+ BOOST_CHECK_EQUAL( set.id_of( "one" ), 0 );
+ BOOST_CHECK_EQUAL( set.id_of( "two" ), unsigned( -1 ) );
+ set.add( "two" );
+ BOOST_CHECK_EQUAL( set.id_of( "two" ), 1 );
+ set.add( "before" );
+ BOOST_CHECK_EQUAL( set.id_of( "two" ), 1 );
+ BOOST_CHECK_EQUAL( set.id_of( "one" ), 0 );
+ BOOST_CHECK_EQUAL( set.id_of( "before" ), 2 );
+}
+
+void add_return() {
+ cleanup();
+ stringset set( fname );
+ BOOST_CHECK_EQUAL( set.add( "one" ), 0 );
+ BOOST_CHECK_EQUAL( set.add( "two" ), 1 );
+ BOOST_CHECK_EQUAL( set.add( "before" ), 2 );
+}
+
+void lower() {
+ cleanup();
+ stringset set( fname );
+ set.add( "aab" );
+ set.add( "aac" );
+ set.add( "aba" );
+ set.add( "abc" );
+ set.add( "acc" );
+
+ BOOST_CHECK_EQUAL( std::string( *set.lower_bound( "ab" ) ), "aba" );
+ BOOST_CHECK_EQUAL( std::string( *set.lower_bound( "abz" ) ), "acc" );
+}
+
+void lower_upper() {
+ cleanup();
+ stringset set( fname );
+ set.add( "aab" );
+ set.add( "aac" );
+ set.add( "aba" );
+ set.add( "abc" );
+ set.add( "acc" );
+
+ std::pair<stringset::const_iterator,stringset::const_iterator> limits;
+ stringset::const_iterator& upper = limits.first;
+ stringset::const_iterator& lower = limits.second;
+
+
+ limits = set.upper_lower( "ab" );
+ BOOST_CHECK_EQUAL( std::distance( upper, lower ), 2u );
+ BOOST_CHECK_EQUAL( std::string( *upper ), "aba" );
+ ++upper;
+ BOOST_CHECK_EQUAL( std::string( *upper ), "abc" );
+ ++upper;
+ BOOST_CHECK( upper == lower );
+
+ limits = set.upper_lower( "abc" );
+ BOOST_CHECK_EQUAL( std::distance( upper, lower ), 1u );
+ BOOST_CHECK_EQUAL( std::string( *upper ), "abc" );
+
+ limits = set.upper_lower( "abz" );
+ BOOST_CHECK_EQUAL( std::distance( upper, lower ), 0u );
+}
+
+void clear() {
+ cleanup();
+ stringset set( fname );
+ set.add( "string1" );
+ set.add( "string2" );
+ set.add( "one" );
+ set.add( "two" );
+ set.add( "three" );
+
+ set.clear();
+ BOOST_CHECK_EQUAL( set.size(), 0u );
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Stringset tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &empty ) );
+ test->add( BOOST_TEST_CASE( &persistent ) );
+ test->add( BOOST_TEST_CASE( &iterator ) );
+ test->add( BOOST_TEST_CASE( &order ) );
+ test->add( BOOST_TEST_CASE( &order_of ) );
+ test->add( BOOST_TEST_CASE( &id_of ) );
+ test->add( BOOST_TEST_CASE( &add_return ) );
+ test->add( BOOST_TEST_CASE( &lower ) );
+ test->add( BOOST_TEST_CASE( &lower_upper ) );
+ test->add( BOOST_TEST_CASE( &clear ) );
+ return test;
+}
+
+} // namespace
+
diff --git a/indexlib/tests/testdriver.cpp b/indexlib/tests/testdriver.cpp
new file mode 100644
index 000000000..db11e0366
--- /dev/null
+++ b/indexlib/tests/testdriver.cpp
@@ -0,0 +1,61 @@
+/* This file is part of indexlib.
+ * Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
+ *
+ * Indexlib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation and available as file
+ * GPL_V2 which is distributed along with indexlib.
+ *
+ * Indexlib is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of this program with any edition of
+ * the Qt library by Trolltech AS, Norway (or with modified versions
+ * of Qt that use the same license as Qt), and distribute linked
+ * combinations including the two. You must obey the GNU General
+ * Public License in all respects for all of the code used other than
+ * Qt. If you modify this file, you may extend this exception to
+ * your version of the file, but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from
+ * your version.
+ */
+#include <boost/test/unit_test.hpp>
+#include "logfile.h"
+using namespace ::boost::unit_test;
+
+namespace memvector_test { test_suite* get_suite(); }
+namespace stringarray_test { test_suite* get_suite(); }
+namespace match_test { test_suite* get_suite(); }
+namespace stringset_test { test_suite* get_suite(); }
+namespace leafdatavector_test { test_suite* get_suite(); }
+namespace ifile_test { test_suite* get_suite(); }
+namespace mempool_test { test_suite* get_suite(); }
+namespace tokenizer_test { test_suite* get_suite(); }
+namespace create_test { test_suite* get_suite(); }
+
+test_suite* init_unit_test_suite( int argc, char* argv[] )
+{
+ redirectlog( "unittest.log" );
+ test_suite* test = BOOST_TEST_SUITE( "Master test suite" );
+
+ test->add( memvector_test::get_suite() );
+ test->add( stringarray_test::get_suite() );
+ test->add( match_test::get_suite() );
+ test->add( stringset_test::get_suite() );
+ test->add( leafdatavector_test::get_suite() );
+ test->add( ifile_test::get_suite() );
+ test->add( mempool_test::get_suite() );
+ test->add( tokenizer_test::get_suite() );
+ test->add( create_test::get_suite() );
+
+ return test;
+}
+
diff --git a/indexlib/tests/tokenizer-test.cpp b/indexlib/tests/tokenizer-test.cpp
new file mode 100644
index 000000000..372859d90
--- /dev/null
+++ b/indexlib/tests/tokenizer-test.cpp
@@ -0,0 +1,69 @@
+#include <boost/test/unit_test.hpp>
+#include "tokenizer.h"
+#include <cassert>
+
+using namespace ::boost::unit_test;
+namespace indexlib { namespace tests { namespace tokenizer_test {
+
+using indexlib::detail::tokenizer;
+using indexlib::detail::get_tokenizer;
+
+void simple() {
+ std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" );
+ assert(tokenizer.get());
+ std::vector<std::string> tokens = tokenizer->string_to_words( "one ,as, ''#`:ThReE, ááàçé" );
+ std::vector<std::string> expected;
+ expected.push_back( "ONE" );
+ expected.push_back( "AS" );
+ expected.push_back( "THREE" );
+ expected.push_back( "AAACE" );
+ std::sort( tokens.begin(), tokens.end() );
+ std::sort( expected.begin(), expected.end() );
+ BOOST_CHECK_EQUAL( expected.size(), tokens.size() );
+ for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) {
+ BOOST_CHECK_EQUAL( expected[ i ], tokens[ i ] );
+ }
+}
+
+void with_newlines() {
+ std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" );
+ assert(tokenizer.get());
+ std::vector<std::string> tokens = tokenizer->string_to_words( "one\ntwo\nthree" );
+ std::vector<std::string> expected;
+ expected.push_back( "ONE" );
+ expected.push_back( "TWO" );
+ expected.push_back( "THREE" );
+ std::sort( tokens.begin(), tokens.end() );
+ std::sort( expected.begin(), expected.end() );
+ BOOST_CHECK_EQUAL( expected.size(), tokens.size() );
+ for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) {
+ BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) );
+ }
+}
+
+void with_numbers() {
+ std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" );
+ assert(tokenizer.get());
+ std::vector<std::string> tokens = tokenizer->string_to_words( "one 012 123 four" );
+ std::vector<std::string> expected;
+ expected.push_back( "ONE" );
+ expected.push_back( "012" );
+ expected.push_back( "123" );
+ expected.push_back( "FOUR" );
+ std::sort( tokens.begin(), tokens.end() );
+ std::sort( expected.begin(), expected.end() );
+ BOOST_CHECK_EQUAL( expected.size(), tokens.size() );
+ for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) {
+ BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) );
+ }
+}
+
+test_suite* get_suite() {
+ test_suite* test = BOOST_TEST_SUITE( "Tokenizer tests" );
+ test->add( BOOST_TEST_CASE( &simple ) );
+ test->add( BOOST_TEST_CASE( &with_newlines ) );
+ test->add( BOOST_TEST_CASE( &with_numbers ) );
+ return test;
+}
+
+}}} //namespaces