beagle r4577 - in trunk/beagle: . Util beagled beagled/Lucene.Net beagled/Lucene.Net/Analysis beagled/Lucene.Net/Analysis/Standard beagled/Lucene.Net/Document beagled/Lucene.Net/Index beagled/Lucene.Net/QueryParser beagled/Lucene.Net/Search beagled/Lucene.Net/Search/Spans beagled/Lucene.Net/Store beagled/Lucene.Net/Util beagled/Lucene.Net/upstream-changes beagled/Snowball.Net/Lucene.Net/Analysis/Snowball beagled/Snowball.Net/upstream-changes
- From: dbera svn gnome org
- To: svn-commits-list gnome org
- Subject: beagle r4577 - in trunk/beagle: . Util beagled beagled/Lucene.Net beagled/Lucene.Net/Analysis beagled/Lucene.Net/Analysis/Standard beagled/Lucene.Net/Document beagled/Lucene.Net/Index beagled/Lucene.Net/QueryParser beagled/Lucene.Net/Search beagled/Lucene.Net/Search/Spans beagled/Lucene.Net/Store beagled/Lucene.Net/Util beagled/Lucene.Net/upstream-changes beagled/Snowball.Net/Lucene.Net/Analysis/Snowball beagled/Snowball.Net/upstream-changes
- Date: Mon, 3 Mar 2008 22:03:28 +0000 (GMT)
Author: dbera
Date: Mon Mar 3 22:03:27 2008
New Revision: 4577
URL: http://svn.gnome.org/viewvc/beagle?rev=4577&view=rev
Log:
Merge beagle-lucene2_1 branch to trunk. In other words, we are switching to lucene-2.1. Thanks Nirbheek for testing. The branch looks stable and has lots of optimizations on top of lucene-2.1 which itself brings better performance (lock-less commits, faster, in-build field-selector). Further testing should be done with trunk and fixes will directly go here.
Added:
trunk/beagle/beagled/Lucene.Net/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/
trunk/beagle/beagled/Lucene.Net/ABOUT.txt
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/ABOUT.txt
trunk/beagle/beagled/Lucene.Net/Analysis/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/
trunk/beagle/beagled/Lucene.Net/Analysis/Analyzer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/Analyzer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/CharTokenizer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/CharTokenizer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/ISOLatin1AccentFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/ISOLatin1AccentFilter.cs
trunk/beagle/beagled/Lucene.Net/Analysis/KeywordAnalyzer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/KeywordAnalyzer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/KeywordTokenizer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/KeywordTokenizer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/LengthFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/LengthFilter.cs
trunk/beagle/beagled/Lucene.Net/Analysis/LetterTokenizer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/LetterTokenizer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/LowerCaseFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/LowerCaseFilter.cs
trunk/beagle/beagled/Lucene.Net/Analysis/LowerCaseTokenizer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/LowerCaseTokenizer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/PerFieldAnalyzerWrapper.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/PerFieldAnalyzerWrapper.cs
trunk/beagle/beagled/Lucene.Net/Analysis/PorterStemFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/PorterStemFilter.cs
trunk/beagle/beagled/Lucene.Net/Analysis/PorterStemmer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/PorterStemmer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/SimpleAnalyzer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/SimpleAnalyzer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/Standard/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/Standard/
trunk/beagle/beagled/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/Standard/StandardFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/Standard/StandardFilter.cs
trunk/beagle/beagled/Lucene.Net/Analysis/Standard/StandardTokenizer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/Standard/StandardTokenizer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/StopAnalyzer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/StopAnalyzer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/StopFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/StopFilter.cs
trunk/beagle/beagled/Lucene.Net/Analysis/Token.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/Token.cs
trunk/beagle/beagled/Lucene.Net/Analysis/TokenFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/TokenFilter.cs
trunk/beagle/beagled/Lucene.Net/Analysis/TokenStream.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/TokenStream.cs
trunk/beagle/beagled/Lucene.Net/Analysis/Tokenizer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/Tokenizer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/WhitespaceAnalyzer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/WhitespaceAnalyzer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/WhitespaceTokenizer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/WhitespaceTokenizer.cs
trunk/beagle/beagled/Lucene.Net/Analysis/WordlistLoader.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Analysis/WordlistLoader.cs
trunk/beagle/beagled/Lucene.Net/AssemblyInfo.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/AssemblyInfo.cs
trunk/beagle/beagled/Lucene.Net/Document/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Document/
trunk/beagle/beagled/Lucene.Net/Document/DateField.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Document/DateField.cs
trunk/beagle/beagled/Lucene.Net/Document/DateTools.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Document/DateTools.cs
trunk/beagle/beagled/Lucene.Net/Document/Document.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Document/Document.cs
trunk/beagle/beagled/Lucene.Net/Document/Field.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Document/Field.cs
trunk/beagle/beagled/Lucene.Net/Document/NumberTools.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Document/NumberTools.cs
trunk/beagle/beagled/Lucene.Net/HISTORY.txt
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/HISTORY.txt
trunk/beagle/beagled/Lucene.Net/Index/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/
trunk/beagle/beagled/Lucene.Net/Index/CompoundFileReader.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/CompoundFileReader.cs
trunk/beagle/beagled/Lucene.Net/Index/CompoundFileWriter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/CompoundFileWriter.cs
trunk/beagle/beagled/Lucene.Net/Index/DocumentWriter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/DocumentWriter.cs
trunk/beagle/beagled/Lucene.Net/Index/FieldInfo.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/FieldInfo.cs
trunk/beagle/beagled/Lucene.Net/Index/FieldInfos.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/FieldInfos.cs
trunk/beagle/beagled/Lucene.Net/Index/FieldsReader.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/FieldsReader.cs
trunk/beagle/beagled/Lucene.Net/Index/FieldsWriter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/FieldsWriter.cs
trunk/beagle/beagled/Lucene.Net/Index/FilterIndexReader.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/FilterIndexReader.cs
trunk/beagle/beagled/Lucene.Net/Index/IndexFileNameFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/IndexFileNameFilter.cs
trunk/beagle/beagled/Lucene.Net/Index/IndexFileNames.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/IndexFileNames.cs
trunk/beagle/beagled/Lucene.Net/Index/IndexModifier.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/IndexModifier.cs
trunk/beagle/beagled/Lucene.Net/Index/IndexReader.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/IndexReader.cs
trunk/beagle/beagled/Lucene.Net/Index/IndexWriter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/IndexWriter.cs
trunk/beagle/beagled/Lucene.Net/Index/MultiReader.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/MultiReader.cs
trunk/beagle/beagled/Lucene.Net/Index/MultipleTermPositions.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/MultipleTermPositions.cs
trunk/beagle/beagled/Lucene.Net/Index/ParallelReader.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/ParallelReader.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentInfo.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentInfo.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentInfos.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentInfos.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentMergeInfo.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentMergeInfo.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentMergeQueue.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentMergeQueue.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentMerger.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentMerger.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentReader.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentReader.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentTermDocs.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentTermDocs.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentTermEnum.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentTermEnum.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentTermPositionVector.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentTermPositionVector.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentTermPositions.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentTermPositions.cs
trunk/beagle/beagled/Lucene.Net/Index/SegmentTermVector.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/SegmentTermVector.cs
trunk/beagle/beagled/Lucene.Net/Index/Term.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/Term.cs
trunk/beagle/beagled/Lucene.Net/Index/TermBuffer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermBuffer.cs
trunk/beagle/beagled/Lucene.Net/Index/TermDocs.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermDocs.cs
trunk/beagle/beagled/Lucene.Net/Index/TermEnum.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermEnum.cs
trunk/beagle/beagled/Lucene.Net/Index/TermFreqVector.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermFreqVector.cs
trunk/beagle/beagled/Lucene.Net/Index/TermInfo.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermInfo.cs
trunk/beagle/beagled/Lucene.Net/Index/TermInfosReader.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermInfosReader.cs
trunk/beagle/beagled/Lucene.Net/Index/TermInfosWriter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermInfosWriter.cs
trunk/beagle/beagled/Lucene.Net/Index/TermPositionVector.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermPositionVector.cs
trunk/beagle/beagled/Lucene.Net/Index/TermPositions.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermPositions.cs
trunk/beagle/beagled/Lucene.Net/Index/TermVectorOffsetInfo.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermVectorOffsetInfo.cs
trunk/beagle/beagled/Lucene.Net/Index/TermVectorsReader.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermVectorsReader.cs
trunk/beagle/beagled/Lucene.Net/Index/TermVectorsWriter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Index/TermVectorsWriter.cs
trunk/beagle/beagled/Lucene.Net/LICENSE.txt
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/LICENSE.txt
trunk/beagle/beagled/Lucene.Net/LucenePackage.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/LucenePackage.cs
trunk/beagle/beagled/Lucene.Net/QueryParser/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/QueryParser/
trunk/beagle/beagled/Lucene.Net/QueryParser/CharStream.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/QueryParser/CharStream.cs
trunk/beagle/beagled/Lucene.Net/QueryParser/FastCharStream.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/QueryParser/FastCharStream.cs
trunk/beagle/beagled/Lucene.Net/QueryParser/MultiFieldQueryParser.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/QueryParser/MultiFieldQueryParser.cs
trunk/beagle/beagled/Lucene.Net/QueryParser/ParseException.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/QueryParser/ParseException.cs
trunk/beagle/beagled/Lucene.Net/QueryParser/QueryParser.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/QueryParser/QueryParser.cs
trunk/beagle/beagled/Lucene.Net/QueryParser/QueryParserConstants.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/QueryParser/QueryParserConstants.cs
trunk/beagle/beagled/Lucene.Net/QueryParser/QueryParserTokenManager.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/QueryParser/QueryParserTokenManager.cs
trunk/beagle/beagled/Lucene.Net/QueryParser/Token.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/QueryParser/Token.cs
trunk/beagle/beagled/Lucene.Net/QueryParser/TokenMgrError.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/QueryParser/TokenMgrError.cs
trunk/beagle/beagled/Lucene.Net/Search/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/
trunk/beagle/beagled/Lucene.Net/Search/BooleanClause.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/BooleanClause.cs
trunk/beagle/beagled/Lucene.Net/Search/BooleanQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/BooleanQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/BooleanScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/BooleanScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/BooleanScorer2.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/BooleanScorer2.cs
trunk/beagle/beagled/Lucene.Net/Search/CachingWrapperFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/CachingWrapperFilter.cs
trunk/beagle/beagled/Lucene.Net/Search/ConjunctionScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/ConjunctionScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/ConstantScoreQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/ConstantScoreQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/ConstantScoreRangeQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/ConstantScoreRangeQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/DefaultSimilarity.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/DefaultSimilarity.cs
trunk/beagle/beagled/Lucene.Net/Search/DisjunctionMaxQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/DisjunctionMaxQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/DisjunctionMaxScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/DisjunctionMaxScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/DisjunctionSumScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/DisjunctionSumScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/ExactPhraseScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/ExactPhraseScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/Explanation.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Explanation.cs
trunk/beagle/beagled/Lucene.Net/Search/FieldCache.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/FieldCache.cs
trunk/beagle/beagled/Lucene.Net/Search/FieldCacheImpl.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/FieldCacheImpl.cs
trunk/beagle/beagled/Lucene.Net/Search/FieldDoc.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/FieldDoc.cs
trunk/beagle/beagled/Lucene.Net/Search/FieldDocSortedHitQueue.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/FieldDocSortedHitQueue.cs
trunk/beagle/beagled/Lucene.Net/Search/FieldSortedHitQueue.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/FieldSortedHitQueue.cs
trunk/beagle/beagled/Lucene.Net/Search/Filter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Filter.cs
trunk/beagle/beagled/Lucene.Net/Search/FilteredQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/FilteredQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/FilteredTermEnum.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/FilteredTermEnum.cs
trunk/beagle/beagled/Lucene.Net/Search/FuzzyQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/FuzzyQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/FuzzyTermEnum.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/FuzzyTermEnum.cs
trunk/beagle/beagled/Lucene.Net/Search/Hit.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Hit.cs
trunk/beagle/beagled/Lucene.Net/Search/HitCollector.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/HitCollector.cs
trunk/beagle/beagled/Lucene.Net/Search/HitIterator.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/HitIterator.cs
trunk/beagle/beagled/Lucene.Net/Search/HitQueue.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/HitQueue.cs
trunk/beagle/beagled/Lucene.Net/Search/Hits.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Hits.cs
trunk/beagle/beagled/Lucene.Net/Search/IndexSearcher.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/IndexSearcher.cs
trunk/beagle/beagled/Lucene.Net/Search/MatchAllDocsQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/MatchAllDocsQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/MultiPhraseQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/MultiPhraseQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/MultiSearcher.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/MultiSearcher.cs
trunk/beagle/beagled/Lucene.Net/Search/MultiTermQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/MultiTermQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/NonMatchingScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/NonMatchingScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/ParallelMultiSearcher.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/ParallelMultiSearcher.cs
trunk/beagle/beagled/Lucene.Net/Search/PhrasePositions.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/PhrasePositions.cs
trunk/beagle/beagled/Lucene.Net/Search/PhraseQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/PhraseQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/PhraseQueue.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/PhraseQueue.cs
trunk/beagle/beagled/Lucene.Net/Search/PhraseScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/PhraseScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/PrefixQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/PrefixQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/Query.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Query.cs
trunk/beagle/beagled/Lucene.Net/Search/QueryFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/QueryFilter.cs
trunk/beagle/beagled/Lucene.Net/Search/QueryTermVector.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/QueryTermVector.cs
trunk/beagle/beagled/Lucene.Net/Search/RangeFilter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/RangeFilter.cs
trunk/beagle/beagled/Lucene.Net/Search/RangeQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/RangeQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/RemoteSearchable.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/RemoteSearchable.cs
trunk/beagle/beagled/Lucene.Net/Search/ReqExclScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/ReqExclScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/ReqOptSumScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/ReqOptSumScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/ScoreDoc.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/ScoreDoc.cs
trunk/beagle/beagled/Lucene.Net/Search/ScoreDocComparator.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/ScoreDocComparator.cs
trunk/beagle/beagled/Lucene.Net/Search/Scorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Scorer.cs
trunk/beagle/beagled/Lucene.Net/Search/Searchable.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Searchable.cs
trunk/beagle/beagled/Lucene.Net/Search/Searcher.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Searcher.cs
trunk/beagle/beagled/Lucene.Net/Search/Similarity.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Similarity.cs
trunk/beagle/beagled/Lucene.Net/Search/SimilarityDelegator.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/SimilarityDelegator.cs
trunk/beagle/beagled/Lucene.Net/Search/SloppyPhraseScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/SloppyPhraseScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/Sort.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Sort.cs
trunk/beagle/beagled/Lucene.Net/Search/SortComparator.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/SortComparator.cs
trunk/beagle/beagled/Lucene.Net/Search/SortComparatorSource.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/SortComparatorSource.cs
trunk/beagle/beagled/Lucene.Net/Search/SortField.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/SortField.cs
trunk/beagle/beagled/Lucene.Net/Search/Spans/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/
trunk/beagle/beagled/Lucene.Net/Search/Spans/NearSpans.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/NearSpans.cs
trunk/beagle/beagled/Lucene.Net/Search/Spans/SpanFirstQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/SpanFirstQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/Spans/SpanNearQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/SpanNearQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/Spans/SpanNotQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/SpanNotQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/Spans/SpanOrQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/SpanOrQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/Spans/SpanQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/SpanQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/Spans/SpanScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/SpanScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/Spans/SpanTermQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/SpanTermQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/Spans/SpanWeight.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/SpanWeight.cs
trunk/beagle/beagled/Lucene.Net/Search/Spans/Spans.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Spans/Spans.cs
trunk/beagle/beagled/Lucene.Net/Search/TermQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/TermQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/TermScorer.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/TermScorer.cs
trunk/beagle/beagled/Lucene.Net/Search/TopDocCollector.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/TopDocCollector.cs
trunk/beagle/beagled/Lucene.Net/Search/TopDocs.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/TopDocs.cs
trunk/beagle/beagled/Lucene.Net/Search/TopFieldDocCollector.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/TopFieldDocCollector.cs
trunk/beagle/beagled/Lucene.Net/Search/TopFieldDocs.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/TopFieldDocs.cs
trunk/beagle/beagled/Lucene.Net/Search/Weight.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/Weight.cs
trunk/beagle/beagled/Lucene.Net/Search/WildcardQuery.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/WildcardQuery.cs
trunk/beagle/beagled/Lucene.Net/Search/WildcardTermEnum.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Search/WildcardTermEnum.cs
trunk/beagle/beagled/Lucene.Net/SharpZipLibAdapter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/SharpZipLibAdapter.cs
trunk/beagle/beagled/Lucene.Net/Store/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/
trunk/beagle/beagled/Lucene.Net/Store/BufferedIndexInput.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/BufferedIndexInput.cs
trunk/beagle/beagled/Lucene.Net/Store/BufferedIndexOutput.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/BufferedIndexOutput.cs
trunk/beagle/beagled/Lucene.Net/Store/Directory.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/Directory.cs
trunk/beagle/beagled/Lucene.Net/Store/FSDirectory.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/FSDirectory.cs
trunk/beagle/beagled/Lucene.Net/Store/IndexInput.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/IndexInput.cs
trunk/beagle/beagled/Lucene.Net/Store/IndexOutput.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/IndexOutput.cs
trunk/beagle/beagled/Lucene.Net/Store/Lock.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/Lock.cs
trunk/beagle/beagled/Lucene.Net/Store/MMapDirectory.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/MMapDirectory.cs
trunk/beagle/beagled/Lucene.Net/Store/RAMDirectory.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/RAMDirectory.cs
trunk/beagle/beagled/Lucene.Net/Store/RAMFile.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/RAMFile.cs
trunk/beagle/beagled/Lucene.Net/Store/RAMInputStream.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/RAMInputStream.cs
trunk/beagle/beagled/Lucene.Net/Store/RAMOutputStream.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Store/RAMOutputStream.cs
trunk/beagle/beagled/Lucene.Net/SupportClass.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/SupportClass.cs
trunk/beagle/beagled/Lucene.Net/Util/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Util/
trunk/beagle/beagled/Lucene.Net/Util/BitVector.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Util/BitVector.cs
trunk/beagle/beagled/Lucene.Net/Util/Constants.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Util/Constants.cs
trunk/beagle/beagled/Lucene.Net/Util/Parameter.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Util/Parameter.cs
trunk/beagle/beagled/Lucene.Net/Util/PriorityQueue.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Util/PriorityQueue.cs
trunk/beagle/beagled/Lucene.Net/Util/SmallFloat.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Util/SmallFloat.cs
trunk/beagle/beagled/Lucene.Net/Util/StringHelper.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Util/StringHelper.cs
trunk/beagle/beagled/Lucene.Net/Util/ToStringUtils.cs
- copied unchanged from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/Util/ToStringUtils.cs
trunk/beagle/beagled/Lucene.Net/upstream-changes/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Lucene.Net/upstream-changes/
trunk/beagle/beagled/Snowball.Net/upstream-changes/
- copied from r4574, /branches/beagle-lucene2_1/beagled/Snowball.Net/upstream-changes/
Modified:
trunk/beagle/ (props changed)
trunk/beagle/Util/PullingReader.cs
trunk/beagle/Util/StringFu.cs
trunk/beagle/beagled/BuildIndex.cs
trunk/beagle/beagled/LuceneCommon.cs
trunk/beagle/beagled/LuceneIndexingDriver.cs
trunk/beagle/beagled/LuceneQueryingDriver.cs
trunk/beagle/beagled/Makefile.am
trunk/beagle/beagled/NoiseFilter.cs
trunk/beagle/beagled/Snowball.Net/Lucene.Net/Analysis/Snowball/SnowballFilter.cs
Modified: trunk/beagle/Util/PullingReader.cs
==============================================================================
--- trunk/beagle/Util/PullingReader.cs (original)
+++ trunk/beagle/Util/PullingReader.cs Mon Mar 3 22:03:27 2008
@@ -56,6 +56,7 @@
done = ! pull (pullBuffer, neededSize - pullBuffer.Length);
} catch (Exception e) {
Logger.Log.Debug (e, "Caught exception pulling text from {0}", pull);
+ done = true;
}
}
}
@@ -88,8 +89,7 @@
if (done && pullBuffer.Length < count)
count = pullBuffer.Length;
- for (int i = 0; i < count; ++i)
- buffer [index + i] = pullBuffer [i];
+ pullBuffer.CopyTo (0, buffer, index, count);
pullBuffer.Remove (0, count);
return count;
Modified: trunk/beagle/Util/StringFu.cs
==============================================================================
--- trunk/beagle/Util/StringFu.cs (original)
+++ trunk/beagle/Util/StringFu.cs Mon Mar 3 22:03:27 2008
@@ -26,6 +26,7 @@
using System;
using System.Collections;
+using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Text;
@@ -459,9 +460,14 @@
static public string HexEscape (string str)
{
- StringBuilder builder = new StringBuilder ();
+ int index = -1;
+ if ((index = str.IndexOfAny (CharsToQuote)) == -1)
+ return str;
+
+ StringBuilder builder = new StringBuilder (str, 0, index, str.Length << 1);
- foreach (char c in str) {
+ for (; index < str.Length; ++ index) {
+ char c = str [index];
if (ArrayFu.IndexOfChar (CharsToQuote, c) != -1)
builder.Append (Uri.HexEscape (c));
@@ -491,23 +497,26 @@
/// </returns>
static public string HexUnescape (string str)
{
- ArrayList bytes = new ArrayList ();
- byte[] sub_bytes;
int i, pos = 0;
+ if ((i = str.IndexOf ('%')) == -1)
+ return str;
- while ((i = str.IndexOf ('%', pos)) != -1) {
+ List<byte> bytes = new List<byte> (str.Length);
+ byte[] sub_bytes;
+
+ do {
sub_bytes = Encoding.UTF8.GetBytes (str.Substring (pos, i - pos));
bytes.AddRange (sub_bytes);
pos = i;
char unescaped = Uri.HexUnescape (str, ref pos);
- bytes.Add ((byte) unescaped);
- }
+ bytes.Add (Convert.ToByte (unescaped));
+ } while ((i = str.IndexOf ('%', pos)) != -1);
sub_bytes = Encoding.UTF8.GetBytes (str.Substring (pos, str.Length - pos));
bytes.AddRange (sub_bytes);
- return Encoding.UTF8.GetString ((byte[]) bytes.ToArray (typeof (byte)));
+ return Encoding.UTF8.GetString (bytes.ToArray ());
}
// These strings should never be exposed to the user.
Modified: trunk/beagle/beagled/BuildIndex.cs
==============================================================================
--- trunk/beagle/beagled/BuildIndex.cs (original)
+++ trunk/beagle/beagled/BuildIndex.cs Mon Mar 3 22:03:27 2008
@@ -102,7 +102,7 @@
static Queue pending_directories = new Queue ();
static IndexerRequest pending_request;
- const int BATCH_SIZE = 30;
+ const int BATCH_SIZE = Lucene.Net.Index.IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
/////////////////////////////////////////////////////////
Modified: trunk/beagle/beagled/LuceneCommon.cs
==============================================================================
--- trunk/beagle/beagled/LuceneCommon.cs (original)
+++ trunk/beagle/beagled/LuceneCommon.cs Mon Mar 3 22:03:27 2008
@@ -26,6 +26,7 @@
using System;
using System.Collections;
+using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
@@ -42,6 +43,9 @@
using Lucene.Net.QueryParsers;
using LNS = Lucene.Net.Search;
+using SF.Snowball.Ext;
+using SnowballProgram = SF.Snowball.SnowballProgram;
+
using Beagle.Util;
namespace Beagle.Daemon {
@@ -101,7 +105,7 @@
private Lucene.Net.Store.Directory secondary_store = null;
// Flush if more than this number of requests
- public const int RequestFlushThreshold = 37; // a total arbitrary magic number
+ public const int RequestFlushThreshold = Lucene.Net.Index.IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; // Use same value as Lucene's flush threshold
//////////////////////////////////////////////////////////////////////////////
@@ -379,7 +383,7 @@
// Create a new store.
Lucene.Net.Store.Directory store;
- store = Lucene.Net.Store.FSDirectory.GetDirectory (path, LockDirectory, true);
+ store = Lucene.Net.Store.FSDirectory.GetDirectory (path, new Lucene.Net.Store.SimpleFSLockFactory (LockDirectory));
// Create an empty index in that store.
IndexWriter writer;
@@ -437,8 +441,14 @@
reader.Close ();
// Create stores for our indexes.
- primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, LockDirectory, false, read_only_mode);
- secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, LockDirectory, false, read_only_mode);
+ // Use separate lock factories since each lock factory is tied to the index directory
+ if (read_only_mode) {
+ primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, Lucene.Net.Store.NoLockFactory.GetNoLockFactory ());
+ secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, Lucene.Net.Store.NoLockFactory.GetNoLockFactory ());
+ } else {
+ primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, new Lucene.Net.Store.SimpleFSLockFactory (LockDirectory));
+ secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, new Lucene.Net.Store.SimpleFSLockFactory (LockDirectory));
+ }
}
////////////////////////////////////////////////////////////////
@@ -471,12 +481,12 @@
}
// FIXME: This assumes everything being indexed is in English!
- internal class BeagleAnalyzer : StandardAnalyzer {
+ public class BeagleAnalyzer : StandardAnalyzer {
+ const string DEFAULT_STEMMER_LANGUAGE = "English";
private char [] buffer = new char [2];
private bool strip_extra_property_info = false;
private bool tokenize_email_hostname = false;
- const string DEFAULT_STEMMER = "English";
public BeagleAnalyzer (bool is_indexing_analyzer)
{
@@ -533,7 +543,10 @@
|| fieldName == "PropertyText"
|| is_text_prop) {
outstream = new NoiseEmailHostFilter (outstream, tokenize_email_hostname);
- outstream = new SnowballFilter (outstream, DEFAULT_STEMMER);
+ // Sharing Stemmer is not thread safe.
+ // Currently our underlying lucene indexing is not done in multiple threads.
+ StemmerInfo stemmer_info = GetStemmer (DEFAULT_STEMMER_LANGUAGE);
+ outstream = new SnowballFilter (outstream, stemmer_info.Stemmer, stemmer_info.StemMethod);
}
return outstream;
@@ -1033,17 +1046,42 @@
// Access to the stemmer and list of stop words
//
- static SF.Snowball.Ext.EnglishStemmer stemmer = new SF.Snowball.Ext.EnglishStemmer ();
+ private static Dictionary<string, StemmerInfo> stemmer_table = new Dictionary<string, StemmerInfo> ();
+
+ class StemmerInfo {
+ internal SnowballProgram Stemmer;
+ internal System.Reflection.MethodInfo StemMethod;
+ }
+
+ private static StemmerInfo GetStemmer (System.String name)
+ {
+ if (! stemmer_table.ContainsKey (name)) {
+ StemmerInfo stemmer_info = new StemmerInfo ();
+
+ // Taken from Snowball/SnowballFilter.cs
+ System.Type stemClass = System.Type.GetType ("SF.Snowball.Ext." + name + "Stemmer", true);
+ SnowballProgram stemmer = (SnowballProgram) System.Activator.CreateInstance (stemClass);
+ // why doesn't the SnowballProgram class have an (abstract?) stem method?
+ System.Reflection.MethodInfo stemMethod = stemClass.GetMethod ("Stem", (new System.Type [0] == null) ? new System.Type [0] : (System.Type []) new System.Type [0]);
+
+ stemmer_info.Stemmer = stemmer;
+ stemmer_info.StemMethod = stemMethod;
+ stemmer_table [name] = stemmer_info;
+ }
+
+ return stemmer_table [name];
+ }
+
+ private static SF.Snowball.Ext.EnglishStemmer default_stemmer = new SF.Snowball.Ext.EnglishStemmer ();
static public string Stem (string str)
{
string stemmed_str;
- lock (stemmer) {
- stemmer.SetCurrent (str);
- stemmer.Stem ();
- stemmed_str = stemmer.GetCurrent ();
- stemmer.SetCurrent (String.Empty);
+ lock (default_stemmer) {
+ default_stemmer.SetCurrent (str);
+ default_stemmer.Stem ();
+ stemmed_str = default_stemmer.GetCurrent ();
}
return stemmed_str;
@@ -1332,11 +1370,11 @@
if (d1 != 1 || d2 != DateTime.DaysInMonth (y2, m2)) {
LNS.BooleanQuery sub_query;
sub_query = new LNS.BooleanQuery ();
- sub_query.Add (ym_query, true, false);
- sub_query.Add (NewDayQuery (field_name, d1, d2), true, false);
- top_level_query.Add (sub_query, false, false);
+ sub_query.Add (ym_query, LNS.BooleanClause.Occur.MUST);
+ sub_query.Add (NewDayQuery (field_name, d1, d2), LNS.BooleanClause.Occur.MUST);
+ top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);
} else {
- top_level_query.Add (ym_query, false, false);
+ top_level_query.Add (ym_query, LNS.BooleanClause.Occur.SHOULD);
}
} else {
@@ -1345,9 +1383,9 @@
if (d1 > 1) {
LNS.BooleanQuery sub_query;
sub_query = new LNS.BooleanQuery ();
- sub_query.Add (NewYearMonthQuery (field_name, y1, m1), true, false);
- sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), true, false);
- top_level_query.Add (sub_query, false, false);
+ sub_query.Add (NewYearMonthQuery (field_name, y1, m1), LNS.BooleanClause.Occur.MUST);
+ sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), LNS.BooleanClause.Occur.MUST);
+ top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);
++m1;
if (m1 == 13) {
@@ -1360,9 +1398,9 @@
if (d2 < DateTime.DaysInMonth (y2, m2)) {
LNS.BooleanQuery sub_query;
sub_query = new LNS.BooleanQuery ();
- sub_query.Add (NewYearMonthQuery (field_name, y2, m2), true, false);
- sub_query.Add (NewDayQuery (field_name, 1, d2), true, false);
- top_level_query.Add (sub_query, false, false);
+ sub_query.Add (NewYearMonthQuery (field_name, y2, m2), LNS.BooleanClause.Occur.MUST);
+ sub_query.Add (NewDayQuery (field_name, 1, d2), LNS.BooleanClause.Occur.MUST);
+ top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);
--m2;
if (m2 == 0) {
@@ -1374,7 +1412,7 @@
// Generate the query for the "middle" of our period, if it is non-empty
if (y1 < y2 || ((y1 == y2) && m1 <= m2))
top_level_query.Add (NewYearMonthQuery (field_name, y1, m1, y2, m2),
- false, false);
+ LNS.BooleanClause.Occur.SHOULD);
}
return top_level_query;
@@ -1432,14 +1470,14 @@
LNS.Query subquery;
subquery = StringToQuery ("Text", part.Text, term_list);
if (subquery != null) {
- p_query.Add (subquery, false, false);
+ p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
added_subquery = true;
}
// FIXME: HotText is ignored for now!
// subquery = StringToQuery ("HotText", part.Text);
// if (subquery != null) {
- // p_query.Add (subquery, false, false);
+ // p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
// added_subquery = true;
// }
}
@@ -1448,10 +1486,10 @@
LNS.Query subquery;
subquery = StringToQuery ("PropertyText", part.Text, term_list);
if (subquery != null) {
- p_query.Add (subquery, false, false);
+ p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
// Properties can live in either index
if (! only_build_primary_query)
- s_query.Add (subquery.Clone () as LNS.Query, false, false);
+ s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
added_subquery = true;
}
@@ -1482,10 +1520,10 @@
if (term_list != null)
term_list.Add (term);
subquery = new LNS.TermQuery (term);
- p_query.Add (subquery, false, false);
+ p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
// Properties can live in either index
if (! only_build_primary_query)
- s_query.Add (subquery.Clone () as LNS.Query, false, false);
+ s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
} else {
// Reset these so we return a null query
p_query = null;
@@ -1515,26 +1553,26 @@
// Search text content
term = new Term ("Text", query_string_lower);
subquery = new LNS.WildcardQuery (term);
- p_query.Add (subquery, false, false);
+ p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
term_list.Add (term);
// Search text properties
term = new Term ("PropertyText", query_string_lower);
subquery = new LNS.WildcardQuery (term);
- p_query.Add (subquery, false, false);
+ p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
// Properties can live in either index
if (! only_build_primary_query)
- s_query.Add (subquery.Clone () as LNS.Query, false, false);
+ s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
term_list.Add (term);
// Search property keywords
term = new Term ("PropertyKeyword", query_string_lower);
term_list.Add (term);
subquery = new LNS.WildcardQuery (term);
- p_query.Add (subquery, false, false);
+ p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
// Properties can live in either index
if (! only_build_primary_query)
- s_query.Add (subquery.Clone () as LNS.Query, false, false);
+ s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
primary_query = p_query;
if (! only_build_primary_query)
@@ -1587,9 +1625,9 @@
term_list, query_part_hook,
out p_subq, out s_subq, out sub_hit_filter);
if (p_subq != null)
- p_query.Add (p_subq, false, false);
+ p_query.Add (p_subq, LNS.BooleanClause.Occur.SHOULD);
if (s_subq != null)
- s_query.Add (s_subq, false, false);
+ s_query.Add (s_subq, LNS.BooleanClause.Occur.SHOULD);
if (sub_hit_filter != null) {
if (or_hit_filter == null)
or_hit_filter = new OrHitFilter ();
@@ -1678,7 +1716,7 @@
int cursor = 0;
if (extra_requirement != null) {
- top_query.Add (extra_requirement, true, false);
+ top_query.Add (extra_requirement, LNS.BooleanClause.Occur.MUST);
++cursor;
}
@@ -1690,7 +1728,7 @@
LNS.BooleanQuery bq;
bq = new LNS.BooleanQuery ();
bottom_queries.Add (bq);
- top_query.Add (bq, false, false);
+ top_query.Add (bq, LNS.BooleanClause.Occur.SHOULD);
}
}
@@ -1708,7 +1746,7 @@
cursor = 0;
}
- target.Add (subquery, false, false);
+ target.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
}
return top_query;
Modified: trunk/beagle/beagled/LuceneIndexingDriver.cs
==============================================================================
--- trunk/beagle/beagled/LuceneIndexingDriver.cs (original)
+++ trunk/beagle/beagled/LuceneIndexingDriver.cs Mon Mar 3 22:03:27 2008
@@ -215,19 +215,19 @@
term = new Term ("Uri", uri_str);
// For property changes, only secondary index is modified
- secondary_reader.Delete (term);
+ secondary_reader.DeleteDocuments (term);
// Now remove from everywhere else (if asked to remove or if asked to add, in which case
// we first remove and then add)
// So we also need to remove child documents
if (indexable.Type != IndexableType.PropertyChange) {
- num_delete = primary_reader.Delete (term);
+ num_delete = primary_reader.DeleteDocuments (term);
// When we delete an indexable, also delete any children.
// FIXME: Shouldn't we also delete any children of children, etc.?
term = new Term ("ParentUri", uri_str);
- num_delete += primary_reader.Delete (term);
- secondary_reader.Delete (term);
+ num_delete += primary_reader.DeleteDocuments (term);
+ secondary_reader.DeleteDocuments (term);
}
// If this is a strict removal (and not a deletion that
@@ -270,6 +270,10 @@
text_cache.BeginTransaction ();
IndexWriter primary_writer, secondary_writer;
+ // FIXME: Lock obtain time-out can happen here; if that happens,
+ // an exception will be thrown and this method will break in the middle
+ // leaving IndexWriters unclosed! Same for any Lucene.Net-index modification
+ // methods.
primary_writer = new IndexWriter (PrimaryStore, IndexingAnalyzer, false);
secondary_writer = null;
Modified: trunk/beagle/beagled/LuceneQueryingDriver.cs
==============================================================================
--- trunk/beagle/beagled/LuceneQueryingDriver.cs (original)
+++ trunk/beagle/beagled/LuceneQueryingDriver.cs Mon Mar 3 22:03:27 2008
@@ -190,12 +190,12 @@
case QueryPartLogic.Prohibited:
if (primary_prohibited_part_query == null)
primary_prohibited_part_query = new LNS.BooleanQuery ();
- primary_prohibited_part_query.Add (primary_part_query, false, false);
+ primary_prohibited_part_query.Add (primary_part_query, LNS.BooleanClause.Occur.SHOULD);
if (secondary_part_query != null) {
if (secondary_prohibited_part_query == null)
secondary_prohibited_part_query = new LNS.BooleanQuery ();
- secondary_prohibited_part_query.Add (secondary_part_query, false, false);
+ secondary_prohibited_part_query.Add (secondary_part_query, LNS.BooleanClause.Occur.SHOULD);
}
if (part_hit_filter != null) {
@@ -514,8 +514,7 @@
// Only generate results if we got some matches
if (primary_matches != null && primary_matches.ContainsTrue ()) {
GenerateQueryResults (primary_reader,
- primary_searcher,
- secondary_searcher,
+ secondary_reader,
primary_matches,
result,
term_list,
@@ -572,7 +571,7 @@
LNS.BooleanQuery combined_query;
combined_query = new LNS.BooleanQuery ();
foreach (LNS.Query query in primary_queries)
- combined_query.Add (query, true, false);
+ combined_query.Add (query, LNS.BooleanClause.Occur.MUST);
LuceneBitArray matches;
matches = new LuceneBitArray (primary_searcher, combined_query);
@@ -708,7 +707,7 @@
foreach (Term term in term_list) {
double idf;
- idf = similarity.Ldf (reader.DocFreq (term), reader.MaxDoc ());
+ idf = similarity.Idf (reader.DocFreq (term), reader.MaxDoc ());
int hit_count;
hit_count = hits_by_id.Count;
@@ -744,12 +743,11 @@
//
// Two arrays we need for quickly creating lucene documents and check if they are valid
- static string[] fields_timestamp_uri = { "Timestamp", "Uri" };
- static string[] fields_uri = {"Uri"};
+ static FieldSelector fields_timestamp_uri = new MapFieldSelector (new string[] {"Uri", "Timestamp"});
+ static FieldSelector fields_uri = new MapFieldSelector (new string[] {"Uri"});
private static void GenerateQueryResults (IndexReader primary_reader,
- LNS.IndexSearcher primary_searcher,
- LNS.IndexSearcher secondary_searcher,
+ IndexReader secondary_reader,
BetterBitArray primary_matches,
IQueryResult result,
ICollection query_term_list,
@@ -787,8 +785,7 @@
if (primary_matches.TrueCount > max_results)
final_list_of_hits = ScanRecentDocs (primary_reader,
- primary_searcher,
- secondary_searcher,
+ secondary_reader,
primary_matches,
hits_by_id,
max_results,
@@ -797,8 +794,7 @@
if (final_list_of_hits == null)
final_list_of_hits = FindRecentResults (primary_reader,
- primary_searcher,
- secondary_searcher,
+ secondary_reader,
primary_matches,
hits_by_id,
max_results,
@@ -889,8 +885,7 @@
// for all of them.
private static ArrayList ScanRecentDocs (IndexReader primary_reader,
- LNS.IndexSearcher primary_searcher,
- LNS.IndexSearcher secondary_searcher,
+ IndexReader secondary_reader,
BetterBitArray primary_matches,
Dictionary<int, Hit> hits_by_id,
int max_results,
@@ -909,8 +904,8 @@
Term term;
TermDocs secondary_term_docs = null;
- if (secondary_searcher != null)
- secondary_term_docs = secondary_searcher.Reader.TermDocs ();
+ if (secondary_reader != null)
+ secondary_term_docs = secondary_reader.TermDocs ();
do {
term = enumerator.Term ();
@@ -926,13 +921,13 @@
int doc_id = docs.Doc ();
if (primary_matches.Get (doc_id)) {
- Document doc = primary_searcher.Doc (doc_id);
+ Document doc = primary_reader.Document (doc_id);
// If we have a UriFilter, apply it.
if (uri_filter != null) {
Uri uri;
uri = GetUriFromDocument (doc);
if (uri_filter (uri)) {
- Hit hit = CreateHit (doc, secondary_searcher, secondary_term_docs);
+ Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs);
hits_by_id [doc_id] = hit;
// Add the result, last modified first
results.Add (hit);
@@ -971,8 +966,7 @@
}
private static ArrayList FindRecentResults (IndexReader primary_reader,
- LNS.IndexSearcher primary_searcher,
- LNS.IndexSearcher secondary_searcher,
+ IndexReader secondary_reader,
BetterBitArray primary_matches,
Dictionary<int, Hit> hits_by_id,
int max_results,
@@ -994,8 +988,8 @@
else
all_docs = new ArrayList (primary_matches.TrueCount);
- if (secondary_searcher != null)
- term_docs = secondary_searcher.Reader.TermDocs ();
+ if (secondary_reader != null)
+ term_docs = secondary_reader.TermDocs ();
for (int match_index = primary_matches.Count; ; match_index --) {
// Walk across the matches backwards, since newer
@@ -1007,7 +1001,7 @@
count++;
- doc = primary_searcher.Doc (match_index, fields_timestamp_uri);
+ doc = primary_reader.Document (match_index, fields_timestamp_uri);
// Check the timestamp --- if we have already reached our
// limit, we might be able to reject it immediately.
@@ -1033,7 +1027,7 @@
// Get the actual hit now
// doc was created with only 2 fields, so first get the complete lucene document for primary document
- Hit hit = CreateHit (primary_searcher.Doc (match_index), secondary_searcher, term_docs);
+ Hit hit = CreateHit (primary_reader.Document (match_index), secondary_reader, term_docs);
hits_by_id [match_index] = hit;
// Add the document to the appropriate data structure.
@@ -1063,12 +1057,12 @@
}
private static Hit CreateHit ( Document primary_doc,
- LNS.IndexSearcher secondary_searcher,
+ IndexReader secondary_reader,
TermDocs term_docs)
{
Hit hit = DocumentToHit (primary_doc);
- if (secondary_searcher == null)
+ if (secondary_reader == null)
return hit;
// Get the stringified version of the URI
@@ -1078,7 +1072,7 @@
// Move to the first (and only) matching term doc
term_docs.Next ();
- Document secondary_doc = secondary_searcher.Doc (term_docs.Doc ());
+ Document secondary_doc = secondary_reader.Document (term_docs.Doc ());
// If we are using the secondary index, now we need to
// merge the properties from the secondary index
Modified: trunk/beagle/beagled/Makefile.am
==============================================================================
--- trunk/beagle/beagled/Makefile.am (original)
+++ trunk/beagle/beagled/Makefile.am Mon Mar 3 22:03:27 2008
@@ -87,208 +87,9 @@
############################################################
-lucenedir = $(srcdir)/Lucene.Net
+include $(srcdir)/Lucene.Net/Makefile.include
-LUCENE_1_9_CSFILES = \
- $(lucenedir)/Analysis/Standard/CharStream.cs \
- $(lucenedir)/Analysis/Standard/FastCharStream.cs \
- $(lucenedir)/Analysis/Standard/ParseException.cs \
- $(lucenedir)/Analysis/Standard/StandardAnalyzer.cs \
- $(lucenedir)/Analysis/Standard/StandardFilter.cs \
- $(lucenedir)/Analysis/Standard/StandardTokenizer.cs \
- $(lucenedir)/Analysis/Standard/StandardTokenizerConstants.cs \
- $(lucenedir)/Analysis/Standard/StandardTokenizerTokenManager.cs \
- $(lucenedir)/Analysis/Standard/Token.cs \
- $(lucenedir)/Analysis/Standard/TokenMgrError.cs \
- $(lucenedir)/Analysis/Analyzer.cs \
- $(lucenedir)/Analysis/CharTokenizer.cs \
- $(lucenedir)/Analysis/ISOLatin1AccentFilter.cs \
- $(lucenedir)/Analysis/KeywordAnalyzer.cs \
- $(lucenedir)/Analysis/KeywordTokenizer.cs \
- $(lucenedir)/Analysis/LengthFilter.cs \
- $(lucenedir)/Analysis/LetterTokenizer.cs \
- $(lucenedir)/Analysis/LowerCaseFilter.cs \
- $(lucenedir)/Analysis/LowerCaseTokenizer.cs \
- $(lucenedir)/Analysis/PerFieldAnalyzerWrapper.cs \
- $(lucenedir)/Analysis/PorterStemFilter.cs \
- $(lucenedir)/Analysis/PorterStemmer.cs \
- $(lucenedir)/Analysis/SimpleAnalyzer.cs \
- $(lucenedir)/Analysis/StopAnalyzer.cs \
- $(lucenedir)/Analysis/StopFilter.cs \
- $(lucenedir)/Analysis/Token.cs \
- $(lucenedir)/Analysis/TokenFilter.cs \
- $(lucenedir)/Analysis/Tokenizer.cs \
- $(lucenedir)/Analysis/TokenStream.cs \
- $(lucenedir)/Analysis/WhitespaceAnalyzer.cs \
- $(lucenedir)/Analysis/WhitespaceTokenizer.cs \
- $(lucenedir)/Analysis/WordlistLoader.cs \
- $(lucenedir)/Document/DateField.cs \
- $(lucenedir)/Document/DateTools.cs \
- $(lucenedir)/Document/Document.cs \
- $(lucenedir)/Document/Field.cs \
- $(lucenedir)/Document/NumberTools.cs \
- $(lucenedir)/Index/CompoundFileReader.cs \
- $(lucenedir)/Index/CompoundFileWriter.cs \
- $(lucenedir)/Index/DocumentWriter.cs \
- $(lucenedir)/Index/FieldInfo.cs \
- $(lucenedir)/Index/FieldInfos.cs \
- $(lucenedir)/Index/FieldsReader.cs \
- $(lucenedir)/Index/FieldsWriter.cs \
- $(lucenedir)/Index/FilterIndexReader.cs \
- $(lucenedir)/Index/IndexFileNameFilter.cs \
- $(lucenedir)/Index/IndexFileNames.cs \
- $(lucenedir)/Index/IndexModifier.cs \
- $(lucenedir)/Index/IndexReader.cs \
- $(lucenedir)/Index/IndexWriter.cs \
- $(lucenedir)/Index/MultipleTermPositions.cs \
- $(lucenedir)/Index/MultiReader.cs \
- $(lucenedir)/Index/ParallelReader.cs \
- $(lucenedir)/Index/SegmentInfo.cs \
- $(lucenedir)/Index/SegmentInfos.cs \
- $(lucenedir)/Index/SegmentMergeInfo.cs \
- $(lucenedir)/Index/SegmentMergeQueue.cs \
- $(lucenedir)/Index/SegmentMerger.cs \
- $(lucenedir)/Index/SegmentReader.cs \
- $(lucenedir)/Index/SegmentTermDocs.cs \
- $(lucenedir)/Index/SegmentTermEnum.cs \
- $(lucenedir)/Index/SegmentTermPositions.cs \
- $(lucenedir)/Index/SegmentTermPositionVector.cs \
- $(lucenedir)/Index/SegmentTermVector.cs \
- $(lucenedir)/Index/Term.cs \
- $(lucenedir)/Index/TermBuffer.cs \
- $(lucenedir)/Index/TermDocs.cs \
- $(lucenedir)/Index/TermEnum.cs \
- $(lucenedir)/Index/TermFreqVector.cs \
- $(lucenedir)/Index/TermInfo.cs \
- $(lucenedir)/Index/TermInfosReader.cs \
- $(lucenedir)/Index/TermInfosWriter.cs \
- $(lucenedir)/Index/TermPositions.cs \
- $(lucenedir)/Index/TermPositionVector.cs \
- $(lucenedir)/Index/TermVectorOffsetInfo.cs \
- $(lucenedir)/Index/TermVectorsReader.cs \
- $(lucenedir)/Index/TermVectorsWriter.cs \
- $(lucenedir)/QueryParser/CharStream.cs \
- $(lucenedir)/QueryParser/FastCharStream.cs \
- $(lucenedir)/QueryParser/MultiFieldQueryParser.cs \
- $(lucenedir)/QueryParser/ParseException.cs \
- $(lucenedir)/QueryParser/QueryParser.cs \
- $(lucenedir)/QueryParser/QueryParserConstants.cs \
- $(lucenedir)/QueryParser/QueryParserTokenManager.cs \
- $(lucenedir)/QueryParser/Token.cs \
- $(lucenedir)/QueryParser/TokenMgrError.cs \
- $(lucenedir)/Search/Regex/RegexQuery.cs \
- $(lucenedir)/Search/Regex/RegexTermEnum.cs \
- $(lucenedir)/Search/Regex/SpanRegexQuery.cs \
- $(lucenedir)/Search/Spans/NearSpans.cs \
- $(lucenedir)/Search/Spans/SpanFirstQuery.cs \
- $(lucenedir)/Search/Spans/SpanNearQuery.cs \
- $(lucenedir)/Search/Spans/SpanNotQuery.cs \
- $(lucenedir)/Search/Spans/SpanOrQuery.cs \
- $(lucenedir)/Search/Spans/SpanQuery.cs \
- $(lucenedir)/Search/Spans/Spans.cs \
- $(lucenedir)/Search/Spans/SpanScorer.cs \
- $(lucenedir)/Search/Spans/SpanTermQuery.cs \
- $(lucenedir)/Search/Spans/SpanWeight.cs \
- $(lucenedir)/Search/BooleanClause.cs \
- $(lucenedir)/Search/BooleanQuery.cs \
- $(lucenedir)/Search/BooleanScorer.cs \
- $(lucenedir)/Search/BooleanScorer2.cs \
- $(lucenedir)/Search/CachingWrapperFilter.cs \
- $(lucenedir)/Search/ConjunctionScorer.cs \
- $(lucenedir)/Search/ConstantScoreQuery.cs \
- $(lucenedir)/Search/ConstantScoreRangeQuery.cs \
- $(lucenedir)/Search/DateFilter.cs \
- $(lucenedir)/Search/DefaultSimilarity.cs \
- $(lucenedir)/Search/DisjunctionMaxQuery.cs \
- $(lucenedir)/Search/DisjunctionMaxScorer.cs \
- $(lucenedir)/Search/DisjunctionSumScorer.cs \
- $(lucenedir)/Search/ExactPhraseScorer.cs \
- $(lucenedir)/Search/Explanation.cs \
- $(lucenedir)/Search/FieldCache.cs \
- $(lucenedir)/Search/FieldCacheImpl.cs \
- $(lucenedir)/Search/FieldDoc.cs \
- $(lucenedir)/Search/FieldDocSortedHitQueue.cs \
- $(lucenedir)/Search/FieldSortedHitQueue.cs \
- $(lucenedir)/Search/Filter.cs \
- $(lucenedir)/Search/FilteredQuery.cs \
- $(lucenedir)/Search/FilteredTermEnum.cs \
- $(lucenedir)/Search/FuzzyQuery.cs \
- $(lucenedir)/Search/FuzzyTermEnum.cs \
- $(lucenedir)/Search/Hit.cs \
- $(lucenedir)/Search/HitCollector.cs \
- $(lucenedir)/Search/HitIterator.cs \
- $(lucenedir)/Search/HitQueue.cs \
- $(lucenedir)/Search/Hits.cs \
- $(lucenedir)/Search/IndexSearcher.cs \
- $(lucenedir)/Search/MatchAllDocsQuery.cs \
- $(lucenedir)/Search/MultiPhraseQuery.cs \
- $(lucenedir)/Search/MultiSearcher.cs \
- $(lucenedir)/Search/MultiTermQuery.cs \
- $(lucenedir)/Search/NonMatchingScorer.cs \
- $(lucenedir)/Search/ParallelMultiSearcher.cs \
- $(lucenedir)/Search/PhrasePositions.cs \
- $(lucenedir)/Search/PhrasePrefixQuery.cs \
- $(lucenedir)/Search/PhraseQuery.cs \
- $(lucenedir)/Search/PhraseQueue.cs \
- $(lucenedir)/Search/PhraseScorer.cs \
- $(lucenedir)/Search/PrefixQuery.cs \
- $(lucenedir)/Search/Query.cs \
- $(lucenedir)/Search/QueryFilter.cs \
- $(lucenedir)/Search/QueryTermVector.cs \
- $(lucenedir)/Search/RangeFilter.cs \
- $(lucenedir)/Search/RangeQuery.cs \
- $(lucenedir)/Search/ReqExclScorer.cs \
- $(lucenedir)/Search/ReqOptSumScorer.cs \
- $(lucenedir)/Search/ScoreDoc.cs \
- $(lucenedir)/Search/ScoreDocComparator.cs \
- $(lucenedir)/Search/Scorer.cs \
- $(lucenedir)/Search/Searchable.cs \
- $(lucenedir)/Search/Searcher.cs \
- $(lucenedir)/Search/Similarity.cs \
- $(lucenedir)/Search/SimilarityDelegator.cs \
- $(lucenedir)/Search/SloppyPhraseScorer.cs \
- $(lucenedir)/Search/Sort.cs \
- $(lucenedir)/Search/SortComparator.cs \
- $(lucenedir)/Search/SortComparatorSource.cs \
- $(lucenedir)/Search/SortField.cs \
- $(lucenedir)/Search/TermQuery.cs \
- $(lucenedir)/Search/TermScorer.cs \
- $(lucenedir)/Search/TopDocs.cs \
- $(lucenedir)/Search/TopFieldDocs.cs \
- $(lucenedir)/Search/Weight.cs \
- $(lucenedir)/Search/WildcardQuery.cs \
- $(lucenedir)/Search/WildcardTermEnum.cs \
- $(lucenedir)/Store/BufferedIndexInput.cs \
- $(lucenedir)/Store/BufferedIndexOutput.cs \
- $(lucenedir)/Store/Directory.cs \
- $(lucenedir)/Store/FSDirectory.cs \
- $(lucenedir)/Store/IndexInput.cs \
- $(lucenedir)/Store/IndexOutput.cs \
- $(lucenedir)/Store/InputStream.cs \
- $(lucenedir)/Store/Lock.cs \
- $(lucenedir)/Store/MMapDirectory.cs \
- $(lucenedir)/Store/OutputStream.cs \
- $(lucenedir)/Store/RAMDirectory.cs \
- $(lucenedir)/Store/RAMFile.cs \
- $(lucenedir)/Store/RAMInputStream.cs \
- $(lucenedir)/Store/RAMOutputStream.cs \
- $(lucenedir)/Util/BitVector.cs \
- $(lucenedir)/Util/Constants.cs \
- $(lucenedir)/Util/Parameter.cs \
- $(lucenedir)/Util/PriorityQueue.cs \
- $(lucenedir)/Util/SmallFloat.cs \
- $(lucenedir)/Util/StringHelper.cs \
- $(lucenedir)/Util/ToStringUtils.cs \
- $(lucenedir)/LucenePackage.cs \
- $(lucenedir)/SharpZipLibAdapter.cs \
- $(lucenedir)/SupportClass.cs
-
-# Stuff we don't build because we don't use it and it
-# introduces additional library dependencies.
-IGNORED_LUCENE_CSFILES = \
- $(lucenedir)/Search/RemoteSearchable.cs
-
-LUCENE_CSFILES = $(LUCENE_1_9_CSFILES)
+LUCENE_CSFILES = $(LUCENE_2_1_CSFILES)
############################################################
Modified: trunk/beagle/beagled/NoiseFilter.cs
==============================================================================
--- trunk/beagle/beagled/NoiseFilter.cs (original)
+++ trunk/beagle/beagled/NoiseFilter.cs Mon Mar 3 22:03:27 2008
@@ -38,7 +38,7 @@
// 1. Removes words which are potential noise like dhyhy8ju7q9
// 2. Splits email addresses into meaningful tokens
// 3. Splits hostnames into subparts
- class NoiseEmailHostFilter : TokenFilter {
+ public class NoiseEmailHostFilter : TokenFilter {
private bool tokenize_email_hostname;
@@ -131,13 +131,13 @@
// Someone might like to search for emails, hostnames and
// phone numbers (which fall under type NUM)
private static readonly string tokentype_email
- = LNSA.StandardTokenizerConstants.tokenImage [LNSA.StandardTokenizerConstants.EMAIL];
+ = LNSA.StandardTokenizerImpl.TOKEN_TYPES [LNSA.StandardTokenizerImpl.EMAIL];
private static readonly string tokentype_host
- = LNSA.StandardTokenizerConstants.tokenImage [LNSA.StandardTokenizerConstants.HOST];
+ = LNSA.StandardTokenizerImpl.TOKEN_TYPES [LNSA.StandardTokenizerImpl.HOST];
private static readonly string tokentype_number
- = LNSA.StandardTokenizerConstants.tokenImage [LNSA.StandardTokenizerConstants.NUM];
+ = LNSA.StandardTokenizerImpl.TOKEN_TYPES [LNSA.StandardTokenizerImpl.NUM];
private static readonly string tokentype_alphanum
- = LNSA.StandardTokenizerConstants.tokenImage [LNSA.StandardTokenizerConstants.ALPHANUM];
+ = LNSA.StandardTokenizerImpl.TOKEN_TYPES [LNSA.StandardTokenizerImpl.ALPHANUM];
private bool ProcessToken (ref Lucene.Net.Analysis.Token token)
{
@@ -166,10 +166,10 @@
if (begin == 0)
return ! IsNoise (text);
token = new Lucene.Net.Analysis.Token (
- token.TermText ().Remove (0, begin),
- token.StartOffset (),
+ text.Remove (0, begin),
+ begin,
token.EndOffset (),
- token.Type ());
+ type);
return true;
} else if (type == tokentype_email) {
if (tokenize_email_hostname)
@@ -184,27 +184,46 @@
return ! IsNoise (token.TermText ());
}
- private Queue parts = new Queue ();
- private Lucene.Net.Analysis.Token token;
+ // State for creating smaller tokens from larger email/hostname tokens
+ private string[] parts = null;
+ private int parts_index = -1;
+ private int last_end_offset = -1;
+ private string token_type = null;
public override Lucene.Net.Analysis.Token Next ()
{
- if (parts.Count != 0) {
- string part = (string) parts.Dequeue ();
- Lucene.Net.Analysis.Token part_token;
- // FIXME: Searching for google.com will not match www.google.com.
- // If we decide to allow google-style "abcd.1234" which means
- // "abcd 1234" as a consequtive phrase, then adjusting
- // the startOffset and endOffset would enable matching
- // google.com to www.google.com
- part_token = new Lucene.Net.Analysis.Token (part,
- token.StartOffset (),
- token.EndOffset (),
- token.Type ());
- part_token.SetPositionIncrement (0);
- return part_token;
+ if (parts != null) {
+ if (++parts_index < parts.Length) {
+ string part = parts [parts_index];
+ Lucene.Net.Analysis.Token part_token;
+ // FIXME: Searching for google.com will not match www.google.com.
+ // If we decide to allow google-style "abcd.1234" which means
+ // "abcd 1234" as a consequtive phrase, then adjusting
+ // the startOffset and endOffset would enable matching
+ // google.com to www.google.com
+ int start_offset = (parts_index == 0 && token_type == tokentype_email ?
+ 0 :
+ last_end_offset + 1); // assuming only one separator
+ int end_offset = start_offset + part.Length;
+ part_token = new Lucene.Net.Analysis.Token (part,
+ start_offset,
+ end_offset,
+ token_type);
+ part_token.SetPositionIncrement (0);
+ last_end_offset = (parts_index == 0 && token_type == tokentype_email ?
+ -1 :
+ end_offset); // assuming only one separator
+ return part_token;
+ } else {
+ // clear the array
+ parts = null;
+ parts_index = -1;
+ last_end_offset = -1;
+ token_type = null;
+ }
}
+ Token token;
while ( (token = token_stream.Next ()) != null) {
//Console.WriteLine ("Found token: [{0}]", token.TermText ());
if (ProcessToken (ref token))
@@ -213,42 +232,52 @@
return null;
}
- char[] replace_array = { '@', '.', '-', '_', '+' };
+ private static readonly char[] replace_array = { '@', '.', '-', '_', '+' };
+
private void ProcessEmailToken (Lucene.Net.Analysis.Token token)
{
+ token_type = tokentype_email;
+
string email = token.TermText ();
- string[] tmp = email.Split (replace_array);
- int l = tmp.Length;
+ parts = email.Split (replace_array);
+ if (parts.Length == 1) // safety check
+ return;
- // store username part as a large token
int index_at = email.IndexOf ('@');
- tmp [l-1] = email.Substring (0, index_at);
-
- foreach (string s in tmp)
- parts.Enqueue (s);
-
+ // store username part as a large token
+ // and also remove the final tld part
+ Array.Copy (parts, 0, parts, 1, parts.Length - 1);
+ parts [0] = email.Substring (0, index_at);
}
private void ProcessURLToken (Lucene.Net.Analysis.Token token)
{
+ token_type = tokentype_host;
+
string hostname = token.TermText ();
- string[] host_parts = hostname.Split ('.');
+ parts = hostname.Split ('.');
+
+ if (parts [0] != "www")
+ return;
// remove initial www
- int begin_index = (host_parts [0] == "www" ? 1 : 0);
+ Array.Copy (parts, 1, parts, 0, parts.Length - 1);
+ Array.Resize (ref parts, parts.Length - 1);
// FIXME: Remove final tld
// Any string of form "<alnum> '.')+<alnum>" has type HOST
// Removing last token might remove important words from non-host
// string of that form. To fix that, we need to match against the
// huge list of TLDs.
- for (int i = begin_index; i < host_parts.Length; ++i)
- parts.Enqueue (host_parts [i]);
-
}
}
-#if false
+#if Noisefilter
public class AnalyzerTest {
+ public static void Main ()
+ {
+ Analyze (Console.In);
+ }
+
public static void Analyze (TextReader reader)
{
Lucene.Net.Analysis.Token lastToken = null;
Modified: trunk/beagle/beagled/Snowball.Net/Lucene.Net/Analysis/Snowball/SnowballFilter.cs
==============================================================================
--- trunk/beagle/beagled/Snowball.Net/Lucene.Net/Analysis/Snowball/SnowballFilter.cs (original)
+++ trunk/beagle/beagled/Snowball.Net/Lucene.Net/Analysis/Snowball/SnowballFilter.cs Mon Mar 3 22:03:27 2008
@@ -60,7 +60,13 @@
throw new System.SystemException(e.ToString());
}
}
-
+
+ public SnowballFilter(TokenStream in_Renamed, SnowballProgram stemmer, System.Reflection.MethodInfo stemMethod) : base(in_Renamed)
+ {
+ this.stemmer = stemmer;
+ this.stemMethod = stemMethod;
+ }
+
/// <summary>Returns the next input Token, after being stemmed </summary>
public override Token Next()
{
@@ -81,5 +87,12 @@
newToken.SetPositionIncrement(token.GetPositionIncrement());
return newToken;
}
+
+ public override void Close()
+ {
+ // In case stemmer was shared
+ stemmer.SetCurrent(String.Empty);
+ base.Close();
+ }
}
-}
\ No newline at end of file
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]