Browse Source

add search query parser (#1652)

* add search query parser

* add some search tests; use consistent analyzers for indexing and searching
pull/1653/head
Jason Dove 2 years ago committed by GitHub
parent
commit
60bb369d0c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 1
      CHANGELOG.md
  2. 24
      ErsatzTV.Infrastructure.Tests/Search/SearchQueryParserTests.cs
  3. 15
      ErsatzTV.Infrastructure/Search/CustomAnalyzer.cs
  4. 4
      ErsatzTV.Infrastructure/Search/ElasticSearchIndex.cs
  5. 55
      ErsatzTV.Infrastructure/Search/LuceneSearchIndex.cs
  6. 71
      ErsatzTV.Infrastructure/Search/SearchQueryParser.cs
  7. 1
      ErsatzTV/Services/EmbyService.cs
  8. 1
      ErsatzTV/Services/JellyfinService.cs
  9. 1
      ErsatzTV/Services/PlexService.cs
  10. 5
      ErsatzTV/Services/ScannerService.cs

1
CHANGELOG.md

@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). @@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Fix multi-collection editor improperly disabling collections/smart collections that haven't already been added to the multi-collection
- Fix path replacement logic when media server paths use inconsistent casing (e.g. `\\SERVERNAME` AND `\\ServerName`)
- Fix *many* search queries, including actors with the name `Will`
- Fix sqlite `database is locked` error that would crash ETV on startup after search index corruption
### Changed
- Log search index updates under scanner category at debug level, to indicate a potential cause for the UI being out of date

24
ErsatzTV.Infrastructure.Tests/Search/SearchQueryParserTests.cs

@ -0,0 +1,24 @@ @@ -0,0 +1,24 @@
using ErsatzTV.Infrastructure.Search;
using FluentAssertions;
using Lucene.Net.Search;
using NUnit.Framework;
namespace ErsatzTV.Infrastructure.Tests.Search;
public class SearchQueryParserTests
{
[TestFixture]
public class ParseQuery
{
[TestCase("actor:\"Will Smith\"", "actor:\"will smith\"")]
[TestCase("tag:\"Will Smith\"", "tag:\"will smith\"")]
[TestCase("library_id:4", "library_id:4")]
[TestCase("content_rating:\"TV-14\"", "content_rating:TV-14")]
[TestCase("content_rating:\"test PHRASE\"", "content_rating:\"test PHRASE\"")]
public void Test(string input, string expected)
{
Query result = SearchQueryParser.ParseQuery(input);
result.ToString().Should().Be(expected);
}
}
}

15
ErsatzTV.Infrastructure/Search/CustomAnalyzer.cs

@ -0,0 +1,15 @@ @@ -0,0 +1,15 @@
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Core;
using Lucene.Net.Util;
namespace ErsatzTV.Infrastructure.Search;
public sealed class CustomAnalyzer(LuceneVersion matchVersion) : Analyzer
{
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer tokenizer = new WhitespaceTokenizer(matchVersion, reader);
TokenStream result = new LowerCaseFilter(matchVersion, tokenizer);
return new TokenStreamComponents(tokenizer, result);
}
}

4
ErsatzTV.Infrastructure/Search/ElasticSearchIndex.cs

@ -46,7 +46,7 @@ public class ElasticSearchIndex : ISearchIndex @@ -46,7 +46,7 @@ public class ElasticSearchIndex : ISearchIndex
return exists.IsValidResponse;
}
public int Version => 42;
public int Version => 43;
public async Task<bool> Initialize(
ILocalFileSystem localFileSystem,
@ -162,7 +162,7 @@ public class ElasticSearchIndex : ISearchIndex @@ -162,7 +162,7 @@ public class ElasticSearchIndex : ISearchIndex
var items = new List<MinimalElasticSearchItem>();
var totalCount = 0;
Query parsedQuery = LuceneSearchIndex.ParseQuery(query);
Query parsedQuery = SearchQueryParser.ParseQuery(query);
SearchResponse<MinimalElasticSearchItem> response = await _client.SearchAsync<MinimalElasticSearchItem>(
s => s.Index(IndexName)

55
ErsatzTV.Infrastructure/Search/LuceneSearchIndex.cs

@ -11,12 +11,8 @@ using ErsatzTV.FFmpeg; @@ -11,12 +11,8 @@ using ErsatzTV.FFmpeg;
using ErsatzTV.FFmpeg.Format;
using LanguageExt.UnsafeValueAccess;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Miscellaneous;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers.Classic;
using Lucene.Net.Sandbox.Queries;
using Lucene.Net.Search;
using Lucene.Net.Store;
@ -30,7 +26,7 @@ namespace ErsatzTV.Infrastructure.Search; @@ -30,7 +26,7 @@ namespace ErsatzTV.Infrastructure.Search;
public sealed class LuceneSearchIndex : ISearchIndex
{
private const LuceneVersion AppLuceneVersion = LuceneVersion.LUCENE_48;
internal const LuceneVersion AppLuceneVersion = LuceneVersion.LUCENE_48;
internal const string IdField = "id";
internal const string TypeField = "type";
@ -114,7 +110,7 @@ public sealed class LuceneSearchIndex : ISearchIndex @@ -114,7 +110,7 @@ public sealed class LuceneSearchIndex : ISearchIndex
return Task.FromResult(directoryExists && fileExists);
}
public int Version => 42;
public int Version => 43;
public async Task<bool> Initialize(
ILocalFileSystem localFileSystem,
@ -138,7 +134,7 @@ public sealed class LuceneSearchIndex : ISearchIndex @@ -138,7 +134,7 @@ public sealed class LuceneSearchIndex : ISearchIndex
}
_directory = FSDirectory.Open(FileSystemLayout.SearchIndexFolder);
var analyzer = new StandardAnalyzer(AppLuceneVersion);
Analyzer analyzer = SearchQueryParser.AnalyzerWrapper();
var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer)
{ OpenMode = OpenMode.CREATE_OR_APPEND };
_writer = new IndexWriter(_directory, indexConfig);
@ -220,7 +216,7 @@ public sealed class LuceneSearchIndex : ISearchIndex @@ -220,7 +216,7 @@ public sealed class LuceneSearchIndex : ISearchIndex
using DirectoryReader reader = _writer.GetReader(true);
var searcher = new IndexSearcher(reader);
int hitsLimit = limit == 0 ? searcher.IndexReader.MaxDoc : skip + limit;
Query parsedQuery = ParseQuery(query);
Query parsedQuery = SearchQueryParser.ParseQuery(query);
// TODO: figure out if this is actually needed
// var filter = new DuplicateFilter(TitleAndYearField);
var sort = new Sort(new SortField(SortTitleField, SortFieldType.STRING));
@ -295,7 +291,7 @@ public sealed class LuceneSearchIndex : ISearchIndex @@ -295,7 +291,7 @@ public sealed class LuceneSearchIndex : ISearchIndex
{
using (var d = FSDirectory.Open(folder))
{
using (var analyzer = new StandardAnalyzer(AppLuceneVersion))
using (Analyzer analyzer = SearchQueryParser.AnalyzerWrapper())
{
var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer)
{ OpenMode = OpenMode.CREATE_OR_APPEND };
@ -1372,47 +1368,6 @@ public sealed class LuceneSearchIndex : ISearchIndex @@ -1372,47 +1368,6 @@ public sealed class LuceneSearchIndex : ISearchIndex
doc.Get(TypeField, CultureInfo.InvariantCulture),
Convert.ToInt32(doc.Get(IdField, CultureInfo.InvariantCulture), CultureInfo.InvariantCulture));
internal static Query ParseQuery(string query)
{
using var analyzer = new SimpleAnalyzer(AppLuceneVersion);
var customAnalyzers = new Dictionary<string, Analyzer>
{
{ IdField, new KeywordAnalyzer() },
{ LibraryIdField, new KeywordAnalyzer() },
{ LibraryFolderIdField, new KeywordAnalyzer() },
{ TypeField, new KeywordAnalyzer() },
{ TagField, new KeywordAnalyzer() },
{ ShowTagField, new KeywordAnalyzer() },
{ ContentRatingField, new KeywordAnalyzer() },
{ ShowContentRatingField, new KeywordAnalyzer() },
{ StateField, new KeywordAnalyzer() },
{ PlotField, new StandardAnalyzer(AppLuceneVersion) }
};
using var analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer, customAnalyzers);
QueryParser parser = new CustomMultiFieldQueryParser(AppLuceneVersion, [TitleField], analyzerWrapper);
parser.AllowLeadingWildcard = true;
Query result = ParseQuery(query, parser);
Serilog.Log.Logger.Debug("Search query parsed from [{Query}] to [{ParsedQuery}]", query, result.ToString());
return result;
}
private static Query ParseQuery(string searchQuery, QueryParser parser)
{
Query query;
try
{
query = parser.Parse(searchQuery.Trim());
}
catch (ParseException)
{
query = parser.Parse(QueryParserBase.Escape(searchQuery.Trim()));
}
return query;
}
private static void AddStatistics(Document doc, List<MediaVersion> mediaVersions)
{
foreach (MediaVersion version in mediaVersions)

71
ErsatzTV.Infrastructure/Search/SearchQueryParser.cs

@ -0,0 +1,71 @@ @@ -0,0 +1,71 @@
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Miscellaneous;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.QueryParsers.Classic;
using Query = Lucene.Net.Search.Query;
namespace ErsatzTV.Infrastructure.Search;
public static class SearchQueryParser
{
internal static Analyzer AnalyzerWrapper()
{
using var defaultAnalyzer = new CustomAnalyzer(LuceneSearchIndex.AppLuceneVersion);
using var keywordAnalyzer = new KeywordAnalyzer();
var customAnalyzers = new Dictionary<string, Analyzer>
{
// StringField should use KeywordAnalyzer
{ LuceneSearchIndex.IdField, keywordAnalyzer },
{ LuceneSearchIndex.TypeField, keywordAnalyzer },
{ LuceneSearchIndex.SortTitleField, keywordAnalyzer },
{ LuceneSearchIndex.LibraryIdField, keywordAnalyzer },
{ LuceneSearchIndex.TitleAndYearField, keywordAnalyzer },
{ LuceneSearchIndex.JumpLetterField, keywordAnalyzer },
{ LuceneSearchIndex.StateField, keywordAnalyzer },
{ LuceneSearchIndex.ContentRatingField, keywordAnalyzer },
{ LuceneSearchIndex.ReleaseDateField, keywordAnalyzer },
{ LuceneSearchIndex.AddedDateField, keywordAnalyzer },
{ LuceneSearchIndex.TraktListField, keywordAnalyzer },
{ LuceneSearchIndex.ShowContentRatingField, keywordAnalyzer },
{ LuceneSearchIndex.LibraryFolderIdField, keywordAnalyzer },
{ LuceneSearchIndex.VideoCodecField, keywordAnalyzer },
{ LuceneSearchIndex.VideoDynamicRange, keywordAnalyzer },
{ LuceneSearchIndex.PlotField, new StandardAnalyzer(LuceneSearchIndex.AppLuceneVersion) }
};
return new PerFieldAnalyzerWrapper(defaultAnalyzer, customAnalyzers);
}
public static Query ParseQuery(string query)
{
using Analyzer analyzerWrapper = AnalyzerWrapper();
QueryParser parser = new CustomMultiFieldQueryParser(
LuceneSearchIndex.AppLuceneVersion,
[LuceneSearchIndex.TitleField],
analyzerWrapper);
parser.AllowLeadingWildcard = true;
Query result = ParseQuery(query, parser);
Serilog.Log.Logger.Debug("Search query parsed from [{Query}] to [{ParsedQuery}]", query, result.ToString());
return result;
}
private static Query ParseQuery(string searchQuery, QueryParser parser)
{
Query query;
try
{
query = parser.Parse(searchQuery.Trim());
}
catch (ParseException)
{
query = parser.Parse(QueryParserBase.Escape(searchQuery.Trim()));
}
return query;
}
}

1
ErsatzTV/Services/EmbyService.cs

@ -32,6 +32,7 @@ public class EmbyService : BackgroundService @@ -32,6 +32,7 @@ public class EmbyService : BackgroundService
await Task.Yield();
await _systemStartup.WaitForDatabase(stoppingToken);
await _systemStartup.WaitForSearchIndex(stoppingToken);
if (stoppingToken.IsCancellationRequested)
{
return;

1
ErsatzTV/Services/JellyfinService.cs

@ -32,6 +32,7 @@ public class JellyfinService : BackgroundService @@ -32,6 +32,7 @@ public class JellyfinService : BackgroundService
await Task.Yield();
await _systemStartup.WaitForDatabase(stoppingToken);
await _systemStartup.WaitForSearchIndex(stoppingToken);
if (stoppingToken.IsCancellationRequested)
{
return;

1
ErsatzTV/Services/PlexService.cs

@ -32,6 +32,7 @@ public class PlexService : BackgroundService @@ -32,6 +32,7 @@ public class PlexService : BackgroundService
await Task.Yield();
await _systemStartup.WaitForDatabase(stoppingToken);
await _systemStartup.WaitForSearchIndex(stoppingToken);
if (stoppingToken.IsCancellationRequested)
{
return;

5
ErsatzTV/Services/ScannerService.cs

@ -17,14 +17,17 @@ public class ScannerService : BackgroundService @@ -17,14 +17,17 @@ public class ScannerService : BackgroundService
private readonly ChannelReader<IScannerBackgroundServiceRequest> _channel;
private readonly ILogger<ScannerService> _logger;
private readonly IServiceScopeFactory _serviceScopeFactory;
private readonly SystemStartup _systemStartup;
public ScannerService(
ChannelReader<IScannerBackgroundServiceRequest> channel,
IServiceScopeFactory serviceScopeFactory,
SystemStartup systemStartup,
ILogger<ScannerService> logger)
{
_channel = channel;
_serviceScopeFactory = serviceScopeFactory;
_systemStartup = systemStartup;
_logger = logger;
}
@ -32,6 +35,8 @@ public class ScannerService : BackgroundService @@ -32,6 +35,8 @@ public class ScannerService : BackgroundService
{
await Task.Yield();
await _systemStartup.WaitForDatabase(stoppingToken);
await _systemStartup.WaitForSearchIndex(stoppingToken);
try
{
_logger.LogInformation("Scanner service started");

Loading…
Cancel
Save