Tokenize regexp requests taking into consideration regexp context

This commit is contained in:
DarkPhoenix
2020-04-08 13:24:48 +03:00
parent 5707914ad5
commit b3278ca9ec
6 changed files with 83 additions and 10 deletions

View File

@@ -39,7 +39,12 @@ class ReadOnlyException(Exception):
def re_fn(expr, item):
reg = re.compile(expr, re.IGNORECASE)
try:
reg = re.compile(expr, re.IGNORECASE)
except (SystemExit, KeyboardInterrupt):
raise
except:
return False
return reg.search(item) is not None

View File

@@ -170,8 +170,8 @@ class ItemView(Display):
def scheduleSearch(self, event=None):
self.searchTimer.Stop() # Cancel any pending timers
search = self.marketBrowser.search.GetLineText(0)
# Make sure we do not count wildcard as search symbol
realsearch = search.replace("*", "")
# Make sure we do not count wildcards as search symbol
realsearch = search.replace('*', '').replace('?', '')
# Re-select market group if search query has zero length
if len(realsearch) == 0:
self.selectionMade('search')

View File

@@ -86,8 +86,8 @@ class NavigationPanel(SFItem.SFBrowserItem):
def OnScheduleSearch(self, event):
search = self.BrowserSearchBox.GetValue()
# Make sure we do not count wildcard as search symbol
realsearch = search.replace("*", "")
# Make sure we do not count wildcards as search symbol
realsearch = search.replace('*', '').replace('?', '')
minChars = 1 if isStringCjk(realsearch) else 3
if len(realsearch) >= minChars:
self.lastSearch = search

View File

@@ -302,8 +302,8 @@ class ItemView(d.Display):
sMkt = Market.getInstance()
search = self.searchBox.GetLineText(0)
# Make sure we do not count wildcard as search symbol
realsearch = search.replace("*", "")
# Make sure we do not count wildcards as search symbol
realsearch = search.replace('*', '').replace('?', '')
# Show nothing if query is too short
if len(realsearch) < 3:
self.clearSearch()

View File

@@ -192,8 +192,8 @@ class ItemView(d.Display):
sMkt = Market.getInstance()
search = self.searchBox.GetLineText(0)
# Make sure we do not count wildcard as search symbol
realsearch = search.replace("*", "")
# Make sure we do not count wildcards as search symbol
realsearch = search.replace('*', '').replace('?', '')
# Show nothing if query is too short
if len(realsearch) < 3:
self.clearSearch()

View File

@@ -18,6 +18,7 @@
# ===============================================================================
import queue
import re
import threading
from collections import OrderedDict
from itertools import chain
@@ -41,6 +42,10 @@ pyfalog = Logger(__name__)
mktRdy = threading.Event()
class RegexTokenizationError(Exception):
pass
class ShipBrowserWorkerThread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
@@ -90,6 +95,7 @@ class ShipBrowserWorkerThread(threading.Thread):
class SearchWorkerThread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
self.name = "SearchWorker"
@@ -138,7 +144,10 @@ class SearchWorkerThread(threading.Thread):
else:
filters = [None]
requestTokens = request.split()
if request.strip().startswith('re:'):
requestTokens = self._prepareRequestRegex(request[3:])
else:
requestTokens = self._prepareRequestNormal(request)
requestTokens = self.jargonLoader.get_jargon().apply(requestTokens)
all_results = set()
@@ -166,6 +175,65 @@ class SearchWorkerThread(threading.Thread):
def stop(self):
self.running = False
def _prepareRequestNormal(self, request):
# Escape regexp-specific symbols, and un-escape whitespaces
request = re.escape(request)
request = re.sub(r'\\(?P<ws>\s+)', '\g<ws>', request)
# Imitate wildcard search
request = re.sub(r'\\\*', r'\\w*', request)
request = re.sub(r'\\\?', r'\\w?', request)
tokens = request.split()
return tokens
def _prepareRequestRegex(self, request):
roundLvl = 0
squareLvl = 0
nextEscaped = False
tokens = []
currentToken = ''
def verifyErrors():
if squareLvl not in (0, 1):
raise RegexTokenizationError('Square braces level is {}'.format(squareLvl))
if roundLvl < 0:
raise RegexTokenizationError('Round braces level is {}'.format(roundLvl))
try:
for char in request:
thisEscaped = nextEscaped
nextEscaped = False
if thisEscaped:
currentToken += char
elif char == '\\':
nextEscaped = True
elif char == '[':
currentToken += char
squareLvl += 1
elif char == ']':
currentToken += char
squareLvl -= 1
elif char == '(' and squareLvl == 0:
currentToken += char
roundLvl += 1
elif char == ')' and squareLvl == 0:
currentToken += char
roundLvl -= 1
elif char.isspace() and roundLvl == squareLvl == 0:
if currentToken:
tokens.append(currentToken)
currentToken = ''
else:
currentToken += char
verifyErrors()
else:
if currentToken:
tokens.append(currentToken)
# Treat request as normal string if regex tokenization fails
except RegexTokenizationError:
tokens = self._prepareRequestNormal(request)
return tokens
class Market:
instance = None