Added first categorization function

This commit is contained in:
Vivek Teega 2022-01-05 09:41:34 +00:00
parent 8fcd54beb3
commit 6ed3eaed09

25
test.py
View File

@ -46,21 +46,13 @@ def findrules(rawstring, special_characters):
return wordList
def checkSearchPattern1(parsed_list, searchpatterns):
def findFirstCategorization(parsed_list, searchpatterns):
for firstCategorization in searchpatterns.keys():
counter = 0
for key in searchpatterns[firstCategorization].keys():
if len(parsed_list) != len(searchpatterns[firstCategorization][key]):
continue
else:
pdb.set_trace()
for idx,val in enumerate(parsed_list):
if not parsed_list[idx].endswith(searchpatterns[firstCategorization][key][idx]):
return False
return True
if counter >= 1:
return firstCategorization
return 'noise'
if checkSearchPattern(parsed_list, searchpatterns[firstCategorization][key]):
return {'categorization':f"{firstCategorization}",'key':f"{key}",'pattern':searchpatterns[firstCategorization][key]}
return {'categorization':"noise"}
def checkSearchPattern(parsed_list, searchpattern):
@ -82,9 +74,6 @@ def className(rawstring):
allList = findrules(rawstring,['#','*','@',':'])
allList = ['rmt@','rmt*',':',"rmt#","rmt#"]
allList1 = ['rmt#',':',"rmt@"]
search_patterns = {
'tokensystem-C':{
1:['#']
@ -109,9 +98,11 @@ def className(rawstring):
}
}
patternmatch = checkSearchPattern(allList1, ['#',':','@','@'])
pattern_list1 = ['rmt@','rmt*',':',"rmt#","rmt#"]
pattern_list2 = ['rmt#',':',"rmt@"]
pattern_list3 = ['rmt#']
patternmatch = findFirstCategorization(pattern_list3, search_patterns)
print(f"Patternmatch is {patternmatch}")
rawstring = "test rmt# rmt@ rmt* : rmt# rmt# test"
className(rawstring)