From 6ed3eaed0972e841aff3ff75ae49c62401556fb4 Mon Sep 17 00:00:00 2001 From: Vivek Teega Date: Wed, 5 Jan 2022 09:41:34 +0000 Subject: [PATCH] Added first categorization function --- test.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/test.py b/test.py index 7963e49..82b2271 100644 --- a/test.py +++ b/test.py @@ -46,21 +46,13 @@ def findrules(rawstring, special_characters): return wordList -def checkSearchPattern1(parsed_list, searchpatterns): +def findFirstCategorization(parsed_list, searchpatterns): for firstCategorization in searchpatterns.keys(): counter = 0 for key in searchpatterns[firstCategorization].keys(): - if len(parsed_list) != len(searchpatterns[firstCategorization][key]): - continue - else: - pdb.set_trace() - for idx,val in enumerate(parsed_list): - if not parsed_list[idx].endswith(searchpatterns[firstCategorization][key][idx]): - return False - return True - if counter >= 1: - return firstCategorization - return 'noise' + if checkSearchPattern(parsed_list, searchpatterns[firstCategorization][key]): + return {'categorization':f"{firstCategorization}",'key':f"{key}",'pattern':searchpatterns[firstCategorization][key]} + return {'categorization':"noise"} def checkSearchPattern(parsed_list, searchpattern): @@ -82,9 +74,6 @@ def className(rawstring): allList = findrules(rawstring,['#','*','@',':']) - allList = ['rmt@','rmt*',':',"rmt#","rmt#"] - allList1 = ['rmt#',':',"rmt@"] - search_patterns = { 'tokensystem-C':{ 1:['#'] @@ -109,9 +98,11 @@ def className(rawstring): } } - patternmatch = checkSearchPattern(allList1, ['#',':','@','@']) + pattern_list1 = ['rmt@','rmt*',':',"rmt#","rmt#"] + pattern_list2 = ['rmt#',':',"rmt@"] + pattern_list3 = ['rmt#'] + patternmatch = findFirstCategorization(pattern_list3, search_patterns) print(f"Patternmatch is {patternmatch}") - rawstring = "test rmt# rmt@ rmt* : rmt# rmt# test" className(rawstring) \ No newline at end of file