Update latest code

2022-01-06 08:35:44 +00:00 · 2022-01-06 08:35:44 +00:00 · 17a9dc6984
commit 17a9dc6984
parent b71b8d41ce
2 changed files with 80 additions and 12 deletions
--- a/input_classifier.py
+++ b/input_classifier.py
@ -1,4 +1,5 @@
 import pdb
 import re
 """ 
 Find make lists of #, *, @ words 
@ -57,6 +58,7 @@ for word in allList:
 rawstring = "test rmt# rmt@ rmt* : rmt# rmt# test" 
 #className(rawstring) '''
 # Variable configurations
 search_patterns = {
    'tokensystem-C':{
        1:['#']
@ -104,20 +106,29 @@ conflict_matrix = {
    }
 }
 # Find some value or return as noise
 def apply_rule1(*argv):
    a = argv[0](*argv[1:])
    if a is False:
        return None
    else:
        return a
 def extract_specialcharacter_words(rawstring, special_characters):
    wordList = []
-    for word in rawstring.strip().split(' '):
+    for word in rawstring.split(' '):
-        if word[-1] in special_characters and (len(word) != 1 or word==":"):
+        if (len(word) != 1 or word==":") and word[-1] in special_characters:
            wordList.append(word)
    return wordList
-def find_first_classification(parsed_list, search_patterns):
+def find_first_classification(parsed_word_list, search_patterns):
    for first_classification in search_patterns.keys():
        counter = 0
        for key in search_patterns[first_classification].keys():
-            if checkSearchPattern(parsed_list, search_patterns[first_classification][key]):
+            if checkSearchPattern(parsed_word_list, search_patterns[first_classification][key]):
-                return {'categorization':f"{first_classification}",'key':f"{key}",'pattern':search_patterns[first_classification][key]}
+                return {'categorization':f"{first_classification}",'key':f"{key}",'pattern':search_patterns[first_classification][key], 'wordlist':parsed_word_list}
    return {'categorization':"noise"}
@ -143,7 +154,6 @@ def sort_specialcharacter_wordlist(inputlist):
 def classify_rawstring(rawstring):
    specialcharacter_wordlist = extract_specialcharacter_words(rawstring,['@','*','$','#',':'])
    print(specialcharacter_wordlist)
    first_classification = find_first_classification(specialcharacter_wordlist, search_patterns)
    return first_classification
@ -158,6 +168,54 @@ def checkSearchPattern(parsed_list, searchpattern):
        return True
 def extractAmount_rule(text):
    base_units = {'thousand': 10 ** 3, 'million': 10 ** 6, 'billion': 10 ** 9, 'trillion': 10 ** 12}
    textList = text.split(' ')
    counter = 0
    value = None
    for idx, word in enumerate(textList):
        try:
            result = float(word)
            if textList[idx + 1] in base_units:
                value = result * base_units[textList[idx + 1]]
                counter = counter + 1
            else:
                value = result
                counter = counter + 1
        except:
            for unit in base_units:
                result = word.split(unit)
                if len(result) == 2 and result[1] == '' and result[0] != '':
                    try:
                        value = float(result[0]) * base_units[unit]
                        counter = counter + 1
                    except:
                        continue
    if counter == 1:
        return value
    else:
        return None
 def selectCateogry(rawstring, wordlist, category1, category2):
 def text_preprocessing(text):
    # strip white spaces at the beginning and end 
    processed_text = text.strip()
    # remove tab spaces
    processed_text = re.sub('\t', ' ', processed_text)
    # remove new lines/line changes 
    processed_text = re.sub('\n', ' ', processed_text)
    # remove extra whitespaces in between
    processed_text = ' '.join(processed_text.split())
    processed_text = re.sub(' +', ' ', processed_text)
    # make everything lowercase 
    processed_text = processed_text.lower()
    return processed_text
 text_list = [
    "create 500 million rmt#",
@ -180,5 +238,14 @@ text_list = [
    "Send 15 rupee# to swap-rupee-article@ its FLO address being FJXw6QGVVaZVvqpyF422Aj4FWQ6jm8p2dL$"
 ]
-for text in text_list:
+text_list1 = [
-    print(f"{classify_rawstring(text)} \n\n")
+    "create 5 million rmt#"
 ]
 for text in text_list1:
    text = text_preprocessing(text)
    first_classification = classify_rawstring(text)
    if first_classification['categorization'] == 'tokensystem-C':
        amount = apply_rule1(extractAmount_rule,text)
        operation = apply_rule1()
--- a/parser_function_definitions.py
+++ b/parser_function_definitions.py
@ -109,6 +109,9 @@ def truefalse_rule2(rawstring, permitted_list, denied_list):
        return False
 def selectCateogry(rawstring, wordlist, category1, category2):
 """
 CLASSIFY RULES 
@ -120,7 +123,6 @@ CLASSIFY RULES
 """
 """
 REJECT RULES 
@ -140,7 +142,6 @@ REJECT RULES
 def rejectrule9(rawtext, starword):
    pass
 ''''''
 extractContractConditions(cleanstring, contracttype, blocktime=blockinfo['time'], marker=hashList[0][:-1])