Update latest code

2022-01-06 08:35:44 +00:00 · 2022-01-06 08:35:44 +00:00 · 17a9dc6984
commit 17a9dc6984
parent b71b8d41ce
2 changed files with 80 additions and 12 deletions
--- a/input_classifier.py
+++ b/input_classifier.py
@ -1,4 +1,5 @@
 import pdb
+import re

 """ 
 Find make lists of #, *, @ words 
@ -57,6 +58,7 @@ for word in allList:
 rawstring = "test rmt# rmt@ rmt* : rmt# rmt# test" 
 #className(rawstring) '''

+# Variable configurations
 search_patterns = {
    'tokensystem-C':{
        1:['#']
@ -104,20 +106,29 @@ conflict_matrix = {
    }
 }

+# Find some value or return as noise
+def apply_rule1(*argv):
+    a = argv[0](*argv[1:])
+    if a is False:
+        return None
+    else:
+        return a
+
+
 def extract_specialcharacter_words(rawstring, special_characters):
    wordList = []
-    for word in rawstring.strip().split(' '):
-        if word[-1] in special_characters and (len(word) != 1 or word==":"):
+    for word in rawstring.split(' '):
+        if (len(word) != 1 or word==":") and word[-1] in special_characters:
            wordList.append(word)
    return wordList


-def find_first_classification(parsed_list, search_patterns):
+def find_first_classification(parsed_word_list, search_patterns):
    for first_classification in search_patterns.keys():
        counter = 0
        for key in search_patterns[first_classification].keys():
-            if checkSearchPattern(parsed_list, search_patterns[first_classification][key]):
-                return {'categorization':f"{first_classification}",'key':f"{key}",'pattern':search_patterns[first_classification][key]}
+            if checkSearchPattern(parsed_word_list, search_patterns[first_classification][key]):
+                return {'categorization':f"{first_classification}",'key':f"{key}",'pattern':search_patterns[first_classification][key], 'wordlist':parsed_word_list}
    return {'categorization':"noise"}


@ -143,7 +154,6 @@ def sort_specialcharacter_wordlist(inputlist):

 def classify_rawstring(rawstring):
    specialcharacter_wordlist = extract_specialcharacter_words(rawstring,['@','*','$','#',':'])
-    print(specialcharacter_wordlist)
    first_classification = find_first_classification(specialcharacter_wordlist, search_patterns)
    return first_classification

@ -158,6 +168,54 @@ def checkSearchPattern(parsed_list, searchpattern):
        return True


+def extractAmount_rule(text):
+    base_units = {'thousand': 10 ** 3, 'million': 10 ** 6, 'billion': 10 ** 9, 'trillion': 10 ** 12}
+    textList = text.split(' ')
+    counter = 0
+    value = None
+    for idx, word in enumerate(textList):
+        try:
+            result = float(word)
+            if textList[idx + 1] in base_units:
+                value = result * base_units[textList[idx + 1]]
+                counter = counter + 1
+            else:
+                value = result
+                counter = counter + 1
+        except:
+            for unit in base_units:
+                result = word.split(unit)
+                if len(result) == 2 and result[1] == '' and result[0] != '':
+                    try:
+                        value = float(result[0]) * base_units[unit]
+                        counter = counter + 1
+                    except:
+                        continue
+
+    if counter == 1:
+        return value
+    else:
+        return None
+
+
+def selectCateogry(rawstring, wordlist, category1, category2):
+    
+
+def text_preprocessing(text):
+    # strip white spaces at the beginning and end 
+    processed_text = text.strip()
+    # remove tab spaces
+    processed_text = re.sub('\t', ' ', processed_text)
+    # remove new lines/line changes 
+    processed_text = re.sub('\n', ' ', processed_text)
+    # remove extra whitespaces in between
+    processed_text = ' '.join(processed_text.split())
+    processed_text = re.sub(' +', ' ', processed_text)
+    # make everything lowercase 
+    processed_text = processed_text.lower()
+    return processed_text
+
+
 text_list = [
    "create 500 million rmt#",

@ -180,5 +238,14 @@ text_list = [
    "Send 15 rupee# to swap-rupee-article@ its FLO address being FJXw6QGVVaZVvqpyF422Aj4FWQ6jm8p2dL$"
 ]

-for text in text_list:
-    print(f"{classify_rawstring(text)} \n\n")
+text_list1 = [
+    "create 5 million rmt#"
+]
+
+for text in text_list1:
+    text = text_preprocessing(text)
+    first_classification = classify_rawstring(text)
+    if first_classification['categorization'] == 'tokensystem-C':
+        amount = apply_rule1(extractAmount_rule,text)
+        operation = apply_rule1()
+
--- a/parser_function_definitions.py
+++ b/parser_function_definitions.py
@ -106,8 +106,11 @@ def truefalse_rule2(rawstring, permitted_list, denied_list):
    if (foundPermitted in not None) and (foundDenied is None):
        return True
    else:
-        return False 
-    
+        return False
+
+
+def selectCateogry(rawstring, wordlist, category1, category2):
+

 """
 CLASSIFY RULES 
@ -120,7 +123,6 @@ CLASSIFY RULES

 """

-
 """
 REJECT RULES 

@ -140,7 +142,6 @@ REJECT RULES
 def rejectrule9(rawtext, starword):
    pass
    
-''''''

 extractContractConditions(cleanstring, contracttype, blocktime=blockinfo['time'], marker=hashList[0][:-1])