Update latest code

This commit is contained in:
Vivek Teega 2022-01-06 08:35:44 +00:00
parent b71b8d41ce
commit 17a9dc6984
2 changed files with 80 additions and 12 deletions

View File

@ -1,4 +1,5 @@
import pdb
import re
"""
Find make lists of #, *, @ words
@ -57,6 +58,7 @@ for word in allList:
rawstring = "test rmt# rmt@ rmt* : rmt# rmt# test"
#className(rawstring) '''
# Variable configurations
search_patterns = {
'tokensystem-C':{
1:['#']
@ -104,20 +106,29 @@ conflict_matrix = {
}
}
# Find some value or return as noise
def apply_rule1(*argv):
a = argv[0](*argv[1:])
if a is False:
return None
else:
return a
def extract_specialcharacter_words(rawstring, special_characters):
wordList = []
for word in rawstring.strip().split(' '):
if word[-1] in special_characters and (len(word) != 1 or word==":"):
for word in rawstring.split(' '):
if (len(word) != 1 or word==":") and word[-1] in special_characters:
wordList.append(word)
return wordList
def find_first_classification(parsed_list, search_patterns):
def find_first_classification(parsed_word_list, search_patterns):
for first_classification in search_patterns.keys():
counter = 0
for key in search_patterns[first_classification].keys():
if checkSearchPattern(parsed_list, search_patterns[first_classification][key]):
return {'categorization':f"{first_classification}",'key':f"{key}",'pattern':search_patterns[first_classification][key]}
if checkSearchPattern(parsed_word_list, search_patterns[first_classification][key]):
return {'categorization':f"{first_classification}",'key':f"{key}",'pattern':search_patterns[first_classification][key], 'wordlist':parsed_word_list}
return {'categorization':"noise"}
@ -143,7 +154,6 @@ def sort_specialcharacter_wordlist(inputlist):
def classify_rawstring(rawstring):
specialcharacter_wordlist = extract_specialcharacter_words(rawstring,['@','*','$','#',':'])
print(specialcharacter_wordlist)
first_classification = find_first_classification(specialcharacter_wordlist, search_patterns)
return first_classification
@ -158,6 +168,54 @@ def checkSearchPattern(parsed_list, searchpattern):
return True
def extractAmount_rule(text):
base_units = {'thousand': 10 ** 3, 'million': 10 ** 6, 'billion': 10 ** 9, 'trillion': 10 ** 12}
textList = text.split(' ')
counter = 0
value = None
for idx, word in enumerate(textList):
try:
result = float(word)
if textList[idx + 1] in base_units:
value = result * base_units[textList[idx + 1]]
counter = counter + 1
else:
value = result
counter = counter + 1
except:
for unit in base_units:
result = word.split(unit)
if len(result) == 2 and result[1] == '' and result[0] != '':
try:
value = float(result[0]) * base_units[unit]
counter = counter + 1
except:
continue
if counter == 1:
return value
else:
return None
def selectCateogry(rawstring, wordlist, category1, category2):
def text_preprocessing(text):
# strip white spaces at the beginning and end
processed_text = text.strip()
# remove tab spaces
processed_text = re.sub('\t', ' ', processed_text)
# remove new lines/line changes
processed_text = re.sub('\n', ' ', processed_text)
# remove extra whitespaces in between
processed_text = ' '.join(processed_text.split())
processed_text = re.sub(' +', ' ', processed_text)
# make everything lowercase
processed_text = processed_text.lower()
return processed_text
text_list = [
"create 500 million rmt#",
@ -180,5 +238,14 @@ text_list = [
"Send 15 rupee# to swap-rupee-article@ its FLO address being FJXw6QGVVaZVvqpyF422Aj4FWQ6jm8p2dL$"
]
for text in text_list:
print(f"{classify_rawstring(text)} \n\n")
text_list1 = [
"create 5 million rmt#"
]
for text in text_list1:
text = text_preprocessing(text)
first_classification = classify_rawstring(text)
if first_classification['categorization'] == 'tokensystem-C':
amount = apply_rule1(extractAmount_rule,text)
operation = apply_rule1()

View File

@ -106,8 +106,11 @@ def truefalse_rule2(rawstring, permitted_list, denied_list):
if (foundPermitted in not None) and (foundDenied is None):
return True
else:
return False
return False
def selectCateogry(rawstring, wordlist, category1, category2):
"""
CLASSIFY RULES
@ -120,7 +123,6 @@ CLASSIFY RULES
"""
"""
REJECT RULES
@ -140,7 +142,6 @@ REJECT RULES
def rejectrule9(rawtext, starword):
pass
''''''
extractContractConditions(cleanstring, contracttype, blocktime=blockinfo['time'], marker=hashList[0][:-1])