Update latest code
This commit is contained in:
parent
b71b8d41ce
commit
17a9dc6984
@ -1,4 +1,5 @@
|
||||
import pdb
|
||||
import re
|
||||
|
||||
"""
|
||||
Find make lists of #, *, @ words
|
||||
@ -57,6 +58,7 @@ for word in allList:
|
||||
rawstring = "test rmt# rmt@ rmt* : rmt# rmt# test"
|
||||
#className(rawstring) '''
|
||||
|
||||
# Variable configurations
|
||||
search_patterns = {
|
||||
'tokensystem-C':{
|
||||
1:['#']
|
||||
@ -104,20 +106,29 @@ conflict_matrix = {
|
||||
}
|
||||
}
|
||||
|
||||
# Find some value or return as noise
|
||||
def apply_rule1(*argv):
|
||||
a = argv[0](*argv[1:])
|
||||
if a is False:
|
||||
return None
|
||||
else:
|
||||
return a
|
||||
|
||||
|
||||
def extract_specialcharacter_words(rawstring, special_characters):
|
||||
wordList = []
|
||||
for word in rawstring.strip().split(' '):
|
||||
if word[-1] in special_characters and (len(word) != 1 or word==":"):
|
||||
for word in rawstring.split(' '):
|
||||
if (len(word) != 1 or word==":") and word[-1] in special_characters:
|
||||
wordList.append(word)
|
||||
return wordList
|
||||
|
||||
|
||||
def find_first_classification(parsed_list, search_patterns):
|
||||
def find_first_classification(parsed_word_list, search_patterns):
|
||||
for first_classification in search_patterns.keys():
|
||||
counter = 0
|
||||
for key in search_patterns[first_classification].keys():
|
||||
if checkSearchPattern(parsed_list, search_patterns[first_classification][key]):
|
||||
return {'categorization':f"{first_classification}",'key':f"{key}",'pattern':search_patterns[first_classification][key]}
|
||||
if checkSearchPattern(parsed_word_list, search_patterns[first_classification][key]):
|
||||
return {'categorization':f"{first_classification}",'key':f"{key}",'pattern':search_patterns[first_classification][key], 'wordlist':parsed_word_list}
|
||||
return {'categorization':"noise"}
|
||||
|
||||
|
||||
@ -143,7 +154,6 @@ def sort_specialcharacter_wordlist(inputlist):
|
||||
|
||||
def classify_rawstring(rawstring):
|
||||
specialcharacter_wordlist = extract_specialcharacter_words(rawstring,['@','*','$','#',':'])
|
||||
print(specialcharacter_wordlist)
|
||||
first_classification = find_first_classification(specialcharacter_wordlist, search_patterns)
|
||||
return first_classification
|
||||
|
||||
@ -158,6 +168,54 @@ def checkSearchPattern(parsed_list, searchpattern):
|
||||
return True
|
||||
|
||||
|
||||
def extractAmount_rule(text):
|
||||
base_units = {'thousand': 10 ** 3, 'million': 10 ** 6, 'billion': 10 ** 9, 'trillion': 10 ** 12}
|
||||
textList = text.split(' ')
|
||||
counter = 0
|
||||
value = None
|
||||
for idx, word in enumerate(textList):
|
||||
try:
|
||||
result = float(word)
|
||||
if textList[idx + 1] in base_units:
|
||||
value = result * base_units[textList[idx + 1]]
|
||||
counter = counter + 1
|
||||
else:
|
||||
value = result
|
||||
counter = counter + 1
|
||||
except:
|
||||
for unit in base_units:
|
||||
result = word.split(unit)
|
||||
if len(result) == 2 and result[1] == '' and result[0] != '':
|
||||
try:
|
||||
value = float(result[0]) * base_units[unit]
|
||||
counter = counter + 1
|
||||
except:
|
||||
continue
|
||||
|
||||
if counter == 1:
|
||||
return value
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def selectCateogry(rawstring, wordlist, category1, category2):
|
||||
|
||||
|
||||
def text_preprocessing(text):
|
||||
# strip white spaces at the beginning and end
|
||||
processed_text = text.strip()
|
||||
# remove tab spaces
|
||||
processed_text = re.sub('\t', ' ', processed_text)
|
||||
# remove new lines/line changes
|
||||
processed_text = re.sub('\n', ' ', processed_text)
|
||||
# remove extra whitespaces in between
|
||||
processed_text = ' '.join(processed_text.split())
|
||||
processed_text = re.sub(' +', ' ', processed_text)
|
||||
# make everything lowercase
|
||||
processed_text = processed_text.lower()
|
||||
return processed_text
|
||||
|
||||
|
||||
text_list = [
|
||||
"create 500 million rmt#",
|
||||
|
||||
@ -180,5 +238,14 @@ text_list = [
|
||||
"Send 15 rupee# to swap-rupee-article@ its FLO address being FJXw6QGVVaZVvqpyF422Aj4FWQ6jm8p2dL$"
|
||||
]
|
||||
|
||||
for text in text_list:
|
||||
print(f"{classify_rawstring(text)} \n\n")
|
||||
text_list1 = [
|
||||
"create 5 million rmt#"
|
||||
]
|
||||
|
||||
for text in text_list1:
|
||||
text = text_preprocessing(text)
|
||||
first_classification = classify_rawstring(text)
|
||||
if first_classification['categorization'] == 'tokensystem-C':
|
||||
amount = apply_rule1(extractAmount_rule,text)
|
||||
operation = apply_rule1()
|
||||
|
||||
|
||||
@ -106,8 +106,11 @@ def truefalse_rule2(rawstring, permitted_list, denied_list):
|
||||
if (foundPermitted in not None) and (foundDenied is None):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def selectCateogry(rawstring, wordlist, category1, category2):
|
||||
|
||||
|
||||
"""
|
||||
CLASSIFY RULES
|
||||
@ -120,7 +123,6 @@ CLASSIFY RULES
|
||||
|
||||
"""
|
||||
|
||||
|
||||
"""
|
||||
REJECT RULES
|
||||
|
||||
@ -140,7 +142,6 @@ REJECT RULES
|
||||
def rejectrule9(rawtext, starword):
|
||||
pass
|
||||
|
||||
''''''
|
||||
|
||||
extractContractConditions(cleanstring, contracttype, blocktime=blockinfo['time'], marker=hashList[0][:-1])
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user