diff --git a/parsing.py b/parsing.py index 3b4ef6b..e9292fa 100644 --- a/parsing.py +++ b/parsing.py @@ -686,8 +686,14 @@ def checkSearchPattern(parsed_list, searchpattern): def extractAmount_rule_new(text): - base_units = {'thousand': 10 ** 3, 'k': 10 ** 3, 'million': 10 ** 6, 'm': 10 ** 6, 'billion': 10 ** 9, 'b': 10 ** 9, 'trillion': 10 ** 12, 'lakh':10 ** 5, 'crore':10 ** 7, 'quadrillion':10 ** 15} - amount_tuple = re.findall(r'(-?\d+(?:\.\d+)?)\s*(?=(?:[bmk]|thousand|million|billion|trillion|m|b|t|k|lakh|crore|quadrillion|\s))(billion|million|thousand|k|lakh|crore|quadrillion)?', text) + base_units = {'thousand': 10 ** 3, 'k': 10 ** 3, 'lakh':10 ** 5, 'crore':10 ** 7, 'million': 10 ** 6, 'm': 10 ** 6, 'billion': 10 ** 9, 'b': 10 ** 9, 'trillion': 10 ** 12, 'quadrillion':10 ** 15} + + # appending whitespace because the regex does not recognize a number at the start & end of string ie. "send rmt# 45" + text = f" {text} " + text = text.replace("'", "") + text = text.replace('"', '') + + amount_tuple = re.findall(r'(? 1 or len(amount_tuple) == 0: return False else: @@ -706,11 +712,11 @@ def extractAmount_rule_new1(text, split_word=None, split_direction=None): if split_direction=='post': text = text.split(split_word)[1] - # appending dummy because the regex does not recognize a number at the start of a string - # text = f"dummy {text}" + # appending whitespace because the regex does not recognize a number at the start & end of string ie. "send rmt# 45" + text = f" {text} " text = text.replace("'", "") text = text.replace('"', '') - amount_tuple = re.findall(r'(-?\d+(?:\.\d+)?)\s*(?=(?:[bmk]|thousand|million|billion|trillion|m|b|t|k|lakh|crore|quadrillion|\s))(billion|million|thousand|k|lakh|crore|quadrillion)?', text) + amount_tuple = re.findall(r'(? 1 or len(amount_tuple) == 0: return False else: @@ -973,6 +979,7 @@ def parse_flodata(text, blockinfo, net): first_classification = firstclassification_rawstring(processed_text) parsed_data = None + if first_classification['categorization'] == 'tokensystem-C': # Resolving conflict for 'tokensystem-C' tokenname = first_classification['wordlist'][0][:-1] @@ -1237,4 +1244,5 @@ def parse_flodata(text, blockinfo, net): return outputreturn('noise') return outputreturn('continuos-event-token-swap-incorporation', f"{contract_token}", f"{contract_name}", f"{contract_address}", f"{clean_text}", f"{contract_conditions['subtype']}", f"{contract_conditions['accepting_token']}", f"{contract_conditions['selling_token']}", f"{contract_conditions['priceType']}", f"{contract_conditions['price']}", stateF_mapping) - return outputreturn('noise') \ No newline at end of file + return outputreturn('noise') + diff --git a/test_parsing.py b/test_parsing.py index 4e6f788..e9519ea 100644 --- a/test_parsing.py +++ b/test_parsing.py @@ -109,6 +109,21 @@ class TestParsing(unittest.TestCase): result = parsing.parse_flodata(text, TestParsing.blockinfo_stub, 'testnet') expected_result = {'type': 'transfer', 'transferType': 'smartContract', 'flodata': 'transfer 6.20000 bioscope# to all-crowd-fund-7@', 'tokenIdentification': 'bioscope', 'tokenAmount': 6.2, 'contractName': 'all-crowd-fund-7'} self.assertEqual(result, expected_result) + + text = 'transfer 6.20000 bioscope# to all-crowd-fund-7@ 24' + result = parsing.parse_flodata(text, TestParsing.blockinfo_stub, 'testnet') + expected_result = {'type': 'noise'} + self.assertEqual(result, expected_result) + + text = 'transfer 6.20000 bioscope# to all-crowd-fund-7@ 24 ' + result = parsing.parse_flodata(text, TestParsing.blockinfo_stub, 'testnet') + expected_result = {'type': 'noise'} + self.assertEqual(result, expected_result) + + text = '6.20.000 transfer bioscope# to all-crowd-fund-7@ 24' + result = parsing.parse_flodata(text, TestParsing.blockinfo_stub, 'testnet') + expected_result = {'type': 'transfer', 'transferType': 'smartContract', 'flodata': '6.20.000 transfer bioscope# to all-crowd-fund-7@ 24', 'tokenIdentification': 'bioscope', 'tokenAmount': 24.0, 'contractName': 'all-crowd-fund-7'} + self.assertEqual(result, expected_result) def test_onetimeevent_externaltrigger_creation(self): # contractamount