flodata-tester/parse_incorp.py

import re
import testcases

marker=None
operation=None
address=None
amount=None

def extractMarkers(text):
    returnval = None
    text = text.lower()
    textlst = text.split(' ')

    for part in textlst:
        if part[-1] == '#' and len(part)>1:
            if returnval is not None:
                return 'od'
            returnval = part
    return returnval


def extractOperation(text, operationList):
    count = 0
    returnval = None
    text = text.lower()
    for operation in operationList:
        operation = operation.lower()

        count = count + text.count(operation)
        if count > 1:
            return 'od'
        if count == 1 and (returnval is None):
            returnval = operation
    return returnval


def extractAmount(text):
    count = 0
    returnval = None
    text = text.lower()
    splitText = re.split("\W+", text)

    for word in splitText:
        word = word.replace('rmt','')
        try:
            float(word)
            count = count + 1
            returnval = float(word)
        except ValueError:
            pass

        if count > 1:
            return 'od'
    return returnval

def isIncorp(text):
    wordlist = ['incorporate','create','start']
    cleantext = re.sub(' +', ' ',text)
    cleantext= cleantext.lower()
    textList = cleantext.split(' ')
    for word in wordlist:
        if word in textList:
            return True
    return False

def extractIncMarker(text):
    cleantext = re.sub(' +', ' ',text)
    textList = cleantext.split(' ')
    for word in textList:
        if word[-1] == '#':
            return word
    return False

def extractInitTokens(text):
    base_units = {'thousand':10**3 , 'million':10**6 ,'billion':10**9, 'trillion':10**12, 'lakh':10**5, 'crore':10**7}
    cleantext = re.sub(' +', ' ',text)
    textList = cleantext.split(' ')
    for idx,word in enumerate(textList):
        try:
            result = float(word)
            if textList[idx+1] in base_units:
                return result*base_units[textList[idx+1]]
            return result
        except:
            continue


# Combine test
def parse_flodata(string):

    if string[0:5] == 'text:':
        string = string.split('text:')[1]

    string = string.lower()

    if not isIncorp(string):
        operationList = ['send', 'transfer', 'give']
        marker = extractMarkers(string)
        operation = extractOperation(string, operationList)
        amount = extractAmount(string)
        parsed_data = {'type': 'transfer', 'flodata': string, 'marker': marker, 'operation': operation,
                       'amount': amount}
    else:
        incMarker = extractIncMarker(string)
        initTokens = extractInitTokens(string)
        parsed_data = {'type': 'incorporation', 'flodata': string, 'marker': incMarker, 'initTokens': initTokens}

    return parsed_data