Source code for acres.util.acronym

"""
Utility functions related to acronyms.
"""
import logging
from collections import namedtuple

from acres import constants
from acres.util import text

logger = logging.getLogger(__name__)

Acronym = namedtuple('Acronym', ['acronym', 'left_context', 'right_context'])


[docs]def is_acronym(str_probe: str, max_length: int = 7) -> bool: """ Identifies Acronyms, restricted by absolute length XXX look for "authoritative" definitions for acronyms :param str_probe: :param max_length: :return: """ ret = False replaced_probe = str_probe.replace(constants.DIGIT_MARKER, "0") lower = 0 upper = 0 if len(replaced_probe) <= max_length: for char in replaced_probe: if char.isupper(): upper = upper + 1 if char.islower(): lower = lower + 1 if upper > 1 and upper > lower: ret = True return ret
[docs]def create_german_acronym(full: str) -> str: """ Creates an acronym out of a given multi-word expression. @todo Use is_stopword? :param full: A full form containing whitespaces. :return: """ out = "" neg_list = ("and", "auf", "bei", "bei", "beim", "by", "der", "des", "die", "et", "for", "für", "gegen", "im", "in", "mit", "nach", "not", "of", "on", "than", "the", "to", "und", "vom", "von", "vor", "with", "zum", "zur") full = text.clean_whitespaces(full.replace("-", " ").replace("/", " ")) for word in full.split(): if word not in neg_list: out = out + word[0].upper() return out
[docs]def trim_plural(acronym: str) -> str: """ Trim the german plural form out of an acronym. @todo rewrite as regex :param acronym: :return: """ # Do not trim two-chars acronyms, as this would lead to a single-char (and invalid) acronym. if len(acronym) <= 2: return acronym acronym_plurals = ["s", "S", "x", "X"] singular_acronym = acronym if acronym[-1] in acronym_plurals: singular_acronym = acronym[0:-1] return singular_acronym