"""
Rating submodule for full form checks.
"""
from acres.util import acronym as acro_util
from acres.util import functions
def _has_parenthesis(full: str) -> bool:
"""
Check whether a given string contains parenthesis.
:param full:
:return:
"""
if "(" in full or ")" in full:
return True
return False
def _is_full_too_short(full: str) -> bool:
"""
Check whether a given full form is too short.
@todo ignore whitespaces?
:param full:
:return:
"""
too_short = 5
if len(full) <= too_short:
return True
return False
def _starts_with_stopword(full: str) -> bool:
"""
Check whether the full form starts with a stopword.
:param full:
:return:
"""
if functions.is_stopword(full.split(' ', 1)[0]):
return True
return False
def _has_capitals(full: str) -> bool:
"""
Check whether a string has at least one capital letter.
:param full:
:return:
"""
if full.islower():
return False
return True
def _contain_acronym(full: str) -> bool:
"""
:param full:
:return:
"""
words = full.split()
for word in words:
if acro_util.is_acronym(word):
return True
return False
def _compute_full_valid(full: str) -> int:
"""
[For internal use only] Compute all checks on full forms.
:param full:
:return: An integer which binary forms indicates the failing test.
"""
ret = 0
# The new cleaning routines remove all parentheses already.
# if _has_parenthesis(full):
# ret += 1
# If disabled, F1 fall 0.4% for word2vec
# if _is_full_too_short(full):
# ret += 2
# If disabled, baseline F1 fall 1%
# if _starts_with_stopword(full):
# ret += 4
# XXX german-only
# A valid expansion of a german acronym would require at least one noun, which is capitalized.
# If disabled, precision fall 2% for word2vec
if not _has_capitals(full):
ret += 8
# If disabled, metrics fall 3% for fastType
if _contain_acronym(full):
ret += 16
return ret
[docs]def is_full_valid(full: str) -> bool:
"""
Check whether the full form is valid.
:param full:
:return:
"""
return _compute_full_valid(full) == 0