# # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import re import unidecode TOKEN_PATTERN = re.compile(r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P[^a-zA-Z\d]+)") DEFAULT_SEPARATOR = "_" def name_conversion(text): """ convert name using a set of rules, for example: '1MyName' -> '_1_my_name' """ text = unidecode.unidecode(text) tokens = [] for m in TOKEN_PATTERN.finditer(text): if m.group("NoToken") is None: tokens.append(m.group(0)) else: tokens.append("") if len(tokens) >= 3: tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:] if tokens and tokens[0].isdigit(): tokens.insert(0, "") text = DEFAULT_SEPARATOR.join(tokens) text = text.lower() return text def safe_name_conversion(text): new = name_conversion(text) if not new: raise Exception(f"initial string '{text}' converted to empty") return new