1
0
mirror of synced 2026-01-01 00:02:54 -05:00
Files
airbyte/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/utils.py
Serhii Chvaliuk a431a52aaa Source Google Sheets: slugify column names (#23057)
Signed-off-by: Sergey Chvalyuk <grubberr@gmail.com>
2023-02-23 18:10:46 +02:00

43 lines
959 B
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import re
import unidecode
TOKEN_PATTERN = re.compile(r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)")
DEFAULT_SEPARATOR = "_"
def name_conversion(text):
"""
convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
"""
text = unidecode.unidecode(text)
tokens = []
for m in TOKEN_PATTERN.finditer(text):
if m.group("NoToken") is None:
tokens.append(m.group(0))
else:
tokens.append("")
if len(tokens) >= 3:
tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
if tokens and tokens[0].isdigit():
tokens.insert(0, "")
text = DEFAULT_SEPARATOR.join(tokens)
text = text.lower()
return text
def safe_name_conversion(text):
new = name_conversion(text)
if not new:
raise Exception(f"initial string '{text}' converted to empty")
return new