Skip to main content

insert_space_in_code_switched_text

def insert_space_in_code_switched_text(text):
text = text.lower()
# Regular expression to match Chinese characters
chinese_char_pattern = r'[\u4e00-\u9fff]'

# Insert space before and after each Chinese character
spaced_text = re.sub(f'({chinese_char_pattern})', r' \1 ', text)

# Remove any extra spaces added by the previous step
normalized_text = re.sub(r'\s+', ' ', spaced_text)
normalized_text = normalized_text.strip().replace(" ", " ")
return normalized_text

('Today 天 氣 很 好', '我 喜 歡 吃 Hambergur')