feat: split text support multiple languages #593

pr675
Hufe921 2 years ago
parent bcf311ff19
commit 1cb07af75e

@ -107,19 +107,27 @@ export function getUUID(): string {
export function splitText(text: string): string[] { export function splitText(text: string): string[] {
const data: string[] = [] const data: string[] = []
const symbolMap = new Map<number, string>() if (Intl.Segmenter) {
for (const match of text.matchAll(UNICODE_SYMBOL_REG)) { const segmenter = new Intl.Segmenter()
symbolMap.set(match.index!, match[0]) const segments = segmenter.segment(text)
} for (const { segment } of segments) {
let t = 0 data.push(segment)
while (t < text.length) { }
const symbol = symbolMap.get(t) } else {
if (symbol) { const symbolMap = new Map<number, string>()
data.push(symbol) for (const match of text.matchAll(UNICODE_SYMBOL_REG)) {
t += symbol.length symbolMap.set(match.index!, match[0])
} else { }
data.push(text[t]) let t = 0
t++ while (t < text.length) {
const symbol = symbolMap.get(t)
if (symbol) {
data.push(symbol)
t += symbol.length
} else {
data.push(text[t])
t++
}
} }
} }
return data return data

Loading…
Cancel
Save