feat: split text support multiple languages #593

pr675
Hufe921 2 years ago
parent bcf311ff19
commit 1cb07af75e

@ -107,19 +107,27 @@ export function getUUID(): string {
export function splitText(text: string): string[] {
const data: string[] = []
const symbolMap = new Map<number, string>()
for (const match of text.matchAll(UNICODE_SYMBOL_REG)) {
symbolMap.set(match.index!, match[0])
}
let t = 0
while (t < text.length) {
const symbol = symbolMap.get(t)
if (symbol) {
data.push(symbol)
t += symbol.length
} else {
data.push(text[t])
t++
if (Intl.Segmenter) {
const segmenter = new Intl.Segmenter()
const segments = segmenter.segment(text)
for (const { segment } of segments) {
data.push(segment)
}
} else {
const symbolMap = new Map<number, string>()
for (const match of text.matchAll(UNICODE_SYMBOL_REG)) {
symbolMap.set(match.index!, match[0])
}
let t = 0
while (t < text.length) {
const symbol = symbolMap.get(t)
if (symbol) {
data.push(symbol)
t += symbol.length
} else {
data.push(text[t])
t++
}
}
}
return data

Loading…
Cancel
Save