Ignore some extra chars in no-combining search (#2929)

* Ignore some extra chars in no-combining search

* Construct new string

* Update rslib/src/text.rs (dae)
This commit is contained in:
Abdo 2024-01-05 07:22:52 +03:00 committed by GitHub
parent d5b57262fd
commit 646ba41cf8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -380,22 +380,60 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) {
}
}
static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map<char, &str> = phf::phf_map! {
'€' => "E",
'Æ' => "AE",
'Ð' => "D",
'Ø' => "O",
'Þ' => "TH",
'ß' => "s",
'æ' => "ae",
'ð' => "d",
'ø' => "o",
'þ' => "th",
'Đ' => "D",
'đ' => "d",
'Ħ' => "H",
'ħ' => "h",
'ı' => "i",
'ĸ' => "k",
'Ł' => "L",
'ł' => "l",
'Ŋ' => "N",
'ŋ' => "n",
'Œ' => "OE",
'œ' => "oe",
'Ŧ' => "T",
'ŧ' => "t",
'Ə' => "E",
'ǝ' => "e",
'ɑ' => "a",
};
/// Convert provided string to NFKD form and strip combining characters.
pub(crate) fn without_combining(s: &str) -> Cow<str> {
// if the string is already normalized
if matches!(is_nfkd_quick(s.chars()), IsNormalized::Yes) {
// and no combining characters found, return unchanged
if !s.chars().any(is_combining_mark) {
if !s
.chars()
.any(|c| is_combining_mark(c) || EXTRA_NO_COMBINING_REPLACEMENTS.contains_key(&c))
{
return s.into();
}
}
// we need to create a new string without the combining marks
s.chars()
.nfkd()
.filter(|c| !is_combining_mark(*c))
.collect::<String>()
.into()
let mut out = String::with_capacity(s.len());
for chr in s.chars().nfkd().filter(|c| !is_combining_mark(*c)) {
if let Some(repl) = EXTRA_NO_COMBINING_REPLACEMENTS.get(&chr) {
out.push_str(repl);
} else {
out.push(chr);
}
}
out.into()
}
/// Check if string contains an unescaped wildcard.