diff --git a/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php b/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php index 3cd8d6856500ba50ca7e44317d110c41b0c33f55..5eee57aa200cc627f10104f59944712a3b71af1b 100644 --- a/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php +++ b/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php @@ -105,31 +105,55 @@ public function removeDiacritics($string) { * {@inheritdoc} */ public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL) { - $result = ''; + $results = []; $length = 0; - // Split into Unicode characters and transliterate each one. - foreach (preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY) as $character) { - $code = self::ordUTF8($character); - if ($code == -1) { - $to_add = $unknown_character; - } - else { - $to_add = $this->replace($code, $langcode, $unknown_character); - } - // Check if this exceeds the maximum allowed length. - if (isset($max_length)) { - $length += strlen($to_add); - if ($length > $max_length) { - // There is no more space. - return $result; + // Split on words to handle mixed case per word. + $words = explode(' ', $string); + foreach ($words as $key => $word) { + $results[$key] = ''; + + // String is mixed case if it consists of both uppercase and lowercase + // letters. To accurately check this, remove any numbers and check that + // remaining characters are not all uppercase and not all lowercase. + $alpha_string = preg_replace('/\\d/', '', $word); + $mixed_case = (strlen($alpha_string) > 1 && mb_strtolower($alpha_string) !== $alpha_string && mb_strtoupper($alpha_string) !== $alpha_string); + + // Split into Unicode characters and transliterate each one. + foreach (preg_split('//u', $word, 0, PREG_SPLIT_NO_EMPTY) as $character) { + $code = self::ordUTF8($character); + if ($code == -1) { + $to_add = $unknown_character; } + else { + $to_add = $this->replace($code, $langcode, $unknown_character); + } + + // Check if this exceeds the maximum allowed length. + if (isset($max_length)) { + $length += strlen($to_add); + if ($length > $max_length) { + // There is no more space. + $results = array_filter($results); + return implode(' ', $results); + } + } + + // If this is a capitalised letter of a mixed case word, only capitalise + // the first letter and lowercase any subsequent letters. + if ($mixed_case && strlen($to_add) > 1 && mb_strtoupper($to_add) === $to_add) { + $to_add = ucfirst(strtolower($to_add)); + } + + $results[$key] .= $to_add; } - $result .= $to_add; + // Add space to count for max length. + $length++; } - return $result; + $results = array_filter($results); + return implode(' ', $results); } /** diff --git a/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php b/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php index 924f3e93000b8799b7054da9574578f052cf0604..eb468c8b80309484bb28680dbaf9ae76f975074c 100644 --- a/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php +++ b/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php @@ -146,6 +146,19 @@ public function providerTestPhpTransliteration() { ['en', chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), '?'], // Max length. ['de', $two_byte, 'Ae Oe', '?', 5], + // Test strings with mixed case words where a single capital character + // results in multiple characters. The first character should remain + // capitalised but subsequent resulting characters should be lowercase. + // For example a result of the transliteration should be 'Shtrikhkod' + // not 'SHtrikhkod'. Numbers should not be used in determining whether a + // string is mixed case. + ['ru', 'Штрихкод', 'Shtrikhkod'], + ['bg', 'Щастие', 'Schastie'], + ['bg', 'Щ1', 'SCH1'], + ['bg', 'Щ1Щ', 'SCH1SCH'], + ['bg', 'Щ1щ', 'Sch1sch'], + ['bg', 'Щастие ЩЩЩ', 'Schastie SCHSCHSCH'], + ['bg', 'Щастие ЩЩЩ. Щастие! Щастие', 'Schastie SCHSCHSCH. Schastie! Schastie'], ]; }