diff --git a/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php b/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php index 5eee57aa200cc627f10104f59944712a3b71af1b..3cd8d6856500ba50ca7e44317d110c41b0c33f55 100644 --- a/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php +++ b/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php @@ -105,55 +105,31 @@ public function removeDiacritics($string) { * {@inheritdoc} */ public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL) { - $results = []; + $result = ''; $length = 0; + // Split into Unicode characters and transliterate each one. + foreach (preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY) as $character) { + $code = self::ordUTF8($character); + if ($code == -1) { + $to_add = $unknown_character; + } + else { + $to_add = $this->replace($code, $langcode, $unknown_character); + } - // Split on words to handle mixed case per word. - $words = explode(' ', $string); - foreach ($words as $key => $word) { - $results[$key] = ''; - - // String is mixed case if it consists of both uppercase and lowercase - // letters. To accurately check this, remove any numbers and check that - // remaining characters are not all uppercase and not all lowercase. - $alpha_string = preg_replace('/\\d/', '', $word); - $mixed_case = (strlen($alpha_string) > 1 && mb_strtolower($alpha_string) !== $alpha_string && mb_strtoupper($alpha_string) !== $alpha_string); - - // Split into Unicode characters and transliterate each one. - foreach (preg_split('//u', $word, 0, PREG_SPLIT_NO_EMPTY) as $character) { - $code = self::ordUTF8($character); - if ($code == -1) { - $to_add = $unknown_character; - } - else { - $to_add = $this->replace($code, $langcode, $unknown_character); - } - - // Check if this exceeds the maximum allowed length. - if (isset($max_length)) { - $length += strlen($to_add); - if ($length > $max_length) { - // There is no more space. - $results = array_filter($results); - return implode(' ', $results); - } - } - - // If this is a capitalised letter of a mixed case word, only capitalise - // the first letter and lowercase any subsequent letters. - if ($mixed_case && strlen($to_add) > 1 && mb_strtoupper($to_add) === $to_add) { - $to_add = ucfirst(strtolower($to_add)); + // Check if this exceeds the maximum allowed length. + if (isset($max_length)) { + $length += strlen($to_add); + if ($length > $max_length) { + // There is no more space. + return $result; } - - $results[$key] .= $to_add; } - // Add space to count for max length. - $length++; + $result .= $to_add; } - $results = array_filter($results); - return implode(' ', $results); + return $result; } /** diff --git a/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php b/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php index eb468c8b80309484bb28680dbaf9ae76f975074c..924f3e93000b8799b7054da9574578f052cf0604 100644 --- a/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php +++ b/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php @@ -146,19 +146,6 @@ public function providerTestPhpTransliteration() { ['en', chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), '?'], // Max length. ['de', $two_byte, 'Ae Oe', '?', 5], - // Test strings with mixed case words where a single capital character - // results in multiple characters. The first character should remain - // capitalised but subsequent resulting characters should be lowercase. - // For example a result of the transliteration should be 'Shtrikhkod' - // not 'SHtrikhkod'. Numbers should not be used in determining whether a - // string is mixed case. - ['ru', 'Штрихкод', 'Shtrikhkod'], - ['bg', 'Щастие', 'Schastie'], - ['bg', 'Щ1', 'SCH1'], - ['bg', 'Щ1Щ', 'SCH1SCH'], - ['bg', 'Щ1щ', 'Sch1sch'], - ['bg', 'Щастие ЩЩЩ', 'Schastie SCHSCHSCH'], - ['bg', 'Щастие ЩЩЩ. Щастие! Щастие', 'Schastie SCHSCHSCH. Schastie! Schastie'], ]; }