Skip to content 7.63 KiB
Newer Older

 * @file
 * Helper class to clean strings to make them URL safe and translatable.
 * This was copied directly from pathauto and put here to be made available
 * to all, because more things than just pathauto want URL safe strings.
 * To use, simply:
 * @code
 * ctools_include('cleanstring');
 * $output = ctools_cleanstring($string);
 * You can add a variety of settings as an array in the second argument,
 * including words to ignore, how to deal with punctuation, length
 * limits, and more. See the function itself for options.

 * Matches Unicode character classes.
 * See:
 * The index only contains the following character classes:
 *   Lu  Letter, Uppercase
 *   Ll  Letter, Lowercase
 *   Lt  Letter, Titlecase
 *   Lo  Letter, Other
 *   Nd  Number, Decimal Digit
 *   No  Number, Other
 * Copied from search.module's PREG_CLASS_SEARCH_EXCLUDE.

 * Clean up a string value provided by a module.
 * Resulting string contains only alphanumerics and separators.
 * @param $string
 *   A string to clean.
 * @param $settings
 *   An optional array of settings to use.
 *   - 'clean slash': If set, slashes will be cleaned. Defaults to TRUE,
 *     so you have to explicitly set this to FALSE to not clean the
 *     slashes.
 *   - 'ignore words': Set to an array of words that will be removed
 *     rather than made safe. Defaults to an empty array.
 *   - 'separator': Change spaces and untranslatable characters to
 *     this character. Defaults to '-'.
 *   - 'replacements': An array of direct replacements to be made that will
 *     be implemented via strtr(). Defaults to an empty array.
 *   - 'transliterate': If set, use the transliteration replacements. If set
 *     to an array, use these replacements instead of the defaults in CTools.
 *     Defaults to FALSE.
 *   - 'reduce ascii': If set to TRUE further reduce to ASCII96 only. Defaults
 *      to TRUE.
 *   - 'max length': If set to a number, reduce the resulting string to this
 *      maximum length. Defaults to no maximum length.
 *   - 'lower case': If set to TRUE, convert the result to lower case.
 *     Defaults to false.
 *   These settings will be passed through drupal_alter.
 * @return
 *   The cleaned string.
function ctools_cleanstring($string, $settings = array()) {
  $settings += array(
    'clean slash' => TRUE,
    'ignore words' => array(),
    'separator' => '-',
    'replacements' => array(),
    'transliterate' => FALSE,
    'reduce ascii' => TRUE,
    'max length' => FALSE,
    'lower case' => FALSE,

  // Allow modules to make other changes to the settings.
  if (isset($settings['clean id'])) {
    drupal_alter('ctools_cleanstring_' . $settings['clean id'], $settings);

  drupal_alter('ctools_cleanstring', $settings);

  $output = $string;

  // Do any replacements the user selected up front.
  if (!empty($settings['replacements'])) {
    $output = strtr($output, $settings['replacements']);

  // Remove slashes if instructed to do so.
  if ($settings['clean slash']) {
    $output = str_replace('/', '', $output);

  if (!empty($settings['transliterate']) && module_exists('transliteration')) {
    $output = transliteration_get($output);

  // Reduce to the subset of ASCII96 letters and numbers
  if ($settings['reduce ascii']) {
    $pattern = '/[^a-zA-Z0-9\/]+/ ';
    $output = preg_replace($pattern, $settings['separator'], $output);

  // Get rid of words that are on the ignore list
  if (!empty($settings['ignore words'])) {
    $ignore_re = '\b'. preg_replace('/,/', '\b|\b', $settings['ignore words']) .'\b';

    if (function_exists('mb_eregi_replace')) {
      $output = mb_eregi_replace($ignore_re, '', $output);
    else {
      $output = preg_replace("/$ignore_re/i", '', $output);

  // Always replace whitespace with the separator.
  $output = preg_replace('/\s+/', $settings['separator'], $output);

  // In preparation for pattern matching,
  // escape the separator if and only if it is not alphanumeric.
  if (isset($settings['separator'])) {
    if (preg_match('/^[^'. CTOOLS_PREG_CLASS_ALNUM .']+$/uD', $settings['separator'])) {
      $seppattern = $settings['separator'];
    else {
      $seppattern = '\\'. $settings['separator'];
    // Trim any leading or trailing separators (note the need to
    $output = preg_replace("/^$seppattern+|$seppattern+$/", '', $output);

    // Replace multiple separators with a single one
    $output = preg_replace("/$seppattern+/", $settings['separator'], $output);

  // Enforce the maximum component length
  if (!empty($settings['max length'])) {
    $output = ctools_cleanstring_truncate($output, $settings['max length'], $settings['separator']);

  if (!empty($settings['lower case'])) {
    $output = drupal_strtolower($output);
  return $output;

 * A friendly version of truncate_utf8.
 * @param $string
 *   The string to be truncated.
 * @param $length
 *   An integer for the maximum desired length.
 * @param $separator
 *   A string which contains the word boundary such as - or _.
 * @return
 *  The string truncated below the maxlength.
function ctools_cleanstring_truncate($string, $length, $separator) {
  if (drupal_strlen($string) > $length) {
    $string = drupal_substr($string, 0, $length + 1); // leave one more character
    if ($last_break = strrpos($string, $separator)) { // space exists AND is not on position 0
      $string = substr($string, 0, $last_break);
    else {
      $string = drupal_substr($string, 0, $length);
  return $string;