' . t("The filter module allows administrators to configure text input formats for use on your site. An input format defines the HTML tags, codes, and other input allowed in both content and comments, and is a key feature in guarding against potentially damaging input from malicious users. Two input formats included by default are Filtered HTML (which allows only an administrator-approved subset of HTML tags) and Full HTML (which allows the full set of HTML tags). Additional input formats may be created by an administrator.") . '

'; $output .= '

' . t('Each input format uses filters to manipulate text, and most input formats apply several different filters to text in a specific order. Each filter is designed for a specific purpose, and generally either adds, removes or transforms elements within user-entered text before it is displayed. A filter does not change the actual content of a post, but instead, modifies it temporarily before it is displayed. A filter may remove unapproved HTML tags, for instance, while another automatically adds HTML to make links referenced in text clickable.') . '

'; $output .= '

' . t('Users with access to more than one input format can use the Input format fieldset to choose between available input formats when creating or editing multi-line content. Administrators determine the input formats available to each user role, select a default input format, and control the order of formats listed in the Input format fieldset.') . '

'; $output .= '

' . t('For more information, see the online handbook entry for Filter module.', array('@filter' => 'http://drupal.org/handbook/modules/filter/')) . '

'; return $output; case 'admin/settings/filters': $output = '

' . t('Use the list below to review the input formats available to each user role, to select a default input format, and to control the order of formats listed in the Input format fieldset. (The Input format fieldset is displayed below textareas when users with access to more than one input format create multi-line content.) The input format selected as Default is available to all users and, unless another format is selected, is applied to all content. All input formats are available to users in roles with the "administer filters" permission.') . '

'; $output .= '

' . t('Since input formats, if available, are presented in the same order as the list below, it may be helpful to arrange the formats in descending order of your preference for their use. To change the order of an input format, grab a drag-and-drop handle under the Name column and drag to a new location in the list. (Grab a handle by clicking and holding the mouse while hovering over a handle icon.) Remember that your changes will not be saved until you click the Save changes button at the bottom of the page.') . '

'; return $output; case 'admin/settings/filters/%': return '

' . t('Every filter performs one particular change on the user input, for example stripping out malicious HTML or making URLs clickable. Choose which filters you want to apply to text in this input format. If you notice some filters are causing conflicts in the output, you can rearrange them.', array('@rearrange' => url('admin/settings/filters/' . $arg[3] . '/order'))) . '

'; case 'admin/settings/filters/%/configure': return '

' . t('If you cannot find the settings for a certain filter, make sure you have enabled it on the edit tab first.', array('@url' => url('admin/settings/filters/' . $arg[3]))) . '

'; case 'admin/settings/filters/%/order': $output = '

' . t('Because of the flexible filtering system, you might encounter a situation where one filter prevents another from doing its job. For example: a word in an URL gets converted into a glossary term, before the URL can be converted to a clickable link. When this happens, rearrange the order of the filters.') . '

'; $output .= '

' . t("Filters are executed from top-to-bottom. To change the order of the filters, modify the values in the Weight column or grab a drag-and-drop handle under the Name column and drag filters to new locations in the list. (Grab a handle by clicking and holding the mouse while hovering over a handle icon.) Remember that your changes will not be saved until you click the Save configuration button at the bottom of the page.") . '

'; return $output; } } /** * Implementation of hook_theme(). */ function filter_theme() { return array( 'filter_admin_overview' => array( 'arguments' => array('form' => NULL), 'file' => 'filter.admin.inc', ), 'filter_admin_order' => array( 'arguments' => array('form' => NULL), 'file' => 'filter.admin.inc', ), 'filter_tips' => array( 'arguments' => array('tips' => NULL, 'long' => FALSE), 'file' => 'filter.pages.inc', ), 'filter_tips_more_info' => array( 'arguments' => array(), ), ); } /** * Implementation of hook_menu(). */ function filter_menu() { $items['admin/settings/filters'] = array( 'title' => 'Input formats', 'description' => 'Configure how content input by users is filtered, including allowed HTML tags. Also allows enabling of module-provided filters.', 'page callback' => 'drupal_get_form', 'page arguments' => array('filter_admin_overview'), 'access arguments' => array('administer filters'), ); $items['admin/settings/filters/list'] = array( 'title' => 'List', 'type' => MENU_DEFAULT_LOCAL_TASK, ); $items['admin/settings/filters/add'] = array( 'title' => 'Add input format', 'page callback' => 'filter_admin_format_page', 'access arguments' => array('administer filters'), 'type' => MENU_LOCAL_TASK, 'weight' => 1, ); $items['admin/settings/filters/delete'] = array( 'title' => 'Delete input format', 'page callback' => 'drupal_get_form', 'page arguments' => array('filter_admin_delete'), 'access arguments' => array('administer filters'), 'type' => MENU_CALLBACK, ); $items['filter/tips'] = array( 'title' => 'Compose tips', 'page callback' => 'filter_tips_long', 'access callback' => TRUE, 'type' => MENU_SUGGESTED_ITEM, ); $items['admin/settings/filters/%filter_format'] = array( 'type' => MENU_CALLBACK, 'title callback' => 'filter_admin_format_title', 'title arguments' => array(3), 'page callback' => 'filter_admin_format_page', 'page arguments' => array(3), 'access arguments' => array('administer filters'), ); $items['admin/settings/filters/%filter_format/edit'] = array( 'title' => 'Edit', 'type' => MENU_DEFAULT_LOCAL_TASK, 'weight' => 0, ); $items['admin/settings/filters/%filter_format/configure'] = array( 'title' => 'Configure', 'page callback' => 'filter_admin_configure_page', 'page arguments' => array(3), 'access arguments' => array('administer filters'), 'type' => MENU_LOCAL_TASK, 'weight' => 1, ); $items['admin/settings/filters/%filter_format/order'] = array( 'title' => 'Rearrange', 'page callback' => 'filter_admin_order_page', 'page arguments' => array(3), 'access arguments' => array('administer filters'), 'type' => MENU_LOCAL_TASK, 'weight' => 2, ); return $items; } function filter_format_load($arg) { return filter_formats($arg); } /** * Display a filter format form title. */ function filter_admin_format_title($format) { return $format->name; } /** * Implementation of hook_perm(). */ function filter_perm() { return array( 'administer filters' => array( 'title' => t('Administer filters'), 'description' => t('Manage input formats and filters, and select which roles may use them. %warning', array('%warning' => t('Warning: Give to trusted roles only; this permission has security implications.'))), ), ); } /** * Implementation of hook_cron(). * * Expire outdated filter cache entries */ function filter_cron() { cache_clear_all(NULL, 'cache_filter'); } /** * Implementation of hook_filter_tips(). */ function filter_filter_tips($delta, $format, $long = FALSE) { global $base_url; switch ($delta) { case 0: if ($allowed_html = variable_get("allowed_html_$format", '
    1. ')) { switch ($long) { case 0: return t('Allowed HTML tags: @tags', array('@tags' => $allowed_html)); case 1: $output = '

      ' . t('Allowed HTML tags: @tags', array('@tags' => $allowed_html)) . '

      '; if (!variable_get("filter_html_help_$format", 1)) { return $output; } $output .= '

      ' . t('This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.') . '

      '; $output .= '

      ' . t('For more information see W3C\'s HTML Specifications or use your favorite search engine to find other sites that explain HTML.', array('@html-specifications' => 'http://www.w3.org/TR/html/')) . '

      '; $tips = array( 'a' => array( t('Anchors are used to make links to other pages.'), '' . variable_get('site_name', 'Drupal') . ''), 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with
      line break')), 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '

      ' . t('Paragraph one.') . '

      ' . t('Paragraph two.') . '

      '), 'strong' => array( t('Strong'), '' . t('Strong') . ''), 'em' => array( t('Emphasized'), '' . t('Emphasized') . ''), 'cite' => array( t('Cited'), '' . t('Cited') . ''), 'code' => array( t('Coded text used to show programming source code'), '' . t('Coded') . ''), 'b' => array( t('Bolded'), '' . t('Bolded') . ''), 'u' => array( t('Underlined'), '' . t('Underlined') . ''), 'i' => array( t('Italicized'), '' . t('Italicized') . ''), 'sup' => array( t('Superscripted'), t('Superscripted')), 'sub' => array( t('Subscripted'), t('Subscripted')), 'pre' => array( t('Preformatted'), '
      ' . t('Preformatted') . '
      '), 'abbr' => array( t('Abbreviation'), t('Abbrev.')), 'acronym' => array( t('Acronym'), t('TLA')), 'blockquote' => array( t('Block quoted'), '
      ' . t('Block quoted') . '
      '), 'q' => array( t('Quoted inline'), '' . t('Quoted inline') . ''), // Assumes and describes tr, td, th. 'table' => array( t('Table'), '
      ' . t('Table header') . '
      ' . t('Table cell') . '
      '), 'tr' => NULL, 'td' => NULL, 'th' => NULL, 'del' => array( t('Deleted'), '' . t('Deleted') . ''), 'ins' => array( t('Inserted'), '' . t('Inserted') . ''), // Assumes and describes li. 'ol' => array( t('Ordered list - use the <li> to begin each list item'), '
      1. ' . t('First item') . '
      2. ' . t('Second item') . '
      '), 'ul' => array( t('Unordered list - use the <li> to begin each list item'), '
      • ' . t('First item') . '
      • ' . t('Second item') . '
      '), 'li' => NULL, // Assumes and describes dt and dd. 'dl' => array( t('Definition lists are similar to other HTML lists. <dl> begins the definition list, <dt> begins the definition term and <dd> begins the definition description.'), '
      ' . t('First term') . '
      ' . t('First definition') . '
      ' . t('Second term') . '
      ' . t('Second definition') . '
      '), 'dt' => NULL, 'dd' => NULL, 'h1' => array( t('Heading'), '

      ' . t('Title') . '

      '), 'h2' => array( t('Heading'), '

      ' . t('Subtitle') . '

      '), 'h3' => array( t('Heading'), '

      ' . t('Subtitle three') . '

      '), 'h4' => array( t('Heading'), '

      ' . t('Subtitle four') . '

      '), 'h5' => array( t('Heading'), '
      ' . t('Subtitle five') . '
      '), 'h6' => array( t('Heading'), '
      ' . t('Subtitle six') . '
      ') ); $header = array(t('Tag Description'), t('You Type'), t('You Get')); preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out); foreach ($out[1] as $tag) { if (array_key_exists($tag, $tips)) { if ($tips[$tag]) { $rows[] = array( array('data' => $tips[$tag][0], 'class' => 'description'), array('data' => '' . check_plain($tips[$tag][1]) . '', 'class' => 'type'), array('data' => $tips[$tag][1], 'class' => 'get') ); } } else { $rows[] = array( array('data' => t('No help provided for tag %tag.', array('%tag' => $tag)), 'class' => 'description', 'colspan' => 3), ); } } $output .= theme('table', $header, $rows); $output .= '

      ' . t('Most unusual characters can be directly entered without any problems.') . '

      '; $output .= '

      ' . t('If you do encounter problems, try using HTML character entities. A common example looks like &amp; for an ampersand & character. For a full list of entities see HTML\'s entities page. Some of the available characters include:', array('@html-entities' => 'http://www.w3.org/TR/html4/sgml/entities.html')) . '

      '; $entities = array( array( t('Ampersand'), '&'), array( t('Greater than'), '>'), array( t('Less than'), '<'), array( t('Quotation mark'), '"'), ); $header = array(t('Character Description'), t('You Type'), t('You Get')); unset($rows); foreach ($entities as $entity) { $rows[] = array( array('data' => $entity[0], 'class' => 'description'), array('data' => '' . check_plain($entity[1]) . '', 'class' => 'type'), array('data' => $entity[1], 'class' => 'get') ); } $output .= theme('table', $header, $rows); return $output; } } break; case 1: switch ($long) { case 0: return t('Lines and paragraphs break automatically.'); case 1: return t('Lines and paragraphs are automatically recognized. The <br /> line break, <p> paragraph and </p> close paragraph tags are inserted automatically. If paragraphs are not recognized simply add a couple blank lines.'); } break; case 2: return t('Web page addresses and e-mail addresses turn into links automatically.'); break; case 4: return t('No HTML tags allowed'); break; } } /** * Retrieve a list of input formats. */ function filter_formats($index = NULL) { global $user; static $formats; // Administrators can always use all input formats. $all = user_access('administer filters'); if (!isset($formats)) { $formats = array(); $query = db_select('filter_format', 'f'); $query->addField('f', 'format', 'format'); $query->addField('f', 'name', 'name'); $query->addField('f', 'roles', 'roles'); $query->addField('f', 'cache', 'cache'); $query->addField('f', 'weight', 'weight'); $query->orderBy('weight'); // Build query for selecting the format(s) based on the user's roles. if (!$all) { $or = db_or()->condition('format', variable_get('filter_default_format', 1)); foreach ($user->roles as $rid => $role) { $or->condition('roles', '%'. (int)$rid .'%', 'LIKE'); } $query->condition($or); } $formats = $query->execute()->fetchAllAssoc('format'); } if (isset($index)) { return isset($formats[$index]) ? $formats[$index] : FALSE; } return $formats; } /** * Build a list of all filters. */ function filter_list_all() { $filters = array(); foreach (module_implements('filter') as $module) { $function = $module . '_filter'; $list = $function('list'); if (isset($list) && is_array($list)) { foreach ($list as $delta => $name) { $filters[$module . '/' . $delta] = (object)array('module' => $module, 'delta' => $delta, 'name' => $name); } } } uasort($filters, '_filter_list_cmp'); return $filters; } /** * Helper function for sorting the filter list by filter name. */ function _filter_list_cmp($a, $b) { return strcmp($a->name, $b->name); } /** * Resolve a format id, including the default format. */ function filter_resolve_format($format) { return $format == FILTER_FORMAT_DEFAULT ? variable_get('filter_default_format', 1) : $format; } /** * Check if text in a certain input format is allowed to be cached. */ function filter_format_allowcache($format) { static $cache = array(); $format = filter_resolve_format($format); if (!isset($cache[$format])) { $cache[$format] = db_result(db_query('SELECT cache FROM {filter_format} WHERE format = %d', $format)); } return $cache[$format]; } /** * Retrieve a list of filters for a certain format. */ function filter_list_format($format) { static $filters = array(); if (!isset($filters[$format])) { $filters[$format] = array(); $result = db_query("SELECT * FROM {filter} WHERE format = %d ORDER BY weight, module, delta", $format); while ($filter = db_fetch_object($result)) { $list = module_invoke($filter->module, 'filter', 'list'); if (isset($list) && is_array($list) && isset($list[$filter->delta])) { $filter->name = $list[$filter->delta]; $filters[$format][$filter->module . '/' . $filter->delta] = $filter; } } } return $filters[$format]; } /** * @name Filtering functions * @{ * Modules which need to have content filtered can use these functions to * interact with the filter system. * * For more info, see the hook_filter() documentation. * * Note: because filters can inject JavaScript or execute PHP code, security is * vital here. When a user supplies a $format, you should validate it with * filter_access($format) before accepting/using it. This is normally done in * the validation stage of the node system. You should for example never make a * preview of content in a disallowed format. */ /** * Run all the enabled filters on a piece of text. * * @param $text * The text to be filtered. * @param $format * The format of the text to be filtered. Specify FILTER_FORMAT_DEFAULT for * the default format. * @param $langcode * Optional: the language code of the text to be filtered, e.g. 'en' for * English. This allows filters to be language aware so language specific * text replacement can be implemented. * @param $check * Whether to check the $format with filter_access() first. Defaults to TRUE. * Note that this will check the permissions of the current user, so you * should specify $check = FALSE when viewing other people's content. When * showing content that is not (yet) stored in the database (eg. upon preview), * set to TRUE so the user's permissions are checked. */ function check_markup($text, $format = FILTER_FORMAT_DEFAULT, $langcode = '', $check = TRUE) { // When $check = TRUE, do an access check on $format. if (isset($text) && (!$check || filter_access($format))) { $format = filter_resolve_format($format); // Check for a cached version of this piece of text. $cache_id = $format . ':' . $langcode . ':' . md5($text); if ($cached = cache_get($cache_id, 'cache_filter')) { return $cached->data; } // See if caching is allowed for this format. $cache = filter_format_allowcache($format); // Convert all Windows and Mac newlines to a single newline, // so filters only need to deal with one possibility. $text = str_replace(array("\r\n", "\r"), "\n", $text); // Get a complete list of filters, ordered properly. $filters = filter_list_format($format); // Give filters the chance to escape HTML-like data such as code or formulas. foreach ($filters as $filter) { $text = module_invoke($filter->module, 'filter', 'prepare', $filter->delta, $format, $text, $langcode, $cache_id); } // Perform filtering. foreach ($filters as $filter) { $text = module_invoke($filter->module, 'filter', 'process', $filter->delta, $format, $text, $langcode, $cache_id); } // Store in cache with a minimum expiration time of 1 day. if ($cache) { cache_set($cache_id, $text, 'cache_filter', REQUEST_TIME + (60 * 60 * 24)); } } else { $text = t('n/a'); } return $text; } /** * Generate a selector for choosing a format in a form. * * @ingroup forms * @see filter_form_validate() * @param $value * The ID of the format that is currently selected. * @param $weight * The weight of the input format. * @param $parents * Required when defining multiple input formats on a single node or having a different parent than 'format'. * @return * HTML for the form element. */ function filter_form($value = FILTER_FORMAT_DEFAULT, $weight = NULL, $parents = array('format')) { $value = filter_resolve_format($value); $formats = filter_formats(); $extra = theme('filter_tips_more_info'); if (count($formats) > 1) { $form = array( '#type' => 'fieldset', '#title' => t('Input format'), '#collapsible' => TRUE, '#collapsed' => TRUE, '#weight' => $weight, '#element_validate' => array('filter_form_validate'), ); // Multiple formats available: display radio buttons with tips. foreach ($formats as $format) { // Generate the parents as the autogenerator does, so we will have a // unique id for each radio button. $parents_for_id = array_merge($parents, array($format->format)); $form[$format->format] = array( '#type' => 'radio', '#title' => $format->name, '#default_value' => $value, '#return_value' => $format->format, '#parents' => $parents, '#description' => theme('filter_tips', _filter_tips($format->format, FALSE)), '#id' => form_clean_id('edit-' . implode('-', $parents_for_id)), ); } } else { // Only one format available: use a hidden form item and only show tips. $format = array_shift($formats); $form[$format->format] = array('#type' => 'value', '#value' => $format->format, '#parents' => $parents); $tips = _filter_tips(variable_get('filter_default_format', 1), FALSE); $form['format']['guidelines'] = array( '#title' => t('Formatting guidelines'), '#markup' => theme('filter_tips', $tips, FALSE), ); } $form[] = array('#markup' => $extra); return $form; } function filter_form_validate($form) { foreach (element_children($form) as $key) { if ($form[$key]['#value'] == $form[$key]['#return_value']) { return; } } form_error($form, t('An illegal choice has been detected. Please contact the site administrator.')); watchdog('form', 'Illegal choice %choice in %name element.', array('%choice' => $form[$key]['#value'], '%name' => empty($form['#title']) ? $form['#parents'][0] : $form['#title']), WATCHDOG_ERROR); } /** * Returns TRUE if the user is allowed to access this format. */ function filter_access($format) { $format = filter_resolve_format($format); if (user_access('administer filters') || ($format == variable_get('filter_default_format', 1))) { return TRUE; } else { $formats = filter_formats(); return isset($formats[$format]); } } /** * @} End of "Filtering functions". */ /** * Helper function for fetching filter tips. */ function _filter_tips($format, $long = FALSE) { if ($format == -1) { $formats = filter_formats(); } else { $formats = array(db_fetch_object(db_query("SELECT * FROM {filter_format} WHERE format = %d", $format))); } $tips = array(); foreach ($formats as $format) { $filters = filter_list_format($format->format); $tips[$format->name] = array(); foreach ($filters as $id => $filter) { if ($tip = module_invoke($filter->module, 'filter_tips', $filter->delta, $format->format, $long)) { $tips[$format->name][] = array('tip' => $tip, 'id' => $id); } } } return $tips; } /** * Format a link to the more extensive filter tips. * * @ingroup themeable */ function theme_filter_tips_more_info() { return '

      ' . l(t('More information about formatting options'), 'filter/tips') . '

      '; } /** * @name Standard filters * @{ * Filters implemented by the filter.module. */ /** * Implementation of hook_filter(). Contains a basic set of essential filters. * - HTML filter: * Validates user-supplied HTML, transforming it as necessary. * - Line break converter: * Converts newlines into paragraph and break tags. * - URL and e-mail address filter: * Converts newlines into paragraph and break tags. */ function filter_filter($op, $delta = 0, $format = -1, $text = '') { switch ($op) { case 'list': return array(0 => t('Limit allowed HTML tags'), 1 => t('Convert line breaks'), 2 => t('Convert URLs into links'), 3 => t('Correct broken HTML'), 4 => t('Escape all HTML')); case 'description': switch ($delta) { case 0: return t('Allows you to restrict the HTML tags the user can use. It will also remove harmful content such as JavaScript events, JavaScript URLs and CSS styles from those tags that are not removed.'); case 1: return t('Converts line breaks into HTML (i.e. <br> and <p>) tags.'); case 2: return t('Turns web and e-mail addresses into clickable links.'); case 3: return t('Corrects faulty and chopped off HTML in postings.'); case 4: return t('Escapes all HTML tags, so they will be visible instead of being effective.'); default: return; } case 'process': switch ($delta) { case 0: return _filter_html($text, $format); case 1: return _filter_autop($text); case 2: return _filter_url($text, $format); case 3: return _filter_htmlcorrector($text); case 4: return trim(check_plain($text)); default: return $text; } case 'settings': switch ($delta) { case 0: return _filter_html_settings($format); case 2: return _filter_url_settings($format); default: return; } default: return $text; } } /** * Settings for the HTML filter. */ function _filter_html_settings($format) { $form['filter_html'] = array( '#type' => 'fieldset', '#title' => t('HTML filter'), '#collapsible' => TRUE, ); $form['filter_html']["allowed_html_$format"] = array( '#type' => 'textfield', '#title' => t('Allowed HTML tags'), '#default_value' => variable_get("allowed_html_$format", '
      ', $chunk); $chunk = preg_replace('|

      \s*

      \n?|', '', $chunk); // under certain strange conditions it could create a P of entirely whitespace $chunk = preg_replace('!

      \s*(]*>)!', "$1", $chunk); $chunk = preg_replace('!(]*>)\s*

      !', "$1", $chunk); $chunk = preg_replace('|(?)\s*\n|', "
      \n", $chunk); // make line breaks $chunk = preg_replace('!(]*>)\s*
      !', "$1", $chunk); $chunk = preg_replace('!
      (\s*)!', '$1', $chunk); $chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&$1', $chunk); } $output .= $chunk; } return $output; } /** * Very permissive XSS/HTML filter for admin-only use. * * Use only for fields where it is impractical to use the * whole filter system, but where some (mainly inline) mark-up * is desired (so check_plain() is not acceptable). * * Allows all tags that can be used inside an HTML body, save * for scripts and styles. */ function filter_xss_admin($string) { return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'b', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'ol', 'p', 'pre', 'q', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'ul', 'var')); } /** * Filters XSS. Based on kses by Ulf Harnhammar, see * http://sourceforge.net/projects/kses * * For examples of various XSS attacks, see: * http://ha.ckers.org/xss.html * * This code does four things: * - Removes characters and constructs that can trick browsers * - Makes sure all HTML entities are well-formed * - Makes sure all HTML tags and attributes are well-formed * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:) * * @param $string * The string with raw HTML in it. It will be stripped of everything that can cause * an XSS attack. * @param $allowed_tags * An array of allowed tags. */ function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) { // Only operate on valid UTF-8 strings. This is necessary to prevent cross // site scripting issues on Internet Explorer 6. if (!drupal_validate_utf8($string)) { return ''; } // Store the input format _filter_xss_split($allowed_tags, TRUE); // Remove NULL characters (ignored by some browsers) $string = str_replace(chr(0), '', $string); // Remove Netscape 4 JS entities $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); // Defuse all HTML entities $string = str_replace('&', '&', $string); // Change back only well-formed entities in our whitelist // Named entities $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string); // Decimal numeric entities $string = preg_replace('/&#([0-9]+;)/', '&#\1', $string); // Hexadecimal numeric entities $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string); return preg_replace_callback('% ( <(?=[^a-zA-Z!/]) # a lone < | # or <[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string | # or > # just a > )%x', '_filter_xss_split', $string); } /** * Processes an HTML tag. * * @param $m * An array with various meaning depending on the value of $store. * If $store is TRUE then the array contains the allowed tags. * If $store is FALSE then the array has one element, the HTML tag to process. * @param $store * Whether to store $m. * @return * If the element isn't allowed, an empty string. Otherwise, the cleaned up * version of the HTML element. */ function _filter_xss_split($m, $store = FALSE) { static $allowed_html; if ($store) { $allowed_html = array_flip($m); return; } $string = $m[1]; if (substr($string, 0, 1) != '<') { // We matched a lone ">" character return '>'; } elseif (strlen($string) == 1) { // We matched a lone "<" character return '<'; } if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) { // Seriously malformed return ''; } $slash = trim($matches[1]); $elem = &$matches[2]; $attrlist = &$matches[3]; if (!isset($allowed_html[strtolower($elem)])) { // Disallowed HTML element return ''; } if ($slash != '') { return ""; } // Is there a closing XHTML slash at the end of the attributes? $attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count); $xhtml_slash = $count ? ' /' : ''; // Clean up attributes $attr2 = implode(' ', _filter_xss_attributes($attrlist)); $attr2 = preg_replace('/[<>]/', '', $attr2); $attr2 = strlen($attr2) ? ' ' . $attr2 : ''; return "<$elem$attr2$xhtml_slash>"; } /** * Processes a string of HTML attributes. * * @return * Cleaned up version of the HTML attributes. */ function _filter_xss_attributes($attr) { $attrarr = array(); $mode = 0; $attrname = ''; while (strlen($attr) != 0) { // Was the last operation successful? $working = 0; switch ($mode) { case 0: // Attribute name, href for instance if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) { $attrname = strtolower($match[1]); $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on'); $working = $mode = 1; $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); } break; case 1: // Equals sign or valueless ("selected") if (preg_match('/^\s*=\s*/', $attr)) { $working = 1; $mode = 2; $attr = preg_replace('/^\s*=\s*/', '', $attr); break; } if (preg_match('/^\s+/', $attr)) { $working = 1; $mode = 0; if (!$skip) { $attrarr[] = $attrname; } $attr = preg_replace('/^\s+/', '', $attr); } break; case 2: // Attribute value, a URL after href= for instance if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) { $thisval = filter_xss_bad_protocol($match[1]); if (!$skip) { $attrarr[] = "$attrname=\"$thisval\""; } $working = 1; $mode = 0; $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); break; } if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) { $thisval = filter_xss_bad_protocol($match[1]); if (!$skip) { $attrarr[] = "$attrname='$thisval'"; } $working = 1; $mode = 0; $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); break; } if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) { $thisval = filter_xss_bad_protocol($match[1]); if (!$skip) { $attrarr[] = "$attrname=\"$thisval\""; } $working = 1; $mode = 0; $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); } break; } if ($working == 0) { // not well formed, remove and try again $attr = preg_replace('/ ^ ( "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string | # or \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string | # or \S # - a non-whitespace character )* # any number of the above three \s* # any number of whitespaces /x', '', $attr); $mode = 0; } } // the attribute list ends with a valueless attribute like "selected" if ($mode == 1) { $attrarr[] = $attrname; } return $attrarr; } /** * Processes an HTML attribute value and ensures it does not contain an URL * with a disallowed protocol (e.g. javascript:) * * @param $string * The string with the attribute value. * @param $decode * Whether to decode entities in the $string. Set to FALSE if the $string * is in plain text, TRUE otherwise. Defaults to TRUE. * @return * Cleaned up and HTML-escaped version of $string. */ function filter_xss_bad_protocol($string, $decode = TRUE) { static $allowed_protocols; if (!isset($allowed_protocols)) { $allowed_protocols = array_flip(variable_get('filter_allowed_protocols', array('ftp', 'http', 'https', 'irc', 'mailto', 'news', 'nntp', 'rtsp', 'sftp', 'ssh', 'telnet', 'webcal'))); } // Get the plain text representation of the attribute value (i.e. its meaning). if ($decode) { $string = decode_entities($string); } // Iteratively remove any invalid protocol found. do { $before = $string; $colonpos = strpos($string, ':'); if ($colonpos > 0) { // We found a colon, possibly a protocol. Verify. $protocol = substr($string, 0, $colonpos); // If a colon is preceded by a slash, question mark or hash, it cannot // possibly be part of the URL scheme. This must be a relative URL, // which inherits the (safe) protocol of the base document. if (preg_match('![/?#]!', $protocol)) { break; } // Per RFC2616, section 3.2.3 (URI Comparison) scheme comparison must be case-insensitive // Check if this is a disallowed protocol. if (!isset($allowed_protocols[strtolower($protocol)])) { $string = substr($string, $colonpos + 1); } } } while ($before != $string); return check_plain($string); } /** * @} End of "Standard filters". */