# a comment | # or <[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string | # or > # just a > )%x', $splitter, $string); } /** * Applies a very permissive XSS/HTML filter for admin-only use. * * Use only for fields where it is impractical to use the * whole filter system, but where some (mainly inline) mark-up * is desired (so \Drupal\Component\Utility\Html::escape() is * not acceptable). * * Allows all tags that can be used inside an HTML body, save * for scripts and styles. * * @param string $string * The string to apply the filter to. * * @return string * The filtered string. * * @ingroup sanitization * * @see \Drupal\Component\Utility\Xss::getAdminTagList() */ public static function filterAdmin($string) { return static::filter($string, static::$adminTags); } /** * Processes an HTML tag. * * @param string $string * The HTML tag to process. * @param array $html_tags * An array where the keys are the allowed tags and the values are not * used. * @param string $class * The called class. This method is called from an anonymous function which * breaks late static binding. See https://bugs.php.net/bug.php?id=66622 for * more information. * * @return string * If the element isn't allowed, an empty string. Otherwise, the cleaned up * version of the HTML element. */ protected static function split($string, $html_tags, $class) { if (substr($string, 0, 1) != '<') { // We matched a lone ">" character. return '>'; } elseif (strlen($string) == 1) { // We matched a lone "<" character. return '<'; } if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9\-]+)\s*([^>]*)>?|()$%', $string, $matches)) { // Seriously malformed. return ''; } $slash = trim($matches[1]); $elem = &$matches[2]; $attrlist = &$matches[3]; $comment = &$matches[4]; if ($comment) { $elem = '!--'; } // When in whitelist mode, an element is disallowed when not listed. if ($class::needsRemoval($html_tags, $elem)) { return ''; } if ($comment) { return $comment; } if ($slash != '') { return ""; } // Is there a closing XHTML slash at the end of the attributes? $attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count); $xhtml_slash = $count ? ' /' : ''; // Clean up attributes. $attr2 = implode(' ', $class::attributes($attrlist)); $attr2 = preg_replace('/[<>]/', '', $attr2); $attr2 = strlen($attr2) ? ' ' . $attr2 : ''; return "<$elem$attr2$xhtml_slash>"; } /** * Processes a string of HTML attributes. * * @param string $attributes * The html attribute to process. * * @return string * Cleaned up version of the HTML attributes. */ protected static function attributes($attributes) { $attributes_array = []; $mode = 0; $attribute_name = ''; $skip = FALSE; $skip_protocol_filtering = FALSE; while (strlen($attributes) != 0) { // Was the last operation successful? $working = 0; switch ($mode) { case 0: // Attribute name, href for instance. if (preg_match('/^([-a-zA-Z][-a-zA-Z0-9]*)/', $attributes, $match)) { $attribute_name = strtolower($match[1]); $skip = ($attribute_name == 'style' || substr($attribute_name, 0, 2) == 'on'); // Values for attributes of type URI should be filtered for // potentially malicious protocols (for example, an href-attribute // starting with "javascript:"). However, for some non-URI // attributes performing this filtering causes valid and safe data // to be mangled. We prevent this by skipping protocol filtering on // such attributes. // @see \Drupal\Component\Utility\UrlHelper::filterBadProtocol() // @see http://www.w3.org/TR/html4/index/attributes.html $skip_protocol_filtering = substr($attribute_name, 0, 5) === 'data-' || in_array($attribute_name, [ 'title', 'alt', 'rel', 'property', ]); $working = $mode = 1; $attributes = preg_replace('/^[-a-zA-Z][-a-zA-Z0-9]*/', '', $attributes); } break; case 1: // Equals sign or valueless ("selected"). if (preg_match('/^\s*=\s*/', $attributes)) { $working = 1; $mode = 2; $attributes = preg_replace('/^\s*=\s*/', '', $attributes); break; } if (preg_match('/^\s+/', $attributes)) { $working = 1; $mode = 0; if (!$skip) { $attributes_array[] = $attribute_name; } $attributes = preg_replace('/^\s+/', '', $attributes); } break; case 2: // Attribute value, a URL after href= for instance. if (preg_match('/^"([^"]*)"(\s+|$)/', $attributes, $match)) { $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "$attribute_name=\"$thisval\""; } $working = 1; $mode = 0; $attributes = preg_replace('/^"[^"]*"(\s+|$)/', '', $attributes); break; } if (preg_match("/^'([^']*)'(\s+|$)/", $attributes, $match)) { $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "$attribute_name='$thisval'"; } $working = 1; $mode = 0; $attributes = preg_replace("/^'[^']*'(\s+|$)/", '', $attributes); break; } if (preg_match("%^([^\s\"']+)(\s+|$)%", $attributes, $match)) { $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "$attribute_name=\"$thisval\""; } $working = 1; $mode = 0; $attributes = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attributes); } break; } if ($working == 0) { // Not well formed; remove and try again. $attributes = preg_replace('/ ^ ( "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string | # or \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string | # or \S # - a non-whitespace character )* # any number of the above three \s* # any number of whitespaces /x', '', $attributes); $mode = 0; } } // The attribute list ends with a valueless attribute like "selected". if ($mode == 1 && !$skip) { $attributes_array[] = $attribute_name; } return $attributes_array; } /** * Whether this element needs to be removed altogether. * * @param $html_tags * The list of HTML tags. * @param $elem * The name of the HTML element. * * @return bool * TRUE if this element needs to be removed. */ protected static function needsRemoval($html_tags, $elem) { return !isset($html_tags[strtolower($elem)]); } /** * Gets the list of HTML tags allowed by Xss::filterAdmin(). * * @return array * The list of HTML tags allowed by filterAdmin(). */ public static function getAdminTagList() { return static::$adminTags; } /** * Gets the standard list of HTML tags allowed by Xss::filter(). * * @return array * The list of HTML tags allowed by Xss::filter(). */ public static function getHtmlTagList() { return static::$htmlTags; } }