审查视图

simplewind/vendor/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php 9.0 KB
景龙 authored
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
<?php

/**
 * Removes all unrecognized tags from the list of tokens.
 *
 * This strategy iterates through all the tokens and removes unrecognized
 * tokens. If a token is not recognized but a TagTransform is defined for
 * that element, the element will be transformed accordingly.
 */

class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
{

    /**
     * @param HTMLPurifier_Token[] $tokens
     * @param HTMLPurifier_Config $config
     * @param HTMLPurifier_Context $context
     * @return array|HTMLPurifier_Token[]
     */
    public function execute($tokens, $config, $context)
    {
        $definition = $config->getHTMLDefinition();
        $generator = new HTMLPurifier_Generator($config, $context);
        $result = array();

        $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
        $remove_invalid_img = $config->get('Core.RemoveInvalidImg');

        // currently only used to determine if comments should be kept
        $trusted = $config->get('HTML.Trusted');
        $comment_lookup = $config->get('HTML.AllowedComments');
        $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
        $check_comments = $comment_lookup !== array() || $comment_regexp !== null;

        $remove_script_contents = $config->get('Core.RemoveScriptContents');
        $hidden_elements = $config->get('Core.HiddenElements');

        // remove script contents compatibility
        if ($remove_script_contents === true) {
            $hidden_elements['script'] = true;
        } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
            unset($hidden_elements['script']);
        }

        $attr_validator = new HTMLPurifier_AttrValidator();

        // removes tokens until it reaches a closing tag with its value
        $remove_until = false;

        // converts comments into text tokens when this is equal to a tag name
        $textify_comments = false;

        $token = false;
        $context->register('CurrentToken', $token);

        $e = false;
        if ($config->get('Core.CollectErrors')) {
            $e =& $context->get('ErrorCollector');
        }

        foreach ($tokens as $token) {
            if ($remove_until) {
                if (empty($token->is_tag) || $token->name !== $remove_until) {
                    continue;
                }
            }
            if (!empty($token->is_tag)) {
                // DEFINITION CALL

                // before any processing, try to transform the element
                if (isset($definition->info_tag_transform[$token->name])) {
                    $original_name = $token->name;
                    // there is a transformation for this tag
                    // DEFINITION CALL
                    $token = $definition->
                        info_tag_transform[$token->name]->transform($token, $config, $context);
                    if ($e) {
                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
                    }
                }

                if (isset($definition->info[$token->name])) {
                    // mostly everything's good, but
                    // we need to make sure required attributes are in order
                    if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
                        $definition->info[$token->name]->required_attr &&
                        ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
                    ) {
                        $attr_validator->validateToken($token, $config, $context);
                        $ok = true;
                        foreach ($definition->info[$token->name]->required_attr as $name) {
                            if (!isset($token->attr[$name])) {
                                $ok = false;
                                break;
                            }
                        }
                        if (!$ok) {
                            if ($e) {
                                $e->send(
                                    E_ERROR,
                                    'Strategy_RemoveForeignElements: Missing required attribute',
                                    $name
                                );
                            }
                            continue;
                        }
                        $token->armor['ValidateAttributes'] = true;
                    }

                    if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
                        $textify_comments = $token->name;
                    } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
                        $textify_comments = false;
                    }

                } elseif ($escape_invalid_tags) {
                    // invalid tag, generate HTML representation and insert in
                    if ($e) {
                        $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
                    }
                    $token = new HTMLPurifier_Token_Text(
                        $generator->generateFromToken($token)
                    );
                } else {
                    // check if we need to destroy all of the tag's children
                    // CAN BE GENERICIZED
                    if (isset($hidden_elements[$token->name])) {
                        if ($token instanceof HTMLPurifier_Token_Start) {
                            $remove_until = $token->name;
                        } elseif ($token instanceof HTMLPurifier_Token_Empty) {
                            // do nothing: we're still looking
                        } else {
                            $remove_until = false;
                        }
                        if ($e) {
                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
                        }
                    } else {
                        if ($e) {
                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
                        }
                    }
                    continue;
                }
            } elseif ($token instanceof HTMLPurifier_Token_Comment) {
                // textify comments in script tags when they are allowed
                if ($textify_comments !== false) {
                    $data = $token->data;
                    $token = new HTMLPurifier_Token_Text($data);
                } elseif ($trusted || $check_comments) {
                    // always cleanup comments
                    $trailing_hyphen = false;
                    if ($e) {
                        // perform check whether or not there's a trailing hyphen
                        if (substr($token->data, -1) == '-') {
                            $trailing_hyphen = true;
                        }
                    }
                    $token->data = rtrim($token->data, '-');
                    $found_double_hyphen = false;
                    while (strpos($token->data, '--') !== false) {
                        $found_double_hyphen = true;
                        $token->data = str_replace('--', '-', $token->data);
                    }
                    if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
                        ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
                        // OK good
                        if ($e) {
                            if ($trailing_hyphen) {
                                $e->send(
                                    E_NOTICE,
                                    'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
                                );
                            }
                            if ($found_double_hyphen) {
                                $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
                            }
                        }
                    } else {
                        if ($e) {
                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
                        }
                        continue;
                    }
                } else {
                    // strip comments
                    if ($e) {
                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
                    }
                    continue;
                }
            } elseif ($token instanceof HTMLPurifier_Token_Text) {
            } else {
                continue;
            }
            $result[] = $token;
        }
        if ($remove_until && $e) {
            // we removed tokens until the end, throw error
            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
        }
        $context->destroy('CurrentToken');
        return $result;
    }
}

// vim: et sw=4 sts=4