作者 RuoYi

添加HTML过滤器,用于去除XSS漏洞隐患

@@ -58,7 +58,7 @@ public class EscapeUtil @@ -58,7 +58,7 @@ public class EscapeUtil
58 */ 58 */
59 public static String clean(String content) 59 public static String clean(String content)
60 { 60 {
61 - return content.replaceAll(RE_HTML_MARK, ""); 61 + return new HTMLFilter().filter(content);
62 } 62 }
63 63
64 /** 64 /**
  1 +package com.ruoyi.common.utils.html;
  2 +
  3 +import java.util.ArrayList;
  4 +import java.util.Collections;
  5 +import java.util.HashMap;
  6 +import java.util.List;
  7 +import java.util.Map;
  8 +import java.util.concurrent.ConcurrentHashMap;
  9 +import java.util.concurrent.ConcurrentMap;
  10 +import java.util.regex.Matcher;
  11 +import java.util.regex.Pattern;
  12 +
  13 +/**
  14 + * HTML过滤器,用于去除XSS漏洞隐患。
  15 + *
  16 + * @author ruoyi
  17 + */
  18 +public final class HTMLFilter
  19 +{
  20 + /**
  21 + * regex flag union representing /si modifiers in php
  22 + **/
  23 + private static final int REGEX_FLAGS_SI = Pattern.CASE_INSENSITIVE | Pattern.DOTALL;
  24 + private static final Pattern P_COMMENTS = Pattern.compile("<!--(.*?)-->", Pattern.DOTALL);
  25 + private static final Pattern P_COMMENT = Pattern.compile("^!--(.*)--$", REGEX_FLAGS_SI);
  26 + private static final Pattern P_TAGS = Pattern.compile("<(.*?)>", Pattern.DOTALL);
  27 + private static final Pattern P_END_TAG = Pattern.compile("^/([a-z0-9]+)", REGEX_FLAGS_SI);
  28 + private static final Pattern P_START_TAG = Pattern.compile("^([a-z0-9]+)(.*?)(/?)$", REGEX_FLAGS_SI);
  29 + private static final Pattern P_QUOTED_ATTRIBUTES = Pattern.compile("([a-z0-9]+)=([\"'])(.*?)\\2", REGEX_FLAGS_SI);
  30 + private static final Pattern P_UNQUOTED_ATTRIBUTES = Pattern.compile("([a-z0-9]+)(=)([^\"\\s']+)", REGEX_FLAGS_SI);
  31 + private static final Pattern P_PROTOCOL = Pattern.compile("^([^:]+):", REGEX_FLAGS_SI);
  32 + private static final Pattern P_ENTITY = Pattern.compile("&#(\\d+);?");
  33 + private static final Pattern P_ENTITY_UNICODE = Pattern.compile("&#x([0-9a-f]+);?");
  34 + private static final Pattern P_ENCODE = Pattern.compile("%([0-9a-f]{2});?");
  35 + private static final Pattern P_VALID_ENTITIES = Pattern.compile("&([^&;]*)(?=(;|&|$))");
  36 + private static final Pattern P_VALID_QUOTES = Pattern.compile("(>|^)([^<]+?)(<|$)", Pattern.DOTALL);
  37 + private static final Pattern P_END_ARROW = Pattern.compile("^>");
  38 + private static final Pattern P_BODY_TO_END = Pattern.compile("<([^>]*?)(?=<|$)");
  39 + private static final Pattern P_XML_CONTENT = Pattern.compile("(^|>)([^<]*?)(?=>)");
  40 + private static final Pattern P_STRAY_LEFT_ARROW = Pattern.compile("<([^>]*?)(?=<|$)");
  41 + private static final Pattern P_STRAY_RIGHT_ARROW = Pattern.compile("(^|>)([^<]*?)(?=>)");
  42 + private static final Pattern P_AMP = Pattern.compile("&");
  43 + private static final Pattern P_QUOTE = Pattern.compile("\"");
  44 + private static final Pattern P_LEFT_ARROW = Pattern.compile("<");
  45 + private static final Pattern P_RIGHT_ARROW = Pattern.compile(">");
  46 + private static final Pattern P_BOTH_ARROWS = Pattern.compile("<>");
  47 +
  48 + // @xxx could grow large... maybe use sesat's ReferenceMap
  49 + private static final ConcurrentMap<String, Pattern> P_REMOVE_PAIR_BLANKS = new ConcurrentHashMap<>();
  50 + private static final ConcurrentMap<String, Pattern> P_REMOVE_SELF_BLANKS = new ConcurrentHashMap<>();
  51 +
  52 + /**
  53 + * set of allowed html elements, along with allowed attributes for each element
  54 + **/
  55 + private final Map<String, List<String>> vAllowed;
  56 + /**
  57 + * counts of open tags for each (allowable) html element
  58 + **/
  59 + private final Map<String, Integer> vTagCounts = new HashMap<>();
  60 +
  61 + /**
  62 + * html elements which must always be self-closing (e.g. "<img />")
  63 + **/
  64 + private final String[] vSelfClosingTags;
  65 + /**
  66 + * html elements which must always have separate opening and closing tags (e.g. "<b></b>")
  67 + **/
  68 + private final String[] vNeedClosingTags;
  69 + /**
  70 + * set of disallowed html elements
  71 + **/
  72 + private final String[] vDisallowed;
  73 + /**
  74 + * attributes which should be checked for valid protocols
  75 + **/
  76 + private final String[] vProtocolAtts;
  77 + /**
  78 + * allowed protocols
  79 + **/
  80 + private final String[] vAllowedProtocols;
  81 + /**
  82 + * tags which should be removed if they contain no content (e.g. "<b></b>" or "<b />")
  83 + **/
  84 + private final String[] vRemoveBlanks;
  85 + /**
  86 + * entities allowed within html markup
  87 + **/
  88 + private final String[] vAllowedEntities;
  89 + /**
  90 + * flag determining whether comments are allowed in input String.
  91 + */
  92 + private final boolean stripComment;
  93 + private final boolean encodeQuotes;
  94 + /**
  95 + * flag determining whether to try to make tags when presented with "unbalanced" angle brackets (e.g. "<b text </b>"
  96 + * becomes "<b> text </b>"). If set to false, unbalanced angle brackets will be html escaped.
  97 + */
  98 + private final boolean alwaysMakeTags;
  99 +
  100 + /**
  101 + * Default constructor.
  102 + */
  103 + public HTMLFilter()
  104 + {
  105 + vAllowed = new HashMap<>();
  106 +
  107 + final ArrayList<String> a_atts = new ArrayList<>();
  108 + a_atts.add("href");
  109 + a_atts.add("target");
  110 + vAllowed.put("a", a_atts);
  111 +
  112 + final ArrayList<String> img_atts = new ArrayList<>();
  113 + img_atts.add("src");
  114 + img_atts.add("width");
  115 + img_atts.add("height");
  116 + img_atts.add("alt");
  117 + vAllowed.put("img", img_atts);
  118 +
  119 + final ArrayList<String> no_atts = new ArrayList<>();
  120 + vAllowed.put("b", no_atts);
  121 + vAllowed.put("strong", no_atts);
  122 + vAllowed.put("i", no_atts);
  123 + vAllowed.put("em", no_atts);
  124 +
  125 + vSelfClosingTags = new String[] { "img" };
  126 + vNeedClosingTags = new String[] { "a", "b", "strong", "i", "em" };
  127 + vDisallowed = new String[] {};
  128 + vAllowedProtocols = new String[] { "http", "mailto", "https" }; // no ftp.
  129 + vProtocolAtts = new String[] { "src", "href" };
  130 + vRemoveBlanks = new String[] { "a", "b", "strong", "i", "em" };
  131 + vAllowedEntities = new String[] { "amp", "gt", "lt", "quot" };
  132 + stripComment = true;
  133 + encodeQuotes = true;
  134 + alwaysMakeTags = true;
  135 + }
  136 +
  137 + /**
  138 + * Map-parameter configurable constructor.
  139 + *
  140 + * @param conf map containing configuration. keys match field names.
  141 + */
  142 + @SuppressWarnings("unchecked")
  143 + public HTMLFilter(final Map<String, Object> conf)
  144 + {
  145 +
  146 + assert conf.containsKey("vAllowed") : "configuration requires vAllowed";
  147 + assert conf.containsKey("vSelfClosingTags") : "configuration requires vSelfClosingTags";
  148 + assert conf.containsKey("vNeedClosingTags") : "configuration requires vNeedClosingTags";
  149 + assert conf.containsKey("vDisallowed") : "configuration requires vDisallowed";
  150 + assert conf.containsKey("vAllowedProtocols") : "configuration requires vAllowedProtocols";
  151 + assert conf.containsKey("vProtocolAtts") : "configuration requires vProtocolAtts";
  152 + assert conf.containsKey("vRemoveBlanks") : "configuration requires vRemoveBlanks";
  153 + assert conf.containsKey("vAllowedEntities") : "configuration requires vAllowedEntities";
  154 +
  155 + vAllowed = Collections.unmodifiableMap((HashMap<String, List<String>>) conf.get("vAllowed"));
  156 + vSelfClosingTags = (String[]) conf.get("vSelfClosingTags");
  157 + vNeedClosingTags = (String[]) conf.get("vNeedClosingTags");
  158 + vDisallowed = (String[]) conf.get("vDisallowed");
  159 + vAllowedProtocols = (String[]) conf.get("vAllowedProtocols");
  160 + vProtocolAtts = (String[]) conf.get("vProtocolAtts");
  161 + vRemoveBlanks = (String[]) conf.get("vRemoveBlanks");
  162 + vAllowedEntities = (String[]) conf.get("vAllowedEntities");
  163 + stripComment = conf.containsKey("stripComment") ? (Boolean) conf.get("stripComment") : true;
  164 + encodeQuotes = conf.containsKey("encodeQuotes") ? (Boolean) conf.get("encodeQuotes") : true;
  165 + alwaysMakeTags = conf.containsKey("alwaysMakeTags") ? (Boolean) conf.get("alwaysMakeTags") : true;
  166 + }
  167 +
  168 + private void reset()
  169 + {
  170 + vTagCounts.clear();
  171 + }
  172 +
  173 + // ---------------------------------------------------------------
  174 + // my versions of some PHP library functions
  175 + public static String chr(final int decimal)
  176 + {
  177 + return String.valueOf((char) decimal);
  178 + }
  179 +
  180 + public static String htmlSpecialChars(final String s)
  181 + {
  182 + String result = s;
  183 + result = regexReplace(P_AMP, "&amp;", result);
  184 + result = regexReplace(P_QUOTE, "&quot;", result);
  185 + result = regexReplace(P_LEFT_ARROW, "&lt;", result);
  186 + result = regexReplace(P_RIGHT_ARROW, "&gt;", result);
  187 + return result;
  188 + }
  189 +
  190 + // ---------------------------------------------------------------
  191 +
  192 + /**
  193 + * given a user submitted input String, filter out any invalid or restricted html.
  194 + *
  195 + * @param input text (i.e. submitted by a user) than may contain html
  196 + * @return "clean" version of input, with only valid, whitelisted html elements allowed
  197 + */
  198 + public String filter(final String input)
  199 + {
  200 + reset();
  201 + String s = input;
  202 +
  203 + s = escapeComments(s);
  204 +
  205 + s = balanceHTML(s);
  206 +
  207 + s = checkTags(s);
  208 +
  209 + s = processRemoveBlanks(s);
  210 +
  211 + s = validateEntities(s);
  212 +
  213 + return s;
  214 + }
  215 +
  216 + public boolean isAlwaysMakeTags()
  217 + {
  218 + return alwaysMakeTags;
  219 + }
  220 +
  221 + public boolean isStripComments()
  222 + {
  223 + return stripComment;
  224 + }
  225 +
  226 + private String escapeComments(final String s)
  227 + {
  228 + final Matcher m = P_COMMENTS.matcher(s);
  229 + final StringBuffer buf = new StringBuffer();
  230 + if (m.find())
  231 + {
  232 + final String match = m.group(1); // (.*?)
  233 + m.appendReplacement(buf, Matcher.quoteReplacement("<!--" + htmlSpecialChars(match) + "-->"));
  234 + }
  235 + m.appendTail(buf);
  236 +
  237 + return buf.toString();
  238 + }
  239 +
  240 + private String balanceHTML(String s)
  241 + {
  242 + if (alwaysMakeTags)
  243 + {
  244 + //
  245 + // try and form html
  246 + //
  247 + s = regexReplace(P_END_ARROW, "", s);
  248 + s = regexReplace(P_BODY_TO_END, "<$1>", s);
  249 + s = regexReplace(P_XML_CONTENT, "$1<$2", s);
  250 +
  251 + }
  252 + else
  253 + {
  254 + //
  255 + // escape stray brackets
  256 + //
  257 + s = regexReplace(P_STRAY_LEFT_ARROW, "&lt;$1", s);
  258 + s = regexReplace(P_STRAY_RIGHT_ARROW, "$1$2&gt;<", s);
  259 +
  260 + //
  261 + // the last regexp causes '<>' entities to appear
  262 + // (we need to do a lookahead assertion so that the last bracket can
  263 + // be used in the next pass of the regexp)
  264 + //
  265 + s = regexReplace(P_BOTH_ARROWS, "", s);
  266 + }
  267 +
  268 + return s;
  269 + }
  270 +
  271 + private String checkTags(String s)
  272 + {
  273 + Matcher m = P_TAGS.matcher(s);
  274 +
  275 + final StringBuffer buf = new StringBuffer();
  276 + while (m.find())
  277 + {
  278 + String replaceStr = m.group(1);
  279 + replaceStr = processTag(replaceStr);
  280 + m.appendReplacement(buf, Matcher.quoteReplacement(replaceStr));
  281 + }
  282 + m.appendTail(buf);
  283 +
  284 + // these get tallied in processTag
  285 + // (remember to reset before subsequent calls to filter method)
  286 + final StringBuilder sBuilder = new StringBuilder(buf.toString());
  287 + for (String key : vTagCounts.keySet())
  288 + {
  289 + for (int ii = 0; ii < vTagCounts.get(key); ii++)
  290 + {
  291 + sBuilder.append("</").append(key).append(">");
  292 + }
  293 + }
  294 + s = sBuilder.toString();
  295 +
  296 + return s;
  297 + }
  298 +
  299 + private String processRemoveBlanks(final String s)
  300 + {
  301 + String result = s;
  302 + for (String tag : vRemoveBlanks)
  303 + {
  304 + if (!P_REMOVE_PAIR_BLANKS.containsKey(tag))
  305 + {
  306 + P_REMOVE_PAIR_BLANKS.putIfAbsent(tag, Pattern.compile("<" + tag + "(\\s[^>]*)?></" + tag + ">"));
  307 + }
  308 + result = regexReplace(P_REMOVE_PAIR_BLANKS.get(tag), "", result);
  309 + if (!P_REMOVE_SELF_BLANKS.containsKey(tag))
  310 + {
  311 + P_REMOVE_SELF_BLANKS.putIfAbsent(tag, Pattern.compile("<" + tag + "(\\s[^>]*)?/>"));
  312 + }
  313 + result = regexReplace(P_REMOVE_SELF_BLANKS.get(tag), "", result);
  314 + }
  315 +
  316 + return result;
  317 + }
  318 +
  319 + private static String regexReplace(final Pattern regex_pattern, final String replacement, final String s)
  320 + {
  321 + Matcher m = regex_pattern.matcher(s);
  322 + return m.replaceAll(replacement);
  323 + }
  324 +
  325 + private String processTag(final String s)
  326 + {
  327 + // ending tags
  328 + Matcher m = P_END_TAG.matcher(s);
  329 + if (m.find())
  330 + {
  331 + final String name = m.group(1).toLowerCase();
  332 + if (allowed(name))
  333 + {
  334 + if (false == inArray(name, vSelfClosingTags))
  335 + {
  336 + if (vTagCounts.containsKey(name))
  337 + {
  338 + vTagCounts.put(name, vTagCounts.get(name) - 1);
  339 + return "</" + name + ">";
  340 + }
  341 + }
  342 + }
  343 + }
  344 +
  345 + // starting tags
  346 + m = P_START_TAG.matcher(s);
  347 + if (m.find())
  348 + {
  349 + final String name = m.group(1).toLowerCase();
  350 + final String body = m.group(2);
  351 + String ending = m.group(3);
  352 +
  353 + // debug( "in a starting tag, name='" + name + "'; body='" + body + "'; ending='" + ending + "'" );
  354 + if (allowed(name))
  355 + {
  356 + final StringBuilder params = new StringBuilder();
  357 +
  358 + final Matcher m2 = P_QUOTED_ATTRIBUTES.matcher(body);
  359 + final Matcher m3 = P_UNQUOTED_ATTRIBUTES.matcher(body);
  360 + final List<String> paramNames = new ArrayList<>();
  361 + final List<String> paramValues = new ArrayList<>();
  362 + while (m2.find())
  363 + {
  364 + paramNames.add(m2.group(1)); // ([a-z0-9]+)
  365 + paramValues.add(m2.group(3)); // (.*?)
  366 + }
  367 + while (m3.find())
  368 + {
  369 + paramNames.add(m3.group(1)); // ([a-z0-9]+)
  370 + paramValues.add(m3.group(3)); // ([^\"\\s']+)
  371 + }
  372 +
  373 + String paramName, paramValue;
  374 + for (int ii = 0; ii < paramNames.size(); ii++)
  375 + {
  376 + paramName = paramNames.get(ii).toLowerCase();
  377 + paramValue = paramValues.get(ii);
  378 +
  379 + // debug( "paramName='" + paramName + "'" );
  380 + // debug( "paramValue='" + paramValue + "'" );
  381 + // debug( "allowed? " + vAllowed.get( name ).contains( paramName ) );
  382 +
  383 + if (allowedAttribute(name, paramName))
  384 + {
  385 + if (inArray(paramName, vProtocolAtts))
  386 + {
  387 + paramValue = processParamProtocol(paramValue);
  388 + }
  389 + params.append(' ').append(paramName).append("=\"").append(paramValue).append("\"");
  390 + }
  391 + }
  392 +
  393 + if (inArray(name, vSelfClosingTags))
  394 + {
  395 + ending = " /";
  396 + }
  397 +
  398 + if (inArray(name, vNeedClosingTags))
  399 + {
  400 + ending = "";
  401 + }
  402 +
  403 + if (ending == null || ending.length() < 1)
  404 + {
  405 + if (vTagCounts.containsKey(name))
  406 + {
  407 + vTagCounts.put(name, vTagCounts.get(name) + 1);
  408 + }
  409 + else
  410 + {
  411 + vTagCounts.put(name, 1);
  412 + }
  413 + }
  414 + else
  415 + {
  416 + ending = " /";
  417 + }
  418 + return "<" + name + params + ending + ">";
  419 + }
  420 + else
  421 + {
  422 + return "";
  423 + }
  424 + }
  425 +
  426 + // comments
  427 + m = P_COMMENT.matcher(s);
  428 + if (!stripComment && m.find())
  429 + {
  430 + return "<" + m.group() + ">";
  431 + }
  432 +
  433 + return "";
  434 + }
  435 +
  436 + private String processParamProtocol(String s)
  437 + {
  438 + s = decodeEntities(s);
  439 + final Matcher m = P_PROTOCOL.matcher(s);
  440 + if (m.find())
  441 + {
  442 + final String protocol = m.group(1);
  443 + if (!inArray(protocol, vAllowedProtocols))
  444 + {
  445 + // bad protocol, turn into local anchor link instead
  446 + s = "#" + s.substring(protocol.length() + 1);
  447 + if (s.startsWith("#//"))
  448 + {
  449 + s = "#" + s.substring(3);
  450 + }
  451 + }
  452 + }
  453 +
  454 + return s;
  455 + }
  456 +
  457 + private String decodeEntities(String s)
  458 + {
  459 + StringBuffer buf = new StringBuffer();
  460 +
  461 + Matcher m = P_ENTITY.matcher(s);
  462 + while (m.find())
  463 + {
  464 + final String match = m.group(1);
  465 + final int decimal = Integer.decode(match).intValue();
  466 + m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal)));
  467 + }
  468 + m.appendTail(buf);
  469 + s = buf.toString();
  470 +
  471 + buf = new StringBuffer();
  472 + m = P_ENTITY_UNICODE.matcher(s);
  473 + while (m.find())
  474 + {
  475 + final String match = m.group(1);
  476 + final int decimal = Integer.valueOf(match, 16).intValue();
  477 + m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal)));
  478 + }
  479 + m.appendTail(buf);
  480 + s = buf.toString();
  481 +
  482 + buf = new StringBuffer();
  483 + m = P_ENCODE.matcher(s);
  484 + while (m.find())
  485 + {
  486 + final String match = m.group(1);
  487 + final int decimal = Integer.valueOf(match, 16).intValue();
  488 + m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal)));
  489 + }
  490 + m.appendTail(buf);
  491 + s = buf.toString();
  492 +
  493 + s = validateEntities(s);
  494 + return s;
  495 + }
  496 +
  497 + private String validateEntities(final String s)
  498 + {
  499 + StringBuffer buf = new StringBuffer();
  500 +
  501 + // validate entities throughout the string
  502 + Matcher m = P_VALID_ENTITIES.matcher(s);
  503 + while (m.find())
  504 + {
  505 + final String one = m.group(1); // ([^&;]*)
  506 + final String two = m.group(2); // (?=(;|&|$))
  507 + m.appendReplacement(buf, Matcher.quoteReplacement(checkEntity(one, two)));
  508 + }
  509 + m.appendTail(buf);
  510 +
  511 + return encodeQuotes(buf.toString());
  512 + }
  513 +
  514 + private String encodeQuotes(final String s)
  515 + {
  516 + if (encodeQuotes)
  517 + {
  518 + StringBuffer buf = new StringBuffer();
  519 + Matcher m = P_VALID_QUOTES.matcher(s);
  520 + while (m.find())
  521 + {
  522 + final String one = m.group(1); // (>|^)
  523 + final String two = m.group(2); // ([^<]+?)
  524 + final String three = m.group(3); // (<|$)
  525 + m.appendReplacement(buf, Matcher.quoteReplacement(one + regexReplace(P_QUOTE, "&quot;", two) + three));
  526 + }
  527 + m.appendTail(buf);
  528 + return buf.toString();
  529 + }
  530 + else
  531 + {
  532 + return s;
  533 + }
  534 + }
  535 +
  536 + private String checkEntity(final String preamble, final String term)
  537 + {
  538 +
  539 + return ";".equals(term) && isValidEntity(preamble) ? '&' + preamble : "&amp;" + preamble;
  540 + }
  541 +
  542 + private boolean isValidEntity(final String entity)
  543 + {
  544 + return inArray(entity, vAllowedEntities);
  545 + }
  546 +
  547 + private static boolean inArray(final String s, final String[] array)
  548 + {
  549 + for (String item : array)
  550 + {
  551 + if (item != null && item.equals(s))
  552 + {
  553 + return true;
  554 + }
  555 + }
  556 + return false;
  557 + }
  558 +
  559 + private boolean allowed(final String name)
  560 + {
  561 + return (vAllowed.isEmpty() || vAllowed.containsKey(name)) && !inArray(name, vDisallowed);
  562 + }
  563 +
  564 + private boolean allowedAttribute(final String name, final String paramName)
  565 + {
  566 + return allowed(name) && (vAllowed.isEmpty() || vAllowed.get(name).contains(paramName));
  567 + }
  568 +}