Login | Register
My pages Projects Community openCollabNet

Discussions > cvs > CVS update: MODIFIED: util ...

Project highlights: Architectural Overview

joist
Discussion topic

Back to topic list

CVS update: MODIFIED: util ...

Author ms
Full name zxcv
Date 2001-05-10 19:30:18 PDT
Message User: ms
  Date: 01/05/10 19:30:18

  Modified: java/org/joist/util TagValidator.java
  Log:
  changed how the regex is constructed and matches valid html. rather then stripping all valid tags then checking for any left over html, it checks for any invalid html right off the bat. should be much faster unless oro's impl of negative-width assertions is totally bad
  
  Revision Changes Path
  1.11 +15 -43 joist/java/org/joist​/util/TagValidator.j​ava
  
  http://joist.tigris.​org/source/browse/jo​ist/java/org/joist/u​til/TagValidator.jav​a.diff?r1=1.10&r​2=1.11
  
  (In the diff below, changes in quantity of whitespace are not shown.)
  
  Index: TagValidator.java
  ====================​====================​====================​=======
  RCS file: /usr/local/tigris/da​ta/helm/cvs/reposito​ry/joist/java/org/jo​ist/util/TagValidato​r.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -b -r1.10 -r1.11
  --- TagValidator.java 2001/05/10 02:05:50 1.10
  +++ TagValidator.java 2001/05/11 02:30:18 1.11
  @@ -67,7 +67,7 @@
     * @author David C. Pellegrini <A HREF="mailto:davidp@​dataweb-systems.com"​>davidp@dataweb-s​ystems.com</A>​
     * @author Jon S. Stevens <A HREF="mailto:jon@lat​chkey.com">jon@la​tchkey.com</A>​
     * @author Michael Salmon <a href="mailto:ms@coll​ab.net">ms@collab​.net</a>
  - * @version $Revision: 1.10 $
  + * @version $Revision: 1.11 $
     */
   public class TagValidator
   {
  @@ -93,9 +93,10 @@
       {"ALIGN","BACKGROUND​","BGCOLOR","CHAR","​CHAROFF","CITE","CLE​AR","COLOR",
        "COLS","FACE","GUTTE​R","HEIGHT","HREF","​SIZE","START","TYPE"​,"WIDTH",
        "COMPACT","NOSHADE","NOWRAP"};
  + final String anyAttribute = "[a-zA-Z]*";
   
  - final private String frontRegExp = "<\\s*/?\\s*(\\s*";
  - final private String middleRegExp = "\\s*)\\s*\\b\\s*(\\s*(\\s*";
  + final private String frontRegExp = "<\\s*/?\\s*(?!\\s*";
  + final private String middleRegExp = "\\s*).+\\s*\\b\\s*(\\s*(\\s*";
       final private String endRegExp = "\\s*)\\s*(\\s​*=\\s*\"?[^<​>\\s]+\"?\\s*)​?\\s*)*\\s*>"​;
       
       // the tags to match with this object
  @@ -105,13 +106,6 @@
       final private String lt = "&lt;";
       final private String gt = "&gt;";
       
  - // For substitution ...
  - static final private PatternCompiler substCompiler = new Perl5Compiler();
  - static final private PatternMatcher substMatcher = new Perl5Matcher();
  - static private Pattern substPattern = null;
  - static final private String substitution = "";
  - static final private int limit = Util.SUBSTITUTE_ALL;
  -
       // For matching ...
       static final private PatternCompiler matchCompiler = new Perl5Compiler();
       static final private PatternMatcher matchMatcher = new Perl5Matcher();
  @@ -119,7 +113,6 @@
   
       static final private String matchRegExp = "</?[^<>\\s]*>";
   
  -
       /**
        * populate the validator with some tags to allow
        */
  @@ -162,20 +155,8 @@
               }
           }
           sb.append ( middleRegExp );
  + sb.append ( anyAttribute );
           firstTime = true;
  - for ( int i=0; i<attributes.length; i++ )
  - {
  - if (firstTime)
  - {
  - sb.append(attributes[i]);
  - firstTime = false;
  - }
  - else
  - {
  - sb.append(orDelim);
  - sb.append(attributes[i]);
  - }
  - }
           sb.append ( endRegExp );
           return sb.toString();
       }
  @@ -231,8 +212,7 @@
       /**
        * Checks the input string for HTML tags that are not allowed.
        * <p>
  - * First, strip out all occurrences of acceptable HTML tags. What remains
  - * is either clean, or contains tags that are not allowed.
  + * Use negative-width lookahead to search for valid tags
        *
        * @param inputString A variable of type String
        * @return a boolean indicating whether the HTML was OK
  @@ -240,31 +220,23 @@
       public boolean validate (String inputString)
       {
           // Bootstrap the first time through ...
  - if (substPattern == null)
  + if (matchPattern == null)
           {
  - synchronized(substCompiler)
  + synchronized(matchCompiler)
               {
  - if (substPattern == null) // if it's STILL null ;-)
  + if (matchPattern == null) // if it's STILL null ;-)
                   {
                       try
                       {
  - substPattern = substCompiler.compil​e(buildRegex(),
  - Perl5Compiler.CASE_I​NSENSITIVE_MASK);
  - matchPattern = matchCompiler.compil​e(matchRegExp,
  + matchPattern = matchCompiler.compil​e(buildRegex(),
                               Perl5Compiler.CASE_I​NSENSITIVE_MASK);
                       }
                       catch (MalformedPatternException ignored){};
                   }
               }
           }
  -
  - // First, strip out all occurrences of valid HTML tags.
  - String strippedCopy = Util.substitute(substMatcher, substPattern,
  - new StringSubstitution(s​ubstitution), inputString, limit);
   
  - // What remains is either clean, or contains tags that are not allowed.
  - PatternMatcherInput input = new PatternMatcherInput(​strippedCopy);
  -
  + PatternMatcherInput input = new PatternMatcherInput(​inputString);
           boolean clean = true;
           MatchResult result = null;
           if (matchMatcher.contains(input, matchPattern))
  @@ -343,7 +315,7 @@
        */
       public void addAllowTags(String[] newTags)
       {
  - substPattern = null;
  + matchPattern = null;
           // let npe's pass if given a null list
           if (allowTags == null && newTags != null)
           {
  @@ -357,7 +329,7 @@
   
       public void setAllowTags (String[] newTags)
       {
  - substPattern = null;
  + matchPattern = null;
           allowTags = null;
           addAllowTags(newTags);
       }
  @@ -376,7 +348,7 @@
        */
       public void resetAllowTags()
       {
  - substPattern = null;
  + matchPattern = null;
           allowTags = null;
       }
   
  
  
  

--------------------​--------------------​--------------------​---------
To unsubscribe, e-mail: cvs-unsubscribe@jois​t.tigris.org
For additional commands, e-mail: cvs-help at joist dot tigris dot org

« Previous message in topic | 1 of 1 | Next message in topic »

Messages

Show all messages in topic

CVS update: MODIFIED: util ... ms zxcv 2001-05-10 19:30:18 PDT
Messages per page: