Login | Register
My pages Projects Community openCollabNet

Discussions > cvs > CVS update: joist/java/org/joist/util TagValidator.java

Project highlights: Architectural Overview

joist
Discussion topic

Back to topic list

CVS update: joist/java/org/joist/util TagValidator.java

Author commitlogger at hocus dot collab dot net
Full name commitlogger at hocus dot collab dot net
Date 2000-04-17 12:33:11 PDT
Message User: davidp
  Date: 00/04/17 12:33:11

  Added: java/org/joist/util TagValidator.java
  Log:
  An updated and renamed version of HTMLValidator
  
  Revision Changes Path
  1.1 joist/java/org/joist​/util/TagValidator.j​ava
  
  Index: TagValidator.java
  ====================​====================​====================​=======
  package org.sourcexchange.util;
  
  import java.util.*;
  import java.sql.*;
  import com.oroinc.text.regex.*;
  
  /**
    * Checks that Content contains only tags from an accepted set.
    *
    * <p>This does not purport to validate syntactically or semantically correct
    * HTML. Its sole purpose is to detect the use of dangerous tags and
    * attributes that can be used to execute scripts and other nefarious misdeeds.
    *
    Allowed tags:<br>
    &lt;A&gt; &lt;ABBR&gt; &lt;ACRONYM&gt; &lt;AREA&gt; &lt;B&gt; &lt;BASE&gt; &lt;BASEFONT&gt; &lt;BIG&gt; &lt;BDO&gt; &lt;BLINK&gt; &lt;BLOCKQUOTE&gt; &lt;BR&gt; &lt;CAPTION&gt; &lt;CENTER&gt; &lt;CITE&gt; &lt;CODE&gt; &lt;COL&gt; &lt;COLGROUP&gt; &lt;DD&gt; &lt;DEL&gt; &lt;DFN&gt; &lt;DIV&gt; &lt;DL&gt; &lt;DT&gt; &lt;EM&gt; &lt;FIELDSET&gt; &lt;FONT&gt; &lt;H1&gt; &lt;H2&gt; &lt;H3&gt; &lt;H4&gt; &lt;H5&gt; &lt;H6&gt; &lt;HR&gt; &lt;I&gt; &lt;IMG&gt; &lt;INS&gt; &lt;KBD&gt; &lt;LI&gt; &lt;LABEL&gt; &lt;LEGEND&gt; &lt;LINK&gt; &lt;MAP&gt; &lt;MENU&gt; &lt;MULTICOL&gt; &lt;NOBR&gt; &lt;NOFRAMES&gt; &lt;NOSCRIPT&gt; &lt;OL&gt; &lt;OPTGROUP&gt; &lt;P&gt; &lt;PARAM&gt; &lt;PRE&gt; &lt;Q&gt; &lt;S&gt; &lt;SAMP&gt; &lt;SMALL&gt; &lt;SPACER&gt; &lt;SPAN&gt; &lt;STRIKE&gt; &lt;STRONG&gt; &lt;STYLE&gt; &lt;SUB&gt; &lt;SUP&gt; &lt;TBODY&gt; &lt;TD&gt; &lt;TFOOT&gt; &lt;TH&gt; &lt;THEAD&gt; &lt;TR&gt; &lt;TT&gt; &lt;TABLE&gt; &lt;U&gt; &lt;UL&gt; &lt;VAR&gt; &lt;WBR&gt;
    <p>
    Allowed attributes:<br>
    &lt;ALIGN&gt; &lt;BACKGROUND&gt; &lt;BGCOLOR&gt; &lt;CHAR&gt; &lt;CHAROFF&gt; &lt;CITE&gt; &lt;CLEAR&gt; &lt;COLOR&gt; &lt;COLS&gt; &lt;FACE&gt; &lt;GUTTER&gt; &lt;HEIGHT&gt; &lt;HREF&gt; &lt;SIZE&gt; &lt;START&gt; &lt;TYPE&gt; &lt;WIDTH&gt; &lt;COMPACT&gt; &lt;NOSHADE&gt; &lt;NOWRAP&gt;
    *
    * @author David C. Pellegrini <A HREF="mailto:davidp@​dataweb-systems.com"​>davidp@dataweb-s​ystems.com</A>​
    * @author Jon S. Stevens <A HREF="mailto:jon@lat​chkey.com">jon@la​tchkey.com</A>​
    * @version $Revision: 1.1 $
    */
  public class TagValidator
  {
      static final private String[] tags =
      {"A","ABBR","ACRONYM​","AREA","B","BASE",​"BASEFONT","BIG","BD​O","BLINK",
       "BLOCKQUOTE","BR","C​APTION","CENTER","CI​TE","CODE","COL","CO​LGROUP",
       "DD","DEL","DFN","DI​V","DL","DT","EM","F​IELDSET","FONT",
       "H1","H2","H3","H4",​"H5","H6","HR","I","​IMG","INS","KBD","LI​",
       "LABEL","LEGEND","LI​NK","MAP","MENU","MU​LTICOL","NOBR","NOFR​AMES","NOSCRIPT",
       "OL","OPTGROUP","P",​"PARAM","PRE","Q","S​","SAMP","SMALL","SP​ACER","SPAN","STRIKE​",
       "STRONG","STYLE","SU​B","SUP","TBODY","TD​","TFOOT","TH","THEA​D",
       "TR","TT","TABLE","U​","UL","VAR","WBR"};​
      
      static final private String[] attributes =
      {"ALIGN","BACKGROUND​","BGCOLOR","CHAR","​CHAROFF","CITE","CLE​AR","COLOR",
       "COLS","FACE","GUTTE​R","HEIGHT","HREF","​SIZE","START","TYPE"​,"WIDTH",
       "COMPACT","NOSHADE","NOWRAP"};
  
      static final private String frontRegExp = "<\\s*/?\\s*(\\s*";
      static final private String middleRegExp = "\\s*)\\s*\\b\\s*(\\s*(\\s*";
      static final private String endRegExp = "\\s*)\\s*(\\s​*=\\s*\"?[^<​>\\s]+\"?\\s*)​?\\s*)*\\s*>"​;
      static final private String orDelim = "|";
      static final private String lt = "&lt;";
      static final private String gt = "&gt;";
      
      // For substitution ...
      static final private PatternCompiler substCompiler = new Perl5Compiler();
      static final private PatternMatcher substMatcher = new Perl5Matcher();
      static private Pattern substPattern = null;
      static final private String substitution = "";
      static final private int limit = Util.SUBSTITUTE_ALL;
      static final private int nointerp = Util.INTERPOLATE_NONE;
  
      // For matching ...
      static final private PatternCompiler matchCompiler = new Perl5Compiler();
      static final private PatternMatcher matchMatcher = new Perl5Matcher();
      static private Pattern matchPattern = null;
      static final private String matchRegExp = "</?[^<>\\s]*>";
  
      /**
       * Tests the operation of the methods.
       * Takes one argument: the string to be validated
       */
      public static final void main(String args[])
      {
          String input = null;
          if (args.length == 0 )
              input = "this is a <script> test";
          else
              input = args[0];
  
              
          databaseTest();
          
  /* StringBuffer errorMsg = new StringBuffer();
          
          System.out.println("original: " + input);
          if (validate(input, errorMsg))
              System.out.println("A-OK!");
          else
              System.out.println("not allowed: " + errorMsg);
              */
      }
  
      public static final void databaseTest()
      {
          String username = "jon";
          String password = "";
  // String host = "womb.collab.net";
          String host = "yang.collab.net";
          String db = "sourcex";
          String DRIVER_MM = "org.gjt.mm.mysql.Driver";
          String url = "jdbc:mysql://" + host + "/" + db;
          Statement stmt = null;
          Connection conn = null;
          try
          {
              String DB_CONNECTION = url +"?user="+ username + "&password="+password;
              Class.forName( DRIVER_MM ).newInstance();
              conn = DriverManager.getConnection( DB_CONNECTION );
              
              String[] tables = {"Developer","Evalua​tion","Interest",
                  "Milestone","Milesto​ne","Milestone", "Milestone",
                  "Proposal","Proposal​","Proposal",
                  "RFP","RFP","RFPComm​ent","Skill","Sponso​r",
                      "Wish","Wish","Wish"​,"WishComment"};
  
              String[] columns = {"achievements","eva​luation","statement"​,"deliverables","ann​ouncement",
                  "response","review",​"statement","deliver​ables","milestones",​"description",
                  "milestones","commen​t","name","name","na​me","description","d​eliverables","commen​t"};
  
              for ( int i=0; i<tables.length; i++ )
              {
                  String sql = "select " + columns[i] + " from " + tables[i];
                  stmt = conn.createStatement();
                  ResultSet rs = stmt.executeQuery ( sql );
                  int rowID = 1;
                  while (rs.next())
                  {
                      StringBuffer errorMsg = new StringBuffer();
                      String input = rs.getString ( columns[i] );
                      if (input == null || input.length() == 0)
                      {
  // System.out.println ("Row is empty: " + rowID );
                      }
                      else if ( ! validate(input, errorMsg ) )
                      {
  // System.out.println ("Error in row: " + rowID );
                          System.out.println ("RowID: " + rowID + " Table: " + tables[i] + " Column: " + columns[i]);
                          System.out.println ("Error Message:\n" + errorMsg.toString() + "\n" );
                      }
                      rowID++;
                  }
              }
          }
          catch ( Exception e )
          {
              e.printStackTrace();
              System.out.println (e.toString());
          }
          finally
          {
              try { if (stmt != null) stmt.close(); } catch (Exception e){}
              try { if (conn != null) conn.close(); } catch (Exception e){}
          }
      }
      /**
          This method builds the regex for the pattern matching.
          We do this as a dynamic string because it makes it easier to
          add new items.
      */
      private static String buildRegex()
      {
          StringBuffer sb = new StringBuffer();
          sb.append ( frontRegExp );
          boolean firstTime = true;
          for ( int i=0; i<tags.length; i++ )
          {
              if (firstTime)
              {
                  sb.append(tags[i]);
                  firstTime = false;
              }
              else
              {
                  sb.append(orDelim);
                  sb.append(tags[i]);
              }
          }
          sb.append ( middleRegExp );
          firstTime = true;
          for ( int i=0; i<attributes.length; i++ )
          {
              if (firstTime)
              {
                  sb.append(attributes[i]);
                  firstTime = false;
              }
              else
              {
                  sb.append(orDelim);
                  sb.append(attributes[i]);
              }
          }
          sb.append ( endRegExp );
          return sb.toString();
      }
  
      /**
          This method builds an HTML representation of all of the allowed Tags
      */
      private static String buildError()
      {
          StringBuffer sb = new StringBuffer();
          boolean firstTime = true;
          sb.append ( "<b>Allowed tags:</b><br> " );
          for ( int i=0; i<tags.length; i++ )
          {
              if (firstTime)
              {
                  sb.append(lt);
                  sb.append(tags[i]);
                  sb.append(gt);
                  firstTime = false;
              }
              else
              {
                  sb.append(" ");
                  sb.append(lt);
                  sb.append(tags[i]);
                  sb.append(gt);
              }
          }
          firstTime = true;
          sb.append ( "<p><b>Allowed attributes:</b><br> " );
          for ( int i=0; i<attributes.length; i++ )
          {
              if (firstTime)
              {
                  sb.append(lt);
                  sb.append(attributes[i]);
                  sb.append(gt);
                  firstTime = false;
              }
              else
              {
                  sb.append(" ");
                  sb.append(lt);
                  sb.append(attributes[i]);
                  sb.append(gt);
              }
          }
          return sb.toString();
      }
      
      /**
       * Checks the input string for HTML tags that are not allowed.
       * <p>
       * First, strip out all occurrences of acceptable HTML tags. What remains
       * is either clean, or contains tags that are not allowed.
       *
       * @param input the string containing HTML
       * @param errorMsg the StringBuffer for any error message text
       * @return a boolean indicating whether the HTML was OK
       */
      public static boolean validate (String inputString, StringBuffer errorMsg)
      {
          // Bootstrap the first time through ...
          if (substPattern == null)
          {
              synchronized(substCompiler)
              {
                  if (substPattern == null) // if it's STILL null ;-)
                  {
                      try
                      {
                          substPattern = substCompiler.compil​e(buildRegex(),
                              Perl5Compiler.CASE_I​NSENSITIVE_MASK);
                          matchPattern = matchCompiler.compil​e(matchRegExp,
                              Perl5Compiler.CASE_I​NSENSITIVE_MASK);
                      }
                      catch (MalformedPatternException ignored){};
  
                  }
              }
          }
  
          // First, strip out all occurrences of valid HTML tags.
          String strippedCopy = Util.substitute(substMatcher, substPattern,
              substitution, inputString, limit, nointerp);
  
          // What remains is either clean, or contains tags that are not allowed.
          PatternMatcherInput input = new PatternMatcherInput(​strippedCopy);
  
          boolean clean = true;
          MatchResult result = null;
          while (matchMatcher.contains(input, matchPattern))
          {
              result = matchMatcher.getMatch();
              errorMsg.append(resu​lt.toString()).appen​d (" ");
              clean = false;
          }
          return clean;
      }
  }

« Previous message in topic | 1 of 1 | Next message in topic »

Messages

Show all messages in topic

CVS update: joist/java/org/joist/util TagValidator.java commitlogger at hocus dot collab dot net commitlogger at hocus dot collab dot net 2000-04-17 12:33:11 PDT
Messages per page: