NP_Validate.php

This plugin validates your item for X(HT)ML well-formedness. This plugin will check all data submitted to you weblog both Comments and Items. This is especially usefull when you serve your website as xhtml. If a parse error occurs you will get the error message returned and you can edit your item or comment.

General Plugin info
Author: Tim Broddin
Current Version: 1.0
Download: -
Code: code
Demo: -
Forum Thread: here

Code

<?php
class NP_Validate extends NucleusPlugin {
	function getName() {	return 'Validate HTML'; 	}
	function getAuthor()  { return 'Tim Broddin'; 	}
	function getURL() {		return 'http://www.fuckhedz.com'; }
	function getVersion() {	return '1.0'; }
	function getDescription() { 
		return 'This plugin checks items for HTML errors. The checking is done by SafeHtmlChecker by Simon Willison.';
	}
 
	function getEventList() { 
		return array('PreAddComment','PreUpdateItem'); 
	}	
 
	function NP_Validate() {
		$this->checker = new SafeHtmlChecker;
	}
 
	function event_PreAddItem($data) {
		$this->checker->check('<all>' . $data['body'] . '</all>');
 
		if (!$this->checker->isOK()) {
			$message = '<ul>';
       	foreach ($this->checker->getErrors() as $error) {
           	$message .= '<li>'.$error.'</li>';
       	}
       	$message .= '</ul>';
       	doError($message);
      }	
 
      $this->checker->check('<all>' . $data['extended'] . '</all>');
		if (!$this->checker->isOK()) {
			$message = '<ul>';
       	foreach ($this->checker->getErrors() as $error) {
           	$message .= '<li>'.$error.'</li>';
       	}
       	$message .= '</ul>';
       	doError($message);
       }	       
 
 
}
 
    function event_PreUpdateItem($data) {
		$this->event_PreAddItem($data);		
	}
}	
 
 
/* SafeHtmlChecker - checks HTML against a subset of
   elements to ensure safety and XHTML validation.
 
   Simon Willison, 23rd Feb 2003
 
   Note: HTML sent to the checker must be wrapped in an '<all>' tag.
   HTML can be sent to the checker in chunks, with multiple calls to
   the check() method.
 
   Usage:
 
   $checker = new SafeHtmlChecker;
   $checker->check('<all>'.$html.'</all>');
   if ($checker->isOK()) {
       echo 'Everything is fine';
   } else {
       echo '<ul>';
       foreach ($checker->getErrors() as $error) {
           echo '<li>'.$error.'</li>';
       }
       echo '</ul>';
   }
 
*/
 
// Entity classes, adapted from XHTML 1.0 strict DTD
define('E_INLINE_CONTENTS', 'em img strong dfn code q samp kbd var cite abbr acronym sub sup a #PCDATA');
define('E_BLOCK_CONTENTS', 'dl ul ol blockquote p');
define('E_FLOW_CONTENTS', E_BLOCK_CONTENTS.' '.E_INLINE_CONTENTS);
 
class SafeHtmlChecker {
    // Array showing what tags each tag can contain
    var $tags = array(
        'all' => E_FLOW_CONTENTS,
        'p' => E_INLINE_CONTENTS,
        'blockquote' => E_BLOCK_CONTENTS,
        // Lists
        'ul' => 'li',
        'ol' => 'li',
        'li' => E_FLOW_CONTENTS,
        'dl' => 'dt dd',
        'dt' => E_INLINE_CONTENTS,
        'dd' => E_FLOW_CONTENTS,
        // Inline elements
        'em' => E_INLINE_CONTENTS,
        'strong' => E_INLINE_CONTENTS,
        'dfn' => E_INLINE_CONTENTS,
        'code' => E_INLINE_CONTENTS,
        'q' => E_INLINE_CONTENTS,
        'samp' => E_INLINE_CONTENTS,
        'kbd' => E_INLINE_CONTENTS,
        'var' => E_INLINE_CONTENTS,
        'cite' => E_INLINE_CONTENTS,
        'abbr' => E_INLINE_CONTENTS,
        'acronym' => E_INLINE_CONTENTS,
        'sub' => E_INLINE_CONTENTS,
        'sup' => E_INLINE_CONTENTS,
        'a' => E_INLINE_CONTENTS,
        'img' => '',
        'br' => ''
    );
    // Array showing allowed attributes for tags
    var $tagattrs = array(
    	  'p' => 'id class style',
    	  'div' => 'id class style id',
        'blockquote' => 'class style cite',
        'q' => 'style cite class',
        'a' => 'id href title style class',
        'img' => 'id class style src alt onclick',
        'dfn' => 'class style title',
        'acronym' => 'class style title',
        'abbr' => 'class style title'
    );
    // Internal variables
    var $errors = array();
    var $parser;
    var $stack = array();
    function SafeHtmlChecker() {
        $this->parser = xml_parser_create();
        xml_set_object($this->parser, &$this);
        xml_set_element_handler($this->parser, 'tag_open', 'tag_close');
        xml_set_character_data_handler($this->parser, 'cdata');
        xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, false);
    }
    function check($xhtml) {
        // Open comments are dangerous
        $xhtml = str_replace('<!--', '', $xhtml);
        // So are CDATA things
        $xhtml = str_replace('<![CDATA[', '', $xhtml);
        if (!xml_parse($this->parser, $xhtml)) {
            $this->errors[] = 'XHTML is not well-formed';
        }
    }
    function tag_open($parser, $tag, $attrs) {
        if ($tag == 'all') {
            $this->stack[] = 'all';
            return;
        }
        $previous = $this->stack[count($this->stack)-1];
        // If previous tag is illegal, no point in running tests
        if (!array_key_exists($previous, $this->tags)) {
            $this->stack[] = $tag;
            return;
        }
        // Is tag a legal tag?
        if (!array_key_exists($tag, $this->tags)) {
            $this->errors[] = "Illegal tag: <_code_>$tag</_code_>";
            $this->stack[] = $tag;
            return;
        }
        // Is tag allowed in the current context?
        if (!in_array($tag, explode(' ', $this->tags[$previous]))) {
            if ($previous == 'all') {
                $this->errors[] = "Tag <_code_>$tag</_code_> must occur inside another tag";
            } else {
                $this->errors[] = "Tag <_code_>$tag</_code_> is not allowed within tag <_code_>$previous</_code_>";
            }
        }
        // Are tag attributes valid?
        foreach ($attrs as $attr => $value) {
            if (!isset($this->tagattrs[$tag]) || !in_array($attr, explode(' ', $this->tagattrs[$tag]))) {
                $this->errors[] = "Tag <_code_>$tag</_code_> may not have attribute <_code_>$attr</_code_>";
            }
            // Special case for javascript: in href attribute
            if ($attr == 'href' && preg_match('/^javascript/i', trim($value))) {
                $this->errors[] = "<_code_>href</_code_> attributes may not contain the <_code_>javascript:</_code_> protocol";
            }
            // Special case for data: in href attribute
            if ($attr == 'href' && preg_match('/^data/i', trim($value))) {
                $this->errors[] = "<_code_>href</_code_> attributes may not contain the <_code_>data:</_code_> protocol";
            }
            // Special case for javascript: in blockquote cites (for use with blockquotes.js)
            if ($attr == 'cite' && preg_match('/^javascript/i', trim($value))) {
                $this->errors[] = "<_code_>cite</_code_> attributes may not contain the <_code_>javascript:</_code_> protocol";
            }
            // Special case for data: in blockquote cites (for use with blockquotes.js)
            if ($attr == 'cite' && preg_match('/^data/i', trim($value))) {
                $this->errors[] = "<_code_>cite</_code_> attributes may not contain the <_code_>data:</_code_> protocol";
            }
        }
        // Set previous, used for checking nesting context rules
        $this->stack[] = $tag;
    }
    function cdata($parser, $cdata) {
        // Simply check that the 'previous' tag allows CDATA
        $previous = $this->stack[count($this->stack)-1];
        // If previous tag is illegal, no point in running test
        if (!array_key_exists($previous, $this->tags)) {
            return;
        }
        if (trim($cdata) != '') {
            if (!in_array('#PCDATA', explode(' ', $this->tags[$previous]))) {
                $this->errors[] = "Tag <_code_>$previous</_code_> may not contain raw character data";
            }
        }
    }
    function tag_close($parser, $tag) {
        // Move back one up the stack
        array_pop($this->stack);
    }
    function isOK() {
        return count($this->errors) < 1;
    }
    function getErrors() {
        return $this->errors;
    }
}
 
?>
validate.txt · Last modified: 2010/01/27 10:47 (external edit)
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki