<?php /** * Ganon single file version - PHP5+ version * Generated on 20 Oct 2012 * * @author Niels A.D. * @package Ganon * @link http://code.google.com/p/ganon/ * @license http://dev.perl.org/licenses/artistic.html Artistic License */ //START ganon.php function str_get_dom($str, $return_root = true) { $a = new HTML_Parser_HTML5($str); return (($return_root) ? $a->root : $a); } function file_get_dom($file, $return_root = true, $use_include_path = false, $context = null) { if (version_compare(PHP_VERSION, ‘5.0.0‘, ‘>=‘)) $f = file_get_contents($file, $use_include_path, $context); else { if ($context !== null) trigger_error(‘Context parameter not supported in this PHP version‘); $f = file_get_contents($file, $use_include_path); } return (($f === false) ? false : str_get_dom($f, $return_root)); } function dom_format(&$root, $options = array()) { $formatter = new HTML_Formatter($options); return $formatter->format($root); } if (version_compare(PHP_VERSION, ‘5.0.0‘, ‘<‘)) { function str_split($string) { $res = array(); $size = strlen($string); for ($i = 0; $i < $size; $i++) { $res[] = $string[$i]; } return $res; } } if (version_compare(PHP_VERSION, ‘5.2.0‘, ‘<‘)) { function array_fill_keys($keys, $value) { $res = array(); foreach($keys as $k) { $res[$k] = $value; } return $res; } } //END ganon.php //START gan_tokenizer.php class Tokenizer_Base { const TOK_NULL = 0; const TOK_UNKNOWN = 1; const TOK_WHITESPACE = 2; const TOK_IDENTIFIER = 3; var $doc = ‘‘; var $size = 0; var $pos = 0; var $line_pos = array(0, 0); var $token = self::TOK_NULL; var $token_start = null; var $whitespace = " \t\n\r\0\x0B"; var $identifiers = ‘abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_‘; var $custom_char_map = array(); var $char_map = array(); var $errors = array(); function __construct($doc = ‘‘, $pos = 0) { $this->setWhitespace($this->whitespace); $this->setIdentifiers($this->identifiers); $this->setDoc($doc, $pos); } function setDoc($doc, $pos = 0) { $this->doc = $doc; $this->size = strlen($doc); $this->setPos($pos); } function getDoc() { return $this->doc; } function setPos($pos = 0) { $this->pos = $pos - 1; $this->line_pos = array(0, 0); $this->next(); } function getPos() { return $this->pos; } function getLinePos() { return array($this->line_pos[0], $this->pos - $this->line_pos[1]); } function getToken() { return $this->token; } function getTokenString($start_offset = 0, $end_offset = 0) { $token_start = ((is_int($this->token_start)) ? $this->token_start : $this->pos) + $start_offset; $len = $this->pos - $token_start + 1 + $end_offset; return (($len > 0) ? substr($this->doc, $token_start, $len) : ‘‘); } function setWhitespace($ws) { if (is_array($ws)) { $this->whitespace = array_fill_keys(array_values($ws), true); $this->buildCharMap(); } else { $this->setWhiteSpace(str_split($ws)); } } function getWhitespace($as_string = true) { $ws = array_keys($this->whitespace); return (($as_string) ? implode(‘‘, $ws) : $ws); } function setIdentifiers($ident) { if (is_array($ident)) { $this->identifiers = array_fill_keys(array_values($ident), true); $this->buildCharMap(); } else { $this->setIdentifiers(str_split($ident)); } } function getIdentifiers($as_string = true) { $ident = array_keys($this->identifiers); return (($as_string) ? implode(‘‘, $ident) : $ident); } function mapChar($char, $map) { $this->custom_char_map[$char] = $map; $this->buildCharMap(); } function unmapChar($char) { unset($this->custom_char_map[$char]); $this->buildCharMap(); } protected function buildCharMap() { $this->char_map = $this->custom_char_map; if (is_array($this->whitespace)) { foreach($this->whitespace as $w => $v) { $this->char_map[$w] = ‘parse_whitespace‘; } } if (is_array($this->identifiers)) { foreach($this->identifiers as $i => $v) { $this->char_map[$i] = ‘parse_identifier‘; } } } function addError($error) { $this->errors[] = htmlentities($error.‘ at ‘.($this->line_pos[0] + 1).‘, ‘.($this->pos - $this->line_pos[1] + 1).‘!‘); } protected function parse_linebreak() { if($this->doc[$this->pos] === "\r") { ++$this->line_pos[0]; if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === "\n")) { ++$this->pos; } $this->line_pos[1] = $this->pos; } elseif($this->doc[$this->pos] === "\n") { ++$this->line_pos[0]; $this->line_pos[1] = $this->pos; } } protected function parse_whitespace() { $this->token_start = $this->pos; while(++$this->pos < $this->size) { if (!isset($this->whitespace[$this->doc[$this->pos]])) { break; } else { $this->parse_linebreak(); } } --$this->pos; return self::TOK_WHITESPACE; } protected function parse_identifier() { $this->token_start = $this->pos; while((++$this->pos < $this->size) && isset($this->identifiers[$this->doc[$this->pos]])) {} --$this->pos; return self::TOK_IDENTIFIER; } function next() { $this->token_start = null; if (++$this->pos < $this->size) { if (isset($this->char_map[$this->doc[$this->pos]])) { if (is_string($this->char_map[$this->doc[$this->pos]])) { return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}()); } else { return ($this->token = $this->char_map[$this->doc[$this->pos]]); } } else { return ($this->token = self::TOK_UNKNOWN); } } else { return ($this->token = self::TOK_NULL); } } function next_no_whitespace() { $this->token_start = null; while (++$this->pos < $this->size) { if (!isset($this->whitespace[$this->doc[$this->pos]])) { if (isset($this->char_map[$this->doc[$this->pos]])) { if (is_string($this->char_map[$this->doc[$this->pos]])) { return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}()); } else { return ($this->token = $this->char_map[$this->doc[$this->pos]]); } } else { return ($this->token = self::TOK_UNKNOWN); } } else { $this->parse_linebreak(); } } return ($this->token = self::TOK_NULL); } function next_search($characters, $callback = true) { $this->token_start = $this->pos; if (!is_array($characters)) { $characters = array_fill_keys(str_split($characters), true); } while(++$this->pos < $this->size) { if (isset($characters[$this->doc[$this->pos]])) { if ($callback && isset($this->char_map[$this->doc[$this->pos]])) { if (is_string($this->char_map[$this->doc[$this->pos]])) { return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}()); } else { return ($this->token = $this->char_map[$this->doc[$this->pos]]); } } else { return ($this->token = self::TOK_UNKNOWN); } } else { $this->parse_linebreak(); } } return ($this->token = self::TOK_NULL); } function next_pos($needle, $callback = true) { $this->token_start = $this->pos; if (($this->pos < $this->size) && (($p = stripos($this->doc, $needle, $this->pos + 1)) !== false)) { $len = $p - $this->pos - 1; if ($len > 0) { $str = substr($this->doc, $this->pos + 1, $len); if (($l = strrpos($str, "\n")) !== false) { ++$this->line_pos[0]; $this->line_pos[1] = $l + $this->pos + 1; $len -= $l; if ($len > 0) { $str = substr($str, 0, -$len); $this->line_pos[0] += substr_count($str, "\n"); } } } $this->pos = $p; if ($callback && isset($this->char_map[$this->doc[$this->pos]])) { if (is_string($this->char_map[$this->doc[$this->pos]])) { return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}()); } else { return ($this->token = $this->char_map[$this->doc[$this->pos]]); } } else { return ($this->token = self::TOK_UNKNOWN); } } else { $this->pos = $this->size; return ($this->token = self::TOK_NULL); } } protected function expect($token, $do_next = true, $try_next = false, $next_on_match = 1) { if ($do_next) { if ($do_next === 1) { $this->next(); } else { $this->next_no_whitespace(); } } if (is_int($token)) { if (($this->token !== $token) && ((!$try_next) || ((($try_next === 1) && ($this->next() !== $token)) || (($try_next === true) && ($this->next_no_whitespace() !== $token))))) { $this->addError(‘Unexpected "‘.$this->getTokenString().‘"‘); return false; } } else { if (($this->doc[$this->pos] !== $token) && ((!$try_next) || (((($try_next === 1) && ($this->next() !== self::TOK_NULL)) || (($try_next === true) && ($this->next_no_whitespace() !== self::TOK_NULL))) && ($this->doc[$this->pos] !== $token)))) { $this->addError(‘Expected "‘.$token.‘", but found "‘.$this->getTokenString().‘"‘); return false; } } if ($next_on_match) { if ($next_on_match === 1) { $this->next(); } else { $this->next_no_whitespace(); } } return true; } } //END gan_tokenizer.php //START gan_parser_html.php class HTML_Parser_Base extends Tokenizer_Base { const TOK_TAG_OPEN = 100; const TOK_TAG_CLOSE = 101; const TOK_SLASH_FORWARD = 103; const TOK_SLASH_BACKWARD = 104; const TOK_STRING = 104; const TOK_EQUALS = 105; var $identifiers = ‘abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890:-_!?%‘; var $status = array(); var $custom_char_map = array( ‘<‘ => self::TOK_TAG_OPEN, ‘>‘ => self::TOK_TAG_CLOSE, "‘" => ‘parse_string‘, ‘"‘ => ‘parse_string‘, ‘/‘ => self::TOK_SLASH_FORWARD, ‘\\‘ => self::TOK_SLASH_BACKWARD, ‘=‘ => self::TOK_EQUALS ); function __construct($doc = ‘‘, $pos = 0) { parent::__construct($doc, $pos); $this->parse_all(); } var $tag_map = array( ‘!doctype‘ => ‘parse_doctype‘, ‘?‘ => ‘parse_php‘, ‘?php‘ => ‘parse_php‘, ‘%‘ => ‘parse_asp‘, ‘style‘ => ‘parse_style‘, ‘script‘ => ‘parse_script‘ ); protected function parse_string() { if ($this->next_pos($this->doc[$this->pos], false) !== self::TOK_UNKNOWN) { --$this->pos; } return self::TOK_STRING; } function parse_text() { $len = $this->pos - 1 - $this->status[‘last_pos‘]; $this->status[‘text‘] = (($len > 0) ? substr($this->doc, $this->status[‘last_pos‘] + 1, $len) : ‘‘); } function parse_comment() { $this->pos += 3; if ($this->next_pos(‘-->‘, false) !== self::TOK_UNKNOWN) { $this->status[‘comment‘] = $this->getTokenString(1, -1); --$this->pos; } else { $this->status[‘comment‘] = $this->getTokenString(1, -1); $this->pos += 2; } $this->status[‘last_pos‘] = $this->pos; return true; } function parse_doctype() { $start = $this->pos; if ($this->next_search(‘[>‘, false) === self::TOK_UNKNOWN) { if ($this->doc[$this->pos] === ‘[‘) { if (($this->next_pos(‘]‘, false) !== self::TOK_UNKNOWN) || ($this->next_pos(‘>‘, false) !== self::TOK_UNKNOWN)) { $this->addError(‘Invalid doctype‘); return false; } } $this->token_start = $start; $this->status[‘dtd‘] = $this->getTokenString(2, -1); $this->status[‘last_pos‘] = $this->pos; return true; } else { $this->addError(‘Invalid doctype‘); return false; } } function parse_cdata() { if ($this->next_pos(‘]]>‘, false) === self::TOK_UNKNOWN) { $this->status[‘cdata‘] = $this->getTokenString(9, -1); $this->status[‘last_pos‘] = $this->pos + 2; return true; } else { $this->addError(‘Invalid cdata tag‘); return false; } } function parse_php() { $start = $this->pos; if ($this->next_pos(‘?>‘, false) !== self::TOK_UNKNOWN) { $this->pos -= 2; } $len = $this->pos - 1 - $start; $this->status[‘text‘] = (($len > 0) ? substr($this->doc, $start + 1, $len) : ‘‘); $this->status[‘last_pos‘] = ++$this->pos; return true; } function parse_asp() { $start = $this->pos; if ($this->next_pos(‘%>‘, false) !== self::TOK_UNKNOWN) { $this->pos -= 2; } $len = $this->pos - 1 - $start; $this->status[‘text‘] = (($len > 0) ? substr($this->doc, $start + 1, $len) : ‘‘); $this->status[‘last_pos‘] = ++$this->pos; return true; } function parse_style() { if ($this->parse_attributes() && ($this->token === self::TOK_TAG_CLOSE) && ($start = $this->pos) && ($this->next_pos(‘</style>‘, false) === self::TOK_UNKNOWN)) { $len = $this->pos - 1 - $start; $this->status[‘text‘] = (($len > 0) ? substr($this->doc, $start + 1, $len) : ‘‘); $this->pos += 7; $this->status[‘last_pos‘] = $this->pos; return true; } else { $this->addError(‘No end for style tag found‘); return false; } } function parse_script() { if ($this->parse_attributes() && ($this->token === self::TOK_TAG_CLOSE) && ($start = $this->pos) && ($this->next_pos(‘</script>‘, false) === self::TOK_UNKNOWN)) { $len = $this->pos - 1 - $start; $this->status[‘text‘] = (($len > 0) ? substr($this->doc, $start + 1, $len) : ‘‘); $this->pos += 8; $this->status[‘last_pos‘] = $this->pos; return true; } else { $this->addError(‘No end for script tag found‘); return false; } } function parse_conditional() { if ($this->status[‘closing_tag‘]) { $this->pos += 8; } else { $this->pos += (($this->status[‘comment‘]) ? 5 : 3); if ($this->next_pos(‘]‘, false) !== self::TOK_UNKNOWN) { $this->addError(‘"]" not found in conditional tag‘); return false; } $this->status[‘tag_condition‘] = $this->getTokenString(0, -1); } if ($this->next_no_whitespace() !== self::TOK_TAG_CLOSE) { $this->addError(‘No ">" tag found 2 for conditional tag‘); return false; } if ($this->status[‘comment‘]) { $this->status[‘last_pos‘] = $this->pos; if ($this->next_pos(‘-->‘, false) !== self::TOK_UNKNOWN) { $this->addError(‘No ending tag found for conditional tag‘); $this->pos = $this->size - 1; $len = $this->pos - 1 - $this->status[‘last_pos‘]; $this->status[‘text‘] = (($len > 0) ? substr($this->doc, $this->status[‘last_pos‘] + 1, $len) : ‘‘); } else { $len = $this->pos - 10 - $this->status[‘last_pos‘]; $this->status[‘text‘] = (($len > 0) ? substr($this->doc, $this->status[‘last_pos‘] + 1, $len) : ‘‘); $this->pos += 2; } } $this->status[‘last_pos‘] = $this->pos; return true; } function parse_attributes() { $this->status[‘attributes‘] = array(); while ($this->next_no_whitespace() === self::TOK_IDENTIFIER) { $attr = $this->getTokenString(); if (($attr === ‘?‘) || ($attr === ‘%‘)) { break; } if ($this->next_no_whitespace() === self::TOK_EQUALS) { if ($this->next_no_whitespace() === self::TOK_STRING) { $val = $this->getTokenString(1, -1); } else { if (!isset($stop)) { $stop = $this->whitespace; $stop[‘<‘] = true; $stop[‘>‘] = true; } while ((++$this->pos < $this->size) && (!isset($stop[$this->doc[$this->pos]]))) {} --$this->pos; $val = $this->getTokenString(); if (trim($val) === ‘‘) { $this->addError(‘Invalid attribute value‘); return false; } } } else { $val = $attr; $this->pos = (($this->token_start) ? $this->token_start : $this->pos) - 1; } $this->status[‘attributes‘][$attr] = $val; } return true; } function parse_tag_default() { if ($this->status[‘closing_tag‘]) { $this->status[‘attributes‘] = array(); $this->next_no_whitespace(); } else { if (!$this->parse_attributes()) { return false; } } if ($this->token !== self::TOK_TAG_CLOSE) { if ($this->token === self::TOK_SLASH_FORWARD) { $this->status[‘self_close‘] = true; $this->next(); } elseif ((($this->status[‘tag_name‘][0] === ‘?‘) && ($this->doc[$this->pos] === ‘?‘)) || (($this->status[‘tag_name‘][0] === ‘%‘) && ($this->doc[$this->pos] === ‘%‘))) { $this->status[‘self_close‘] = true; $this->pos++; if (isset($this->char_map[$this->doc[$this->pos]]) && (!is_string($this->char_map[$this->doc[$this->pos]]))) { $this->token = $this->char_map[$this->doc[$this->pos]]; } else { $this->token = self::TOK_UNKNOWN; } } } if ($this->token !== self::TOK_TAG_CLOSE) { $this->addError(‘Expected ">", but found "‘.$this->getTokenString().‘"‘); if ($this->next_pos(‘>‘, false) !== self::TOK_UNKNOWN) { $this->addError(‘No ">" tag found for "‘.$this->status[‘tag_name‘].‘" tag‘); return false; } } return true; } function parse_tag() { $start = $this->pos; $this->status[‘self_close‘] = false; $this->parse_text(); $next = (($this->pos + 1) < $this->size) ? $this->doc[$this->pos + 1] : ‘‘; if ($next === ‘!‘) { $this->status[‘closing_tag‘] = false; if (substr($this->doc, $this->pos + 2, 2) === ‘--‘) { $this->status[‘comment‘] = true; if (($this->doc[$this->pos + 4] === ‘[‘) && (strcasecmp(substr($this->doc, $this->pos + 5, 2), ‘if‘) === 0)) { return $this->parse_conditional(); } else { return $this->parse_comment(); } } else { $this->status[‘comment‘] = false; if ($this->doc[$this->pos + 2] === ‘[‘) { if (strcasecmp(substr($this->doc, $this->pos + 3, 2), ‘if‘) === 0) { return $this->parse_conditional(); } elseif (strcasecmp(substr($this->doc, $this->pos + 3, 5), ‘endif‘) === 0) { $this->status[‘closing_tag‘] = true; return $this->parse_conditional(); } elseif (strcasecmp(substr($this->doc, $this->pos + 3, 5), ‘cdata‘) === 0) { return $this->parse_cdata(); } } } } elseif ($next === ‘/‘) { $this->status[‘closing_tag‘] = true; ++$this->pos; } else { $this->status[‘closing_tag‘] = false; } if ($this->next() !== self::TOK_IDENTIFIER) { $this->addError(‘Tagname expected‘); $this->status[‘last_pos‘] = $start - 1; return true; } $tag = $this->getTokenString(); $this->status[‘tag_name‘] = $tag; $tag = strtolower($tag); if (isset($this->tag_map[$tag])) { $res = $this->{$this->tag_map[$tag]}(); } else { $res = $this->parse_tag_default(); } $this->status[‘last_pos‘] = $this->pos; return $res; } function parse_all() { $this->errors = array(); $this->status[‘last_pos‘] = -1; if (($this->token === self::TOK_TAG_OPEN) || ($this->next_pos(‘<‘, false) === self::TOK_UNKNOWN)) { do { if (!$this->parse_tag()) { return false; } } while ($this->next_pos(‘<‘) !== self::TOK_NULL); } $this->pos = $this->size; $this->parse_text(); return true; } } class HTML_Parser extends HTML_Parser_Base { var $root = ‘HTML_Node‘; var $hierarchy = array(); var $tags_selfclose = array( ‘area‘ => true, ‘base‘ => true, ‘basefont‘ => true, ‘br‘ => true, ‘col‘ => true, ‘command‘ => true, ‘embed‘ => true, ‘frame‘ => true, ‘hr‘ => true, ‘img‘ => true, ‘input‘ => true, ‘ins‘ => true, ‘keygen‘ => true, ‘link‘ => true, ‘meta‘ => true, ‘param‘ => true, ‘source‘ => true, ‘track‘ => true, ‘wbr‘ => true ); function __construct($doc = ‘‘, $pos = 0, $root = null) { if ($root === null) { $root = new $this->root(‘~root~‘, null); } $this->root =& $root; parent::__construct($doc, $pos); } function __invoke($query = ‘*‘) { return $this->select($query); } function __toString() { return $this->root->getInnerText(); } function select($query = ‘*‘, $index = false, $recursive = true, $check_self = false) { return $this->root->select($query, $index, $recursive, $check_self); } protected function parse_hierarchy($self_close = null) { if ($self_close === null) { $this->status[‘self_close‘] = ($self_close = isset($this->tags_selfclose[strtolower($this->status[‘tag_name‘])])); } if ($self_close) { if ($this->status[‘closing_tag‘]) { $c = $this->hierarchy[count($this->hierarchy) - 1]->children; $found = false; for ($count = count($c), $i = $count - 1; $i >= 0; $i--) { if (strcasecmp($c[$i]->tag, $this->status[‘tag_name‘]) === 0) { for($ii = $i + 1; $ii < $count; $ii++) { $index = null; $c[$i + 1]->changeParent($c[$i], $index); } $c[$i]->self_close = false; $found = true; break; } } if (!$found) { $this->addError(‘Closing tag "‘.$this->status[‘tag_name‘].‘" which is not open‘); } } elseif ($this->status[‘tag_name‘][0] === ‘?‘) { $index = null; $this->hierarchy[count($this->hierarchy) - 1]->addXML($this->status[‘tag_name‘], ‘‘, $this->status[‘attributes‘], $index); } elseif ($this->status[‘tag_name‘][0] === ‘%‘) { $index = null; $this->hierarchy[count($this->hierarchy) - 1]->addASP($this->status[‘tag_name‘], ‘‘, $this->status[‘attributes‘], $index); } else { $index = null; $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index); } } elseif ($this->status[‘closing_tag‘]) { $found = false; for ($count = count($this->hierarchy), $i = $count - 1; $i >= 0; $i--) { if (strcasecmp($this->hierarchy[$i]->tag, $this->status[‘tag_name‘]) === 0) { for($ii = ($count - $i - 1); $ii >= 0; $ii--) { $e = array_pop($this->hierarchy); if ($ii > 0) { $this->addError(‘Closing tag "‘.$this->status[‘tag_name‘].‘" while "‘.$e->tag.‘" is not closed yet‘); } } $found = true; break; } } if (!$found) { $this->addError(‘Closing tag "‘.$this->status[‘tag_name‘].‘" which is not open‘); } } else { $index = null; $this->hierarchy[] = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index); } } function parse_cdata() { if (!parent::parse_cdata()) {return false;} $index = null; $this->hierarchy[count($this->hierarchy) - 1]->addCDATA($this->status[‘cdata‘], $index); return true; } function parse_comment() { if (!parent::parse_comment()) {return false;} $index = null; $this->hierarchy[count($this->hierarchy) - 1]->addComment($this->status[‘comment‘], $index); return true; } function parse_conditional() { if (!parent::parse_conditional()) {return false;} if ($this->status[‘comment‘]) { $index = null; $e = $this->hierarchy[count($this->hierarchy) - 1]->addConditional($this->status[‘tag_condition‘], true, $index); if ($this->status[‘text‘] !== ‘‘) { $index = null; $e->addText($this->status[‘text‘], $index); } } else { if ($this->status[‘closing_tag‘]) { $this->parse_hierarchy(false); } else { $index = null; $this->hierarchy[] = $this->hierarchy[count($this->hierarchy) - 1]->addConditional($this->status[‘tag_condition‘], false, $index); } } return true; } function parse_doctype() { if (!parent::parse_doctype()) {return false;} $index = null; $this->hierarchy[count($this->hierarchy) - 1]->addDoctype($this->status[‘dtd‘], $index); return true; } function parse_php() { if (!parent::parse_php()) {return false;} $index = null; $this->hierarchy[count($this->hierarchy) - 1]->addXML(‘php‘, $this->status[‘text‘], $index); return true; } function parse_asp() { if (!parent::parse_asp()) {return false;} $index = null; $this->hierarchy[count($this->hierarchy) - 1]->addASP(‘‘, $this->status[‘text‘], $index); return true; } function parse_script() { if (!parent::parse_script()) {return false;} $index = null; $e = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index); if ($this->status[‘text‘] !== ‘‘) { $index = null; $e->addText($this->status[‘text‘], $index); } return true; } function parse_style() { if (!parent::parse_style()) {return false;} $index = null; $e = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index); if ($this->status[‘text‘] !== ‘‘) { $index = null; $e->addText($this->status[‘text‘], $index); } return true; } function parse_tag_default() { if (!parent::parse_tag_default()) {return false;} $this->parse_hierarchy(($this->status[‘self_close‘]) ? true : null); return true; } function parse_text() { parent::parse_text(); if ($this->status[‘text‘] !== ‘‘) { $index = null; $this->hierarchy[count($this->hierarchy) - 1]->addText($this->status[‘text‘], $index); } } function parse_all() { $this->hierarchy = array(&$this->root); return ((parent::parse_all()) ? $this->root : false); } } class HTML_Parser_HTML5 extends HTML_Parser { var $tags_optional_close = array( ‘li‘ => array(‘li‘ => true), ‘dt‘ => array(‘dt‘ => true, ‘dd‘ => true), ‘dd‘ => array(‘dt‘ => true, ‘dd‘ => true), ‘address‘ => array(‘p‘ => true), ‘article‘ => array(‘p‘ => true), ‘aside‘ => array(‘p‘ => true), ‘blockquote‘ => array(‘p‘ => true), ‘dir‘ => array(‘p‘ => true), ‘div‘ => array(‘p‘ => true), ‘dl‘ => array(‘p‘ => true), ‘fieldset‘ => array(‘p‘ => true), ‘footer‘ => array(‘p‘ => true), ‘form‘ => array(‘p‘ => true), ‘h1‘ => array(‘p‘ => true), ‘h2‘ => array(‘p‘ => true), ‘h3‘ => array(‘p‘ => true), ‘h4‘ => array(‘p‘ => true), ‘h5‘ => array(‘p‘ => true), ‘h6‘ => array(‘p‘ => true), ‘header‘ => array(‘p‘ => true), ‘hgroup‘ => array(‘p‘ => true), ‘hr‘ => array(‘p‘ => true), ‘menu‘ => array(‘p‘ => true), ‘nav‘ => array(‘p‘ => true), ‘ol‘ => array(‘p‘ => true), ‘p‘ => array(‘p‘ => true), ‘pre‘ => array(‘p‘ => true), ‘section‘ => array(‘p‘ => true), ‘table‘ => array(‘p‘ => true), ‘ul‘ => array(‘p‘ => true), ‘rt‘ => array(‘rt‘ => true, ‘rp‘ => true), ‘rp‘ => array(‘rt‘ => true, ‘rp‘ => true), ‘optgroup‘ => array(‘optgroup‘ => true, ‘option‘ => true), ‘option‘ => array(‘option‘), ‘tbody‘ => array(‘thread‘ => true, ‘tbody‘ => true, ‘tfoot‘ => true), ‘tfoot‘ => array(‘thread‘ => true, ‘tbody‘ => true), ‘tr‘ => array(‘tr‘ => true), ‘td‘ => array(‘td‘ => true, ‘th‘ => true), ‘th‘ => array(‘td‘ => true, ‘th‘ => true), ‘body‘ => array(‘head‘ => true) ); protected function parse_hierarchy($self_close = null) { $tag_curr = strtolower($this->status[‘tag_name‘]); if ($self_close === null) { $this->status[‘self_close‘] = ($self_close = isset($this->tags_selfclose[$tag_curr])); } if (! ($self_close || $this->status[‘closing_tag‘])) { $tag_prev = strtolower($this->hierarchy[count($this->hierarchy) - 1]->tag); if (isset($this->tags_optional_close[$tag_curr]) && isset($this->tags_optional_close[$tag_curr][$tag_prev])) { array_pop($this->hierarchy); } } return parent::parse_hierarchy($self_close); } } //END gan_parser_html.php //START gan_node_html.php class HTML_Node { const NODE_ELEMENT = 0; const NODE_TEXT = 1; const NODE_COMMENT = 2; const NODE_CONDITIONAL = 3; const NODE_CDATA = 4; const NODE_DOCTYPE = 5; const NODE_XML = 6; const NODE_ASP = 7; const NODE_TYPE = self::NODE_ELEMENT; var $selectClass = ‘HTML_Selector‘; var $parserClass = ‘HTML_Parser_HTML5‘; var $childClass = __CLASS__; var $childClass_Text = ‘HTML_Node_TEXT‘; var $childClass_Comment = ‘HTML_Node_COMMENT‘; var $childClass_Conditional = ‘HTML_Node_CONDITIONAL‘; var $childClass_CDATA = ‘HTML_Node_CDATA‘; var $childClass_Doctype = ‘HTML_Node_DOCTYPE‘; var $childClass_XML = ‘HTML_Node_XML‘; var $childClass_ASP = ‘HTML_Node_ASP‘; var $parent = null; var $attributes = array(); var $attributes_ns = null; var $children = array(); var $tag = ‘‘; var $tag_ns = null; var $self_close = false; var $self_close_str = ‘ /‘; var $attribute_shorttag = true; var $filter_map = array( ‘root‘ => ‘filter_root‘, ‘nth-child‘ => ‘filter_nchild‘, ‘eq‘ => ‘filter_nchild‘, ‘gt‘ => ‘filter_gt‘, ‘lt‘ => ‘filter_lt‘, ‘nth-last-child‘ => ‘filter_nlastchild‘, ‘nth-of-type‘ => ‘filter_ntype‘, ‘nth-last-of-type‘ => ‘filter_nlastype‘, ‘odd‘ => ‘filter_odd‘, ‘even‘ => ‘filter_even‘, ‘every‘ => ‘filter_every‘, ‘first-child‘ => ‘filter_first‘, ‘last-child‘ => ‘filter_last‘, ‘first-of-type‘ => ‘filter_firsttype‘, ‘last-of-type‘ => ‘filter_lasttype‘, ‘only-child‘ => ‘filter_onlychild‘, ‘only-of-type‘ => ‘filter_onlytype‘, ‘empty‘ => ‘filter_empty‘, ‘not-empty‘ => ‘filter_notempty‘, ‘has-text‘ => ‘filter_hastext‘, ‘no-text‘ => ‘filter_notext‘, ‘lang‘ => ‘filter_lang‘, ‘contains‘ => ‘filter_contains‘, ‘has‘ => ‘filter_has‘, ‘not‘ => ‘filter_not‘, ‘element‘ => ‘filter_element‘, ‘text‘ => ‘filter_text‘, ‘comment‘ => ‘filter_comment‘ ); function __construct($tag, $parent) { $this->parent = $parent; if (is_string($tag)) { $this->tag = $tag; } else { $this->tag = $tag[‘tag_name‘]; $this->self_close = $tag[‘self_close‘]; $this->attributes = $tag[‘attributes‘]; } } function __destruct() { $this->delete(); } function __toString() { return (($this->tag === ‘~root~‘) ? $this->toString(true, true, 1) : $this->tag); } function __get($attribute) { return $this->getAttribute($attribute); } function __set($attribute, $value) { $this->setAttribute($attribute, $value); } function __isset($attribute) { return $this->hasAttribute($attribute); } function __unset($attribute) { return $this->deleteAttribute($attribute); } function __invoke($query = ‘*‘, $index = false, $recursive = true, $check_self = false) { return $this->select($query, $index, $recursive, $check_self); } function dumpLocation() { return (($this->parent) ? (($p = $this->parent->dumpLocation()) ? $p.‘ > ‘ : ‘‘).$this->tag.‘(‘.$this->typeIndex().‘)‘ : ‘‘); } protected function toString_attributes() { $s = ‘‘; foreach($this->attributes as $a => $v) { $s .= ‘ ‘.$a.(((!$this->attribute_shorttag) || ($this->attributes[$a] !== $a)) ? ‘="‘.htmlspecialchars($this->attributes[$a], ENT_QUOTES, ‘‘, false).‘"‘ : ‘‘); } return $s; } protected function toString_content($attributes = true, $recursive = true, $content_only = false) { $s = ‘‘; foreach($this->children as $c) { $s .= $c->toString($attributes, $recursive, $content_only); } return $s; } function toString($attributes = true, $recursive = true, $content_only = false) { if ($content_only) { if (is_int($content_only)) { --$content_only; } return $this->toString_content($attributes, $recursive, $content_only); } $s = ‘<‘.$this->tag; if ($attributes) { $s .= $this->toString_attributes(); } if ($this->self_close) { $s .= $this->self_close_str.‘>‘; } else { $s .= ‘>‘; if($recursive) { $s .= $this->toString_content($attributes); } $s .= ‘</‘.$this->tag.‘>‘; } return $s; } function getOuterText() { return html_entity_decode($this->toString(), ENT_QUOTES); } function setOuterText($text, $parser = null) { if (trim($text)) { $index = $this->index(); if ($parser === null) { $parser = new $this->parserClass(); } $parser->setDoc($text); $parser->parse_all(); $parser->root->moveChildren($this->parent, $index); } $this->delete(); return (($parser && $parser->errors) ? $parser->errors : true); } function html() { return $this->toString(); } function getInnerText() { return html_entity_decode($this->toString(true, true, 1), ENT_QUOTES); } function setInnerText($text, $parser = null) { $this->clear(); if (trim($text)) { if ($parser === null) { $parser = new $this->parserClass(); } $parser->root =& $this; $parser->setDoc($text); $parser->parse_all(); } return (($parser && $parser->errors) ? $parser->errors : true); } function getPlainText() { return preg_replace(‘`\s+`‘, ‘ ‘, html_entity_decode($this->toString(true, true, true), ENT_QUOTES)); } function getPlainTextUTF8() { $txt = $this->getPlainText(); $enc = $this->getEncoding(); if ($enc !== false) { $txt = mb_convert_encoding($txt, "UTF-8", $enc); } return $txt; } function setPlainText($text) { $this->clear(); if (trim($text)) { $this->addText(htmlentities($text, ENT_QUOTES)); } } function delete() { if (($p = $this->parent) !== null) { $this->parent = null; $p->deleteChild($this); } else { $this->clear(); } } function detach($move_children_up = false) { if (($p = $this->parent) !== null) { $index = $this->index(); $this->parent = null; if ($move_children_up) { $this->moveChildren($p, $index); } $p->deleteChild($this, true); } } function clear() { foreach($this->children as $c) { $c->parent = null; $c->delete(); } $this->children = array(); } function getRoot() { $r = $this->parent; $n = ($r === null) ? null : $r->parent; while ($n !== null) { $r = $n; $n = $r->parent; } return $r; } function changeParent($to, &$index = null) { if ($this->parent !== null) { $this->parent->deleteChild($this, true); } $this->parent = $to; if ($index !== false) { $new_index = $this->index(); if (!(is_int($new_index) && ($new_index >= 0))) { $this->parent->addChild($this, $index); } } } function hasParent($tag = null, $recursive = false) { if ($this->parent !== null) { if ($tag === null) { return true; } elseif (is_string($tag)) { return (($this->parent->tag === $tag) || ($recursive && $this->parent->hasParent($tag))); } elseif (is_object($tag)) { return (($this->parent === $tag) || ($recursive && $this->parent->hasParent($tag))); } } return false; } function isParent($tag, $recursive = false) { return ($this->hasParent($tag, $recursive) === ($tag !== null)); } function isText() { return false; } function isComment() { return false; } function isTextOrComment() { return false; } function move($to, &$new_index = -1) { $this->changeParent($to, $new_index); } function moveChildren($to, &$new_index = -1, $start = 0, $end = -1) { if ($end < 0) { $end += count($this->children); } for ($i = $start; $i <= $end; $i++) { $this->children[$start]->changeParent($to, $new_index); } } function index($count_all = true) { if (!$this->parent) { return -1; } elseif ($count_all) { return $this->parent->findChild($this); } else{ $index = -1; foreach(array_keys($this->parent->children) as $k) { if (!$this->parent->children[$k]->isTextOrComment()) { ++$index; } if ($this->parent->children[$k] === $this) { return $index; } } return -1; } } function setIndex($index) { if ($this->parent) { if ($index > $this->index()) { --$index; } $this->delete(); $this->parent->addChild($this, $index); } } function typeIndex() { if (!$this->parent) { return -1; } else { $index = -1; foreach(array_keys($this->parent->children) as $k) { if (strcasecmp($this->tag, $this->parent->children[$k]->tag) === 0) { ++$index; } if ($this->parent->children[$k] === $this) { return $index; } } return -1; } } function indent() { return (($this->parent) ? $this->parent->indent() + 1 : -1); } function getSibling($offset = 1) { $index = $this->index() + $offset; if (($index >= 0) && ($index < $this->parent->childCount())) { return $this->parent->getChild($index); } else { return null; } } function getNextSibling($skip_text_comments = true) { $offset = 1; while (($n = $this->getSibling($offset)) !== null) { if ($skip_text_comments && ($n->tag[0] === ‘~‘)) { ++$offset; } else { break; } } return $n; } function getPreviousSibling($skip_text_comments = true) { $offset = -1; while (($n = $this->getSibling($offset)) !== null) { if ($skip_text_comments && ($n->tag[0] === ‘~‘)) { --$offset; } else { break; } } return $n; } function getNamespace() { if ($tag_ns === null) { $a = explode(‘:‘, $this->tag, 2); if (empty($a[1])) { $this->tag_ns = array(‘‘, $a[0]); } else { $this->tag_ns = array($a[0], $a[1]); } } return $this->tag_ns[0]; } function setNamespace($ns) { if ($this->getNamespace() !== $ns) { $this->tag_ns[0] = $ns; $this->tag = $ns.‘:‘.$this->tag_ns[1]; } } function getTag() { if ($tag_ns === null) { $this->getNamespace(); } return $this->tag_ns[1]; } function setTag($tag, $with_ns = false) { $with_ns = $with_ns || (strpos($tag, ‘:‘) !== false); if ($with_ns) { $this->tag = $tag; $this->tag_ns = null; } elseif ($this->getTag() !== $tag) { $this->tag_ns[1] = $tag; $this->tag = (($this->tag_ns[0]) ? $this->tag_ns[0].‘:‘ : ‘‘).$tag; } } function getEncoding() { $root = $this->getRoot(); if ($root !== null) { if ($enc = $root->select(‘meta[charset]‘, 0, true, true)) { return $enc->getAttribute("charset"); } elseif ($enc = $root->select(‘"?xml"[encoding]‘, 0, true, true)) { return $enc->getAttribute("encoding"); } elseif ($enc = $root->select(‘meta[content*="charset="]‘, 0, true, true)) { $enc = $enc->getAttribute("content"); return substr($enc, strpos($enc, "charset=")+8); } } return false; } function childCount($ignore_text_comments = false) { if (!$ignore_text_comments) { return count($this->children); } else{ $count = 0; foreach(array_keys($this->children) as $k) { if (!$this->children[$k]->isTextOrComment()) { ++$count; } } return $count; } } function findChild($child) { return array_search($child, $this->children, true); } function hasChild($child) { return ((bool) findChild($child)); } function &getChild($child, $ignore_text_comments = false) { if (!is_int($child)) { $child = $this->findChild($child); } elseif ($child < 0) { $child += $this->childCount($ignore_text_comments); } if ($ignore_text_comments) { $count = 0; $last = null; foreach(array_keys($this->children) as $k) { if (!$this->children[$k]->isTextOrComment()) { if ($count++ === $child) { return $this->children[$k]; } $last = $this->children[$k]; } } return (($child > $count) ? $last : null); } else { return $this->children[$child]; } } function &addChild($tag, &$offset = null) { if (!is_object($tag)) { $tag = new $this->childClass($tag, $this); } elseif ($tag->parent !== $this) { $index = false; $tag->changeParent($this, $index); } if (is_int($offset) && ($offset < count($this->children)) && ($offset !== -1)) { if ($offset < 0) { $offset += count($this->children); } array_splice($this->children, $offset++, 0, array(&$tag)); } else { $this->children[] =& $tag; } return $tag; } function &firstChild($ignore_text_comments = false) { return $this->getChild(0, $ignore_text_comments); } function &lastChild($ignore_text_comments = false) { return $this->getChild(-1, $ignore_text_comments); } function &insertChild($tag, $index) { return $this->addChild($tag, $index); } function &addText($text, &$offset = null) { return $this->addChild(new $this->childClass_Text($this, $text), $offset); } function &addComment($text, &$offset = null) { return $this->addChild(new $this->childClass_Comment($this, $text), $offset); } function &addConditional($condition, $hidden = true, &$offset = null) { return $this->addChild(new $this->childClass_Conditional($this, $condition, $hidden), $offset); } function &addCDATA($text, &$offset = null) { return $this->addChild(new $this->childClass_CDATA($this, $text), $offset); } function &addDoctype($dtd, &$offset = null) { return $this->addChild(new $this->childClass_Doctype($this, $dtd), $offset); } function &addXML($tag = ‘xml‘, $text = ‘‘, $attributes = array(), &$offset = null) { return $this->addChild(new $this->childClass_XML($this, $tag, $text, $attributes), $offset); } function &addASP($tag = ‘‘, $text = ‘‘, $attributes = array(), &$offset = null) { return $this->addChild(new $this->childClass_ASP($this, $tag, $text, $attributes), $offset); } function deleteChild($child, $soft_delete = false) { if (is_object($child)) { $child = $this->findChild($child); } elseif ($child < 0) { $child += count($this->children); } if (!$soft_delete) { $this->children[$child]->delete(); } unset($this->children[$child]); $tmp = array(); foreach(array_keys($this->children) as $k) { $tmp[] =& $this->children[$k]; } $this->children = $tmp; } function wrap($node, $wrap_index = -1, $node_index = null) { if ($node_index === null) { $node_index = $this->index(); } if (!is_object($node)) { $node = $this->parent->addChild($node, $node_index); } elseif ($node->parent !== $this->parent) { $node->changeParent($this->parent, $node_index); } $this->changeParent($node, $wrap_index); return $node; } function wrapInner($node, $start = 0, $end = -1, $wrap_index = -1, $node_index = null) { if ($end < 0) { $end += count($this->children); } if ($node_index === null) { $node_index = $end + 1; } if (!is_object($node)) { $node = $this->addChild($node, $node_index); } elseif ($node->parent !== $this) { $node->changeParent($this->parent, $node_index); } $this->moveChildren($node, $wrap_index, $start, $end); return $node; } function attributeCount() { return count($this->attributes); } protected function findAttribute($attr, $compare = ‘total‘, $case_sensitive = false) { if (is_int($attr)) { if ($attr < 0) { $attr += count($this->attributes); } $keys = array_keys($this->attributes); return $this->findAttribute($keys[$attr], ‘total‘, true); } else if ($compare === ‘total‘) { $b = explode(‘:‘, $attr, 2); if ($case_sensitive) { $t =& $this->attributes; } else { $t = array_change_key_case($this->attributes); $attr = strtolower($attr); } if (isset($t[$attr])) { $index = 0; foreach($this->attributes as $a => $v) { if (($v === $t[$attr]) && (strcasecmp($a, $attr) === 0)) { $attr = $a; $b = explode(‘:‘, $attr, 2); break; } ++$index; } if (empty($b[1])) { return array(array(‘‘, $b[0], $attr, $index)); } else { return array(array($b[0], $b[1], $attr, $index)); } } else { return false; } } else { if ($this->attributes_ns === null) { $index = 0; foreach($this->attributes as $a => $v) { $b = explode(‘:‘, $a, 2); if (empty($b[1])) { $this->attributes_ns[$b[0]][] = array(‘‘, $b[0], $a, $index); } else { $this->attributes_ns[$b[1]][] = array($b[0], $b[1], $a, $index); } ++$index; } } if ($case_sensitive) { $t =& $this->attributes_ns; } else { $t = array_change_key_case($this->attributes_ns); $attr = strtolower($attr); } if ($compare === ‘namespace‘) { $res = array(); foreach($t as $ar) { foreach($ar as $a) { if ($a[0] === $attr) { $res[] = $a; } } } return $res; } elseif ($compare === ‘name‘) { return ((isset($t[$attr])) ? $t[$attr] : false); } else { trigger_error(‘Unknown comparison mode‘); } } } function hasAttribute($attr, $compare = ‘total‘, $case_sensitive = false) { return ((bool) $this->findAttribute($attr, $compare, $case_sensitive)); } function getAttributeNS($attr, $compare = ‘name‘, $case_sensitive = false) { $f = $this->findAttribute($attr, $compare, $case_sensitive); if (is_array($f) && $f) { if (count($f) === 1) { return $this->attributes[$f[0][0]]; } else { $res = array(); foreach($f as $a) { $res[] = $a[0]; } return $res; } } else { return false; } } function setAttributeNS($attr, $namespace, $compare = ‘name‘, $case_sensitive = false) { $f = $this->findAttribute($attr, $compare, $case_sensitive); if (is_array($f) && $f) { if ($namespace) { $namespace .= ‘:‘; } foreach($f as $a) { $val = $this->attributes[$a[2]]; unset($this->attributes[$a[2]]); $this->attributes[$namespace.$a[1]] = $val; } $this->attributes_ns = null; return true; } else { return false; } } function getAttribute($attr, $compare = ‘total‘, $case_sensitive = false) { $f = $this->findAttribute($attr, $compare, $case_sensitive); if (is_array($f) && $f){ if (count($f) === 1) { return $this->attributes[$f[0][2]]; } else { $res = array(); foreach($f as $a) { $res[] = $this->attributes[$a[2]]; } return $res; } } else { return null; } } function setAttribute($attr, $val, $compare = ‘total‘, $case_sensitive = false) { if ($val === null) { return $this->deleteAttribute($attr, $compare, $case_sensitive); } $f = $this->findAttribute($attr, $compare, $case_sensitive); if (is_array($f) && $f) { foreach($f as $a) { $this->attributes[$a[2]] = (string) $val; } } else { $this->attributes[$attr] = (string) $val; } } function addAttribute($attr, $val) { $this->setAttribute($attr, $val, ‘total‘, true); } function deleteAttribute($attr, $compare = ‘total‘, $case_sensitive = false) { $f = $this->findAttribute($attr, $compare, $case_sensitive); if (is_array($f) && $f) { foreach($f as $a) { unset($this->attributes[$a[2]]); if ($this->attributes_ns !== null) { unset($this->attributes_ns[$a[1]]); } } } } function hasClass($className) { return ($className && preg_match(‘`\b‘.preg_quote($className).‘\b`si‘, $class = $this->class)); } function addClass($className) { if (!is_array($className)) { $className = array($className); } $class = $this->class; foreach ($className as $c) { if (!(preg_match(‘`\b‘.preg_quote($c).‘\b`si‘, $class) > 0)) { $class .= ‘ ‘.$c; } } $this->class = $class; } function removeClass($className) { if (!is_array($className)) { $className = array($className); } $class = $this->class; foreach ($className as $c) { $class = reg_replace(‘`\b‘.preg_quote($c).‘\b`si‘, ‘‘, $class); } if ($class) { $this->class = $class; } else { unset($this->class); } } function getChildrenByCallback($callback, $recursive = true, $check_self = false) { $count = $this->childCount(); if ($check_self && $callback($this)) { $res = array($this); } else { $res = array(); } if ($count > 0) { if (is_int($recursive)) { $recursive = (($recursive > 1) ? $recursive - 1 : false); } for ($i = 0; $i < $count; $i++) { if ($callback($this->children[$i])) { $res[] = $this->children[$i]; } if ($recursive) { $res = array_merge($res, $this->children[$i]->getChildrenByCallback($callback, $recursive)); } } } return $res; } function getChildrenByMatch($conditions, $recursive = true, $check_self = false, $custom_filters = array()) { $count = $this->childCount(); if ($check_self && $this->match($conditions, true, $custom_filters)) { $res = array($this); } else { $res = array(); } if ($count > 0) { if (is_int($recursive)) { $recursive = (($recursive > 1) ? $recursive - 1 : false); } for ($i = 0; $i < $count; $i++) { if ($this->children[$i]->match($conditions, true, $custom_filters)) { $res[] = $this->children[$i]; } if ($recursive) { $res = array_merge($res, $this->children[$i]->getChildrenByMatch($conditions, $recursive, false, $custom_filters)); } } } return $res; } protected function match_tags($tags) { $res = false; foreach($tags as $tag => $match) { if (!is_array($match)) { $match = array( ‘match‘ => $match, ‘operator‘ => ‘or‘, ‘compare‘ => ‘total‘, ‘case_sensitive‘ => false ); } else { if (is_int($tag)) { $tag = $match[‘tag‘]; } if (!isset($match[‘match‘])) { $match[‘match‘] = true; } if (!isset($match[‘operator‘])) { $match[‘operator‘] = ‘or‘; } if (!isset($match[‘compare‘])) { $match[‘compare‘] = ‘total‘; } if (!isset($match[‘case_sensitive‘])) { $match[‘case_sensitive‘] = false; } } if (($match[‘operator‘] === ‘and‘) && (!$res)) { return false; } elseif (!($res && ($match[‘operator‘] === ‘or‘))) { if ($match[‘compare‘] === ‘total‘) { $a = $this->tag; } elseif ($match[‘compare‘] === ‘namespace‘) { $a = $this->getNamespace(); } elseif ($match[‘compare‘] === ‘name‘) { $a = $this->getTag(); } if ($match[‘case_sensitive‘]) { $res = (($a === $tag) === $match[‘match‘]); } else { $res = ((strcasecmp($a, $tag) === 0) === $match[‘match‘]); } } } return $res; } protected function match_attributes($attributes) { $res = false; foreach($attributes as $attribute => $match) { if (!is_array($match)) { $match = array( ‘operator_value‘ => ‘equals‘, ‘value‘ => $match, ‘match‘ => true, ‘operator_result‘ => ‘or‘, ‘compare‘ => ‘total‘, ‘case_sensitive‘ => false ); } else { if (is_int($attribute)) { $attribute = $match[‘attribute‘]; } if (!isset($match[‘match‘])) { $match[‘match‘] = true; } if (!isset($match[‘operator_result‘])) { $match[‘operator_result‘] = ‘or‘; } if (!isset($match[‘compare‘])) { $match[‘compare‘] = ‘total‘; } if (!isset($match[‘case_sensitive‘])) { $match[‘case_sensitive‘] = false; } } if (is_string($match[‘value‘]) && (!$match[‘case_sensitive‘])) { $match[‘value‘] = strtolower($match[‘value‘]); } if (($match[‘operator_result‘] === ‘and‘) && (!$res)) { return false; } elseif (!($res && ($match[‘operator_result‘] === ‘or‘))) { $possibles = $this->findAttribute($attribute, $match[‘compare‘], $match[‘case_sensitive‘]); $has = (is_array($possibles) && $possibles); $res = (($match[‘value‘] === $has) || (($match[‘match‘] === false) && ($has === $match[‘match‘]))); if ((!$res) && $has && is_string($match[‘value‘])) { foreach($possibles as $a) { $val = $this->attributes[$a[2]]; if (is_string($val) && (!$match[‘case_sensitive‘])) { $val = strtolower($val); } switch($match[‘operator_value‘]) { case ‘%=‘: case ‘contains_regex‘: $res = ((preg_match(‘`‘.$match[‘value‘].‘`s‘, $val) > 0) === $match[‘match‘]); if ($res) break 1; else break 2; case ‘|=‘: case ‘contains_prefix‘: $res = ((preg_match(‘`\b‘.preg_quote($match[‘value‘]).‘[\-\s]?`s‘, $val) > 0) === $match[‘match‘]); if ($res) break 1; else break 2; case ‘~=‘: case ‘contains_word‘: $res = ((preg_match(‘`\b‘.preg_quote($match[‘value‘]).‘\b`s‘, $val) > 0) === $match[‘match‘]); if ($res) break 1; else break 2; case ‘*=‘: case ‘contains‘: $res = ((strpos($val, $match[‘value‘]) !== false) === $match[‘match‘]); if ($res) break 1; else break 2; case ‘$=‘: case ‘ends_with‘: $res = ((substr($val, -strlen($match[‘value‘])) === $match[‘value‘]) === $match[‘match‘]); if ($res) break 1; else break 2; case ‘^=‘: case ‘starts_with‘: $res = ((substr($val, 0, strlen($match[‘value‘])) === $match[‘value‘]) === $match[‘match‘]); if ($res) break 1; else break 2; case ‘!=‘: case ‘not_equal‘: $res = (($val !== $match[‘value‘]) === $match[‘match‘]); if ($res) break 1; else break 2; case ‘=‘: case ‘equals‘: $res = (($val === $match[‘value‘]) === $match[‘match‘]); if ($res) break 1; else break 2; case ‘>=‘: case ‘bigger_than‘: $res = (($val >= $match[‘value‘]) === $match[‘match‘]); if ($res) break 1; else break 2; case ‘<=‘: case ‘smaller_than‘: $res = (($val >= $match[‘value‘]) === $match[‘match‘]); if ($res) break 1; else break 2; default: trigger_error(‘Unknown operator "‘.$match[‘operator_value‘].‘" to match attributes!‘); return false; } } } } } return $res; } protected function match_filters($conditions, $custom_filters = array()) { foreach($conditions as $c) { $c[‘filter‘] = strtolower($c[‘filter‘]); if (isset($this->filter_map[$c[‘filter‘]])) { if (!$this->{$this->filter_map[$c[‘filter‘]]}($c[‘params‘])) { return false; } } elseif (isset($custom_filters[$c[‘filter‘]])) { if (!call_user_func($custom_filters[$c[‘filter‘]], $this, $c[‘params‘])) { return false; } } else { trigger_error(‘Unknown filter "‘.$c[‘filter‘].‘"!‘); return false; } } return true; } function match($conditions, $match = true, $custom_filters = array()) { $t = isset($conditions[‘tags‘]); $a = isset($conditions[‘attributes‘]); $f = isset($conditions[‘filters‘]); if (!($t || $a || $f)) { if (is_array($conditions) && $conditions) { foreach($conditions as $c) { if ($this->match($c, $match)) { return true; } } } return false; } else { if (($t && (!$this->match_tags($conditions[‘tags‘]))) === $match) { return false; } if (($a && (!$this->match_attributes($conditions[‘attributes‘]))) === $match) { return false; } if (($f && (!$this->match_filters($conditions[‘filters‘], $custom_filters))) === $match) { return false; } return true; } } function getChildrenByAttribute($attribute, $value, $mode = ‘equals‘, $compare = ‘total‘, $recursive = true) { if ($this->childCount() < 1) { return array(); } $mode = explode(‘ ‘, strtolower($mode)); $match = ((isset($mode[1]) && ($mode[1] === ‘not‘)) ? ‘false‘ : ‘true‘); return $this->getChildrenByMatch( array( ‘attributes‘ => array( $attribute => array( ‘operator_value‘ => $mode[0], ‘value‘ => $value, ‘match‘ => $match, ‘compare‘ => $compare ) ) ), $recursive ); } function getChildrenByTag($tag, $compare = ‘total‘, $recursive = true) { if ($this->childCount() < 1) { return array(); } $tag = explode(‘ ‘, strtolower($tag)); $match = ((isset($tag[1]) && ($tag[1] === ‘not‘)) ? ‘false‘ : ‘true‘); return $this->getChildrenByMatch( array( ‘tags‘ => array( $tag[0] => array( ‘match‘ => $match, ‘compare‘ => $compare ) ) ), $recursive ); } function getChildrenByID($id, $recursive = true) { return $this->getChildrenByAttribute(‘id‘, $id, ‘equals‘, ‘total‘, $recursive); } function getChildrenByClass($class, $recursive = true) { return $this->getChildrenByAttribute(‘class‘, $id, ‘equals‘, ‘total‘, $recursive); } function getChildrenByName($name, $recursive = true) { return $this->getChildrenByAttribute(‘name‘, $name, ‘equals‘, ‘total‘, $recursive); } function select($query = ‘*‘, $index = false, $recursive = true, $check_self = false) { $s = new $this->selectClass($this, $query, $check_self, $recursive); $res = $s->result; unset($s); if (is_array($res) && ($index === true) && (count($res) === 1)) { return $res[0]; } elseif (is_int($index) && is_array($res)) { if ($index < 0) { $index += count($res); } return ($index < count($res)) ? $res[$index] : null; } else { return $res; } } protected function filter_root() { return (strtolower($this->tag) === ‘html‘); } protected function filter_nchild($n) { return ($this->index(false) === (int) $n); } protected function filter_gt($n) { return ($this->index(false) > (int) $n); } protected function filter_lt($n) { return ($this->index(false) < (int) $n); } protected function filter_nlastchild($n) { if ($this->parent === null) { return false; } else { return ($this->parent->childCount(true) - 1 - $this->index(false) === (int) $n); } } protected function filter_ntype($n) { return ($this->typeIndex() === (int) $n); } protected function filter_nlastype($n) { if ($this->parent === null) { return false; } else { return (count($this->parent->getChildrenByTag($this->tag, ‘total‘, false)) - 1 - $this->typeIndex() === (int) $n); } } protected function filter_odd() { return (($this->index(false) & 1) === 1); } protected function filter_even() { return (($this->index(false) & 1) === 0); } protected function filter_every($n) { return (($this->index(false) % (int) $n) === 0); } protected function filter_first() { return ($this->index(false) === 0); } protected function filter_last() { if ($this->parent === null) { return false; } else { return ($this->parent->childCount(true) - 1 === $this->index(false)); } } protected function filter_firsttype() { return ($this->typeIndex() === 0); } protected function filter_lasttype() { if ($this->parent === null) { return false; } else { return (count($this->parent->getChildrenByTag($this->tag, ‘total‘, false)) - 1 === $this->typeIndex()); } } protected function filter_onlychild() { if ($this->parent === null) { return false; } else { return ($this->parent->childCount(true) === 1); } } protected function filter_onlytype() { if ($this->parent === null) { return false; } else { return (count($this->parent->getChildrenByTag($this->tag, ‘total‘, false)) === 1); } } protected function filter_empty() { return ($this->childCount() === 0); } protected function filter_notempty() { return ($this->childCount() !== 0); } protected function filter_hastext() { return ($this->getPlainText() !== ‘‘); } protected function filter_notext() { return ($this->getPlainText() === ‘‘); } protected function filter_lang($lang) { return ($this->lang === $lang); } protected function filter_contains($text) { return (strpos($this->getPlainText(), $text) !== false); } protected function filter_has($selector) { $s = $this->select((string) $selector, false); return (is_array($s) && (count($s) > 0)); } protected function filter_not($selector) { $s = $this->select((string) $selector, false, true, true); return ((!is_array($s)) || (array_search($this, $s, true) === false)); } protected function filter_element() { return true; } protected function filter_text() { return false; } protected function filter_comment() { return false; } } class HTML_NODE_TEXT extends HTML_Node { const NODE_TYPE = self::NODE_TEXT; var $tag = ‘~text~‘; var $text = ‘‘; function __construct($parent, $text = ‘‘) { $this->parent = $parent; $this->text = $text; } function isText() {return true;} function isTextOrComment() {return true;} protected function filter_element() {return false;} protected function filter_text() {return true;} function toString_attributes() {return ‘‘;} function toString_content($attributes = true, $recursive = true, $content_only = false) {return $this->text;} function toString($attributes = true, $recursive = true, $content_only = false) {return $this->text;} } class HTML_NODE_COMMENT extends HTML_Node { const NODE_TYPE = self::NODE_COMMENT; var $tag = ‘~comment~‘; var $text = ‘‘; function __construct($parent, $text = ‘‘) { $this->parent = $parent; $this->text = $text; } function isComment() {return true;} function isTextOrComment() {return true;} protected function filter_element() {return false;} protected function filter_comment() {return true;} function toString_attributes() {return ‘‘;} function toString_content($attributes = true, $recursive = true, $content_only = false) {return $this->text;} function toString($attributes = true, $recursive = true, $content_only = false) {return ‘<!--‘.$this->text.‘-->‘;} } class HTML_NODE_CONDITIONAL extends HTML_Node { const NODE_TYPE = self::NODE_CONDITIONAL; var $tag = ‘~conditional~‘; var $condition = ‘‘; function __construct($parent, $condition = ‘‘, $hidden = true) { $this->parent = $parent; $this->hidden = $hidden; $this->condition = $condition; } protected function filter_element() {return false;} function toString_attributes() {return ‘‘;} function toString($attributes = true, $recursive = true, $content_only = false) { if ($content_only) { if (is_int($content_only)) { --$content_only; } return $this->toString_content($attributes, $recursive, $content_only); } $s = ‘<!‘.(($this->hidden) ? ‘--‘ : ‘‘).‘[‘.$this->condition.‘]>‘; if($recursive) { $s .= $this->toString_content($attributes); } $s .= ‘<![endif]‘.(($this->hidden) ? ‘--‘ : ‘‘).‘>‘; return $s; } } class HTML_NODE_CDATA extends HTML_Node { const NODE_TYPE = self::NODE_CDATA; var $tag = ‘~cdata~‘; var $text = ‘‘; function __construct($parent, $text = ‘‘) { $this->parent = $parent; $this->text = $text; } protected function filter_element() {return false;} function toString_attributes() {return ‘‘;} function toString_content($attributes = true, $recursive = true, $content_only = false) {return $this->text;} function toString($attributes = true, $recursive = true, $content_only = false) {return ‘<![CDATA[‘.$this->text.‘]]>‘;} } class HTML_NODE_DOCTYPE extends HTML_Node { const NODE_TYPE = self::NODE_DOCTYPE; var $tag = ‘!DOCTYPE‘; var $dtd = ‘‘; function __construct($parent, $dtd = ‘‘) { $this->parent = $parent; $this->dtd = $dtd; } protected function filter_element() {return false;} function toString_attributes() {return ‘‘;} function toString_content($attributes = true, $recursive = true, $content_only = false) {return $this->text;} function toString($attributes = true, $recursive = true, $content_only = false) {return ‘<‘.$this->tag.‘ ‘.$this->dtd.‘>‘;} } class HTML_NODE_EMBEDDED extends HTML_Node { var $tag_char = ‘‘; var $text = ‘‘; function __construct($parent, $tag_char = ‘‘, $tag = ‘‘, $text = ‘‘, $attributes = array()) { $this->parent = $parent; $this->tag_char = $tag_char; if ($tag[0] !== $this->tag_char) { $tag = $this->tag_char.$tag; } $this->tag = $tag; $this->text = $text; $this->attributes = $attributes; $this->self_close_str = $tag_char; } protected function filter_element() {return false;} function toString($attributes = true, $recursive = true, $content_only = false) { $s = ‘<‘.$this->tag; if ($attributes) { $s .= $this->toString_attributes(); } $s .= $this->text.$this->self_close_str.‘>‘; return $s; } } class HTML_NODE_XML extends HTML_NODE_EMBEDDED { const NODE_TYPE = self::NODE_XML; function __construct($parent, $tag = ‘xml‘, $text = ‘‘, $attributes = array()) { return parent::__construct($parent, ‘?‘, $tag, $text, $attributes); } } class HTML_NODE_ASP extends HTML_NODE_EMBEDDED { const NODE_TYPE = self::NODE_ASP; function __construct($parent, $tag = ‘‘, $text = ‘‘, $attributes = array()) { return parent::__construct($parent, ‘%‘, $tag, $text, $attributes); } } //END gan_node_html.php //START gan_selector_html.php class Tokenizer_CSSQuery extends Tokenizer_Base { const TOK_BRACKET_OPEN = 100; const TOK_BRACKET_CLOSE = 101; const TOK_BRACE_OPEN = 102; const TOK_BRACE_CLOSE = 103; const TOK_STRING = 104; const TOK_COLON = 105; const TOK_COMMA = 106; const TOK_NOT = 107; const TOK_ALL = 108; const TOK_PIPE = 109; const TOK_PLUS = 110; const TOK_SIBLING = 111; const TOK_CLASS = 112; const TOK_ID = 113; const TOK_CHILD = 114; const TOK_COMPARE_PREFIX = 115; const TOK_COMPARE_CONTAINS = 116; const TOK_COMPARE_CONTAINS_WORD = 117; const TOK_COMPARE_ENDS = 118; const TOK_COMPARE_EQUALS = 119; const TOK_COMPARE_NOT_EQUAL = 120; const TOK_COMPARE_BIGGER_THAN = 121; const TOK_COMPARE_SMALLER_THAN = 122; const TOK_COMPARE_REGEX = 123; const TOK_COMPARE_STARTS = 124; var $identifiers = ‘abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_-?‘; var $custom_char_map = array( ‘.‘ => self::TOK_CLASS, ‘#‘ => self::TOK_ID, ‘,‘ => self::TOK_COMMA, ‘>‘ => ‘parse_gt‘, ‘+‘ => self::TOK_PLUS, ‘~‘ => ‘parse_sibling‘, ‘|‘ => ‘parse_pipe‘, ‘*‘ => ‘parse_star‘, ‘$‘ => ‘parse_compare‘, ‘=‘ => self::TOK_COMPARE_EQUALS, ‘!‘ => ‘parse_not‘, ‘%‘ => ‘parse_compare‘, ‘^‘ => ‘parse_compare‘, ‘<‘ => ‘parse_compare‘, ‘"‘ => ‘parse_string‘, "‘" => ‘parse_string‘, ‘(‘ => self::TOK_BRACE_OPEN, ‘)‘ => self::TOK_BRACE_CLOSE, ‘[‘ => self::TOK_BRACKET_OPEN, ‘]‘ => self::TOK_BRACKET_CLOSE, ‘:‘ => self::TOK_COLON ); protected function parse_gt() { if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === ‘=‘)) { ++$this->pos; return ($this->token = self::TOK_COMPARE_BIGGER_THAN); } else { return ($this->token = self::TOK_CHILD); } } protected function parse_sibling() { if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === ‘=‘)) { ++$this->pos; return ($this->token = self::TOK_COMPARE_CONTAINS_WORD); } else { return ($this->token = self::TOK_SIBLING); } } protected function parse_pipe() { if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === ‘=‘)) { ++$this->pos; return ($this->token = self::TOK_COMPARE_PREFIX); } else { return ($this->token = self::TOK_PIPE); } } protected function parse_star() { if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === ‘=‘)) { ++$this->pos; return ($this->token = self::TOK_COMPARE_CONTAINS); } else { return ($this->token = self::TOK_ALL); } } protected function parse_not() { if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === ‘=‘)) { ++$this->pos; return ($this->token = self::TOK_COMPARE_NOT_EQUAL); } else { return ($this->token = self::TOK_NOT); } } protected function parse_compare() { if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === ‘=‘)) { switch($this->doc[$this->pos++]) { case ‘$‘: return ($this->token = self::TOK_COMPARE_ENDS); case ‘%‘: return ($this->token = self::TOK_COMPARE_REGEX); case ‘^‘: return ($this->token = self::TOK_COMPARE_STARTS); case ‘<‘: return ($this->token = self::TOK_COMPARE_SMALLER_THAN); } } return false; } protected function parse_string() { $char = $this->doc[$this->pos]; while (true) { if ($this->next_search($char.‘\\‘, false) !== self::TOK_NULL) { if($this->doc[$this->pos] === $char) { break; } else { ++$this->pos; } } else { $this->pos = $this->size - 1; break; } } return ($this->token = self::TOK_STRING); } } class HTML_Selector { var $parser = ‘Tokenizer_CSSQuery‘; var $root = null; var $query = ‘‘; var $result = array(); var $search_root = false; var $search_recursive = true; var $custom_filter_map = array(); function __construct($root, $query = ‘*‘, $search_root = false, $search_recursive = true, $parser = null) { if ($parser === null) { $parser = new $this->parser(); } $this->parser = $parser; $this->root =& $root; $this->search_root = $search_root; $this->search_recursive = $search_recursive; $this->select($query); } function __toString() { return $this->query; } function __invoke($query = ‘*‘) { return $this->select($query); } function select($query = ‘*‘) { $this->parser->setDoc($query); $this->query = $query; return (($this->parse()) ? $this->result : false); } protected function error($error) { $error = htmlentities(str_replace( array(‘%tok%‘, ‘%pos%‘), array($this->parser->getTokenString(), (int) $this->parser->getPos()), $error )); trigger_error($error); } protected function parse_getIdentifier($do_error = true) { $p =& $this->parser; $tok = $p->token; if ($tok === Tokenizer_CSSQuery::TOK_IDENTIFIER) { return $p->getTokenString(); } elseif($tok === Tokenizer_CSSQuery::TOK_STRING) { return str_replace(array(‘\\\‘‘, ‘\\"‘, ‘\\\\‘), array(‘\‘‘, ‘"‘, ‘\\‘), $p->getTokenString(1, -1)); } elseif ($do_error) { $this->error(‘Expected identifier at %pos%!‘); } return false; } protected function parse_conditions() { $p =& $this->parser; $tok = $p->token; if ($tok === Tokenizer_CSSQuery::TOK_NULL) { $this->error(‘Invalid search pattern(1): Empty string!‘); return false; } $conditions_all = array(); while ($tok !== Tokenizer_CSSQuery::TOK_NULL) { $conditions = array(‘tags‘ => array(), ‘attributes‘ => array()); if ($tok === Tokenizer_CSSQuery::TOK_ALL) { $tok = $p->next(); if (($tok === Tokenizer_CSSQuery::TOK_PIPE) && ($tok = $p->next()) && ($tok !== Tokenizer_CSSQuery::TOK_ALL)) { if (($tag = $this->parse_getIdentifier()) === false) { return false; } $conditions[‘tags‘][] = array( ‘tag‘ => $tag, ‘compare‘ => ‘name‘ ); $tok = $p->next_no_whitespace(); } else { $conditions[‘tags‘][‘‘] = array( ‘tag‘ => ‘‘, ‘match‘ => false ); if ($tok === Tokenizer_CSSQuery::TOK_ALL) { $tok = $p->next_no_whitespace(); } } } elseif ($tok === Tokenizer_CSSQuery::TOK_PIPE) { $tok = $p->next(); if ($tok === Tokenizer_CSSQuery::TOK_ALL) { $conditions[‘tags‘][] = array( ‘tag‘ => ‘‘, ‘compare‘ => ‘namespace‘, ); } elseif (($tag = $this->parse_getIdentifier()) !== false) { $conditions[‘tags‘][] = array( ‘tag‘ => $tag, ‘compare‘ => ‘total‘, ); } else { return false; } $tok = $p->next_no_whitespace(); } elseif ($tok === Tokenizer_CSSQuery::TOK_BRACE_OPEN) { $tok = $p->next_no_whitespace(); $last_mode = ‘or‘; while (true) { $match = true; $compare = ‘total‘; if ($tok === Tokenizer_CSSQuery::TOK_NOT) { $match = false; $tok = $p->next_no_whitespace(); } if ($tok === Tokenizer_CSSQuery::TOK_ALL) { $tok = $p->next(); if ($tok === Tokenizer_CSSQuery::TOK_PIPE) { $this->next(); $compare = ‘name‘; if (($tag = $this->parse_getIdentifier()) === false) { return false; } } } elseif ($tok === Tokenizer_CSSQuery::TOK_PIPE) { $tok = $p->next(); if ($tok === Tokenizer_CSSQuery::TOK_ALL) { $tag = ‘‘; $compare = ‘namespace‘; } elseif (($tag = $this->parse_getIdentifier()) === false) { return false; } $tok = $p->next_no_whitespace(); } else { if (($tag = $this->parse_getIdentifier()) === false) { return false; } $tok = $p->next(); if ($tok === Tokenizer_CSSQuery::TOK_PIPE) { $tok = $p->next(); if ($tok === Tokenizer_CSSQuery::TOK_ALL) { $compare = ‘namespace‘; } elseif (($tag_name = $this->parse_getIdentifier()) !== false) { $tag = $tag.‘:‘.$tag_name; } else { return false; } $tok = $p->next_no_whitespace(); } } if ($tok === Tokenizer_CSSQuery::TOK_WHITESPACE) { $tok = $p->next_no_whitespace(); } $conditions[‘tags‘][] = array( ‘tag‘ => $tag, ‘match‘ => $match, ‘operator‘ => $last_mode, ‘compare‘ => $compare ); switch($tok) { case Tokenizer_CSSQuery::TOK_COMMA: $tok = $p->next_no_whitespace(); $last_mode = ‘or‘; continue 2; case Tokenizer_CSSQuery::TOK_PLUS: $tok = $p->next_no_whitespace(); $last_mode = ‘and‘; continue 2; case Tokenizer_CSSQuery::TOK_BRACE_CLOSE: $tok = $p->next(); break 2; default: $this->error(‘Expected closing brace or comma at pos %pos%!‘); return false; } } } elseif (($tag = $this->parse_getIdentifier(false)) !== false) { $tok = $p->next(); if ($tok === Tokenizer_CSSQuery::TOK_PIPE) { $tok = $p->next(); if ($tok === Tokenizer_CSSQuery::TOK_ALL) { $conditions[‘tags‘][] = array( ‘tag‘ => $tag, ‘compare‘ => ‘namespace‘ ); } elseif (($tag_name = $this->parse_getIdentifier()) !== false) { $tag = $tag.‘:‘.$tag_name; $conditions[‘tags‘][] = array( ‘tag‘ => $tag, ‘match‘ => true ); } else { return false; } $tok = $p->next(); } else { $conditions[‘tags‘][] = array( ‘tag‘ => $tag, ‘match‘ => true ); } } else { unset($conditions[‘tags‘]); } $last_mode = ‘or‘; if ($tok === Tokenizer_CSSQuery::TOK_CLASS) { $p->next(); if (($class = $this->parse_getIdentifier()) === false) { return false; } $conditions[‘attributes‘][] = array( ‘attribute‘ => ‘class‘, ‘operator_value‘ => ‘contains_word‘, ‘value‘ => $class, ‘operator_result‘ => $last_mode ); $last_mode = ‘and‘; $tok = $p->next(); } if ($tok === Tokenizer_CSSQuery::TOK_ID) { $p->next(); if (($id = $this->parse_getIdentifier()) === false) { return false; } $conditions[‘attributes‘][] = array( ‘attribute‘ => ‘id‘, ‘operator_value‘ => ‘equals‘, ‘value‘ => $id, ‘operator_result‘ => $last_mode ); $last_mode = ‘and‘; $tok = $p->next(); } if ($tok === Tokenizer_CSSQuery::TOK_BRACKET_OPEN) { $tok = $p->next_no_whitespace(); while (true) { $match = true; $compare = ‘total‘; if ($tok === Tokenizer_CSSQuery::TOK_NOT) { $match = false; $tok = $p->next_no_whitespace(); } if ($tok === Tokenizer_CSSQuery::TOK_ALL) { $tok = $p->next(); if ($tok === Tokenizer_CSSQuery::TOK_PIPE) { $tok = $p->next(); if (($attribute = $this->parse_getIdentifier()) === false) { return false; } $compare = ‘name‘; $tok = $p->next(); } else { $this->error(‘Expected pipe at pos %pos%!‘); return false; } } elseif ($tok === Tokenizer_CSSQuery::TOK_PIPE) { $tok = $p->next(); if (($tag = $this->parse_getIdentifier()) === false) { return false; } $tok = $p->next_no_whitespace(); } elseif (($attribute = $this->parse_getIdentifier()) !== false) { $tok = $p->next(); if ($tok === Tokenizer_CSSQuery::TOK_PIPE) { $tok = $p->next(); if (($attribute_name = $this->parse_getIdentifier()) !== false) { $attribute = $attribute.‘:‘.$attribute_name; } else { return false; } $tok = $p->next(); } } else { return false; } if ($tok === Tokenizer_CSSQuery::TOK_WHITESPACE) { $tok = $p->next_no_whitespace(); } $operator_value = ‘‘; $val = ‘‘; switch($tok) { case Tokenizer_CSSQuery::TOK_COMPARE_PREFIX: case Tokenizer_CSSQuery::TOK_COMPARE_CONTAINS: case Tokenizer_CSSQuery::TOK_COMPARE_CONTAINS_WORD: case Tokenizer_CSSQuery::TOK_COMPARE_ENDS: case Tokenizer_CSSQuery::TOK_COMPARE_EQUALS: case Tokenizer_CSSQuery::TOK_COMPARE_NOT_EQUAL: case Tokenizer_CSSQuery::TOK_COMPARE_REGEX: case Tokenizer_CSSQuery::TOK_COMPARE_STARTS: case Tokenizer_CSSQuery::TOK_COMPARE_BIGGER_THAN: case Tokenizer_CSSQuery::TOK_COMPARE_SMALLER_THAN: $operator_value = $p->getTokenString(($tok === Tokenizer_CSSQuery::TOK_COMPARE_EQUALS) ? 0 : -1); $p->next_no_whitespace(); if (($val = $this->parse_getIdentifier()) === false) { return false; } $tok = $p->next_no_whitespace(); break; } if ($operator_value && $val) { $conditions[‘attributes‘][] = array( ‘attribute‘ => $attribute, ‘operator_value‘ => $operator_value, ‘value‘ => $val, ‘match‘ => $match, ‘operator_result‘ => $last_mode, ‘compare‘ => $compare ); } else { $conditions[‘attributes‘][] = array( ‘attribute‘ => $attribute, ‘value‘ => $match, ‘operator_result‘ => $last_mode, ‘compare‘ => $compare ); } switch($tok) { case Tokenizer_CSSQuery::TOK_COMMA: $tok = $p->next_no_whitespace(); $last_mode = ‘or‘; continue 2; case Tokenizer_CSSQuery::TOK_PLUS: $tok = $p->next_no_whitespace(); $last_mode = ‘and‘; continue 2; case Tokenizer_CSSQuery::TOK_BRACKET_CLOSE: $tok = $p->next(); break 2; default: $this->error(‘Expected closing bracket or comma at pos %pos%!‘); return false; } } } if (count($conditions[‘attributes‘]) < 1) { unset($conditions[‘attributes‘]); } while($tok === Tokenizer_CSSQuery::TOK_COLON) { if (count($conditions) < 1) { $conditions[‘tags‘] = array(array( ‘tag‘ => ‘‘, ‘match‘ => false )); } $tok = $p->next(); if (($filter = $this->parse_getIdentifier()) === false) { return false; } if (($tok = $p->next()) === Tokenizer_CSSQuery::TOK_BRACE_OPEN) { $start = $p->pos; $count = 1; while ((($tok = $p->next()) !== Tokenizer_CSSQuery::TOK_NULL) && !(($tok === Tokenizer_CSSQuery::TOK_BRACE_CLOSE) && (--$count === 0))) { if ($tok === Tokenizer_CSSQuery::TOK_BRACE_OPEN) { ++$count; } } if ($tok !== Tokenizer_CSSQuery::TOK_BRACE_CLOSE) { $this->error(‘Expected closing brace at pos %pos%!‘); return false; } $len = $p->pos - 1 - $start; $params = (($len > 0) ? substr($p->doc, $start + 1, $len) : ‘‘); $tok = $p->next(); } else { $params = ‘‘; } $conditions[‘filters‘][] = array(‘filter‘ => $filter, ‘params‘ => $params); } if (count($conditions) < 1) { $this->error(‘Invalid search pattern(2): No conditions found!‘); return false; } $conditions_all[] = $conditions; if ($tok === Tokenizer_CSSQuery::TOK_WHITESPACE) { $tok = $p->next_no_whitespace(); } if ($tok === Tokenizer_CSSQuery::TOK_COMMA) { $tok = $p->next_no_whitespace(); continue; } else { break; } } return $conditions_all; } protected function parse_callback($conditions, $recursive = true, $check_root = false) { return ($this->result = $this->root->getChildrenByMatch( $conditions, $recursive, $check_root, $this->custom_filter_map )); } protected function parse_single($recursive = true) { if (($c = $this->parse_conditions()) === false) { return false; } $this->parse_callback($c, $recursive, $this->search_root); return true; } protected function parse_adjacent() { $tmp = $this->result; $this->result = array(); if (($c = $this->parse_conditions()) === false) { return false; } foreach($tmp as $t) { if (($sibling = $t->getNextSibling()) !== false) { if ($sibling->match($c, true, $this->custom_filter_map)) { $this->result[] = $sibling; } } } return true; } protected function parse_result($parent = false, $recursive = true) { $tmp = $this->result; $tmp_res = array(); if (($c = $this->parse_conditions()) === false) { return false; } foreach(array_keys($tmp) as $t) { $this->root = (($parent) ? $tmp[$t]->parent : $tmp[$t]); $this->parse_callback($c, $recursive); foreach(array_keys($this->result) as $r) { if (!in_array($this->result[$r], $tmp_res, true)) { $tmp_res[] = $this->result[$r]; } } } $this->result = $tmp_res; return true; } protected function parse() { $p =& $this->parser; $p->setPos(0); $this->result = array(); if (!$this->parse_single()) { return false; } while (count($this->result) > 0) { switch($p->token) { case Tokenizer_CSSQuery::TOK_CHILD: $this->parser->next_no_whitespace(); if (!$this->parse_result(false, 1)) { return false; } break; case Tokenizer_CSSQuery::TOK_SIBLING: $this->parser->next_no_whitespace(); if (!$this->parse_result(true, 1)) { return false; } break; case Tokenizer_CSSQuery::TOK_PLUS: $this->parser->next_no_whitespace(); if (!$this->parse_adjacent()) { return false; } break; case Tokenizer_CSSQuery::TOK_ALL: case Tokenizer_CSSQuery::TOK_IDENTIFIER: case Tokenizer_CSSQuery::TOK_STRING: case Tokenizer_CSSQuery::TOK_BRACE_OPEN: case Tokenizer_CSSQuery::TOK_BRACKET_OPEN: case Tokenizer_CSSQuery::TOK_ID: case Tokenizer_CSSQuery::TOK_CLASS: case Tokenizer_CSSQuery::TOK_COLON: if (!$this->parse_result()) { return false; } break; case Tokenizer_CSSQuery::TOK_NULL: break 2; default: $this->error(‘Invalid search pattern(3): No result modifier found!‘); return false; } } return true; } } //END gan_selector_html.php //START gan_formatter.php function indent_text($text, $indent, $indent_string = ‘ ‘) { if ($indent && $indent_string) { return str_replace("\n", "\n".str_repeat($indent_string, $indent), $text); } else { return $text; } } class HTML_Formatter { var $block_elements = array( ‘p‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘h1‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘h2‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘h3‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘h4‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘h5‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘h6‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘form‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘fieldset‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘legend‘ => array(‘new_line‘ => true, ‘as_block‘ => false, ‘format_inside‘ => true), ‘dl‘ => array(‘new_line‘ => true, ‘as_block‘ => false, ‘format_inside‘ => true), ‘dt‘ => array(‘new_line‘ => true, ‘as_block‘ => false, ‘format_inside‘ => true), ‘dd‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘ol‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘ul‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘li‘ => array(‘new_line‘ => true, ‘as_block‘ => false, ‘format_inside‘ => true), ‘table‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘tr‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘dir‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘menu‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘address‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘blockquote‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘center‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘del‘ => array(‘new_line‘ => true, ‘as_block‘ => false, ‘format_inside‘ => true), ‘hr‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘ins‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘noscript‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘pre‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => false), ‘script‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘style‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘html‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘head‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘body‘ => array(‘new_line‘ => true, ‘as_block‘ => true, ‘format_inside‘ => true), ‘title‘ => array(‘new_line‘ => true, ‘as_block‘ => false, ‘format_inside‘ => false) ); var $whitespace = array( ‘ ‘ => false, "\t" => false, "\x0B" => false, "\0" => false, "\n" => true, "\r" => true ); var $indent_string = ‘ ‘; var $linebreak_string = "\n"; var $options = array( ‘img_alt‘ => ‘‘, ‘self_close_str‘ => null, ‘attribute_shorttag‘ => false, ‘sort_attributes‘ => false, ‘attributes_case‘ => CASE_LOWER, ‘minify_script‘ => true ); var $errors = array(); function __construct($options = array()) { $this->options = array_merge($this->options, $options); } function __invoke(&$node) { return $this->format($node); } static function minify_html(&$root, $strip_comments = true, $recursive = true) { if ($strip_comments) { foreach($root->select(‘:comment‘, false, $recursive, true) as $c) { $prev = $c->getSibling(-1); $next = $c->getSibling(1); $c->delete(); if ($prev && $next && ($prev->isText()) && ($next->isText())) { $prev->text .= $next->text; $next->delete(); } } } foreach($root->select(‘(!pre + !xmp + !style + !script + !"?php" + !"~text~" + !"~comment~"):not-empty > "~text~"‘, false, $recursive, true) as $c) { $c->text = preg_replace(‘`\s+`‘, ‘ ‘, $c->text); } } static function minify_javascript(&$root, $indent_string = ‘ ‘, $wrap_comment = true, $recursive = true) { include_once(‘third party/jsminplus.php‘); $errors = array(); foreach($root->select(‘script:not-empty > "~text~"‘, false, $recursive, true) as $c) { try { $text = $c->text; while ($text) { $text = trim($text); if (substr($text, 0, 4) === ‘<!--‘) { $text = substr($text, 5); continue; } elseif (strtolower(substr($text, 0, 9)) === ‘<![cdata[‘) { $text = substr($text, 10); continue; } if (($end = substr($text, -3)) && (($end === ‘-->‘) || ($end === ‘]]>‘))) { $text = substr($text, 0, -3); continue; } break; } if (trim($text)) { $text = JSMinPlus::minify($text); if ($wrap_comment) { $text = "<!--\n".$text."\n//-->"; } if ($indent_string && ($wrap_comment || (strpos($text, "\n") !== false))) { $text = indent_text("\n".$text, $c->indent(), $indent_string); } } $c->text = $text; } catch (Exception $e) { $errors[] = array($e, $c->parent->dumpLocation()); } } return (($errors) ? $errors : true); } function format_html(&$root, $recursive = null) { if ($recursive === null) { $recursive = true; self::minify_html($root); } elseif (is_int($recursive)) { $recursive = (($recursive > 1) ? $recursive - 1 : false); } $root_tag = strtolower($root->tag); $in_block = isset($this->block_elements[$root_tag]) && $this->block_elements[$root_tag][‘as_block‘]; $child_count = count($root->children); if (isset($this->options[‘attributes_case‘]) && $this->options[‘attributes_case‘]) { $root->attributes = array_change_key_case($root->attributes, $this->options[‘attributes_case‘]); $root->attributes_ns = null; } if (isset($this->options[‘sort_attributes‘]) && $this->options[‘sort_attributes‘]) { if ($this->options[‘sort_attributes‘] === ‘reverse‘) { krsort($root->attributes); } else { ksort($root->attributes); } } if ($root->select(‘:element‘, true, false, true)) { $root->setTag(strtolower($root->tag), true); if (($this->options[‘img_alt‘] !== null) && ($root_tag === ‘img‘) && (!isset($root->alt))) { $root->alt = $this->options[‘img_alt‘]; } } if ($this->options[‘self_close_str‘] !== null) { $root->self_close_str = $this->options[‘self_close_str‘]; } if ($this->options[‘attribute_shorttag‘] !== null) { $root->attribute_shorttag = $this->options[‘attribute_shorttag‘]; } $prev = null; $n_tag = ‘‘; $prev_tag = ‘‘; $as_block = false; $prev_asblock = false; for($i = 0; $i < $child_count; $i++) { $n =& $root->children[$i]; $indent = $n->indent(); if (!$n->isText()) { $n_tag = strtolower($n->tag); $new_line = isset($this->block_elements[$n_tag]) && $this->block_elements[$n_tag][‘new_line‘]; $as_block = isset($this->block_elements[$n_tag]) && $this->block_elements[$n_tag][‘as_block‘]; $format_inside = ((!isset($this->block_elements[$n_tag])) || $this->block_elements[$n_tag][‘format_inside‘]); if ($prev && ($prev->isText()) && $prev->text && ($char = $prev->text[strlen($prev->text) - 1]) && isset($this->whitespace[$char])) { if ($this->whitespace[$char]) { $prev->text .= str_repeat($this->indent_string, $indent); } else { $prev->text = substr_replace($prev->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), -1, 1); } } elseif (($new_line || $prev_asblock || ($in_block && ($i === 0)))){ if ($prev && ($prev->isText())) { $prev->text .= $this->linebreak_string.str_repeat($this->indent_string, $indent); } else { $root->addText($this->linebreak_string.str_repeat($this->indent_string, $indent), $i); ++$child_count; } } if ($format_inside && count($n->children)) { $last = $n->children[count($n->children) - 1]; $last_tag = ($last) ? strtolower($last->tag) : ‘‘; $last_asblock = ($last_tag && isset($this->block_elements[$last_tag]) && $this->block_elements[$last_tag][‘as_block‘]); if (($n->childCount(true) > 0) || (trim($n->getPlainText()))) { if ($last && ($last->isText()) && $last->text && ($char = $last->text[strlen($last->text) - 1]) && isset($this->whitespace[$char])) { if ($as_block || ($last->index() > 0) || isset($this->whitespace[$last->text[0]])) { if ($this->whitespace[$char]) { $last->text .= str_repeat($this->indent_string, $indent); } else { $last->text = substr_replace($last->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), -1, 1); } } } elseif (($as_block || $last_asblock || ($in_block && ($i === 0))) && $last) { if ($last && ($last->isText())) { $last->text .= $this->linebreak_string.str_repeat($this->indent_string, $indent); } else { $n->addText($this->linebreak_string.str_repeat($this->indent_string, $indent)); } } } elseif (!trim($n->getInnerText())) { $n->clear(); } if ($recursive) { $this->format_html($n, $recursive); } } } elseif (trim($n->text) && ((($i - 1 < $child_count) && ($char = $n->text[0]) && isset($this->whitespace[$char])) || ($in_block && ($i === 0)))) { if (isset($this->whitespace[$char])) { if ($this->whitespace[$char]) { $n->text = str_repeat($this->indent_string, $indent).$n->text; } else { $n->text = substr_replace($n->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), 0, 1); } } else { $n->text = $this->linebreak_string.str_repeat($this->indent_string, $indent).$n->text; } } $prev = $n; $prev_tag = $n_tag; $prev_asblock = $as_block; } return true; } function format(&$node) { $this->errors = array(); if ($this->options[‘minify_script‘]) { $a = self::minify_javascript($node, $this->indent_string, true, true); if (is_array($a)) { foreach($a as $error) { $this->errors[] = $error[0]->getMessage().‘ >>> ‘.$error[1]; } } } return $this->format_html($node); } } //END gan_formatter.php ?>
原文:http://www.cnblogs.com/swocn/p/6731308.html