/* * HTML Parser By Noah Sloan (http://noahsloan.com) * Supports parsing HTML in incomplete fragments. * * Modified from: * HTML Parser By John Resig (http://ejohn.org/files/htmlparser.js) * Original code by Erik Arvidsson, Mozilla Public License * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js * * // Use like so: * var parser = HTMLParser({ * start: function(tag, attrs, unary) {}, * end: function(tag) {}, * chars: function(text) {}, * comment: function(text) {} * }); * * parser.parseMore(html); * parser.parseMore(html); * parser.end(); * * You can write directly to an element with WriteToElement: * var writer = elementWrite.toElement(element); * writer.write(html); * writer.writeln(html); * writer.close(); // important * * toElement takes an optional 2nd argument: listeners. * * listeners is just like the handlers object passed to HTMLParser, but * with a few additions: * 1. If the handler function returns false, then no action is taken (the handler * is assumed to have handled the event). * 2. The last argument is a state object. The stack property is the current * element stack. last() is the current parent element. push() and pop() will * push and pop the current parent element, which affects where content is created. * 3. There is a close() listener that will be called when the writer is closed. * * See the listeners qunit test for an example. */ (function(define){ // Regular Expressions for parsing tags and attributes var startTag = /^<([\-A-Za-z0-9_]+)((?:\s+[\w\-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/, endTag = /^<\/([\-A-Za-z0-9_]+)[^>]*>/, attr = /([\-A-Za-z0-9_]+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g; // Empty Elements - HTML 4.01 var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed"); // Block Elements - HTML 4.01 var block = makeMap("address,applet,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,form,frameset,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul"); // Inline Elements - HTML 4.01 var inline = makeMap("a,abbr,acronym,applet,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var"); // Elements that you can, intentionally, leave open // (and which close themselves) var closeSelf = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr"); // Attributes that have their values filled in disabled="disabled" var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"); // Special Elements (can contain anything) var special = makeMap("script,style"); function HTMLParser( handler ) { var index, chars, match, stack = [], last = '', waitingForSpecialBody; stack.last = function(){ return this[ this.length - 1 ]; }; function processSpecial(all, text){ text = text.replace(//g, "$1") .replace(//g, "$1"); if ( handler.chars ) handler.chars( text ); return ""; } function parseMore(html) { html = last + html; last = html; if(module.debug.parse) console.log('PARSE parseMore',html); while ( html ) { chars = true; // Make sure we're not in a script or style element if ( waitingForSpecialBody || (stack.last() && special[ stack.last() ]) ) { waitingForSpecialBody = false; var specialBody = new RegExp("([\\s\\S]*?)<\/" + stack.last() + "[^>]*>","i"); if(!html.match(specialBody)) { waitingForSpecialBody = true; return self; } html = html.replace(specialBody, processSpecial); parseEndTag( "", stack.last() ); } else { // Comment if ( html.indexOf(""); if ( index >= 0 ) { if(module.debug.parse) console.log('PARSE comment',match); if ( handler.comment ) handler.comment( html.substring( 4, index ) ); html = html.substring( index + 3 ); chars = false; } // end tag } else if ( html.indexOf("= 0; pos-- ) if ( stack[ pos ] == tagName ) break; if ( pos >= 0 ) { // Close all the open elements, up the stack for ( var i = stack.length - 1; i >= pos; i-- ) if ( handler.end ) handler.end( stack[ i ] ); if(module.debug.parse) console.log('PARSE parseEndTag pop',stack.slice(pos)); // Remove the open elements from the stack stack.length = pos; } } function end(html) { if(html) { parseMore(html); } // Clean up any remaining tags parseEndTag(); } var self = { parseMore: parseMore, end: end }; return self; } function toElement( element, listeners ) { listeners = listeners || {}; var curParentNode = element; var elems = [curParentNode], doc = element.ownerDocument || element.getOwnerDocument && element.getOwnerDocument(); elems.last = function() { return this[ this.length - 1 ]; }; elems.pop = function() { var el = Array.prototype.pop.call(this); curParentNode = this.last(); return el; }; elems.push = function(el) { Array.prototype.push.call(this,el); curParentNode = el; }; function listen(event) { if(listeners[event]) { return listeners[event].apply(listeners,slice(arguments,1).concat([state()])); } } function state() { return { stack: elems }; } var handlers, parser = HTMLParser(handlers = { start: function( tagName, attrs, unary ) { if(listen('start',tagName,attrs,unary) === false) return; if(module.debug.write) console.log('WRITE start',tagName,attrs,unary); var elem = doc.createElement( tagName ); for ( var i = 0; i < attrs.length; i++ ) { var attr = attrs[i]; elem.setAttribute( attr.name, attr.value ); } if ( curParentNode && curParentNode.appendChild ) { curParentNode.appendChild( elem ); if(module.debug.write) console.log('WRITE start append',elem,'to',curParentNode); } if ( !unary ) { if(module.debug.write) console.log('WRITE push',elem,elems.slice(0)); elems.push( elem ); } if(module.debug.write) console.log('WRITE start done - parent:',curParentNode); }, end: function( tag ) { if(listen('end',tag) === false) return; elems.pop(); if(module.debug.write) console.log('WRITE end',tag,'new parent:',curParentNode,'elems',elems); }, chars: function( text ) { if(listen('chars',text) === false) return; if(module.debug.write) console.log('WRITE chars',text,'el:',element); curParentNode.appendChild( doc.createTextNode( text ) ); }, comment: function( text ) { if(listen('comment',text) === false) return; // create comment node }, close: function() { if(listen('close') === false) return; if(module.debug.write) console.log('WRITE close el:',element); }, _handle: function(event) { listen.apply(this,arguments); } }); return listeners.writer = { /** * Hook for calling custom listener methods. * * e.g., defining pause & resume methods. */ handle: function(event,args) { if(handlers[event]) { handlers[event].apply(handlers,args || []); } else { handlers._handle.apply(handlers,arguments); } }, write: function(html) { parser.parseMore(html); return this; }, writeln: function(html) { this.write(html+'\n'); return this; }, close: function(html) { parser.end(html); handlers.close(); return this; } }; } function makeMap(str){ var obj = {}, items = str.split(","); for ( var i = 0; i < items.length; i++ ) obj[ items[i] ] = true; return obj; } function slice(args,index) { return Array.prototype.slice.call(args,index); } var module = { toElement: toElement, HTMLParser: HTMLParser, debug: { parse: false, write: false } }; define(module); })(typeof define == 'function' ? define : function(ew) { if (typeof exports === 'object') { module.exports = ew; } else { this.elementWrite = ew; } });