1 /* 2 Copyright (c) 2003-2012, CKSource - Frederico Knabben. All rights reserved. 3 For licensing, see LICENSE.html or http://ckeditor.com/license 4 */ 5 6 /** 7 * Creates a {@link CKEDITOR.htmlParser} class instance. 8 * @class Provides an "event like" system to parse strings of HTML data. 9 * @example 10 * var parser = new CKEDITOR.htmlParser(); 11 * parser.onTagOpen = function( tagName, attributes, selfClosing ) 12 * { 13 * alert( tagName ); 14 * }; 15 * parser.parse( '<p>Some <b>text</b>.</p>' ); 16 */ 17 CKEDITOR.htmlParser = function() 18 { 19 this._ = 20 { 21 htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:"[^"]*")|(?:\'[^\']*\')|[^"\'>])*)\\/?>))', 'g' ) 22 }; 23 }; 24 25 (function() 26 { 27 var attribsRegex = /([\w\-:.]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g, 28 emptyAttribs = {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1}; 29 30 CKEDITOR.htmlParser.prototype = 31 { 32 /** 33 * Function to be fired when a tag opener is found. This function 34 * should be overriden when using this class. 35 * @param {String} tagName The tag name. The name is guarantted to be 36 * lowercased. 37 * @param {Object} attributes An object containing all tag attributes. Each 38 * property in this object represent and attribute name and its 39 * value is the attribute value. 40 * @param {Boolean} selfClosing true if the tag closes itself, false if the 41 * tag doesn't. 42 * @example 43 * var parser = new CKEDITOR.htmlParser(); 44 * parser.onTagOpen = function( tagName, attributes, selfClosing ) 45 * { 46 * alert( tagName ); // e.g. "b" 47 * }); 48 * parser.parse( "<!-- Example --><b>Hello</b>" ); 49 */ 50 onTagOpen : function() {}, 51 52 /** 53 * Function to be fired when a tag closer is found. This function 54 * should be overriden when using this class. 55 * @param {String} tagName The tag name. The name is guarantted to be 56 * lowercased. 57 * @example 58 * var parser = new CKEDITOR.htmlParser(); 59 * parser.onTagClose = function( tagName ) 60 * { 61 * alert( tagName ); // e.g. "b" 62 * }); 63 * parser.parse( "<!-- Example --><b>Hello</b>" ); 64 */ 65 onTagClose : function() {}, 66 67 /** 68 * Function to be fired when text is found. This function 69 * should be overriden when using this class. 70 * @param {String} text The text found. 71 * @example 72 * var parser = new CKEDITOR.htmlParser(); 73 * parser.onText = function( text ) 74 * { 75 * alert( text ); // e.g. "Hello" 76 * }); 77 * parser.parse( "<!-- Example --><b>Hello</b>" ); 78 */ 79 onText : function() {}, 80 81 /** 82 * Function to be fired when CDATA section is found. This function 83 * should be overriden when using this class. 84 * @param {String} cdata The CDATA been found. 85 * @example 86 * var parser = new CKEDITOR.htmlParser(); 87 * parser.onCDATA = function( cdata ) 88 * { 89 * alert( cdata ); // e.g. "var hello;" 90 * }); 91 * parser.parse( "<script>var hello;</script>" ); 92 */ 93 onCDATA : function() {}, 94 95 /** 96 * Function to be fired when a commend is found. This function 97 * should be overriden when using this class. 98 * @param {String} comment The comment text. 99 * @example 100 * var parser = new CKEDITOR.htmlParser(); 101 * parser.onComment = function( comment ) 102 * { 103 * alert( comment ); // e.g. " Example " 104 * }); 105 * parser.parse( "<!-- Example --><b>Hello</b>" ); 106 */ 107 onComment : function() {}, 108 109 /** 110 * Parses text, looking for HTML tokens, like tag openers or closers, 111 * or comments. This function fires the onTagOpen, onTagClose, onText 112 * and onComment function during its execution. 113 * @param {String} html The HTML to be parsed. 114 * @example 115 * var parser = new CKEDITOR.htmlParser(); 116 * // The onTagOpen, onTagClose, onText and onComment should be overriden 117 * // at this point. 118 * parser.parse( "<!-- Example --><b>Hello</b>" ); 119 */ 120 parse : function( html ) 121 { 122 var parts, 123 tagName, 124 nextIndex = 0, 125 cdata; // The collected data inside a CDATA section. 126 127 while ( ( parts = this._.htmlPartsRegex.exec( html ) ) ) 128 { 129 var tagIndex = parts.index; 130 if ( tagIndex > nextIndex ) 131 { 132 var text = html.substring( nextIndex, tagIndex ); 133 134 if ( cdata ) 135 cdata.push( text ); 136 else 137 this.onText( text ); 138 } 139 140 nextIndex = this._.htmlPartsRegex.lastIndex; 141 142 /* 143 "parts" is an array with the following items: 144 0 : The entire match for opening/closing tags and comments. 145 1 : Group filled with the tag name for closing tags. 146 2 : Group filled with the comment text. 147 3 : Group filled with the tag name for opening tags. 148 4 : Group filled with the attributes part of opening tags. 149 */ 150 151 // Closing tag 152 if ( ( tagName = parts[ 1 ] ) ) 153 { 154 tagName = tagName.toLowerCase(); 155 156 if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] ) 157 { 158 // Send the CDATA data. 159 this.onCDATA( cdata.join('') ); 160 cdata = null; 161 } 162 163 if ( !cdata ) 164 { 165 this.onTagClose( tagName ); 166 continue; 167 } 168 } 169 170 // If CDATA is enabled, just save the raw match. 171 if ( cdata ) 172 { 173 cdata.push( parts[ 0 ] ); 174 continue; 175 } 176 177 // Opening tag 178 if ( ( tagName = parts[ 3 ] ) ) 179 { 180 tagName = tagName.toLowerCase(); 181 182 // There are some tag names that can break things, so let's 183 // simply ignore them when parsing. (#5224) 184 if ( /="/.test( tagName ) ) 185 continue; 186 187 var attribs = {}, 188 attribMatch, 189 attribsPart = parts[ 4 ], 190 selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' ); 191 192 if ( attribsPart ) 193 { 194 while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) ) 195 { 196 var attName = attribMatch[1].toLowerCase(), 197 attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || ''; 198 199 if ( !attValue && emptyAttribs[ attName ] ) 200 attribs[ attName ] = attName; 201 else 202 attribs[ attName ] = attValue; 203 } 204 } 205 206 this.onTagOpen( tagName, attribs, selfClosing ); 207 208 // Open CDATA mode when finding the appropriate tags. 209 if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] ) 210 cdata = []; 211 212 continue; 213 } 214 215 // Comment 216 if ( ( tagName = parts[ 2 ] ) ) 217 this.onComment( tagName ); 218 } 219 220 if ( html.length > nextIndex ) 221 this.onText( html.substring( nextIndex, html.length ) ); 222 } 223 }; 224 })(); 225