1 /*
  2 Copyright (c) 2003-2012, CKSource - Frederico Knabben. All rights reserved.
  3 For licensing, see LICENSE.html or http://ckeditor.com/license
  4 */
  5 
  6 /**
  7  * A lightweight representation of an HTML DOM structure.
  8  * @constructor
  9  * @example
 10  */
 11 CKEDITOR.htmlParser.fragment = function()
 12 {
 13 	/**
 14 	 * The nodes contained in the root of this fragment.
 15 	 * @type Array
 16 	 * @example
 17 	 * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );
 18 	 * alert( fragment.children.length );  "2"
 19 	 */
 20 	this.children = [];
 21 
 22 	/**
 23 	 * Get the fragment parent. Should always be null.
 24 	 * @type Object
 25 	 * @default null
 26 	 * @example
 27 	 */
 28 	this.parent = null;
 29 
 30 	/** @private */
 31 	this._ =
 32 	{
 33 		isBlockLike : true,
 34 		hasInlineStarted : false
 35 	};
 36 };
 37 
 38 (function()
 39 {
 40 	// Block-level elements whose internal structure should be respected during
 41 	// parser fixing.
 42 	var nonBreakingBlocks = CKEDITOR.tools.extend( { table:1,ul:1,ol:1,dl:1 }, CKEDITOR.dtd.table, CKEDITOR.dtd.ul, CKEDITOR.dtd.ol, CKEDITOR.dtd.dl );
 43 
 44 	// IE < 8 don't output the close tag on definition list items. (#6975)
 45 	var optionalCloseTags = CKEDITOR.env.ie && CKEDITOR.env.version < 8 ? { dd : 1, dt :1 } : {};
 46 
 47 	var listBlocks = { ol:1, ul:1 };
 48 
 49 	// Dtd of the fragment element, basically it accept anything except for intermediate structure, e.g. orphan <li>.
 50 	var rootDtd = CKEDITOR.tools.extend( {}, { html: 1 }, CKEDITOR.dtd.html, CKEDITOR.dtd.body, CKEDITOR.dtd.head, { style:1,script:1 } );
 51 
 52 	function isRemoveEmpty( node )
 53 	{
 54 		// Empty link is to be removed when empty but not anchor. (#7894)
 55 		return node.name == 'a' && node.attributes.href
 56 			|| CKEDITOR.dtd.$removeEmpty[ node.name ];
 57 	}
 58 
 59 	/**
 60 	 * Creates a {@link CKEDITOR.htmlParser.fragment} from an HTML string.
 61 	 * @param {String} fragmentHtml The HTML to be parsed, filling the fragment.
 62 	 * @param {Number} [fixForBody=false] Wrap body with specified element if needed.
 63 	 * @param {CKEDITOR.htmlParser.element} contextNode Parse the html as the content of this element.
 64 	 * @returns CKEDITOR.htmlParser.fragment The fragment created.
 65 	 * @example
 66 	 * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );
 67 	 * alert( fragment.children[0].name );  "b"
 68 	 * alert( fragment.children[1].value );  " Text"
 69 	 */
 70 	CKEDITOR.htmlParser.fragment.fromHtml = function( fragmentHtml, fixForBody, contextNode )
 71 	{
 72 		var parser = new CKEDITOR.htmlParser(),
 73 			fragment = contextNode || new CKEDITOR.htmlParser.fragment(),
 74 			pendingInline = [],
 75 			pendingBRs = [],
 76 			currentNode = fragment,
 77 		    // Indicate we're inside a <textarea> element, spaces should be touched differently.
 78 			inTextarea = false,
 79 		    // Indicate we're inside a <pre> element, spaces should be touched differently.
 80 			inPre = false;
 81 
 82 		function checkPending( newTagName )
 83 		{
 84 			var pendingBRsSent;
 85 
 86 			if ( pendingInline.length > 0 )
 87 			{
 88 				for ( var i = 0 ; i < pendingInline.length ; i++ )
 89 				{
 90 					var pendingElement = pendingInline[ i ],
 91 						pendingName = pendingElement.name,
 92 						pendingDtd = CKEDITOR.dtd[ pendingName ],
 93 						currentDtd = currentNode.name && CKEDITOR.dtd[ currentNode.name ];
 94 
 95 					if ( ( !currentDtd || currentDtd[ pendingName ] ) && ( !newTagName || !pendingDtd || pendingDtd[ newTagName ] || !CKEDITOR.dtd[ newTagName ] ) )
 96 					{
 97 						if ( !pendingBRsSent )
 98 						{
 99 							sendPendingBRs();
100 							pendingBRsSent = 1;
101 						}
102 
103 						// Get a clone for the pending element.
104 						pendingElement = pendingElement.clone();
105 
106 						// Add it to the current node and make it the current,
107 						// so the new element will be added inside of it.
108 						pendingElement.parent = currentNode;
109 						currentNode = pendingElement;
110 
111 						// Remove the pending element (back the index by one
112 						// to properly process the next entry).
113 						pendingInline.splice( i, 1 );
114 						i--;
115 					}
116 					else
117 					{
118 						// Some element of the same type cannot be nested, flat them,
119 						// e.g. <a href="#">foo<a href="#">bar</a></a>. (#7894)
120 						if ( pendingName == currentNode.name )
121 							addElement( currentNode, currentNode.parent, 1 ), i--;
122 					}
123 				}
124 			}
125 		}
126 
127 		function sendPendingBRs()
128 		{
129 			while ( pendingBRs.length )
130 				addElement( pendingBRs.shift(), currentNode );
131 		}
132 
133 		/*
134 		* Beside of simply append specified element to target, this function also takes
135 		* care of other dirty lifts like forcing block in body, trimming spaces at
136 		* the block boundaries etc.
137 		*
138 		* @param {Element} element  The element to be added as the last child of {@link target}.
139 		* @param {Element} target The parent element to relieve the new node.
140 		* @param {Boolean} [moveCurrent=false] Don't change the "currentNode" global unless
141 		* there's a return point node specified on the element, otherwise move current onto {@link target} node.
142 		 */
143 		function addElement( element, target, moveCurrent )
144 		{
145 			// Ignore any element that has already been added.
146 			if ( element.previous !== undefined )
147 				return;
148 
149 			target = target || currentNode || fragment;
150 
151 			// Current element might be mangled by fix body below,
152 			// save it for restore later.
153 			var savedCurrent = currentNode;
154 
155 			// If the target is the fragment and this inline element can't go inside
156 			// body (if fixForBody).
157 			if ( fixForBody && ( !target.type || target.name == 'body' ) )
158 			{
159 				var elementName, realElementName;
160 				if ( element.attributes
161 					 && ( realElementName =
162 						  element.attributes[ 'data-cke-real-element-type' ] ) )
163 					elementName = realElementName;
164 				else
165 					elementName =  element.name;
166 
167 				if ( elementName && !( elementName in CKEDITOR.dtd.$body || elementName == 'body' || element.isOrphan ) )
168 				{
169 					// Create a <p> in the fragment.
170 					currentNode = target;
171 					parser.onTagOpen( fixForBody, {} );
172 
173 					// The new target now is the <p>.
174 					element.returnPoint = target = currentNode;
175 				}
176 			}
177 
178 			// Rtrim empty spaces on block end boundary. (#3585)
179 			if ( element._.isBlockLike
180 				 && element.name != 'pre' && element.name != 'textarea' )
181 			{
182 
183 				var length = element.children.length,
184 					lastChild = element.children[ length - 1 ],
185 					text;
186 				if ( lastChild && lastChild.type == CKEDITOR.NODE_TEXT )
187 				{
188 					if ( !( text = CKEDITOR.tools.rtrim( lastChild.value ) ) )
189 						element.children.length = length -1;
190 					else
191 						lastChild.value = text;
192 				}
193 			}
194 
195 			target.add( element );
196 
197 			if ( element.name == 'pre' )
198 				inPre = false;
199 
200 			if ( element.name == 'textarea' )
201 				inTextarea = false;
202 
203 
204 			if ( element.returnPoint )
205 			{
206 				currentNode = element.returnPoint;
207 				delete element.returnPoint;
208 			}
209 			else
210 				currentNode = moveCurrent ? target : savedCurrent;
211 		}
212 
213 		parser.onTagOpen = function( tagName, attributes, selfClosing, optionalClose )
214 		{
215 			var element = new CKEDITOR.htmlParser.element( tagName, attributes );
216 
217 			// "isEmpty" will be always "false" for unknown elements, so we
218 			// must force it if the parser has identified it as a selfClosing tag.
219 			if ( element.isUnknown && selfClosing )
220 				element.isEmpty = true;
221 
222 			// Check for optional closed elements, including browser quirks and manually opened blocks.
223 			element.isOptionalClose = tagName in optionalCloseTags || optionalClose;
224 
225 			// This is a tag to be removed if empty, so do not add it immediately.
226 			if ( isRemoveEmpty( element ) )
227 			{
228 				pendingInline.push( element );
229 				return;
230 			}
231 			else if ( tagName == 'pre' )
232 				inPre = true;
233 			else if ( tagName == 'br' && inPre )
234 			{
235 				currentNode.add( new CKEDITOR.htmlParser.text( '\n' ) );
236 				return;
237 			}
238 			else if ( tagName == 'textarea' )
239 				inTextarea = true;
240 
241 			if ( tagName == 'br' )
242 			{
243 				pendingBRs.push( element );
244 				return;
245 			}
246 
247 			while( 1 )
248 			{
249 				var currentName = currentNode.name;
250 
251 				var currentDtd = currentName ? ( CKEDITOR.dtd[ currentName ]
252 						|| ( currentNode._.isBlockLike ? CKEDITOR.dtd.div : CKEDITOR.dtd.span ) )
253 						: rootDtd;
254 
255 				// If the element cannot be child of the current element.
256 				if ( !element.isUnknown && !currentNode.isUnknown && !currentDtd[ tagName ] )
257 				{
258 					// Current node doesn't have a close tag, time for a close
259 					// as this element isn't fit in. (#7497)
260 					if ( currentNode.isOptionalClose )
261 						parser.onTagClose( currentName );
262 					// Fixing malformed nested lists by moving it into a previous list item. (#3828)
263 					else if ( tagName in listBlocks
264 						&& currentName in listBlocks )
265 					{
266 						var children = currentNode.children,
267 							lastChild = children[ children.length - 1 ];
268 
269 						// Establish the list item if it's not existed.
270 						if ( !( lastChild && lastChild.name == 'li' ) )
271 							addElement( ( lastChild = new CKEDITOR.htmlParser.element( 'li' ) ), currentNode );
272 
273 						!element.returnPoint && ( element.returnPoint = currentNode );
274 						currentNode = lastChild;
275 					}
276 					// Establish new list root for orphan list items.
277 					else if ( tagName in CKEDITOR.dtd.$listItem && currentName != tagName )
278 						parser.onTagOpen( tagName == 'li' ? 'ul' : 'dl', {}, 0, 1 );
279 					// We're inside a structural block like table and list, AND the incoming element
280 					// is not of the same type (e.g. <td>td1<td>td2</td>), we simply add this new one before it,
281 					// and most importantly, return back to here once this element is added,
282 					// e.g. <table><tr><td>td1</td><p>p1</p><td>td2</td></tr></table>
283 					else if ( currentName in nonBreakingBlocks && currentName != tagName )
284 					{
285 						!element.returnPoint && ( element.returnPoint = currentNode );
286 						currentNode = currentNode.parent;
287 					}
288 					else
289 					{
290 						// The current element is an inline element, which
291 						// need to be continued even after the close, so put
292 						// it in the pending list.
293 						if ( currentName in CKEDITOR.dtd.$inline )
294 							pendingInline.unshift( currentNode );
295 
296 						// The most common case where we just need to close the
297 						// current one and append the new one to the parent.
298 						if ( currentNode.parent )
299 							addElement( currentNode, currentNode.parent, 1 );
300 						// We've tried our best to fix the embarrassment here, while
301 						// this element still doesn't find it's parent, mark it as
302 						// orphan and show our tolerance to it.
303 						else
304 						{
305 							element.isOrphan = 1;
306 							break;
307 						}
308 					}
309 				}
310 				else
311 					break;
312 			}
313 
314 			checkPending( tagName );
315 			sendPendingBRs();
316 
317 			element.parent = currentNode;
318 
319 			if ( element.isEmpty )
320 				addElement( element );
321 			else
322 				currentNode = element;
323 		};
324 
325 		parser.onTagClose = function( tagName )
326 		{
327 			// Check if there is any pending tag to be closed.
328 			for ( var i = pendingInline.length - 1 ; i >= 0 ; i-- )
329 			{
330 				// If found, just remove it from the list.
331 				if ( tagName == pendingInline[ i ].name )
332 				{
333 					pendingInline.splice( i, 1 );
334 					return;
335 				}
336 			}
337 
338 			var pendingAdd = [],
339 				newPendingInline = [],
340 				candidate = currentNode;
341 
342 			while ( candidate != fragment && candidate.name != tagName )
343 			{
344 				// If this is an inline element, add it to the pending list, if we're
345 				// really closing one of the parents element later, they will continue
346 				// after it.
347 				if ( !candidate._.isBlockLike )
348 					newPendingInline.unshift( candidate );
349 
350 				// This node should be added to it's parent at this point. But,
351 				// it should happen only if the closing tag is really closing
352 				// one of the nodes. So, for now, we just cache it.
353 				pendingAdd.push( candidate );
354 
355 				// Make sure return point is properly restored.
356 				candidate = candidate.returnPoint || candidate.parent;
357 			}
358 
359 			if ( candidate != fragment )
360 			{
361 				// Add all elements that have been found in the above loop.
362 				for ( i = 0 ; i < pendingAdd.length ; i++ )
363 				{
364 					var node = pendingAdd[ i ];
365 					addElement( node, node.parent );
366 				}
367 
368 				currentNode = candidate;
369 
370 				if ( candidate._.isBlockLike )
371 					sendPendingBRs();
372 
373 				addElement( candidate, candidate.parent );
374 
375 				// The parent should start receiving new nodes now, except if
376 				// addElement changed the currentNode.
377 				if ( candidate == currentNode )
378 					currentNode = currentNode.parent;
379 
380 				pendingInline = pendingInline.concat( newPendingInline );
381 			}
382 
383 			if ( tagName == 'body' )
384 				fixForBody = false;
385 		};
386 
387 		parser.onText = function( text )
388 		{
389 			// Trim empty spaces at beginning of text contents except <pre> and <textarea>.
390 			if ( ( !currentNode._.hasInlineStarted || pendingBRs.length ) && !inPre && !inTextarea )
391 			{
392 				text = CKEDITOR.tools.ltrim( text );
393 
394 				if ( text.length === 0 )
395 					return;
396 			}
397 
398 			var currentName = currentNode.name,
399 			currentDtd = currentName ? ( CKEDITOR.dtd[ currentName ]
400 							|| ( currentNode._.isBlockLike ?
401 								 CKEDITOR.dtd.div : CKEDITOR.dtd.span ) ) : rootDtd;
402 
403 			// Fix orphan text in list/table. (#8540) (#8870)
404 			if ( !inTextarea &&
405 				 !currentDtd [ '#' ] &&
406 				 currentName in nonBreakingBlocks )
407 			{
408 				parser.onTagOpen( currentName in listBlocks ? 'li' :
409 								  currentName == 'dl' ? 'dd' :
410 								  currentName == 'table' ? 'tr' :
411 								  currentName == 'tr' ? 'td' : '' );
412 				parser.onText( text );
413 				return;
414 			}
415 
416 			sendPendingBRs();
417 			checkPending();
418 
419 			if ( fixForBody
420 				 && ( !currentNode.type || currentNode.name == 'body' )
421 				 && CKEDITOR.tools.trim( text ) )
422 			{
423 				this.onTagOpen( fixForBody, {}, 0, 1 );
424 			}
425 
426 			// Shrinking consequential spaces into one single for all elements
427 			// text contents.
428 			if ( !inPre && !inTextarea )
429 				text = text.replace( /[\t\r\n ]{2,}|[\t\r\n]/g, ' ' );
430 
431 			currentNode.add( new CKEDITOR.htmlParser.text( text ) );
432 		};
433 
434 		parser.onCDATA = function( cdata )
435 		{
436 			currentNode.add( new CKEDITOR.htmlParser.cdata( cdata ) );
437 		};
438 
439 		parser.onComment = function( comment )
440 		{
441 			sendPendingBRs();
442 			checkPending();
443 			currentNode.add( new CKEDITOR.htmlParser.comment( comment ) );
444 		};
445 
446 		// Parse it.
447 		parser.parse( fragmentHtml );
448 
449 		// Send all pending BRs except one, which we consider a unwanted bogus. (#5293)
450 		sendPendingBRs( !CKEDITOR.env.ie && 1 );
451 
452 		// Close all pending nodes, make sure return point is properly restored.
453 		while ( currentNode != fragment )
454 			addElement( currentNode, currentNode.parent, 1 );
455 
456 		return fragment;
457 	};
458 
459 	CKEDITOR.htmlParser.fragment.prototype =
460 	{
461 		/**
462 		 * Adds a node to this fragment.
463 		 * @param {Object} node The node to be added. It can be any of of the
464 		 *		following types: {@link CKEDITOR.htmlParser.element},
465 		 *		{@link CKEDITOR.htmlParser.text} and
466 		 *		{@link CKEDITOR.htmlParser.comment}.
467 		 *	@param {Number} [index] From where the insertion happens.
468 		 * @example
469 		 */
470 		add : function( node, index )
471 		{
472 			isNaN( index ) && ( index = this.children.length );
473 
474 			var previous = index > 0 ? this.children[ index - 1 ] : null;
475 			if ( previous )
476 			{
477 				// If the block to be appended is following text, trim spaces at
478 				// the right of it.
479 				if ( node._.isBlockLike && previous.type == CKEDITOR.NODE_TEXT )
480 				{
481 					previous.value = CKEDITOR.tools.rtrim( previous.value );
482 
483 					// If we have completely cleared the previous node.
484 					if ( previous.value.length === 0 )
485 					{
486 						// Remove it from the list and add the node again.
487 						this.children.pop();
488 						this.add( node );
489 						return;
490 					}
491 				}
492 
493 				previous.next = node;
494 			}
495 
496 			node.previous = previous;
497 			node.parent = this;
498 
499 			this.children.splice( index, 0, node );
500 
501 			this._.hasInlineStarted = node.type == CKEDITOR.NODE_TEXT || ( node.type == CKEDITOR.NODE_ELEMENT && !node._.isBlockLike );
502 		},
503 
504 		/**
505 		 * Writes the fragment HTML to a CKEDITOR.htmlWriter.
506 		 * @param {CKEDITOR.htmlWriter} writer The writer to which write the HTML.
507 		 * @example
508 		 * var writer = new CKEDITOR.htmlWriter();
509 		 * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<P><B>Example' );
510 		 * fragment.writeHtml( writer )
511 		 * alert( writer.getHtml() );  "<p><b>Example</b></p>"
512 		 */
513 		writeHtml : function( writer, filter )
514 		{
515 			var isChildrenFiltered;
516 			this.filterChildren = function()
517 			{
518 				var writer = new CKEDITOR.htmlParser.basicWriter();
519 				this.writeChildrenHtml.call( this, writer, filter, true );
520 				var html = writer.getHtml();
521 				this.children = new CKEDITOR.htmlParser.fragment.fromHtml( html ).children;
522 				isChildrenFiltered = 1;
523 			};
524 
525 			// Filtering the root fragment before anything else.
526 			!this.name && filter && filter.onFragment( this );
527 
528 			this.writeChildrenHtml( writer, isChildrenFiltered ? null : filter );
529 		},
530 
531 		writeChildrenHtml : function( writer, filter )
532 		{
533 			for ( var i = 0 ; i < this.children.length ; i++ )
534 				this.children[i].writeHtml( writer, filter );
535 		}
536 	};
537 })();
538