CKSource

CleanWordKeepsStructure

This setting controls the behavior in the Paste from Word dialog. It's default value is false, and that way it's trying to do the changes so the content looks like it was in Word. If it's switched to true then the routine will prefer to keep the HTML structure of the data instead of modifying it to keep the look as it was in word. Example:

FCKConfig.CleanWordKeepsStructure = true ;

Enabling this setting allows to keep a properly structured document as it was created in word as well as keep anchors and you can use CSS to make it look like it was in Word.

Additionally you can specify your own function for the clean up adding it to the FCK object: Define a FCK.CustomCleanWord function that accepts three parameters and returns the cleaned up string. Here's an example using the default function that will be called in the Paste dialog (you should add your function for example with a plugin):

// This function will be called from the PasteFromWord dialog (fck_paste.html)
// Input: oNode a DOM node that contains the raw paste from the clipboard
// bIgnoreFont, bRemoveStyles booleans according to the values set in the dialog
// Output: the cleaned string
function CleanWord( oNode, bIgnoreFont, bRemoveStyles )
{
	var html = oNode.innerHTML ;

	html = html.replace(/<o:p>\s*<\/o:p>/g, ) ;
	html = html.replace(/<o:p>.*?<\/o:p>/g, ' ') ;

	// Remove mso-xxx styles.
	html = html.replace( /\s*mso-[^:]+:[^;"]+;?/gi,  ) ;

	// Remove margin styles.
	html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi,  ) ;
	html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ;

	html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi,  ) ;
	html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ;

	html = html.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ;

	html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ;

	html = html.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ;

	html = html.replace( /\s*tab-stops:[^;"]*;?/gi,  ) ;
	html = html.replace( /\s*tab-stops:[^"]*/gi,  ) ;

	// Remove FONT face attributes.
	if ( bIgnoreFont )
	{
		html = html.replace( /\s*face="[^"]*"/gi,  ) ;
		html = html.replace( /\s*face=[^ >]*/gi,  ) ;

		html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi,  ) ;
	}

	// Remove Class attributes
	html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ;

	// Remove styles.
	if ( bRemoveStyles )
		html = html.replace( /<(\w[^>]*) style="([^\"]*)"([^>]*)/gi, "<$1$3" ) ;

	// Remove empty styles.
	html =  html.replace( /\s*style="\s*"/gi,  ) ;

	html = html.replace( /<SPAN\s*[^>]*>\s* \s*<\/SPAN>/gi, ' ' ) ;

	html = html.replace( /<SPAN\s*[^>]*><\/SPAN>/gi,  ) ;

	// Remove Lang attributes
	html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;

	html = html.replace( /<SPAN\s*>(.*?)<\/SPAN>/gi, '$1' ) ;

	html = html.replace( /<FONT\s*>(.*?)<\/FONT>/gi, '$1' ) ;

	// Remove XML elements and declarations
	html = html.replace(/<\\?\?xml[^>]*>/gi,  ) ;

	// Remove Tags with XML namespace declarations: <o:p><\/o:p>
	html = html.replace(/<\/?\w+:[^>]*>/gi,  ) ;

	// Remove comments [SF BUG-1481861].
	html = html.replace(/<\!--.*?-->/g,  ) ;

	html = html.replace( /<(U|I|STRIKE)> <\/\1>/g, ' ' ) ;

	html = html.replace( /<H\d>\s*<\/H\d>/gi,  ) ;

	// Remove "display:none" tags.
	html = html.replace( /<(\w+)[^>]*\sstyle="[^"]*DISPLAY\s?:\s?none(.*?)<\/\1>/ig,  ) ;

	// Remove language tags
	html = html.replace( /<(\w[^>]*) language=([^ |>]*)([^>]*)/gi, "<$1$3") ;

	// Remove onmouseover and onmouseout events (from MS Word comments effect)
	html = html.replace( /<(\w[^>]*) onmouseover="([^\"]*)"([^>]*)/gi, "<$1$3") ;
	html = html.replace( /<(\w[^>]*) onmouseout="([^\"]*)"([^>]*)/gi, "<$1$3") ;

	if ( FCKConfig.CleanWordKeepsStructure )
	{
		// The original <Hn> tag send from Word is something like this: <Hn style="margin-top:0px;margin-bottom:0px">
		html = html.replace( /<H(\d)([^>]*)>/gi, '<h$1>' ) ;

		// Word likes to insert extra  tags, when using MSIE. (Wierd).
		html = html.replace( /<(H\d)><FONT[^>]*>(.*?)<\/FONT><\/\1>/gi, '<$1>$2<\/$1>' );
		html = html.replace( /<(H\d)>(.*?)<\/EM><\/\1>/gi, '<$1>$2<\/$1>' );
	}
	else
	{
		html = html.replace( /<H1([^>]*)>/gi, '<div$1>' ) ;
		html = html.replace( /<H2([^>]*)>/gi, '<div$1><b>' ) ;
		html = html.replace( /<H3([^>]*)>/gi, '<div$1><b>' ) ;
		html = html.replace( /<H4([^>]*)>/gi, '<div$1><b>' ) ;
		html = html.replace( /<H5([^>]*)>/gi, '<div$1><b>' ) ;
		html = html.replace( /<H6([^>]*)>/gi, '<div$1><b>' ) ;

		html = html.replace( /<\/H\d>/gi, '<\/font><\/b><\/div>' ) ;

		// Transform  to 

		var re = new RegExp( '(<P)([^>]*>.*?)(<\/P>)', 'gi' ) ;	// Different because of a IE 5.0 error
		html = html.replace( re, '<div$2<\/div>' ) ;
		// Remove empty tags (three times, just to be sure).
		// This also removes any empty anchor
		html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g,  ) ;
		html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g,  ) ;
		html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g,  ) ;
	}
	return html ;

}

CKSource

Login / Create Account

Page Tools

CleanWordKeepsStructure

CleanWordKeepsStructure