CleanWordKeepsStructure
This setting controls the behavior in the Paste from Word dialog. It's default value is false, and that way it's trying to do the changes so the content looks like it was in Word. If it's switched to true then the routine will prefer to keep the HTML structure of the data instead of modifying it to keep the look as it was in word. Example:
FCKConfig.CleanWordKeepsStructure = true ;
Enabling this setting allows to keep a properly structured document as it was created in word as well as keep anchors and you can use CSS to make it look like it was in Word.
Additionally you can specify your own function for the clean up adding it to the FCK object:
Define a FCK.CustomCleanWord function that accepts three parameters and returns the cleaned up string. Here's an example using the default function that will be called in the Paste dialog (you should add your function for example with a plugin):
// This function will be called from the PasteFromWord dialog (fck_paste.html) // Input: oNode a DOM node that contains the raw paste from the clipboard // bIgnoreFont, bRemoveStyles booleans according to the values set in the dialog // Output: the cleaned string function CleanWord( oNode, bIgnoreFont, bRemoveStyles ) { var html = oNode.innerHTML ; html = html.replace(/<o:p>\s*<\/o:p>/g, ) ; html = html.replace(/<o:p>.*?<\/o:p>/g, ' ') ; // Remove mso-xxx styles. html = html.replace( /\s*mso-[^:]+:[^;"]+;?/gi, ) ; // Remove margin styles. html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, ) ; html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ; html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, ) ; html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ; html = html.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ; html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ; html = html.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ; html = html.replace( /\s*tab-stops:[^;"]*;?/gi, ) ; html = html.replace( /\s*tab-stops:[^"]*/gi, ) ; // Remove FONT face attributes. if ( bIgnoreFont ) { html = html.replace( /\s*face="[^"]*"/gi, ) ; html = html.replace( /\s*face=[^ >]*/gi, ) ; html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, ) ; } // Remove Class attributes html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ; // Remove styles. if ( bRemoveStyles ) html = html.replace( /<(\w[^>]*) style="([^\"]*)"([^>]*)/gi, "<$1$3" ) ; // Remove empty styles. html = html.replace( /\s*style="\s*"/gi, ) ; html = html.replace( /<SPAN\s*[^>]*>\s* \s*<\/SPAN>/gi, ' ' ) ; html = html.replace( /<SPAN\s*[^>]*><\/SPAN>/gi, ) ; // Remove Lang attributes html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ; html = html.replace( /<SPAN\s*>(.*?)<\/SPAN>/gi, '$1' ) ; html = html.replace( /<FONT\s*>(.*?)<\/FONT>/gi, '$1' ) ; // Remove XML elements and declarations html = html.replace(/<\\?\?xml[^>]*>/gi, ) ; // Remove Tags with XML namespace declarations: <o:p><\/o:p> html = html.replace(/<\/?\w+:[^>]*>/gi, ) ; // Remove comments [SF BUG-1481861]. html = html.replace(/<\!--.*?-->/g, ) ; html = html.replace( /<(U|I|STRIKE)> <\/\1>/g, ' ' ) ; html = html.replace( /<H\d>\s*<\/H\d>/gi, ) ; // Remove "display:none" tags. html = html.replace( /<(\w+)[^>]*\sstyle="[^"]*DISPLAY\s?:\s?none(.*?)<\/\1>/ig, ) ; // Remove language tags html = html.replace( /<(\w[^>]*) language=([^ |>]*)([^>]*)/gi, "<$1$3") ; // Remove onmouseover and onmouseout events (from MS Word comments effect) html = html.replace( /<(\w[^>]*) onmouseover="([^\"]*)"([^>]*)/gi, "<$1$3") ; html = html.replace( /<(\w[^>]*) onmouseout="([^\"]*)"([^>]*)/gi, "<$1$3") ; if ( FCKConfig.CleanWordKeepsStructure ) { // The original <Hn> tag send from Word is something like this: <Hn style="margin-top:0px;margin-bottom:0px"> html = html.replace( /<H(\d)([^>]*)>/gi, '<h$1>' ) ; // Word likes to insert extra tags, when using MSIE. (Wierd). html = html.replace( /<(H\d)><FONT[^>]*>(.*?)<\/FONT><\/\1>/gi, '<$1>$2<\/$1>' ); html = html.replace( /<(H\d)>(.*?)<\/EM><\/\1>/gi, '<$1>$2<\/$1>' ); } else { html = html.replace( /<H1([^>]*)>/gi, '<div$1>' ) ; html = html.replace( /<H2([^>]*)>/gi, '<div$1><b>' ) ; html = html.replace( /<H3([^>]*)>/gi, '<div$1><b>' ) ; html = html.replace( /<H4([^>]*)>/gi, '<div$1><b>' ) ; html = html.replace( /<H5([^>]*)>/gi, '<div$1><b>' ) ; html = html.replace( /<H6([^>]*)>/gi, '<div$1><b>' ) ; html = html.replace( /<\/H\d>/gi, '<\/font><\/b><\/div>' ) ; // Transformto
var re = new RegExp( '(<P)([^>]*>.*?)(<\/P>)', 'gi' ) ; // Different because of a IE 5.0 error html = html.replace( re, '<div$2<\/div>' ) ;
// Remove empty tags (three times, just to be sure). // This also removes any empty anchor html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, ) ; html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, ) ; html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, ) ; }
return html ;
}