<script type="text/javascript">
function CleanWord( html )
{
bIgnoreFont = true;
bRemoveStyles = true;
html = html.replace(/<o:p>\s*<\/o:p>/g, '') ;
html = html.replace(/<o:p>.*?<\/o:p>/g, ' ') ;
// Remove mso-xxx styles.
html = html.replace( /\s*mso-[^:] :[^;"] ;?/gi, '' ) ;
// Remove margin styles.
html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, '' ) ;
html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ;
html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, '' ) ;
html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ;
html = html.replace( /\s*TEXT-ALIGN: [^\s;] ;?"/gi, "\"" ) ;
html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;] ;?"/gi, "\"" ) ;
html = html.replace( /\s*FONT-VARIANT: [^\s;] ;?"/gi, "\"" ) ;
html = html.replace( /\s*tab-stops:[^;"]*;?/gi, '' ) ;
html = html.replace( /\s*tab-stops:[^"]*/gi, '' ) ;
// Remove FONT face attributes.
if ( bIgnoreFont )
{
html = html.replace( /\s*face="[^"]*"/gi, '' ) ;
html = html.replace( /\s*face=[^ >]*/gi, '' ) ;
html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, '' ) ;
}
// Remove Class attributes
html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ;
// Remove styles.
if ( bRemoveStyles )
html = html.replace( /<(\w[^>]*) style="([^\"]*)"([^>]*)/gi, "<$1$3" ) ;
// Remove empty styles.
html = html.replace( /\s*style="\s*"/gi, '' ) ;
html = html.replace( /<SPAN\s*[^>]*>\s* \s*<\/SPAN>/gi, ' ' ) ;
html = html.replace( /<SPAN\s*[^>]*><\/SPAN>/gi, '' ) ;
// Remove Lang attributes
html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;
html = html.replace( /<SPAN\s*>(.*?)<\/SPAN>/gi, '$1' ) ;
html = html.replace( /<FONT\s*>(.*?)<\/FONT>/gi, '$1' ) ;
// Remove XML elements and declarations
html = html.replace(/<\?\?xml[^>]*>/gi, '' ) ;
// Remove Tags with XML namespace declarations: <o:p><\/o:p>
html = html.replace(/<\/?\w :[^>]*>/gi, '' ) ;
// Remove comments [SF BUG-1481861].
html = html.replace(/<\!--.*?-->/mg, '' ) ;
html = html.replace( /<(U|I|STRIKE)> <\/\1>/g, ' ' ) ;
html = html.replace( /<H\d>\s*<\/H\d>/gi, '' ) ;
// Remove "display:none" tags.
html = html.replace( /<(\w )[^>]*\sstyle="[^"]*DISPLAY\s?:\s?none(.*?)<\/\1>/ig, '' ) ;
// Remove language tags
html = html.replace( /<(\w[^>]*) language=([^ |>]*)([^>]*)/gi, "<$1$3") ;
// Remove onmouseover and onmouseout events (from MS Word comments effect)
html = html.replace( /<(\w[^>]*) onmouseover="([^\"]*)"([^>]*)/gi, "<$1$3") ;
html = html.replace( /<(\w[^>]*) onmouseout="([^\"]*)"([^>]*)/gi, "<$1$3") ;
// The original <Hn> tag send from Word is something like this: <Hn style="margin-top:0px;margin-bottom:0px">
html = html.replace( /<H(\d)([^>]*)>/gi, '<h$1>' ) ;
// Word likes to insert extra <font> tags, when using MSIE. (Wierd).
html = html.replace( /<(H\d)><FONT[^>]*>(.*?)<\/FONT><\/\1>/gi, '<$1>$2<\/$1>' );
html = html.replace( /<(H\d)><EM>(.*?)<\/EM><\/\1>/gi, '<$1>$2<\/$1>' );
// Remove comments
html = html.replace( /<\!--.*-->/mg, '' ) ;
return html ;
}
</script>
|