1 /*
  2 Copyright (c) 2003-2012, CKSource - Frederico Knabben. All rights reserved.
  3 For licensing, see LICENSE.html or http://ckeditor.com/license
  4 */
  5 
  6 (function()
  7 {
  8 	// Base HTML entities.
  9 	var htmlbase = 'nbsp,gt,lt,amp';
 10 
 11 	var entities =
 12 		// Latin-1 Entities
 13 		'quot,iexcl,cent,pound,curren,yen,brvbar,sect,uml,copy,ordf,laquo,' +
 14 		'not,shy,reg,macr,deg,plusmn,sup2,sup3,acute,micro,para,middot,' +
 15 		'cedil,sup1,ordm,raquo,frac14,frac12,frac34,iquest,times,divide,' +
 16 
 17 		// Symbols
 18 		'fnof,bull,hellip,prime,Prime,oline,frasl,weierp,image,real,trade,' +
 19 		'alefsym,larr,uarr,rarr,darr,harr,crarr,lArr,uArr,rArr,dArr,hArr,' +
 20 		'forall,part,exist,empty,nabla,isin,notin,ni,prod,sum,minus,lowast,' +
 21 		'radic,prop,infin,ang,and,or,cap,cup,int,there4,sim,cong,asymp,ne,' +
 22 		'equiv,le,ge,sub,sup,nsub,sube,supe,oplus,otimes,perp,sdot,lceil,' +
 23 		'rceil,lfloor,rfloor,lang,rang,loz,spades,clubs,hearts,diams,' +
 24 
 25 		// Other Special Characters
 26 		'circ,tilde,ensp,emsp,thinsp,zwnj,zwj,lrm,rlm,ndash,mdash,lsquo,' +
 27 		'rsquo,sbquo,ldquo,rdquo,bdquo,dagger,Dagger,permil,lsaquo,rsaquo,' +
 28 		'euro';
 29 
 30 	// Latin Letters Entities
 31 	var latin =
 32 		'Agrave,Aacute,Acirc,Atilde,Auml,Aring,AElig,Ccedil,Egrave,Eacute,' +
 33 		'Ecirc,Euml,Igrave,Iacute,Icirc,Iuml,ETH,Ntilde,Ograve,Oacute,Ocirc,' +
 34 		'Otilde,Ouml,Oslash,Ugrave,Uacute,Ucirc,Uuml,Yacute,THORN,szlig,' +
 35 		'agrave,aacute,acirc,atilde,auml,aring,aelig,ccedil,egrave,eacute,' +
 36 		'ecirc,euml,igrave,iacute,icirc,iuml,eth,ntilde,ograve,oacute,ocirc,' +
 37 		'otilde,ouml,oslash,ugrave,uacute,ucirc,uuml,yacute,thorn,yuml,' +
 38 		'OElig,oelig,Scaron,scaron,Yuml';
 39 
 40 	// Greek Letters Entities.
 41 	var greek =
 42 		'Alpha,Beta,Gamma,Delta,Epsilon,Zeta,Eta,Theta,Iota,Kappa,Lambda,Mu,' +
 43 		'Nu,Xi,Omicron,Pi,Rho,Sigma,Tau,Upsilon,Phi,Chi,Psi,Omega,alpha,' +
 44 		'beta,gamma,delta,epsilon,zeta,eta,theta,iota,kappa,lambda,mu,nu,xi,' +
 45 		'omicron,pi,rho,sigmaf,sigma,tau,upsilon,phi,chi,psi,omega,thetasym,' +
 46 		'upsih,piv';
 47 
 48 	/**
 49 	 * Create a mapping table between one character and its entity form from a list of entity names.
 50 	 * @param reverse {Boolean} Whether to create a reverse map from the entity string form to an actual character.
 51 	 */
 52 	function buildTable( entities, reverse )
 53 	{
 54 		var table = {},
 55 			regex = [];
 56 
 57 		// Entities that the browsers DOM don't transform to the final char
 58 		// automatically.
 59 		var specialTable =
 60 			{
 61 				nbsp	: '\u00A0',		// IE | FF
 62 				shy		: '\u00AD',		// IE
 63 				gt		: '\u003E',		// IE | FF |   --   | Opera
 64 				lt		: '\u003C',		// IE | FF | Safari | Opera
 65 				amp 	: '\u0026',		// ALL
 66 				apos 	: '\u0027',		// IE
 67 				quot 	: '\u0022'		// IE
 68 			};
 69 
 70 		entities = entities.replace( /\b(nbsp|shy|gt|lt|amp|apos|quot)(?:,|$)/g, function( match, entity )
 71 			{
 72 				var org = reverse ? '&' + entity + ';' : specialTable[ entity ],
 73 					result = reverse ? specialTable[ entity ] : '&' + entity + ';';
 74 
 75 				table[ org ] = result;
 76 				regex.push( org );
 77 				return '';
 78 			});
 79 
 80 		if ( !reverse && entities )
 81 		{
 82 			// Transforms the entities string into an array.
 83 			entities = entities.split( ',' );
 84 
 85 			// Put all entities inside a DOM element, transforming them to their
 86 			// final chars.
 87 			var div = document.createElement( 'div' ),
 88 				chars;
 89 			div.innerHTML = '&' + entities.join( ';&' ) + ';';
 90 			chars = div.innerHTML;
 91 			div = null;
 92 
 93 			// Add all chars to the table.
 94 			for ( var i = 0 ; i < chars.length ; i++ )
 95 			{
 96 				var charAt = chars.charAt( i );
 97 				table[ charAt ] = '&' + entities[ i ] + ';';
 98 				regex.push( charAt );
 99 			}
100 		}
101 
102 		table.regex = regex.join( reverse ? '|' : '' );
103 
104 		return table;
105 	}
106 
107 	CKEDITOR.plugins.add( 'entities',
108 	{
109 		afterInit : function( editor )
110 		{
111 			var config = editor.config;
112 
113 			var dataProcessor = editor.dataProcessor,
114 				htmlFilter = dataProcessor && dataProcessor.htmlFilter;
115 
116 			if ( htmlFilter )
117 			{
118 				// Mandatory HTML base entities.
119 				var selectedEntities = [];
120 
121 				if ( config.basicEntities !== false )
122 					selectedEntities.push( htmlbase );
123 
124 				if ( config.entities )
125 				{
126 					if ( selectedEntities.length )
127 						selectedEntities.push( entities );
128 
129 					if ( config.entities_latin )
130 						selectedEntities.push( latin );
131 
132 					if ( config.entities_greek )
133 						selectedEntities.push( greek );
134 
135 					if ( config.entities_additional )
136 						selectedEntities.push( config.entities_additional );
137 				}
138 
139 				var entitiesTable = buildTable( selectedEntities.join( ',' ) );
140 
141 				// Create the Regex used to find entities in the text, leave it matches nothing if entities are empty.
142 				var entitiesRegex = entitiesTable.regex ? '[' + entitiesTable.regex + ']' : 'a^';
143 				delete entitiesTable.regex;
144 
145 				if ( config.entities && config.entities_processNumerical )
146 					entitiesRegex = '[^ -~]|' + entitiesRegex ;
147 
148 				entitiesRegex = new RegExp( entitiesRegex, 'g' );
149 
150 				function getEntity( character )
151 				{
152 					return config.entities_processNumerical == 'force' || !entitiesTable[ character ] ?
153 						   '&#' + character.charCodeAt(0) + ';'
154 							: entitiesTable[ character ];
155 				}
156 
157 				// Decode entities that the browsers has transformed
158 				// at first place.
159 				var baseEntitiesTable = buildTable( [ htmlbase, 'shy' ].join( ',' ) , true ),
160 					baseEntitiesRegex = new RegExp( baseEntitiesTable.regex, 'g' );
161 
162 				function getChar( character )
163 				{
164 					return baseEntitiesTable[ character ];
165 				}
166 
167 				htmlFilter.addRules(
168 					{
169 						text : function( text )
170 						{
171 							return text.replace( baseEntitiesRegex, getChar )
172 									.replace( entitiesRegex, getEntity );
173 						}
174 					});
175 			}
176 		}
177 	});
178 })();
179 
180 /**
181  * Whether to escape basic HTML entities in the document, including:
182  * <ul>
183  * <li><code>nbsp</code></li>
184  * <li><code>gt</code></li>
185  * <li><code>lt</code></li>
186  * <li><code>amp</code></li>
187  * </ul>
188  * <strong>Note:</strong> It should not be subject to change unless when outputting a non-HTML data format like BBCode.
189  * @type Boolean
190  * @default <code>true</code>
191  * @example
192  * config.basicEntities = false;
193  */
194 CKEDITOR.config.basicEntities = true;
195 
196 /**
197  * Whether to use HTML entities in the output.
198  * @name CKEDITOR.config.entities
199  * @type Boolean
200  * @default <code>true</code>
201  * @example
202  * config.entities = false;
203  */
204 CKEDITOR.config.entities = true;
205 
206 /**
207  * Whether to convert some Latin characters (Latin alphabet No. 1, ISO 8859-1)
208  * to HTML entities. The list of entities can be found in the
209  * <a href="http://www.w3.org/TR/html4/sgml/entities.html#h-24.2.1">W3C HTML 4.01 Specification, section 24.2.1</a>.
210  * @name CKEDITOR.config.entities_latin
211  * @type Boolean
212  * @default <code>true</code>
213  * @example
214  * config.entities_latin = false;
215  */
216 CKEDITOR.config.entities_latin = true;
217 
218 /**
219  * Whether to convert some symbols, mathematical symbols, and Greek letters to
220  * HTML entities. This may be more relevant for users typing text written in Greek.
221  * The list of entities can be found in the
222  * <a href="http://www.w3.org/TR/html4/sgml/entities.html#h-24.3.1">W3C HTML 4.01 Specification, section 24.3.1</a>.
223  * @name CKEDITOR.config.entities_greek
224  * @type Boolean
225  * @default <code>true</code>
226  * @example
227  * config.entities_greek = false;
228  */
229 CKEDITOR.config.entities_greek = true;
230 
231 /**
232  * Whether to convert all remaining characters not included in the ASCII
233  * character table to their relative decimal numeric representation of HTML entity.
234  * When set to <code>force</code>, it will convert all entities into this format.
235  * For example the phrase "This is Chinese: 汉语." is output
236  * as "This is Chinese: &#27721;&#35821;."
237  * @name CKEDITOR.config.entities_processNumerical
238  * @type Boolean|String
239  * @default <code>false</code>
240  * @example
241  * config.entities_processNumerical = true;
242  * config.entities_processNumerical = 'force';		//Converts from " " into " ";
243  */
244 
245 /**
246  * A comma separated list of  additional entities to be used. Entity names
247  * or numbers must be used in a form that excludes the "&" prefix and the ";" ending.
248  * @name CKEDITOR.config.entities_additional
249  * @default <code>'#39'</code>  (The single quote (') character.)
250  * @type String
251  * @example
252  * config.entities_additional = '#1049';		// Adds Cyrillic capital letter Short I (Й).
253  */
254 CKEDITOR.config.entities_additional = '#39';
255