1 define("dijit/_editor/html", [
3 "dojo/_base/lang", // lang.setObject
4 "dojo/sniff" // has("ie")
5 ], function(array
, lang
, has
){
10 // HTML serialization utility functions used by editor
13 lang
.setObject("dijit._editor.html", exports
);
15 var escape
= exports
.escapeXml = function(/*String*/ str
, /*Boolean?*/ noSingleQuotes
){
17 // Adds escape sequences for special characters in XML: `&<>"'`.
18 // Optionally skips escapes for single quotes.
19 str
= str
.replace(/&/gm, "&").replace(/</gm, "<").replace(/>/gm, ">").replace(/"/gm, ""
;");
21 str = str.replace(/'/gm, "'");
27 exports.getNodeHtml = function(/*DomNode*/ node){
29 // Return string representing HTML for node and it's children
31 exports.getNodeHtmlHelper(node, output);
32 return output.join("");
35 exports.getNodeHtmlHelper = function(/*DomNode*/ node, /*String[]*/ output){
37 // Pushes array of strings into output[] which represent HTML for node and it's children
38 switch(node.nodeType){
39 case 1: // element node
40 var lName = node.nodeName.toLowerCase();
41 if(!lName || lName.charAt(0) == "/"){
42 // IE does some strange things with malformed HTML input, like
43 // treating a close tag </span> without an open tag <span>, as
44 // a new tag with tagName of /span. Corrupts output HTML, remove
45 // them. Other browsers don't prefix tags that way, so will
49 output.push('<', lName);
51 // store the list of attributes and sort it to have the
52 // attributes appear in the dictionary order
53 var attrarray = [], attrhash = {};
55 if(has("dom
-attributes
-explicit
") || has("dom
-attributes
-specified
-flag
")){
56 // IE8+ and all other browsers.
58 while((attr = node.attributes[i++])){
59 // ignore all attributes starting with _dj which are
60 // internal temporary attributes used by the editor
62 if(n.substr(0,3) !== '_dj' &&
63 (!has("dom
-attributes
-specified
-flag
") || attr.specified) &&
64 !(n in attrhash)){ // workaround repeated attributes bug in IE8 (LinkDialog test)
66 if(n == 'src' || n == 'href'){
67 if(node.getAttribute('_djrealurl')){
68 v = node.getAttribute('_djrealurl');
71 if(has("ie
") === 8 && n === "style
"){
72 v = v.replace("HEIGHT
:", "height
:").replace("WIDTH
:", "width
:");
74 attrarray.push([n,v]);
80 var clone = /^input$|^img$/i.test(node.nodeName) ? node : node.cloneNode(false);
81 var s = clone.outerHTML;
82 // Split up and manage the attrs via regexp
83 // similar to prettyPrint attr logic.
84 var rgxp_attrsMatch = /[\w-]+=("[^"]*"|'[^']*'|\S*)/gi
85 var attrSplit = s.match(rgxp_attrsMatch);
86 s = s.substr(0, s.indexOf('>'));
87 array.forEach(attrSplit, function(attr){
89 var idx = attr.indexOf("=");
91 var key = attr.substring(0,idx);
92 if(key.substr(0,3) != '_dj
'){
93 if(key == 'src
' || key == 'href
'){
94 if(node.getAttribute('_djrealurl
')){
95 attrarray.push([key,node.getAttribute('_djrealurl
')]);
102 val = node.style.cssText.toLowerCase();
105 val = node.className;
109 // This somehow gets lost on IE for IMG tags and the like
110 // and we have to find it in outerHTML, known IE oddity.
111 match=/width=(\S+)/i.exec(s);
119 // This somehow gets lost on IE for IMG tags and the like
120 // and we have to find it in outerHTML, known IE oddity.
121 match=/height=(\S+)/i.exec(s);
128 val = node.getAttribute(key);
131 attrarray.push([key, val.toString()]);
138 attrarray.sort(function(a,b){
139 return a[0] < b[0] ? -1 : (a[0] == b[0] ? 0 : 1);
142 while((attr = attrarray[j++])){
143 output.push(' ', attr[0], '="',
144 (typeof attr[1] === "string
" ? escape(attr[1], true) : attr[1]), '"');
155 // These should all be singly closed
159 // Browsers handle script tags differently in how you get content,
160 // but innerHTML always seems to work, so insert its content that way
161 // Yes, it's bad to allow script tags
in the editor code
, but some people
162 // seem to want to do it, so we need to at least return them right.
163 // other plugins/filters can strip them.
164 output
.push('>', node
.innerHTML
, '</', lName
, '>');
168 if(node
.hasChildNodes()){
169 exports
.getChildrenHtmlHelper(node
, output
);
171 output
.push('</', lName
, '>');
177 output
.push(escape(node
.nodeValue
, true));
181 output
.push('<!--', escape(node
.nodeValue
, true), '-->');
184 output
.push("<!-- Element not recognized - Type: ", node
.nodeType
, " Name: ", node
.nodeName
, "-->");
188 exports
.getChildrenHtml = function(/*DomNode*/ node
){
190 // Returns the html content of a DomNode's children
192 exports
.getChildrenHtmlHelper(node
, output
);
193 return output
.join("");
196 exports
.getChildrenHtmlHelper = function(/*DomNode*/ dom
, /*String[]*/ output
){
198 // Pushes the html content of a DomNode's children into out[]
201 var nodes
= dom
["childNodes"] || dom
;
204 // If we have an actual node we can check parent relationships on for IE,
205 // We should check, as IE sometimes builds invalid DOMS. If no parent, we can't check
206 // And should just process it and hope for the best.
207 var checkParent
= !has("ie") || nodes
!== dom
;
210 while((node
= nodes
[i
++])){
211 // IE is broken. DOMs are supposed to be a tree. But in the case of malformed HTML, IE generates a graph
212 // meaning one node ends up with multiple references (multiple parents). This is totally wrong and invalid, but
213 // such is what it is. We have to keep track and check for this because otherwise the source output HTML will have dups.
214 // No other browser generates a graph. Leave it to IE to break a fundamental DOM rule. So, we check the parent if we can
215 // If we can't, nothing more we can do other than walk it.
216 if(!checkParent
|| node
.parentNode
== dom
){
217 exports
.getNodeHtmlHelper(node
, output
);