2021-08-20 13:33:39 +02:00
var conventions = require ( "./conventions" ) ;
2022-02-04 19:19:19 +01:00
var dom = require ( './dom' )
2021-08-20 13:33:39 +02:00
var entities = require ( './entities' ) ;
2022-02-04 19:19:19 +01:00
var sax = require ( './sax' ) ;
var DOMImplementation = dom . DOMImplementation ;
2021-08-20 13:33:39 +02:00
var NAMESPACE = conventions . NAMESPACE ;
2022-02-04 19:19:19 +01:00
var ParseError = sax . ParseError ;
var XMLReader = sax . XMLReader ;
/ * *
* Normalizes line ending according to https : //www.w3.org/TR/xml11/#sec-line-ends:
*
* > XML parsed entities are often stored in computer files which ,
* > for editing convenience , are organized into lines .
* > These lines are typically separated by some combination
* > of the characters CARRIAGE RETURN ( # xD ) and LINE FEED ( # xA ) .
* >
* > To simplify the tasks of applications , the XML processor must behave
* > as if it normalized all line breaks in external parsed entities ( including the document entity )
* > on input , before parsing , by translating all of the following to a single # xA character :
* >
* > 1. the two - character sequence # xD # xA
* > 2. the two - character sequence # xD # x85
* > 3. the single character # x85
* > 4. the single character # x2028
* > 5. any # xD character that is not immediately followed by # xA or # x85 .
*
* @ param { string } input
* @ returns { string }
* /
function normalizeLineEndings ( input ) {
return input
. replace ( /\r[\n\u0085]/g , '\n' )
. replace ( /[\r\u0085\u2028]/g , '\n' )
}
/ * *
* @ typedef Locator
* @ property { number } [ columnNumber ]
* @ property { number } [ lineNumber ]
* /
/ * *
* @ typedef DOMParserOptions
* @ property { DOMHandler } [ domBuilder ]
* @ property { Function } [ errorHandler ]
* @ property { ( string ) => string } [ normalizeLineEndings ] used to replace line endings before parsing
* defaults to ` normalizeLineEndings `
* @ property { Locator } [ locator ]
* @ property { Record < string , string > } [ xmlns ]
*
* @ see normalizeLineEndings
* /
/ * *
* The DOMParser interface provides the ability to parse XML or HTML source code
* from a string into a DOM ` Document ` .
*
* _xmldom is different from the spec in that it allows an ` options ` parameter ,
* to override the default behavior . _
*
* @ param { DOMParserOptions } [ options ]
* @ constructor
*
* @ see https : //developer.mozilla.org/en-US/docs/Web/API/DOMParser
* @ see https : //html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
* /
2020-02-22 14:45:20 +01:00
function DOMParser ( options ) {
this . options = options || { locator : { } } ;
}
DOMParser . prototype . parseFromString = function ( source , mimeType ) {
var options = this . options ;
var sax = new XMLReader ( ) ;
var domBuilder = options . domBuilder || new DOMHandler ( ) ; //contentHandler and LexicalHandler
var errorHandler = options . errorHandler ;
var locator = options . locator ;
var defaultNSMap = options . xmlns || { } ;
var isHTML = /\/x?html?$/ . test ( mimeType ) ; //mimeType.toLowerCase().indexOf('html') > -1;
2021-08-20 13:33:39 +02:00
var entityMap = isHTML ? entities . HTML _ENTITIES : entities . XML _ENTITIES ;
2020-02-22 14:45:20 +01:00
if ( locator ) {
domBuilder . setDocumentLocator ( locator )
}
2020-03-10 23:20:34 +01:00
2020-02-22 14:45:20 +01:00
sax . errorHandler = buildErrorHandler ( errorHandler , domBuilder , locator ) ;
sax . domBuilder = options . domBuilder || domBuilder ;
if ( isHTML ) {
2021-08-20 13:33:39 +02:00
defaultNSMap [ '' ] = NAMESPACE . HTML ;
2020-02-22 14:45:20 +01:00
}
2021-08-20 13:33:39 +02:00
defaultNSMap . xml = defaultNSMap . xml || NAMESPACE . XML ;
2022-02-04 19:19:19 +01:00
var normalize = options . normalizeLineEndings || normalizeLineEndings ;
if ( source && typeof source === 'string' ) {
sax . parse (
normalize ( source ) ,
defaultNSMap ,
entityMap
)
} else {
sax . errorHandler . error ( 'invalid doc source' )
2020-02-22 14:45:20 +01:00
}
return domBuilder . doc ;
}
function buildErrorHandler ( errorImpl , domBuilder , locator ) {
if ( ! errorImpl ) {
if ( domBuilder instanceof DOMHandler ) {
return domBuilder ;
}
errorImpl = domBuilder ;
}
var errorHandler = { }
var isCallback = errorImpl instanceof Function ;
locator = locator || { }
function build ( key ) {
var fn = errorImpl [ key ] ;
if ( ! fn && isCallback ) {
fn = errorImpl . length == 2 ? function ( msg ) { errorImpl ( key , msg ) } : errorImpl ;
}
errorHandler [ key ] = fn && function ( msg ) {
fn ( '[xmldom ' + key + ']\t' + msg + _locator ( locator ) ) ;
} || function ( ) { } ;
}
build ( 'warning' ) ;
build ( 'error' ) ;
build ( 'fatalError' ) ;
return errorHandler ;
}
//console.log('#\n\n\n\n\n\n\n####')
/ * *
* + ContentHandler + ErrorHandler
* + LexicalHandler + EntityResolver2
2020-03-10 23:20:34 +01:00
* - DeclHandler - DTDHandler
*
2020-02-22 14:45:20 +01:00
* DefaultHandler : EntityResolver , DTDHandler , ContentHandler , ErrorHandler
* DefaultHandler2 : DefaultHandler , LexicalHandler , DeclHandler , EntityResolver2
* @ link http : //www.saxproject.org/apidoc/org/xml/sax/helpers/DefaultHandler.html
* /
function DOMHandler ( ) {
this . cdata = false ;
}
function position ( locator , node ) {
node . lineNumber = locator . lineNumber ;
node . columnNumber = locator . columnNumber ;
}
/ * *
* @ see org . xml . sax . ContentHandler # startDocument
* @ link http : //www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html
2020-03-10 23:20:34 +01:00
* /
2020-02-22 14:45:20 +01:00
DOMHandler . prototype = {
startDocument : function ( ) {
this . doc = new DOMImplementation ( ) . createDocument ( null , null , null ) ;
if ( this . locator ) {
this . doc . documentURI = this . locator . systemId ;
}
} ,
startElement : function ( namespaceURI , localName , qName , attrs ) {
var doc = this . doc ;
var el = doc . createElementNS ( namespaceURI , qName || localName ) ;
var len = attrs . length ;
appendElement ( this , el ) ;
this . currentElement = el ;
2020-03-10 23:20:34 +01:00
2020-02-22 14:45:20 +01:00
this . locator && position ( this . locator , el )
for ( var i = 0 ; i < len ; i ++ ) {
var namespaceURI = attrs . getURI ( i ) ;
var value = attrs . getValue ( i ) ;
var qName = attrs . getQName ( i ) ;
var attr = doc . createAttributeNS ( namespaceURI , qName ) ;
this . locator && position ( attrs . getLocator ( i ) , attr ) ;
attr . value = attr . nodeValue = value ;
el . setAttributeNode ( attr )
}
} ,
endElement : function ( namespaceURI , localName , qName ) {
var current = this . currentElement
var tagName = current . tagName ;
this . currentElement = current . parentNode ;
} ,
startPrefixMapping : function ( prefix , uri ) {
} ,
endPrefixMapping : function ( prefix ) {
} ,
processingInstruction : function ( target , data ) {
var ins = this . doc . createProcessingInstruction ( target , data ) ;
this . locator && position ( this . locator , ins )
appendElement ( this , ins ) ;
} ,
ignorableWhitespace : function ( ch , start , length ) {
} ,
characters : function ( chars , start , length ) {
chars = _toString . apply ( this , arguments )
//console.log(chars)
if ( chars ) {
if ( this . cdata ) {
var charNode = this . doc . createCDATASection ( chars ) ;
} else {
var charNode = this . doc . createTextNode ( chars ) ;
}
if ( this . currentElement ) {
this . currentElement . appendChild ( charNode ) ;
} else if ( /^\s*$/ . test ( chars ) ) {
this . doc . appendChild ( charNode ) ;
//process xml
}
this . locator && position ( this . locator , charNode )
}
} ,
skippedEntity : function ( name ) {
} ,
endDocument : function ( ) {
this . doc . normalize ( ) ;
} ,
setDocumentLocator : function ( locator ) {
if ( this . locator = locator ) { // && !('lineNumber' in locator)){
locator . lineNumber = 0 ;
}
} ,
//LexicalHandler
comment : function ( chars , start , length ) {
chars = _toString . apply ( this , arguments )
var comm = this . doc . createComment ( chars ) ;
this . locator && position ( this . locator , comm )
appendElement ( this , comm ) ;
} ,
2020-03-10 23:20:34 +01:00
2020-02-22 14:45:20 +01:00
startCDATA : function ( ) {
//used in characters() methods
this . cdata = true ;
} ,
endCDATA : function ( ) {
this . cdata = false ;
} ,
2020-03-10 23:20:34 +01:00
2020-02-22 14:45:20 +01:00
startDTD : function ( name , publicId , systemId ) {
var impl = this . doc . implementation ;
if ( impl && impl . createDocumentType ) {
var dt = impl . createDocumentType ( name , publicId , systemId ) ;
this . locator && position ( this . locator , dt )
appendElement ( this , dt ) ;
2022-02-04 19:19:19 +01:00
this . doc . doctype = dt ;
2020-02-22 14:45:20 +01:00
}
} ,
/ * *
* @ see org . xml . sax . ErrorHandler
* @ link http : //www.saxproject.org/apidoc/org/xml/sax/ErrorHandler.html
* /
warning : function ( error ) {
console . warn ( '[xmldom warning]\t' + error , _locator ( this . locator ) ) ;
} ,
error : function ( error ) {
console . error ( '[xmldom error]\t' + error , _locator ( this . locator ) ) ;
} ,
fatalError : function ( error ) {
2021-08-20 13:33:39 +02:00
throw new ParseError ( error , this . locator ) ;
2020-02-22 14:45:20 +01:00
}
}
function _locator ( l ) {
if ( l ) {
return '\n@' + ( l . systemId || '' ) + '#[line:' + l . lineNumber + ',col:' + l . columnNumber + ']'
}
}
function _toString ( chars , start , length ) {
if ( typeof chars == 'string' ) {
return chars . substr ( start , length )
} else { //java sax connect width xmldom on rhino(what about: "? && !(chars instanceof String)")
if ( chars . length >= start + length || start ) {
return new java . lang . String ( chars , start , length ) + '' ;
}
return chars ;
}
}
/ *
* @ link http : //www.saxproject.org/apidoc/org/xml/sax/ext/LexicalHandler.html
* used method of org . xml . sax . ext . LexicalHandler :
* # comment ( chars , start , length )
* # startCDATA ( )
* # endCDATA ( )
* # startDTD ( name , publicId , systemId )
*
*
* IGNORED method of org . xml . sax . ext . LexicalHandler :
* # endDTD ( )
* # startEntity ( name )
* # endEntity ( name )
*
*
* @ link http : //www.saxproject.org/apidoc/org/xml/sax/ext/DeclHandler.html
* IGNORED method of org . xml . sax . ext . DeclHandler
* # attributeDecl ( eName , aName , type , mode , value )
* # elementDecl ( name , model )
* # externalEntityDecl ( name , publicId , systemId )
* # internalEntityDecl ( name , value )
* @ link http : //www.saxproject.org/apidoc/org/xml/sax/ext/EntityResolver2.html
* IGNORED method of org . xml . sax . EntityResolver2
* # resolveEntity ( String name , String publicId , String baseURI , String systemId )
* # resolveEntity ( publicId , systemId )
* # getExternalSubset ( name , baseURI )
* @ link http : //www.saxproject.org/apidoc/org/xml/sax/DTDHandler.html
* IGNORED method of org . xml . sax . DTDHandler
* # notationDecl ( name , publicId , systemId ) { } ;
* # unparsedEntityDecl ( name , publicId , systemId , notationName ) { } ;
* /
"endDTD,startEntity,endEntity,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,resolveEntity,getExternalSubset,notationDecl,unparsedEntityDecl" . replace ( /\w+/g , function ( key ) {
DOMHandler . prototype [ key ] = function ( ) { return null }
} )
/* Private static helpers treated below as private instance methods, so don't need to add these to the public API; we might use a Relator to also get rid of non-standard public properties */
function appendElement ( hander , node ) {
if ( ! hander . currentElement ) {
hander . doc . appendChild ( node ) ;
} else {
hander . currentElement . appendChild ( node ) ;
}
} //appendChild and setAttributeNS are preformance key
2021-08-20 13:33:39 +02:00
exports . _ _DOMHandler = DOMHandler ;
2022-02-04 19:19:19 +01:00
exports . normalizeLineEndings = normalizeLineEndings ;
exports . DOMParser = DOMParser ;