Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/sax.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ var ENTITY_REG = /&#?\w+;?/g;
function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
var isHTML = isHTMLMimeType(domBuilder.mimeType);
if (source.indexOf(g.UNICODE_REPLACEMENT_CHARACTER) >= 0) {
return errorHandler.fatalError('Unicode replacement character detected, source encoding issues?');
errorHandler.warning('Unicode replacement character detected, source encoding issues?');
}

function fixedFromCharCode(code) {
Expand Down
26 changes: 13 additions & 13 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
"np": "8.0.4",
"prettier": "3.3.3",
"rxjs": "7.8.1",
"xmltest": "2.0.1",
"xmltest": "2.0.2",
"yauzl": "3.2.0"
},
"bugs": {
Expand Down
40 changes: 40 additions & 0 deletions test/errors/__snapshots__/reported-levels.test.js.snap
Original file line number Diff line number Diff line change
@@ -1,5 +1,45 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`Encoding_ReplacementCharacter with mimeType text/html should be reported 1`] = `
[
[
"warning",
"Unicode replacement character detected, source encoding issues?",
{
"columnNumber": 6,
"lineNumber": 1,
},
],
]
`;

exports[`Encoding_ReplacementCharacter with mimeType text/html should escalate Error thrown in onError to ParseError 1`] = `
[
"warning: Unicode replacement character detected, source encoding issues?
at warning (lib/sax.js:#0)",
]
`;

exports[`Encoding_ReplacementCharacter with mimeType text/xml should be reported 1`] = `
[
[
"warning",
"Unicode replacement character detected, source encoding issues?",
{
"columnNumber": 6,
"lineNumber": 1,
},
],
]
`;

exports[`Encoding_ReplacementCharacter with mimeType text/xml should escalate Error thrown in onError to ParseError 1`] = `
[
"warning: Unicode replacement character detected, source encoding issues?
at warning (lib/sax.js:#0)",
]
`;

exports[`SYNTAX_AttributeMissingEndingQuote with mimeType text/html should be reported 1`] = `
[
[
Expand Down
16 changes: 13 additions & 3 deletions test/errors/reported.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,20 @@ const skippedInHtml = true;
* to call methods on `errorHandler`.
*/
const REPORTED = {
/**
* There are well-formed documents containing the unicode replacement character,
* e.g. https://en.wikipedia.org/wiki/Mojibake
* see https://github.com/xmldom/xmldom/issues/790#issuecomment-2493975063
* But reading files in a different encoding than they have been written with,
* will also lead to these characters being present.
* Which is why this is reported once at the beginning,
* before parsing any content.
* Use `onWarningStopParsing` to prevent parsing documents containing these characters.
*/
Encoding_ReplacementCharacter: {
source: '\ufffd',
level: 'fatalError',
match: (msg) => /Unicode replacement character/.test(msg),
source: '<doc>\ufffd</doc>',
level: 'warning',
match: (msg) => /unicode replacement character/i.test(msg),
},
/**
* Well-formedness constraint: Element Type Match
Expand Down
48 changes: 45 additions & 3 deletions test/xmltest/__snapshots__/not-wf.test.js.snap
Original file line number Diff line number Diff line change
Expand Up @@ -1176,11 +1176,53 @@ exports[`xmltest/not-wellformed standalone should match 167.xml with snapshot: r
}
`;

exports[`xmltest/not-wellformed standalone should match 168.xml with snapshot: caught 1`] = `"Unicode replacement character detected, source encoding issues?"`;
exports[`xmltest/not-wellformed standalone should match 168.xml with snapshot: reported 1`] = `
{
"actual": "<doc>���</doc>",
"errors": [
[
"warning",
"Unicode replacement character detected, source encoding issues?",
{
"columnNumber": 6,
"lineNumber": 1,
},
],
],
}
`;

exports[`xmltest/not-wellformed standalone should match 169.xml with snapshot: caught 1`] = `"Unicode replacement character detected, source encoding issues?"`;
exports[`xmltest/not-wellformed standalone should match 169.xml with snapshot: reported 1`] = `
{
"actual": "<doc>���</doc>",
"errors": [
[
"warning",
"Unicode replacement character detected, source encoding issues?",
{
"columnNumber": 6,
"lineNumber": 1,
},
],
],
}
`;

exports[`xmltest/not-wellformed standalone should match 170.xml with snapshot: caught 1`] = `"Unicode replacement character detected, source encoding issues?"`;
exports[`xmltest/not-wellformed standalone should match 170.xml with snapshot: reported 1`] = `
{
"actual": "<doc>����</doc>",
"errors": [
[
"warning",
"Unicode replacement character detected, source encoding issues?",
{
"columnNumber": 6,
"lineNumber": 1,
},
],
],
}
`;

exports[`xmltest/not-wellformed standalone should match 171.xml with snapshot: caught 1`] = `"comment is not well-formed at position 0"`;

Expand Down
57 changes: 48 additions & 9 deletions test/xmltest/__snapshots__/valid.test.js.snap
Original file line number Diff line number Diff line change
Expand Up @@ -535,24 +535,63 @@ exports[`xmltest/valid standalone should match 048.xml with snapshot 1`] = `
}
`;

exports[`xmltest/valid standalone should match 049.xml with snapshot: caught 1`] = `
exports[`xmltest/valid standalone should match 049.xml with snapshot 1`] = `
{
"error": [ParseError: Unicode replacement character detected, source encoding issues?],
"expected": "<doc>£</doc>",
"actual": "<!DOCTYPE doc [
<!ELEMENT doc (#PCDATA)>
]>
<doc>£</doc>",
"errors": [
[
"error",
"Unexpected content outside root element: ''",
{
"columnNumber": 6,
"lineNumber": 4,
},
],
],
"expected": "搼捯숾㲣搯捯",
}
`;

exports[`xmltest/valid standalone should match 050.xml with snapshot: caught 1`] = `
exports[`xmltest/valid standalone should match 050.xml with snapshot 1`] = `
{
"error": [ParseError: Unicode replacement character detected, source encoding issues?],
"expected": "<doc>เจมส์</doc>",
"actual": "<!DOCTYPE doc [
<!ELEMENT doc (#PCDATA)>
]>
<doc>เจมส์</doc>",
"errors": [
[
"error",
"Unexpected content outside root element: ''",
{
"columnNumber": 6,
"lineNumber": 4,
},
],
],
"expected": "搼捯肹룠ꆸ룠貹⼼潤㹣",
}
`;

exports[`xmltest/valid standalone should match 051.xml with snapshot: caught 1`] = `
exports[`xmltest/valid standalone should match 051.xml with snapshot 1`] = `
{
"error": [ParseError: Unicode replacement character detected, source encoding issues?],
"expected": "<เจมส์></เจมส์>",
"actual": "<!DOCTYPE เจมส์ [
<!ELEMENT เจมส์ (#PCDATA)>
]>
<เจมส์/>",
"errors": [
[
"error",
"Unexpected content outside root element: ''",
{
"columnNumber": 1,
"lineNumber": 4,
},
],
],
"expected": "肹룠ꆸ룠貹㰾肹룠ꆸ룠貹",
}
`;

Expand Down