46std::unique_ptr<XmlElement> parseXML (
const File& file)
48 return XmlDocument (file).getDocumentElement();
51std::unique_ptr<XmlElement> parseXMLIfTagMatches (
const String& textToParse, StringRef requiredTag)
53 return XmlDocument (textToParse).getDocumentElementIfTagMatches (requiredTag);
56std::unique_ptr<XmlElement> parseXMLIfTagMatches (
const File& file, StringRef requiredTag)
58 return XmlDocument (file).getDocumentElementIfTagMatches (requiredTag);
71namespace XmlIdentifierChars
73 static bool isIdentifierCharSlow (juce_wchar
c)
noexcept
76 ||
c ==
'_' ||
c ==
'-' ||
c ==
':' ||
c ==
'.';
79 static bool isIdentifierChar (juce_wchar c)
noexcept
81 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
83 return ((
int) c < (
int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (uint32) (1 << (c & 31))) != 0)
84 : isIdentifierCharSlow (c);
101 static String::CharPointerType findEndOfToken (String::CharPointerType p)
noexcept
103 while (isIdentifierChar (*p))
112 if (originalText.
isEmpty() && inputSource !=
nullptr)
114 std::unique_ptr<InputStream>
in (inputSource->createInputStream());
121 #if JUCE_STRING_UTF_TYPE == 8
125 auto* text =
static_cast<const char*
> (data.
getData());
170String XmlDocument::getFileContents (
const String& filename)
const
172 if (inputSource !=
nullptr)
174 std::unique_ptr<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
177 return in->readEntireStreamAsString();
183juce_wchar XmlDocument::readNextChar() noexcept
185 auto c = input.getAndAdvance();
196std::unique_ptr<XmlElement> XmlDocument::parseDocumentElement (String::CharPointerType textToParse,
197 bool onlyReadOuterDocumentElement)
200 errorOccurred =
false;
202 needToLoadDTD =
true;
204 if (textToParse.isEmpty())
206 lastError =
"not enough input";
208 else if (! parseHeader())
210 lastError =
"malformed header";
212 else if (! parseDTD())
214 lastError =
"malformed DTD";
219 std::unique_ptr<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
228bool XmlDocument::parseHeader()
230 skipNextWhiteSpace();
236 if (headerEnd.isEmpty())
240 auto encoding = String (input, headerEnd)
241 .fromFirstOccurrenceOf (
"encoding",
false,
true)
242 .fromFirstOccurrenceOf (
"=",
false,
false)
243 .fromFirstOccurrenceOf (
"\"",
false,
false)
244 .upToFirstOccurrenceOf (
"\"",
false,
false)
254 jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase (
"utf-"));
257 input = headerEnd + 2;
258 skipNextWhiteSpace();
264bool XmlDocument::parseDTD()
269 auto dtdStart = input;
271 for (
int n = 1; n > 0;)
273 auto c = readNextChar();
284 dtdText = String (dtdStart, input - 1).
trim();
290void XmlDocument::skipNextWhiteSpace()
294 input = input.findEndOfWhitespace();
309 auto closeComment = input.indexOf (CharPointer_ASCII (
"-->"));
311 if (closeComment < 0)
317 input += closeComment + 3;
324 auto closeBracket = input.indexOf (CharPointer_ASCII (
"?>"));
326 if (closeBracket < 0)
332 input += closeBracket + 2;
341void XmlDocument::readQuotedString (String& result)
343 auto quote = readNextChar();
347 auto c = readNextChar();
364 auto character = *input;
366 if (character == quote)
368 result.appendCharPointer (start, input);
373 if (character ==
'&')
375 result.appendCharPointer (start, input);
381 setLastError (
"unmatched quotes",
false);
392XmlElement* XmlDocument::readNextElement (
const bool alsoParseSubElements)
394 XmlElement* node =
nullptr;
395 skipNextWhiteSpace();
403 auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
405 if (endOfToken == input)
408 skipNextWhiteSpace();
409 endOfToken = XmlIdentifierChars::findEndOfToken (input);
411 if (endOfToken == input)
413 setLastError (
"tag name missing",
false);
418 node =
new XmlElement (input, endOfToken);
420 LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
425 skipNextWhiteSpace();
429 if (c ==
'/' && input[1] ==
'>')
440 if (alsoParseSubElements)
441 readChildElements (*node);
447 if (XmlIdentifierChars::isIdentifierChar (c))
449 auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
451 if (attNameEnd != input)
453 auto attNameStart = input;
455 skipNextWhiteSpace();
457 if (readNextChar() ==
'=')
459 skipNextWhiteSpace();
460 auto nextChar = *input;
462 if (nextChar ==
'"' || nextChar ==
'\'')
464 auto* newAtt =
new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
465 readQuotedString (newAtt->value);
466 attributeAppender.append (newAtt);
472 setLastError (
"expected '=' after attribute '"
473 + String (attNameStart, attNameEnd) +
"'",
false);
481 setLastError (
"illegal character found in " + node->getTagName() +
": '" + c +
"'",
false);
491void XmlDocument::readChildElements (XmlElement& parent)
493 LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
497 auto preWhitespaceInput = input;
498 skipNextWhiteSpace();
502 setLastError (
"unmatched tags",
false);
513 auto closeTag = input.indexOf ((juce_wchar)
'>');
516 input += closeTag + 1;
524 auto inputStart = input;
532 setLastError (
"unterminated CDATA section",
false);
537 if (c0 ==
']' && input[1] ==
']' && input[2] ==
'>')
550 if (
auto* n = readNextElement (
true))
551 childAppender.append (n);
558 input = preWhitespaceInput;
559 MemoryOutputStream textElementContent;
560 bool contentShouldBeUsed = ! ignoreEmptyTextElements;
568 if (input[1] ==
'!' && input[2] ==
'-' && input[3] ==
'-')
571 auto closeComment = input.indexOf (CharPointer_ASCII (
"-->"));
573 if (closeComment < 0)
575 setLastError (
"unterminated comment",
false);
580 input += closeComment + 3;
589 setLastError (
"unmatched tags",
false);
599 if (entity.startsWithChar (
'<') && entity [1] != 0)
601 auto oldInput = input;
602 auto oldOutOfData = outOfData;
604 input = entity.getCharPointer();
607 while (
auto* n = readNextElement (
true))
608 childAppender.append (n);
611 outOfData = oldOutOfData;
615 textElementContent << entity;
616 contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
623 auto nextChar = *input;
625 if (nextChar ==
'\r')
629 if (input[1] ==
'\n')
633 if (nextChar ==
'<' || nextChar ==
'&')
638 setLastError (
"unmatched tags",
false);
643 textElementContent.appendUTF8Char (nextChar);
649 if (contentShouldBeUsed)
655void XmlDocument::readEntity (String& result)
660 if (input.compareIgnoreCaseUpTo (CharPointer_ASCII (
"amp;"), 4) == 0)
665 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII (
"quot;"), 5) == 0)
670 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII (
"apos;"), 5) == 0)
675 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII (
"lt;"), 3) == 0)
680 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII (
"gt;"), 3) == 0)
685 else if (*input ==
'#')
690 if (*input ==
'x' || *input ==
'X')
695 while (input[0] !=
';')
699 if (hexValue < 0 || ++numChars > 8)
701 setLastError (
"illegal escape sequence",
true);
705 charCode = (charCode << 4) | hexValue;
711 else if (input[0] >=
'0' && input[0] <=
'9')
715 while (input[0] !=
';')
719 setLastError (
"illegal escape sequence",
true);
723 charCode = charCode * 10 + ((int) input[0] -
'0');
731 setLastError (
"illegal escape sequence",
true);
736 result << (juce_wchar) charCode;
740 auto entityNameStart = input;
741 auto closingSemiColon = input.indexOf ((juce_wchar)
';');
743 if (closingSemiColon < 0)
750 input += closingSemiColon + 1;
751 result += expandExternalEntity (String (entityNameStart, (
size_t) closingSemiColon));
756String XmlDocument::expandEntity (
const String& ent)
768 if (char1 ==
'x' || char1 ==
'X')
771 if (char1 >=
'0' && char1 <=
'9')
774 setLastError (
"illegal escape sequence",
false);
778 return expandExternalEntity (ent);
781String XmlDocument::expandExternalEntity (
const String& entity)
790 if (tokenisedDTD[tokenisedDTD.
size() - 2].equalsIgnoreCase (
"system")
791 && tokenisedDTD[tokenisedDTD.
size() - 1].isQuotedString())
793 auto fn = tokenisedDTD[tokenisedDTD.
size() - 1];
795 tokenisedDTD.
clear();
796 tokenisedDTD.
addTokens (getFileContents (fn),
true);
800 tokenisedDTD.
clear();
807 if (closeBracket > openBracket)
809 closeBracket),
true);
813 for (
int i = tokenisedDTD.
size(); --i >= 0;)
815 if (tokenisedDTD[i].startsWithChar (
'%')
816 && tokenisedDTD[i].endsWithChar (
';'))
818 auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
820 newToks.addTokens (parsed,
true);
824 for (
int j = newToks.size(); --j >= 0;)
825 tokenisedDTD.
insert (i, newToks[j]);
830 needToLoadDTD =
false;
833 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
835 if (tokenisedDTD[i] == entity)
837 if (tokenisedDTD[i - 1].equalsIgnoreCase (
"<!entity"))
839 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">").
trim().unquoted();
842 auto ampersand = ent.indexOfChar (
'&');
844 while (ampersand >= 0)
846 auto semiColon = ent.
indexOf (i + 1,
";");
850 setLastError (
"entity without terminating semi-colon",
false);
854 auto resolved = expandEntity (ent.substring (i + 1, semiColon));
856 ent = ent.substring (0, ampersand)
858 + ent.substring (semiColon + 1);
860 ampersand = ent.indexOfChar (semiColon + 1,
'&');
868 setLastError (
"unknown entity",
true);
872String XmlDocument::getParameterEntity (
const String& entity)
874 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
876 if (tokenisedDTD[i] == entity
877 && tokenisedDTD [i - 1] ==
"%"
878 && tokenisedDTD [i - 2].equalsIgnoreCase (
"<!entity"))
880 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">");
882 if (ent.equalsIgnoreCase (
"system"))
883 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (
">"));
885 return ent.trim().unquoted();
int indexOf(ParameterType elementToLookFor) const
static bool isByteOrderMarkBigEndian(const void *possibleByteOrder) noexcept
static bool isByteOrderMarkLittleEndian(const void *possibleByteOrder) noexcept
static bool isByteOrderMark(const void *possibleByteOrder) noexcept
static int getHexDigitValue(juce_wchar digit) noexcept
static bool isWhitespace(char character) noexcept
static bool isLetterOrDigit(char character) noexcept
static CharPointerType1 find(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
const void * getData() const noexcept
size_t getDataSize() const noexcept
int64 writeFromInputStream(InputStream &, int64 maxNumBytesToWrite) override
virtual bool writeByte(char byte)
void insert(int index, String stringToAdd)
int size() const noexcept
int addTokens(StringRef stringToTokenise, bool preserveQuotedStrings)
CharPointerType getCharPointer() const noexcept
int indexOfChar(juce_wchar characterToLookFor) const noexcept
bool isEmpty() const noexcept
int lastIndexOfChar(juce_wchar character) const noexcept
String trimCharactersAtEnd(StringRef charactersToTrim) const
static String charToString(juce_wchar character)
String substring(int startIndex, int endIndex) const
bool isNotEmpty() const noexcept
const String & getLastParseError() const noexcept
std::unique_ptr< XmlElement > getDocumentElementIfTagMatches(StringRef requiredTag)
std::unique_ptr< XmlElement > getDocumentElement(bool onlyReadOuterDocumentElement=false)
XmlDocument(const String &documentText)
static std::unique_ptr< XmlElement > parse(const File &file)
void setInputSource(InputSource *newSource) noexcept
void setEmptyTextElementsIgnored(bool shouldBeIgnored) noexcept
static XmlElement * createTextElement(const String &text)