From 6168dbe21f5f83b906e562ea0ab232d499b275a6 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Wed, 15 Jan 2020 14:56:04 -0500 Subject: Add java htmlparser sources that match the original 52-level state https://hg.mozilla.org/projects/htmlparser/ Commit: abe62ab2a9b69ccb3b5d8a231ec1ae11154c571d --- parser/html/java/htmlparser/HtmlParser-compile | 3 + .../java/htmlparser/HtmlParser-compile-detailed | 3 + .../htmlparser/HtmlParser-compile-detailed.launch | 24 + .../html/java/htmlparser/HtmlParser-compile.launch | 22 + parser/html/java/htmlparser/HtmlParser-linux | 3 + parser/html/java/htmlparser/HtmlParser-shell | 3 + parser/html/java/htmlparser/HtmlParser.launch | 23 + parser/html/java/htmlparser/LICENSE.txt | 96 + parser/html/java/htmlparser/README.txt | 5 + parser/html/java/htmlparser/doc/README | 15 + .../htmlparser/doc/named-character-references.html | 4 + parser/html/java/htmlparser/doc/tokenization.txt | 1147 ++++ .../html/java/htmlparser/doc/tree-construction.txt | 2201 ++++++ .../html/java/htmlparser/generate-encoding-data.py | 745 +++ .../nu/validator/htmlparser/HtmlParser.gwt.xml | 12 + .../htmlparser/gwt/BrowserTreeBuilder.java | 477 ++ .../nu/validator/htmlparser/gwt/HtmlParser.java | 265 + .../validator/htmlparser/gwt/HtmlParserModule.java | 87 + .../validator/htmlparser/gwt/ParseEndListener.java | 46 + .../nu/validator/htmlparser/public/HtmlParser.html | 225 + .../htmlparser/public/LICENSE.Live-DOM-viewer.txt | 25 + .../nu/validator/htmlparser/public/blank.html | 2 + .../htmlparser/mozilla-export-scripts/README.txt | 25 + .../mozilla-export-scripts/export-all.sh | 24 + .../mozilla-export-scripts/export-java-srcs.sh | 25 + .../mozilla-export-scripts/export-translator.sh | 24 + .../mozilla-export-scripts/make-translator-jar.sh | 63 + .../java/htmlparser/mozilla-export-scripts/util.sh | 23 + parser/html/java/htmlparser/pom.xml | 240 + parser/html/java/htmlparser/ruby-gcj/DomUtils.java | 36 + parser/html/java/htmlparser/ruby-gcj/README | 65 + parser/html/java/htmlparser/ruby-gcj/Rakefile | 77 + parser/html/java/htmlparser/ruby-gcj/extconf.rb | 45 + .../java/htmlparser/ruby-gcj/test/domencoding.rb | 5 + parser/html/java/htmlparser/ruby-gcj/test/fonts.rb | 11 + .../html/java/htmlparser/ruby-gcj/test/google.html | 10 + .../html/java/htmlparser/ruby-gcj/test/greek.xml | 2 + parser/html/java/htmlparser/ruby-gcj/validator.cpp | 210 + .../htmlparser/src/nu/validator/encoding/Big5.java | 59 + .../src/nu/validator/encoding/Big5Data.java | 185 + .../src/nu/validator/encoding/Big5Decoder.java | 184 + .../src/nu/validator/encoding/Big5Encoder.java | 185 + .../src/nu/validator/encoding/Decoder.java | 80 + .../src/nu/validator/encoding/Encoder.java | 95 + .../src/nu/validator/encoding/Encoding.java | 886 +++ .../src/nu/validator/encoding/EucJp.java | 57 + .../src/nu/validator/encoding/EucKr.java | 64 + .../encoding/FallibleSingleByteDecoder.java | 61 + .../src/nu/validator/encoding/Gb18030.java | 55 + .../htmlparser/src/nu/validator/encoding/Gbk.java | 63 + .../src/nu/validator/encoding/Ibm866.java | 184 + .../encoding/InfallibleSingleByteDecoder.java | 57 + .../src/nu/validator/encoding/Iso10.java | 187 + .../src/nu/validator/encoding/Iso13.java | 183 + .../src/nu/validator/encoding/Iso14.java | 183 + .../src/nu/validator/encoding/Iso15.java | 186 + .../src/nu/validator/encoding/Iso16.java | 181 + .../htmlparser/src/nu/validator/encoding/Iso2.java | 189 + .../src/nu/validator/encoding/Iso2022Jp.java | 56 + .../htmlparser/src/nu/validator/encoding/Iso3.java | 189 + .../htmlparser/src/nu/validator/encoding/Iso4.java | 189 + .../htmlparser/src/nu/validator/encoding/Iso5.java | 188 + .../htmlparser/src/nu/validator/encoding/Iso6.java | 194 + .../htmlparser/src/nu/validator/encoding/Iso7.java | 192 + .../htmlparser/src/nu/validator/encoding/Iso8.java | 191 + .../src/nu/validator/encoding/Iso8I.java | 183 + .../src/nu/validator/encoding/Koi8R.java | 185 + .../src/nu/validator/encoding/Koi8U.java | 182 + .../src/nu/validator/encoding/MacCyrillic.java | 182 + .../src/nu/validator/encoding/Macintosh.java | 184 + .../src/nu/validator/encoding/Replacement.java | 59 + .../nu/validator/encoding/ReplacementDecoder.java | 75 + .../src/nu/validator/encoding/ShiftJis.java | 62 + .../src/nu/validator/encoding/UserDefined.java | 55 + .../nu/validator/encoding/UserDefinedDecoder.java | 56 + .../src/nu/validator/encoding/Utf16Be.java | 55 + .../src/nu/validator/encoding/Utf16Le.java | 56 + .../htmlparser/src/nu/validator/encoding/Utf8.java | 57 + .../src/nu/validator/encoding/Windows1250.java | 183 + .../src/nu/validator/encoding/Windows1251.java | 183 + .../src/nu/validator/encoding/Windows1252.java | 197 + .../src/nu/validator/encoding/Windows1253.java | 183 + .../src/nu/validator/encoding/Windows1254.java | 192 + .../src/nu/validator/encoding/Windows1255.java | 183 + .../src/nu/validator/encoding/Windows1256.java | 183 + .../src/nu/validator/encoding/Windows1257.java | 183 + .../src/nu/validator/encoding/Windows1258.java | 183 + .../src/nu/validator/encoding/Windows874.java | 186 + .../nu/validator/htmlparser/annotation/Auto.java | 27 + .../htmlparser/annotation/CharacterName.java | 27 + .../nu/validator/htmlparser/annotation/Const.java | 34 + .../nu/validator/htmlparser/annotation/IdType.java | 34 + .../nu/validator/htmlparser/annotation/Inline.java | 33 + .../validator/htmlparser/annotation/Literal.java | 34 + .../nu/validator/htmlparser/annotation/Local.java | 34 + .../validator/htmlparser/annotation/NoLength.java | 34 + .../nu/validator/htmlparser/annotation/NsUri.java | 33 + .../nu/validator/htmlparser/annotation/Prefix.java | 33 + .../nu/validator/htmlparser/annotation/QName.java | 33 + .../validator/htmlparser/annotation/Virtual.java | 33 + .../validator/htmlparser/annotation/package.html | 30 + .../validator/htmlparser/common/ByteReadable.java | 44 + .../htmlparser/common/CharacterHandler.java | 59 + .../htmlparser/common/DoctypeExpectation.java | 65 + .../validator/htmlparser/common/DocumentMode.java | 47 + .../htmlparser/common/DocumentModeHandler.java | 46 + .../common/EncodingDeclarationHandler.java | 58 + .../nu/validator/htmlparser/common/Heuristics.java | 52 + .../nu/validator/htmlparser/common/Interner.java | 35 + .../validator/htmlparser/common/TokenHandler.java | 183 + .../htmlparser/common/TransitionHandler.java | 53 + .../htmlparser/common/XmlViolationPolicy.java | 48 + .../nu/validator/htmlparser/common/package.html | 29 + .../validator/htmlparser/dom/DOMTreeBuilder.java | 357 + .../src/nu/validator/htmlparser/dom/Dom2Sax.java | 259 + .../htmlparser/dom/HtmlDocumentBuilder.java | 736 ++ .../src/nu/validator/htmlparser/dom/package.html | 29 + .../validator/htmlparser/extra/ChardetSniffer.java | 84 + .../htmlparser/extra/IcuDetectorSniffer.java | 77 + .../htmlparser/extra/NormalizationChecker.java | 268 + .../validator/htmlparser/impl/AttributeName.java | 2473 +++++++ .../htmlparser/impl/CoalescingTreeBuilder.java | 90 + .../nu/validator/htmlparser/impl/ElementName.java | 1609 +++++ .../htmlparser/impl/ErrorReportingTokenizer.java | 772 +++ .../htmlparser/impl/HotSpotWorkaround.txt | 55 + .../validator/htmlparser/impl/HtmlAttributes.java | 618 ++ .../nu/validator/htmlparser/impl/LocatorImpl.java | 60 + .../nu/validator/htmlparser/impl/MetaScanner.java | 854 +++ .../src/nu/validator/htmlparser/impl/NCName.java | 495 ++ .../validator/htmlparser/impl/NamedCharacters.java | 944 +++ .../htmlparser/impl/NamedCharactersAccel.java | 311 + .../nu/validator/htmlparser/impl/Portability.java | 150 + .../validator/htmlparser/impl/PushedLocation.java | 136 + .../nu/validator/htmlparser/impl/StackNode.java | 295 + .../validator/htmlparser/impl/StateSnapshot.java | 204 + .../htmlparser/impl/TaintableLocatorImpl.java | 43 + .../nu/validator/htmlparser/impl/Tokenizer.java | 7067 ++++++++++++++++++++ .../nu/validator/htmlparser/impl/TreeBuilder.java | 6558 ++++++++++++++++++ .../htmlparser/impl/TreeBuilderState.java | 129 + .../nu/validator/htmlparser/impl/UTF16Buffer.java | 151 + .../src/nu/validator/htmlparser/impl/package.html | 30 + .../src/nu/validator/htmlparser/io/BomSniffer.java | 79 + .../src/nu/validator/htmlparser/io/Confidence.java | 27 + .../src/nu/validator/htmlparser/io/Driver.java | 597 ++ .../src/nu/validator/htmlparser/io/Encoding.java | 395 ++ .../htmlparser/io/HtmlInputStreamReader.java | 512 ++ .../nu/validator/htmlparser/io/MetaSniffer.java | 199 + .../htmlparser/rewindable/Rewindable.java | 42 + .../rewindable/RewindableInputStream.java | 235 + .../nu/validator/htmlparser/sax/HtmlParser.java | 1097 +++ .../validator/htmlparser/sax/HtmlSerializer.java | 269 + .../htmlparser/sax/InfosetCoercingHtmlParser.java | 47 + .../htmlparser/sax/NameCheckingXmlSerializer.java | 51 + .../nu/validator/htmlparser/sax/SAXStreamer.java | 196 + .../validator/htmlparser/sax/SAXTreeBuilder.java | 210 + .../nu/validator/htmlparser/sax/XmlSerializer.java | 737 ++ .../src/nu/validator/htmlparser/sax/package.html | 29 + .../nu/validator/htmlparser/xom/FormPointer.java | 49 + .../validator/htmlparser/xom/FormPtrElement.java | 87 + .../nu/validator/htmlparser/xom/HtmlBuilder.java | 773 +++ .../nu/validator/htmlparser/xom/ModalDocument.java | 75 + .../src/nu/validator/htmlparser/xom/Mode.java | 48 + .../htmlparser/xom/SimpleNodeFactory.java | 102 + .../validator/htmlparser/xom/XOMTreeBuilder.java | 351 + .../src/nu/validator/htmlparser/xom/package.html | 29 + .../htmlparser/src/nu/validator/saxtree/CDATA.java | 70 + .../src/nu/validator/saxtree/CharBufferNode.java | 62 + .../src/nu/validator/saxtree/Characters.java | 65 + .../src/nu/validator/saxtree/Comment.java | 66 + .../htmlparser/src/nu/validator/saxtree/DTD.java | 118 + .../src/nu/validator/saxtree/Document.java | 70 + .../src/nu/validator/saxtree/DocumentFragment.java | 58 + .../src/nu/validator/saxtree/Element.java | 172 + .../src/nu/validator/saxtree/Entity.java | 86 + .../nu/validator/saxtree/IgnorableWhitespace.java | 65 + .../src/nu/validator/saxtree/LocatorImpl.java | 104 + .../htmlparser/src/nu/validator/saxtree/Node.java | 307 + .../src/nu/validator/saxtree/NodeType.java | 76 + .../nu/validator/saxtree/NullLexicalHandler.java | 85 + .../src/nu/validator/saxtree/ParentNode.java | 208 + .../src/nu/validator/saxtree/PrefixMapping.java | 65 + .../validator/saxtree/ProcessingInstruction.java | 94 + .../src/nu/validator/saxtree/SkippedEntity.java | 77 + .../src/nu/validator/saxtree/TreeBuilder.java | 250 + .../src/nu/validator/saxtree/TreeParser.java | 301 + .../src/nu/validator/saxtree/package.html | 46 + .../translatable/java/io/IOException.java | 42 + .../translatable/org/xml/sax/Attributes.java | 257 + .../translatable/org/xml/sax/ErrorHandler.java | 139 + .../translatable/org/xml/sax/Locator.java | 136 + .../translatable/org/xml/sax/SAXException.java | 153 + .../org/xml/sax/SAXParseException.java | 269 + .../translatable/org/xml/sax/package.html | 297 + .../nu/validator/encoding/test/Big5Tester.java | 96 + .../nu/validator/encoding/test/EncodingTester.java | 491 ++ .../htmlparser/test/DecoderLoopTester.java | 115 + .../nu/validator/htmlparser/test/DomIdTester.java | 49 + .../nu/validator/htmlparser/test/DomTest.java | 40 + .../validator/htmlparser/test/EncodingTester.java | 123 + .../htmlparser/test/JSONArrayTokenHandler.java | 185 + .../htmlparser/test/ListErrorHandler.java | 66 + .../htmlparser/test/SystemErrErrorHandler.java | 201 + .../nu/validator/htmlparser/test/TokenPrinter.java | 210 + .../validator/htmlparser/test/TokenizerTester.java | 211 + .../htmlparser/test/TreeDumpContentHandler.java | 239 + .../nu/validator/htmlparser/test/TreePrinter.java | 50 + .../nu/validator/htmlparser/test/TreeTester.java | 246 + .../htmlparser/test/UntilHashInputStream.java | 97 + .../htmlparser/test/XmlSerializerTester.java | 63 + .../nu/validator/htmlparser/test/XomTest.java | 33 + .../nu/validator/htmlparser/test/package.html | 29 + .../nu/validator/htmlparser/tools/HTML2HTML.java | 87 + .../nu/validator/htmlparser/tools/HTML2XML.java | 86 + .../nu/validator/htmlparser/tools/XML2HTML.java | 89 + .../nu/validator/htmlparser/tools/XML2XML.java | 89 + .../nu/validator/htmlparser/tools/XSLT4HTML5.java | 237 + .../validator/htmlparser/tools/XSLT4HTML5XOM.java | 162 + .../validator/htmlparser/tools/XmlnsDropper.java | 169 + .../nu/validator/htmlparser/tools/package.html | 29 + .../nu/validator/saxtree/test/PassThruPrinter.java | 67 + .../nu/validator/saxtree/test/package.html | 29 + .../cpptranslate/AnnotationHelperVisitor.java | 139 + .../cpptranslate/CppOnlyInputStream.java | 70 + .../htmlparser/cpptranslate/CppTypes.java | 445 ++ .../htmlparser/cpptranslate/CppVisitor.java | 2421 +++++++ .../htmlparser/cpptranslate/GkAtomParser.java | 70 + .../htmlparser/cpptranslate/HVisitor.java | 306 + .../htmlparser/cpptranslate/LabelVisitor.java | 84 + .../htmlparser/cpptranslate/LicenseExtractor.java | 75 + .../htmlparser/cpptranslate/LocalSymbolTable.java | 89 + .../nu/validator/htmlparser/cpptranslate/Main.java | 148 + .../htmlparser/cpptranslate/NoCppInputStream.java | 86 + .../cpptranslate/StringLiteralParser.java | 70 + .../htmlparser/cpptranslate/StringPair.java | 73 + .../htmlparser/cpptranslate/SymbolTable.java | 80 + .../cpptranslate/SymbolTableVisitor.java | 71 + .../htmlparser/cpptranslate/TranslatorUtils.java | 81 + .../nu/validator/htmlparser/cpptranslate/Type.java | 99 + .../generator/ApplyHotSpotWorkaround.java | 104 + .../generator/GenerateNamedCharacters.java | 182 + .../generator/GenerateNamedCharactersCpp.java | 580 ++ .../DuplicatingFallThroughRemover.java | 79 + .../htmlparser/rusttranslate/JavaVisitor.java | 1349 ++++ .../rusttranslate/LoopBreakAnalyzerVisitor.java | 183 + .../validator/htmlparser/rusttranslate/Main.java | 144 + .../rusttranslate/ModeFallThroughRemover.java | 106 + .../htmlparser/rusttranslate/RustVisitor.java | 1586 +++++ .../rusttranslate/SwitchBreakAnalyzerVisitor.java | 191 + 248 files changed, 62553 insertions(+) create mode 100644 parser/html/java/htmlparser/HtmlParser-compile create mode 100644 parser/html/java/htmlparser/HtmlParser-compile-detailed create mode 100644 parser/html/java/htmlparser/HtmlParser-compile-detailed.launch create mode 100644 parser/html/java/htmlparser/HtmlParser-compile.launch create mode 100644 parser/html/java/htmlparser/HtmlParser-linux create mode 100644 parser/html/java/htmlparser/HtmlParser-shell create mode 100644 parser/html/java/htmlparser/HtmlParser.launch create mode 100644 parser/html/java/htmlparser/LICENSE.txt create mode 100644 parser/html/java/htmlparser/README.txt create mode 100644 parser/html/java/htmlparser/doc/README create mode 100644 parser/html/java/htmlparser/doc/named-character-references.html create mode 100644 parser/html/java/htmlparser/doc/tokenization.txt create mode 100644 parser/html/java/htmlparser/doc/tree-construction.txt create mode 100644 parser/html/java/htmlparser/generate-encoding-data.py create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt create mode 100644 parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html create mode 100644 parser/html/java/htmlparser/mozilla-export-scripts/README.txt create mode 100644 parser/html/java/htmlparser/mozilla-export-scripts/export-all.sh create mode 100644 parser/html/java/htmlparser/mozilla-export-scripts/export-java-srcs.sh create mode 100644 parser/html/java/htmlparser/mozilla-export-scripts/export-translator.sh create mode 100644 parser/html/java/htmlparser/mozilla-export-scripts/make-translator-jar.sh create mode 100644 parser/html/java/htmlparser/mozilla-export-scripts/util.sh create mode 100644 parser/html/java/htmlparser/pom.xml create mode 100644 parser/html/java/htmlparser/ruby-gcj/DomUtils.java create mode 100644 parser/html/java/htmlparser/ruby-gcj/README create mode 100644 parser/html/java/htmlparser/ruby-gcj/Rakefile create mode 100644 parser/html/java/htmlparser/ruby-gcj/extconf.rb create mode 100644 parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb create mode 100644 parser/html/java/htmlparser/ruby-gcj/test/fonts.rb create mode 100644 parser/html/java/htmlparser/ruby-gcj/test/google.html create mode 100644 parser/html/java/htmlparser/ruby-gcj/test/greek.xml create mode 100644 parser/html/java/htmlparser/ruby-gcj/validator.cpp create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java create mode 100644 parser/html/java/htmlparser/src/nu/validator/saxtree/package.html create mode 100644 parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/java/io/IOException.java create mode 100644 parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java create mode 100644 parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java create mode 100644 parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java create mode 100644 parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java create mode 100644 parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java create mode 100644 parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java create mode 100644 parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Main.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Type.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/DuplicatingFallThroughRemover.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/JavaVisitor.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/LoopBreakAnalyzerVisitor.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/Main.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/ModeFallThroughRemover.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/RustVisitor.java create mode 100644 parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/SwitchBreakAnalyzerVisitor.java (limited to 'parser') diff --git a/parser/html/java/htmlparser/HtmlParser-compile b/parser/html/java/htmlparser/HtmlParser-compile new file mode 100644 index 0000000000..3e867827fc --- /dev/null +++ b/parser/html/java/htmlparser/HtmlParser-compile @@ -0,0 +1,3 @@ +#!/bin/sh +APPDIR=`dirname $0`; +java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTCompiler -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser; diff --git a/parser/html/java/htmlparser/HtmlParser-compile-detailed b/parser/html/java/htmlparser/HtmlParser-compile-detailed new file mode 100644 index 0000000000..a4102d642c --- /dev/null +++ b/parser/html/java/htmlparser/HtmlParser-compile-detailed @@ -0,0 +1,3 @@ +#!/bin/sh +APPDIR=`dirname $0`; +java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTCompiler -style DETAILED -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser; diff --git a/parser/html/java/htmlparser/HtmlParser-compile-detailed.launch b/parser/html/java/htmlparser/HtmlParser-compile-detailed.launch new file mode 100644 index 0000000000..0347fd6cf2 --- /dev/null +++ b/parser/html/java/htmlparser/HtmlParser-compile-detailed.launch @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/parser/html/java/htmlparser/HtmlParser-compile.launch b/parser/html/java/htmlparser/HtmlParser-compile.launch new file mode 100644 index 0000000000..54e7bc337c --- /dev/null +++ b/parser/html/java/htmlparser/HtmlParser-compile.launch @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/parser/html/java/htmlparser/HtmlParser-linux b/parser/html/java/htmlparser/HtmlParser-linux new file mode 100644 index 0000000000..0a9e9deffd --- /dev/null +++ b/parser/html/java/htmlparser/HtmlParser-linux @@ -0,0 +1,3 @@ +#!/bin/sh +APPDIR=`dirname $0`; +java -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:$APPDIR/bin:/home/hsivonen/gwt-linux-1.5.1/gwt-user.jar:/home/hsivonen/gwt-linux-1.5.1/gwt-dev-linux.jar" com.google.gwt.dev.GWTShell -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser/HtmlParser.html; diff --git a/parser/html/java/htmlparser/HtmlParser-shell b/parser/html/java/htmlparser/HtmlParser-shell new file mode 100644 index 0000000000..ffcf2e2972 --- /dev/null +++ b/parser/html/java/htmlparser/HtmlParser-shell @@ -0,0 +1,3 @@ +#!/bin/sh +APPDIR=`dirname $0`; +java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:$APPDIR/bin:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTShell -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser/HtmlParser.html; diff --git a/parser/html/java/htmlparser/HtmlParser.launch b/parser/html/java/htmlparser/HtmlParser.launch new file mode 100644 index 0000000000..9335abf60e --- /dev/null +++ b/parser/html/java/htmlparser/HtmlParser.launch @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/parser/html/java/htmlparser/LICENSE.txt b/parser/html/java/htmlparser/LICENSE.txt new file mode 100644 index 0000000000..4bfe5d331f --- /dev/null +++ b/parser/html/java/htmlparser/LICENSE.txt @@ -0,0 +1,96 @@ +This is for the HTML parser as a whole except the rewindable input stream, +the named character classes and the Live DOM Viewer. +For the copyright notices for individual files, please see individual files. + +/* + * Copyright (c) 2005, 2006, 2007 Henri Sivonen + * Copyright (c) 2007-2012 Mozilla Foundation + * Portions of comments Copyright 2004-2007 Apple Computer, Inc., Mozilla + * Foundation, and Opera Software ASA. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +The following license is for the WHATWG spec from which the named character +data was extracted. + +/* + * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera + * Software ASA. + * + * You are granted a license to use, reproduce and create derivative works of + * this document. + */ + +The following license is for the rewindable input stream. + +/* + * Copyright (c) 2001-2003 Thai Open Source Software Center Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Thai Open Source Software Center Ltd nor + * the names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +The following license applies to the Live DOM Viewer: + +Copyright (c) 2000, 2006, 2008 Ian Hickson and various contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/parser/html/java/htmlparser/README.txt b/parser/html/java/htmlparser/README.txt new file mode 100644 index 0000000000..713b404e83 --- /dev/null +++ b/parser/html/java/htmlparser/README.txt @@ -0,0 +1,5 @@ +An HTML5 parser. + +Please see http://about.validator.nu/htmlparser/ + +-- Henri Sivonen (hsivonen@iki.fi). diff --git a/parser/html/java/htmlparser/doc/README b/parser/html/java/htmlparser/doc/README new file mode 100644 index 0000000000..e0132a41e4 --- /dev/null +++ b/parser/html/java/htmlparser/doc/README @@ -0,0 +1,15 @@ +tokenization.txt represents the state of the spec implemented in Tokenizer.java. + +To get a diffable version corresponding to the current spec: +lynx -display_charset=utf-8 -dump -nolist http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html > current.txt + +tree-construction.txt represents the state of the spec implemented in TreeBuilder.java. + +To get a diffable version corresponding to the current spec: +lynx -display_charset=utf-8 -dump -nolist http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html > current.txt + + +The text of the files in this directory comes from the WHATWG HTML 5 spec +which carries the following notice: +© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera Software ASA. +You are granted a license to use, reproduce and create derivative works of this document. diff --git a/parser/html/java/htmlparser/doc/named-character-references.html b/parser/html/java/htmlparser/doc/named-character-references.html new file mode 100644 index 0000000000..5f05a991fc --- /dev/null +++ b/parser/html/java/htmlparser/doc/named-character-references.html @@ -0,0 +1,4 @@ + + +
Name Character(s) Glyph
AElig; U+000C6 Æ
AMP; U+00026 &
Aacute; U+000C1 Á
Abreve; U+00102 Ă
Acirc; U+000C2 Â
Acy; U+00410 А
Afr; U+1D504 𝔄
Agrave; U+000C0 À
Alpha; U+00391 Α
Amacr; U+00100 Ā
And; U+02A53
Aogon; U+00104 Ą
Aopf; U+1D538 𝔸
ApplyFunction; U+02061
Aring; U+000C5 Å
Ascr; U+1D49C 𝒜
Assign; U+02254
Atilde; U+000C3 Ã
Auml; U+000C4 Ä
Backslash; U+02216
Barv; U+02AE7
Barwed; U+02306
Bcy; U+00411 Б
Because; U+02235
Bernoullis; U+0212C
Beta; U+00392 Β
Bfr; U+1D505 𝔅
Bopf; U+1D539 𝔹
Breve; U+002D8 ˘
Bscr; U+0212C
Bumpeq; U+0224E
CHcy; U+00427 Ч
COPY; U+000A9 ©
Cacute; U+00106 Ć
Cap; U+022D2
CapitalDifferentialD; U+02145
Cayleys; U+0212D
Ccaron; U+0010C Č
Ccedil; U+000C7 Ç
Ccirc; U+00108 Ĉ
Cconint; U+02230
Cdot; U+0010A Ċ
Cedilla; U+000B8 ¸
CenterDot; U+000B7 ·
Cfr; U+0212D
Chi; U+003A7 Χ
CircleDot; U+02299
CircleMinus; U+02296
CirclePlus; U+02295
CircleTimes; U+02297
ClockwiseContourIntegral; U+02232
CloseCurlyDoubleQuote; U+0201D
CloseCurlyQuote; U+02019
Colon; U+02237
Colone; U+02A74
Congruent; U+02261
Conint; U+0222F
ContourIntegral; U+0222E
Copf; U+02102
Coproduct; U+02210
CounterClockwiseContourIntegral; U+02233
Cross; U+02A2F
Cscr; U+1D49E 𝒞
Cup; U+022D3
CupCap; U+0224D
DD; U+02145
DDotrahd; U+02911
DJcy; U+00402 Ђ
DScy; U+00405 Ѕ
DZcy; U+0040F Џ
Dagger; U+02021
Darr; U+021A1
Dashv; U+02AE4
Dcaron; U+0010E Ď
Dcy; U+00414 Д
Del; U+02207
Delta; U+00394 Δ
Dfr; U+1D507 𝔇
DiacriticalAcute; U+000B4 ´
DiacriticalDot; U+002D9 ˙
DiacriticalDoubleAcute; U+002DD ˝
DiacriticalGrave; U+00060 `
DiacriticalTilde; U+002DC ˜
Diamond; U+022C4
DifferentialD; U+02146
Dopf; U+1D53B 𝔻
Dot; U+000A8 ¨
DotDot; U+020DC ◌⃜
DotEqual; U+02250
DoubleContourIntegral; U+0222F
DoubleDot; U+000A8 ¨
DoubleDownArrow; U+021D3
DoubleLeftArrow; U+021D0
DoubleLeftRightArrow; U+021D4
DoubleLeftTee; U+02AE4
DoubleLongLeftArrow; U+027F8
DoubleLongLeftRightArrow; U+027FA
DoubleLongRightArrow; U+027F9
DoubleRightArrow; U+021D2
DoubleRightTee; U+022A8
DoubleUpArrow; U+021D1
DoubleUpDownArrow; U+021D5
DoubleVerticalBar; U+02225
DownArrow; U+02193
DownArrowBar; U+02913
DownArrowUpArrow; U+021F5
DownBreve; U+00311 ◌̑
DownLeftRightVector; U+02950
DownLeftTeeVector; U+0295E
DownLeftVector; U+021BD
DownLeftVectorBar; U+02956
DownRightTeeVector; U+0295F
DownRightVector; U+021C1
DownRightVectorBar; U+02957
DownTee; U+022A4
DownTeeArrow; U+021A7
Downarrow; U+021D3
Dscr; U+1D49F 𝒟
Dstrok; U+00110 Đ
ENG; U+0014A Ŋ
ETH; U+000D0 Ð
Eacute; U+000C9 É
Ecaron; U+0011A Ě
Ecirc; U+000CA Ê
Ecy; U+0042D Э
Edot; U+00116 Ė
Efr; U+1D508 𝔈
Egrave; U+000C8 È
Element; U+02208
Emacr; U+00112 Ē
EmptySmallSquare; U+025FB
EmptyVerySmallSquare; U+025AB
Eogon; U+00118 Ę
Eopf; U+1D53C 𝔼
Epsilon; U+00395 Ε
Equal; U+02A75
EqualTilde; U+02242
Equilibrium; U+021CC
Escr; U+02130
Esim; U+02A73
Eta; U+00397 Η
Euml; U+000CB Ë
Exists; U+02203
ExponentialE; U+02147
Fcy; U+00424 Ф
Ffr; U+1D509 𝔉
FilledSmallSquare; U+025FC
FilledVerySmallSquare; U+025AA
Fopf; U+1D53D 𝔽
ForAll; U+02200
Fouriertrf; U+02131
Fscr; U+02131
GJcy; U+00403 Ѓ
GT; U+0003E >
Gamma; U+00393 Γ
Gammad; U+003DC Ϝ
Gbreve; U+0011E Ğ
Gcedil; U+00122 Ģ
Gcirc; U+0011C Ĝ
Gcy; U+00413 Г
Gdot; U+00120 Ġ
Gfr; U+1D50A 𝔊
Gg; U+022D9
Gopf; U+1D53E 𝔾
GreaterEqual; U+02265
GreaterEqualLess; U+022DB
GreaterFullEqual; U+02267
GreaterGreater; U+02AA2
GreaterLess; U+02277
GreaterSlantEqual; U+02A7E
GreaterTilde; U+02273
Gscr; U+1D4A2 𝒢
Gt; U+0226B
HARDcy; U+0042A Ъ
Hacek; U+002C7 ˇ
Hat; U+0005E ^
Hcirc; U+00124 Ĥ
Hfr; U+0210C
HilbertSpace; U+0210B
Hopf; U+0210D
HorizontalLine; U+02500
Hscr; U+0210B
Hstrok; U+00126 Ħ
HumpDownHump; U+0224E
HumpEqual; U+0224F
IEcy; U+00415 Е
IJlig; U+00132 IJ
IOcy; U+00401 Ё
Iacute; U+000CD Í
Icirc; U+000CE Î
Icy; U+00418 И
Idot; U+00130 İ
Ifr; U+02111
Igrave; U+000CC Ì
Im; U+02111
Imacr; U+0012A Ī
ImaginaryI; U+02148
Implies; U+021D2
Int; U+0222C
Integral; U+0222B
Intersection; U+022C2
InvisibleComma; U+02063
InvisibleTimes; U+02062
Iogon; U+0012E Į
Iopf; U+1D540 𝕀
Iota; U+00399 Ι
Iscr; U+02110
Itilde; U+00128 Ĩ
Iukcy; U+00406 І
Iuml; U+000CF Ï
Jcirc; U+00134 Ĵ
Jcy; U+00419 Й
Jfr; U+1D50D 𝔍
Jopf; U+1D541 𝕁
Jscr; U+1D4A5 𝒥
Jsercy; U+00408 Ј
Jukcy; U+00404 Є
KHcy; U+00425 Х
KJcy; U+0040C Ќ
Kappa; U+0039A Κ
Kcedil; U+00136 Ķ
Kcy; U+0041A К
Kfr; U+1D50E 𝔎
Kopf; U+1D542 𝕂
Kscr; U+1D4A6 𝒦
LJcy; U+00409 Љ
LT; U+0003C <
Lacute; U+00139 Ĺ
Lambda; U+0039B Λ
Lang; U+027EA
Laplacetrf; U+02112
Larr; U+0219E
Lcaron; U+0013D Ľ
Lcedil; U+0013B Ļ
Lcy; U+0041B Л
LeftAngleBracket; U+027E8
LeftArrow; U+02190
LeftArrowBar; U+021E4
LeftArrowRightArrow; U+021C6
LeftCeiling; U+02308
LeftDoubleBracket; U+027E6
LeftDownTeeVector; U+02961
LeftDownVector; U+021C3
LeftDownVectorBar; U+02959
LeftFloor; U+0230A
LeftRightArrow; U+02194
LeftRightVector; U+0294E
LeftTee; U+022A3
LeftTeeArrow; U+021A4
LeftTeeVector; U+0295A
LeftTriangle; U+022B2
LeftTriangleBar; U+029CF
LeftTriangleEqual; U+022B4
LeftUpDownVector; U+02951
LeftUpTeeVector; U+02960
LeftUpVector; U+021BF
LeftUpVectorBar; U+02958
LeftVector; U+021BC
LeftVectorBar; U+02952
Leftarrow; U+021D0
Leftrightarrow; U+021D4
LessEqualGreater; U+022DA
LessFullEqual; U+02266
LessGreater; U+02276
LessLess; U+02AA1
LessSlantEqual; U+02A7D
LessTilde; U+02272
Lfr; U+1D50F 𝔏
Ll; U+022D8
Lleftarrow; U+021DA
Lmidot; U+0013F Ŀ
LongLeftArrow; U+027F5
LongLeftRightArrow; U+027F7
LongRightArrow; U+027F6
Longleftarrow; U+027F8
Longleftrightarrow; U+027FA
Longrightarrow; U+027F9
Lopf; U+1D543 𝕃
LowerLeftArrow; U+02199
LowerRightArrow; U+02198
Lscr; U+02112
Lsh; U+021B0
Lstrok; U+00141 Ł
Lt; U+0226A
Map; U+02905
Mcy; U+0041C М
MediumSpace; U+0205F
Mellintrf; U+02133
Mfr; U+1D510 𝔐
MinusPlus; U+02213
Mopf; U+1D544 𝕄
Mscr; U+02133
Mu; U+0039C Μ
NJcy; U+0040A Њ
Nacute; U+00143 Ń
Ncaron; U+00147 Ň
Ncedil; U+00145 Ņ
Ncy; U+0041D Н
NegativeMediumSpace; U+0200B
NegativeThickSpace; U+0200B
NegativeThinSpace; U+0200B
NegativeVeryThinSpace; U+0200B
NestedGreaterGreater; U+0226B
NestedLessLess; U+0226A
NewLine; U+0000A
Nfr; U+1D511 𝔑
NoBreak; U+02060
NonBreakingSpace; U+000A0  
Nopf; U+02115
Not; U+02AEC
NotCongruent; U+02262
NotCupCap; U+0226D
NotDoubleVerticalBar; U+02226
NotElement; U+02209
NotEqual; U+02260
NotEqualTilde; U+02242 U+00338 ≂̸
NotExists; U+02204
NotGreater; U+0226F
NotGreaterEqual; U+02271
NotGreaterFullEqual; U+02267 U+00338 ≧̸
NotGreaterGreater; U+0226B U+00338 ≫̸
NotGreaterLess; U+02279
NotGreaterSlantEqual; U+02A7E U+00338 ⩾̸
NotGreaterTilde; U+02275
NotHumpDownHump; U+0224E U+00338 ≎̸
NotHumpEqual; U+0224F U+00338 ≏̸
NotLeftTriangle; U+022EA
NotLeftTriangleBar; U+029CF U+00338 ⧏̸
NotLeftTriangleEqual; U+022EC
NotLess; U+0226E
NotLessEqual; U+02270
NotLessGreater; U+02278
NotLessLess; U+0226A U+00338 ≪̸
NotLessSlantEqual; U+02A7D U+00338 ⩽̸
NotLessTilde; U+02274
NotNestedGreaterGreater; U+02AA2 U+00338 ⪢̸
NotNestedLessLess; U+02AA1 U+00338 ⪡̸
NotPrecedes; U+02280
NotPrecedesEqual; U+02AAF U+00338 ⪯̸
NotPrecedesSlantEqual; U+022E0
NotReverseElement; U+0220C
NotRightTriangle; U+022EB
NotRightTriangleBar; U+029D0 U+00338 ⧐̸
NotRightTriangleEqual; U+022ED
NotSquareSubset; U+0228F U+00338 ⊏̸
NotSquareSubsetEqual; U+022E2
NotSquareSuperset; U+02290 U+00338 ⊐̸
NotSquareSupersetEqual; U+022E3
NotSubset; U+02282 U+020D2 ⊂⃒
NotSubsetEqual; U+02288
NotSucceeds; U+02281
NotSucceedsEqual; U+02AB0 U+00338 ⪰̸
NotSucceedsSlantEqual; U+022E1
NotSucceedsTilde; U+0227F U+00338 ≿̸
NotSuperset; U+02283 U+020D2 ⊃⃒
NotSupersetEqual; U+02289
NotTilde; U+02241
NotTildeEqual; U+02244
NotTildeFullEqual; U+02247
NotTildeTilde; U+02249
NotVerticalBar; U+02224
Nscr; U+1D4A9 𝒩
Ntilde; U+000D1 Ñ
Nu; U+0039D Ν
OElig; U+00152 Œ
Oacute; U+000D3 Ó
Ocirc; U+000D4 Ô
Ocy; U+0041E О
Odblac; U+00150 Ő
Ofr; U+1D512 𝔒
Ograve; U+000D2 Ò
Omacr; U+0014C Ō
Omega; U+003A9 Ω
Omicron; U+0039F Ο
Oopf; U+1D546 𝕆
OpenCurlyDoubleQuote; U+0201C
OpenCurlyQuote; U+02018
Or; U+02A54
Oscr; U+1D4AA 𝒪
Oslash; U+000D8 Ø
Otilde; U+000D5 Õ
Otimes; U+02A37
Ouml; U+000D6 Ö
OverBar; U+0203E
OverBrace; U+023DE
OverBracket; U+023B4
OverParenthesis; U+023DC
PartialD; U+02202
Pcy; U+0041F П
Pfr; U+1D513 𝔓
Phi; U+003A6 Φ
Pi; U+003A0 Π
PlusMinus; U+000B1 ±
Poincareplane; U+0210C
Popf; U+02119
Pr; U+02ABB
Precedes; U+0227A
PrecedesEqual; U+02AAF
PrecedesSlantEqual; U+0227C
PrecedesTilde; U+0227E
Prime; U+02033
Product; U+0220F
Proportion; U+02237
Proportional; U+0221D
Pscr; U+1D4AB 𝒫
Psi; U+003A8 Ψ
QUOT; U+00022 "
Qfr; U+1D514 𝔔
Qopf; U+0211A
Qscr; U+1D4AC 𝒬
RBarr; U+02910
REG; U+000AE ®
Racute; U+00154 Ŕ
Rang; U+027EB
Rarr; U+021A0
Rarrtl; U+02916
Rcaron; U+00158 Ř
Rcedil; U+00156 Ŗ
Rcy; U+00420 Р
Re; U+0211C
ReverseElement; U+0220B
ReverseEquilibrium; U+021CB
ReverseUpEquilibrium; U+0296F
Rfr; U+0211C
Rho; U+003A1 Ρ
RightAngleBracket; U+027E9
RightArrow; U+02192
RightArrowBar; U+021E5
RightArrowLeftArrow; U+021C4
RightCeiling; U+02309
RightDoubleBracket; U+027E7
RightDownTeeVector; U+0295D
RightDownVector; U+021C2
RightDownVectorBar; U+02955
RightFloor; U+0230B
RightTee; U+022A2
RightTeeArrow; U+021A6
RightTeeVector; U+0295B
RightTriangle; U+022B3
RightTriangleBar; U+029D0
RightTriangleEqual; U+022B5
RightUpDownVector; U+0294F
RightUpTeeVector; U+0295C
RightUpVector; U+021BE
RightUpVectorBar; U+02954
RightVector; U+021C0
RightVectorBar; U+02953
Rightarrow; U+021D2
Ropf; U+0211D
RoundImplies; U+02970
Rrightarrow; U+021DB
Rscr; U+0211B
Rsh; U+021B1
RuleDelayed; U+029F4
SHCHcy; U+00429 Щ
SHcy; U+00428 Ш
SOFTcy; U+0042C Ь
Sacute; U+0015A Ś
Sc; U+02ABC
Scaron; U+00160 Š
Scedil; U+0015E Ş
Scirc; U+0015C Ŝ
Scy; U+00421 С
Sfr; U+1D516 𝔖
ShortDownArrow; U+02193
ShortLeftArrow; U+02190
ShortRightArrow; U+02192
ShortUpArrow; U+02191
Sigma; U+003A3 Σ
SmallCircle; U+02218
Sopf; U+1D54A 𝕊
Sqrt; U+0221A
Square; U+025A1
SquareIntersection; U+02293
SquareSubset; U+0228F
SquareSubsetEqual; U+02291
SquareSuperset; U+02290
SquareSupersetEqual; U+02292
SquareUnion; U+02294
Sscr; U+1D4AE 𝒮
Star; U+022C6
Sub; U+022D0
Subset; U+022D0
SubsetEqual; U+02286
Succeeds; U+0227B
SucceedsEqual; U+02AB0
SucceedsSlantEqual; U+0227D
SucceedsTilde; U+0227F
SuchThat; U+0220B
Sum; U+02211
Sup; U+022D1
Superset; U+02283
SupersetEqual; U+02287
Supset; U+022D1
THORN; U+000DE Þ
TRADE; U+02122
TSHcy; U+0040B Ћ
TScy; U+00426 Ц
Tab; U+00009
Tau; U+003A4 Τ
Tcaron; U+00164 Ť
Tcedil; U+00162 Ţ
Tcy; U+00422 Т
Tfr; U+1D517 𝔗
Therefore; U+02234
Theta; U+00398 Θ
ThickSpace; U+0205F U+0200A   
ThinSpace; U+02009
Tilde; U+0223C
TildeEqual; U+02243
TildeFullEqual; U+02245
TildeTilde; U+02248
Topf; U+1D54B 𝕋
TripleDot; U+020DB ◌⃛
Tscr; U+1D4AF 𝒯
Tstrok; U+00166 Ŧ
Uacute; U+000DA Ú
Uarr; U+0219F
Uarrocir; U+02949
Ubrcy; U+0040E Ў
Ubreve; U+0016C Ŭ
Ucirc; U+000DB Û
Ucy; U+00423 У
Udblac; U+00170 Ű
Ufr; U+1D518 𝔘
Ugrave; U+000D9 Ù
Umacr; U+0016A Ū
UnderBar; U+0005F _
UnderBrace; U+023DF
UnderBracket; U+023B5
UnderParenthesis; U+023DD
Union; U+022C3
UnionPlus; U+0228E
Uogon; U+00172 Ų
Uopf; U+1D54C 𝕌
UpArrow; U+02191
UpArrowBar; U+02912
UpArrowDownArrow; U+021C5
UpDownArrow; U+02195
UpEquilibrium; U+0296E
UpTee; U+022A5
UpTeeArrow; U+021A5
Uparrow; U+021D1
Updownarrow; U+021D5
UpperLeftArrow; U+02196
UpperRightArrow; U+02197
Upsi; U+003D2 ϒ
Upsilon; U+003A5 Υ
Uring; U+0016E Ů
Uscr; U+1D4B0 𝒰
Utilde; U+00168 Ũ
Uuml; U+000DC Ü
VDash; U+022AB
Vbar; U+02AEB
Vcy; U+00412 В
Vdash; U+022A9
Vdashl; U+02AE6
Vee; U+022C1
Verbar; U+02016
Vert; U+02016
VerticalBar; U+02223
VerticalLine; U+0007C |
VerticalSeparator; U+02758
VerticalTilde; U+02240
VeryThinSpace; U+0200A
Vfr; U+1D519 𝔙
Vopf; U+1D54D 𝕍
Vscr; U+1D4B1 𝒱
Vvdash; U+022AA
Wcirc; U+00174 Ŵ
Wedge; U+022C0
Wfr; U+1D51A 𝔚
Wopf; U+1D54E 𝕎
Wscr; U+1D4B2 𝒲
Xfr; U+1D51B 𝔛
Xi; U+0039E Ξ
Xopf; U+1D54F 𝕏
Xscr; U+1D4B3 𝒳
YAcy; U+0042F Я
YIcy; U+00407 Ї
YUcy; U+0042E Ю
Yacute; U+000DD Ý
Ycirc; U+00176 Ŷ
Ycy; U+0042B Ы
Yfr; U+1D51C 𝔜
Yopf; U+1D550 𝕐
Yscr; U+1D4B4 𝒴
Yuml; U+00178 Ÿ
ZHcy; U+00416 Ж
Zacute; U+00179 Ź
Zcaron; U+0017D Ž
Zcy; U+00417 З
Zdot; U+0017B Ż
ZeroWidthSpace; U+0200B
Zeta; U+00396 Ζ
Zfr; U+02128
Zopf; U+02124
Zscr; U+1D4B5 𝒵
aacute; U+000E1 á
abreve; U+00103 ă
ac; U+0223E
acE; U+0223E U+00333 ∾̳
acd; U+0223F
acirc; U+000E2 â
acute; U+000B4 ´
acy; U+00430 а
aelig; U+000E6 æ
af; U+02061
afr; U+1D51E 𝔞
agrave; U+000E0 à
alefsym; U+02135
aleph; U+02135
alpha; U+003B1 α
amacr; U+00101 ā
amalg; U+02A3F ⨿
amp; U+00026 &
and; U+02227
andand; U+02A55
andd; U+02A5C
andslope; U+02A58
andv; U+02A5A
ang; U+02220
ange; U+029A4
angle; U+02220
angmsd; U+02221
angmsdaa; U+029A8
angmsdab; U+029A9
angmsdac; U+029AA
angmsdad; U+029AB
angmsdae; U+029AC
angmsdaf; U+029AD
angmsdag; U+029AE
angmsdah; U+029AF
angrt; U+0221F
angrtvb; U+022BE
angrtvbd; U+0299D
angsph; U+02222
angst; U+000C5 Å
angzarr; U+0237C
aogon; U+00105 ą
aopf; U+1D552 𝕒
ap; U+02248
apE; U+02A70
apacir; U+02A6F
ape; U+0224A
apid; U+0224B
apos; U+00027 '
approx; U+02248
approxeq; U+0224A
aring; U+000E5 å
ascr; U+1D4B6 𝒶
ast; U+0002A *
asymp; U+02248
asympeq; U+0224D
atilde; U+000E3 ã
auml; U+000E4 ä
awconint; U+02233
awint; U+02A11
bNot; U+02AED
backcong; U+0224C
backepsilon; U+003F6 ϶
backprime; U+02035
backsim; U+0223D
backsimeq; U+022CD
barvee; U+022BD
barwed; U+02305
barwedge; U+02305
bbrk; U+023B5
bbrktbrk; U+023B6
bcong; U+0224C
bcy; U+00431 б
bdquo; U+0201E
becaus; U+02235
because; U+02235
bemptyv; U+029B0
bepsi; U+003F6 ϶
bernou; U+0212C
beta; U+003B2 β
beth; U+02136
between; U+0226C
bfr; U+1D51F 𝔟
bigcap; U+022C2
bigcirc; U+025EF
bigcup; U+022C3
bigodot; U+02A00
bigoplus; U+02A01
bigotimes; U+02A02
bigsqcup; U+02A06
bigstar; U+02605
bigtriangledown; U+025BD
bigtriangleup; U+025B3
biguplus; U+02A04
bigvee; U+022C1
bigwedge; U+022C0
bkarow; U+0290D
blacklozenge; U+029EB
blacksquare; U+025AA
blacktriangle; U+025B4
blacktriangledown; U+025BE
blacktriangleleft; U+025C2
blacktriangleright; U+025B8
blank; U+02423
blk12; U+02592
blk14; U+02591
blk34; U+02593
block; U+02588
bne; U+0003D U+020E5 =⃥
bnequiv; U+02261 U+020E5 ≡⃥
bnot; U+02310
bopf; U+1D553 𝕓
bot; U+022A5
bottom; U+022A5
bowtie; U+022C8
boxDL; U+02557
boxDR; U+02554
boxDl; U+02556
boxDr; U+02553
boxH; U+02550
boxHD; U+02566
boxHU; U+02569
boxHd; U+02564
boxHu; U+02567
boxUL; U+0255D
boxUR; U+0255A
boxUl; U+0255C
boxUr; U+02559
boxV; U+02551
boxVH; U+0256C
boxVL; U+02563
boxVR; U+02560
boxVh; U+0256B
boxVl; U+02562
boxVr; U+0255F
boxbox; U+029C9
boxdL; U+02555
boxdR; U+02552
boxdl; U+02510
boxdr; U+0250C
boxh; U+02500
boxhD; U+02565
boxhU; U+02568
boxhd; U+0252C
boxhu; U+02534
boxminus; U+0229F
boxplus; U+0229E
boxtimes; U+022A0
boxuL; U+0255B
boxuR; U+02558
boxul; U+02518
boxur; U+02514
boxv; U+02502
boxvH; U+0256A
boxvL; U+02561
boxvR; U+0255E
boxvh; U+0253C
boxvl; U+02524
boxvr; U+0251C
bprime; U+02035
breve; U+002D8 ˘
brvbar; U+000A6 ¦
bscr; U+1D4B7 𝒷
bsemi; U+0204F
bsim; U+0223D
bsime; U+022CD
bsol; U+0005C \
bsolb; U+029C5
bsolhsub; U+027C8
bull; U+02022
bullet; U+02022
bump; U+0224E
bumpE; U+02AAE
bumpe; U+0224F
bumpeq; U+0224F
cacute; U+00107 ć
cap; U+02229
capand; U+02A44
capbrcup; U+02A49
capcap; U+02A4B
capcup; U+02A47
capdot; U+02A40
caps; U+02229 U+0FE00 ∩︀
caret; U+02041
caron; U+002C7 ˇ
ccaps; U+02A4D
ccaron; U+0010D č
ccedil; U+000E7 ç
ccirc; U+00109 ĉ
ccups; U+02A4C
ccupssm; U+02A50
cdot; U+0010B ċ
cedil; U+000B8 ¸
cemptyv; U+029B2
cent; U+000A2 ¢
centerdot; U+000B7 ·
cfr; U+1D520 𝔠
chcy; U+00447 ч
check; U+02713
checkmark; U+02713
chi; U+003C7 χ
cir; U+025CB
cirE; U+029C3
circ; U+002C6 ˆ
circeq; U+02257
circlearrowleft; U+021BA
circlearrowright; U+021BB
circledR; U+000AE ®
circledS; U+024C8
circledast; U+0229B
circledcirc; U+0229A
circleddash; U+0229D
cire; U+02257
cirfnint; U+02A10
cirmid; U+02AEF
cirscir; U+029C2
clubs; U+02663
clubsuit; U+02663
colon; U+0003A :
colone; U+02254
coloneq; U+02254
comma; U+0002C ,
commat; U+00040 @
comp; U+02201
compfn; U+02218
complement; U+02201
complexes; U+02102
cong; U+02245
congdot; U+02A6D
conint; U+0222E
copf; U+1D554 𝕔
coprod; U+02210
copy; U+000A9 ©
copysr; U+02117
crarr; U+021B5
cross; U+02717
cscr; U+1D4B8 𝒸
csub; U+02ACF
csube; U+02AD1
csup; U+02AD0
csupe; U+02AD2
ctdot; U+022EF
cudarrl; U+02938
cudarrr; U+02935
cuepr; U+022DE
cuesc; U+022DF
cularr; U+021B6
cularrp; U+0293D
cup; U+0222A
cupbrcap; U+02A48
cupcap; U+02A46
cupcup; U+02A4A
cupdot; U+0228D
cupor; U+02A45
cups; U+0222A U+0FE00 ∪︀
curarr; U+021B7
curarrm; U+0293C
curlyeqprec; U+022DE
curlyeqsucc; U+022DF
curlyvee; U+022CE
curlywedge; U+022CF
curren; U+000A4 ¤
curvearrowleft; U+021B6
curvearrowright; U+021B7
cuvee; U+022CE
cuwed; U+022CF
cwconint; U+02232
cwint; U+02231
cylcty; U+0232D
dArr; U+021D3
dHar; U+02965
dagger; U+02020
daleth; U+02138
darr; U+02193
dash; U+02010
dashv; U+022A3
dbkarow; U+0290F
dblac; U+002DD ˝
dcaron; U+0010F ď
dcy; U+00434 д
dd; U+02146
ddagger; U+02021
ddarr; U+021CA
ddotseq; U+02A77
deg; U+000B0 °
delta; U+003B4 δ
demptyv; U+029B1
dfisht; U+0297F ⥿
dfr; U+1D521 𝔡
dharl; U+021C3
dharr; U+021C2
diam; U+022C4
diamond; U+022C4
diamondsuit; U+02666
diams; U+02666
die; U+000A8 ¨
digamma; U+003DD ϝ
disin; U+022F2
div; U+000F7 ÷
divide; U+000F7 ÷
divideontimes; U+022C7
divonx; U+022C7
djcy; U+00452 ђ
dlcorn; U+0231E
dlcrop; U+0230D
dollar; U+00024 $
dopf; U+1D555 𝕕
dot; U+002D9 ˙
doteq; U+02250
doteqdot; U+02251
dotminus; U+02238
dotplus; U+02214
dotsquare; U+022A1
doublebarwedge; U+02306
downarrow; U+02193
downdownarrows; U+021CA
downharpoonleft; U+021C3
downharpoonright; U+021C2
drbkarow; U+02910
drcorn; U+0231F
drcrop; U+0230C
dscr; U+1D4B9 𝒹
dscy; U+00455 ѕ
dsol; U+029F6
dstrok; U+00111 đ
dtdot; U+022F1
dtri; U+025BF
dtrif; U+025BE
duarr; U+021F5
duhar; U+0296F
dwangle; U+029A6
dzcy; U+0045F џ
dzigrarr; U+027FF
eDDot; U+02A77
eDot; U+02251
eacute; U+000E9 é
easter; U+02A6E
ecaron; U+0011B ě
ecir; U+02256
ecirc; U+000EA ê
ecolon; U+02255
ecy; U+0044D э
edot; U+00117 ė
ee; U+02147
efDot; U+02252
efr; U+1D522 𝔢
eg; U+02A9A
egrave; U+000E8 è
egs; U+02A96
egsdot; U+02A98
el; U+02A99
elinters; U+023E7
ell; U+02113
els; U+02A95
elsdot; U+02A97
emacr; U+00113 ē
empty; U+02205
emptyset; U+02205
emptyv; U+02205
emsp; U+02003
emsp13; U+02004
emsp14; U+02005
eng; U+0014B ŋ
ensp; U+02002
eogon; U+00119 ę
eopf; U+1D556 𝕖
epar; U+022D5
eparsl; U+029E3
eplus; U+02A71
epsi; U+003B5 ε
epsilon; U+003B5 ε
epsiv; U+003F5 ϵ
eqcirc; U+02256
eqcolon; U+02255
eqsim; U+02242
eqslantgtr; U+02A96
eqslantless; U+02A95
equals; U+0003D =
equest; U+0225F
equiv; U+02261
equivDD; U+02A78
eqvparsl; U+029E5
erDot; U+02253
erarr; U+02971
escr; U+0212F
esdot; U+02250
esim; U+02242
eta; U+003B7 η
eth; U+000F0 ð
euml; U+000EB ë
euro; U+020AC
excl; U+00021 !
exist; U+02203
expectation; U+02130
exponentiale; U+02147
fallingdotseq; U+02252
fcy; U+00444 ф
female; U+02640
ffilig; U+0FB03
fflig; U+0FB00
ffllig; U+0FB04
ffr; U+1D523 𝔣
filig; U+0FB01
fjlig; U+00066 U+0006A fj
flat; U+0266D
fllig; U+0FB02
fltns; U+025B1
fnof; U+00192 ƒ
fopf; U+1D557 𝕗
forall; U+02200
fork; U+022D4
forkv; U+02AD9
fpartint; U+02A0D
frac12; U+000BD ½
frac13; U+02153
frac14; U+000BC ¼
frac15; U+02155
frac16; U+02159
frac18; U+0215B
frac23; U+02154
frac25; U+02156
frac34; U+000BE ¾
frac35; U+02157
frac38; U+0215C
frac45; U+02158
frac56; U+0215A
frac58; U+0215D
frac78; U+0215E
frasl; U+02044
frown; U+02322
fscr; U+1D4BB 𝒻
gE; U+02267
gEl; U+02A8C
gacute; U+001F5 ǵ
gamma; U+003B3 γ
gammad; U+003DD ϝ
gap; U+02A86
gbreve; U+0011F ğ
gcirc; U+0011D ĝ
gcy; U+00433 г
gdot; U+00121 ġ
ge; U+02265
gel; U+022DB
geq; U+02265
geqq; U+02267
geqslant; U+02A7E
ges; U+02A7E
gescc; U+02AA9
gesdot; U+02A80
gesdoto; U+02A82
gesdotol; U+02A84
gesl; U+022DB U+0FE00 ⋛︀
gesles; U+02A94
gfr; U+1D524 𝔤
gg; U+0226B
ggg; U+022D9
gimel; U+02137
gjcy; U+00453 ѓ
gl; U+02277
glE; U+02A92
gla; U+02AA5
glj; U+02AA4
gnE; U+02269
gnap; U+02A8A
gnapprox; U+02A8A
gne; U+02A88
gneq; U+02A88
gneqq; U+02269
gnsim; U+022E7
gopf; U+1D558 𝕘
grave; U+00060 `
gscr; U+0210A
gsim; U+02273
gsime; U+02A8E
gsiml; U+02A90
gt; U+0003E >
gtcc; U+02AA7
gtcir; U+02A7A
gtdot; U+022D7
gtlPar; U+02995
gtquest; U+02A7C
gtrapprox; U+02A86
gtrarr; U+02978
gtrdot; U+022D7
gtreqless; U+022DB
gtreqqless; U+02A8C
gtrless; U+02277
gtrsim; U+02273
gvertneqq; U+02269 U+0FE00 ≩︀
gvnE; U+02269 U+0FE00 ≩︀
hArr; U+021D4
hairsp; U+0200A
half; U+000BD ½
hamilt; U+0210B
hardcy; U+0044A ъ
harr; U+02194
harrcir; U+02948
harrw; U+021AD
hbar; U+0210F
hcirc; U+00125 ĥ
hearts; U+02665
heartsuit; U+02665
hellip; U+02026
hercon; U+022B9
hfr; U+1D525 𝔥
hksearow; U+02925
hkswarow; U+02926
hoarr; U+021FF
homtht; U+0223B
hookleftarrow; U+021A9
hookrightarrow; U+021AA
hopf; U+1D559 𝕙
horbar; U+02015
hscr; U+1D4BD 𝒽
hslash; U+0210F
hstrok; U+00127 ħ
hybull; U+02043
hyphen; U+02010
iacute; U+000ED í
ic; U+02063
icirc; U+000EE î
icy; U+00438 и
iecy; U+00435 е
iexcl; U+000A1 ¡
iff; U+021D4
ifr; U+1D526 𝔦
igrave; U+000EC ì
ii; U+02148
iiiint; U+02A0C
iiint; U+0222D
iinfin; U+029DC
iiota; U+02129
ijlig; U+00133 ij
imacr; U+0012B ī
image; U+02111
imagline; U+02110
imagpart; U+02111
imath; U+00131 ı
imof; U+022B7
imped; U+001B5 Ƶ
in; U+02208
incare; U+02105
infin; U+0221E
infintie; U+029DD
inodot; U+00131 ı
int; U+0222B
intcal; U+022BA
integers; U+02124
intercal; U+022BA
intlarhk; U+02A17
intprod; U+02A3C
iocy; U+00451 ё
iogon; U+0012F į
iopf; U+1D55A 𝕚
iota; U+003B9 ι
iprod; U+02A3C
iquest; U+000BF ¿
iscr; U+1D4BE 𝒾
isin; U+02208
isinE; U+022F9
isindot; U+022F5
isins; U+022F4
isinsv; U+022F3
isinv; U+02208
it; U+02062
itilde; U+00129 ĩ
iukcy; U+00456 і
iuml; U+000EF ï
jcirc; U+00135 ĵ
jcy; U+00439 й
jfr; U+1D527 𝔧
jmath; U+00237 ȷ
jopf; U+1D55B 𝕛
jscr; U+1D4BF 𝒿
jsercy; U+00458 ј
jukcy; U+00454 є
kappa; U+003BA κ
kappav; U+003F0 ϰ
kcedil; U+00137 ķ
kcy; U+0043A к
kfr; U+1D528 𝔨
kgreen; U+00138 ĸ
khcy; U+00445 х
kjcy; U+0045C ќ
kopf; U+1D55C 𝕜
kscr; U+1D4C0 𝓀
lAarr; U+021DA
lArr; U+021D0
lAtail; U+0291B
lBarr; U+0290E
lE; U+02266
lEg; U+02A8B
lHar; U+02962
lacute; U+0013A ĺ
laemptyv; U+029B4
lagran; U+02112
lambda; U+003BB λ
lang; U+027E8
langd; U+02991
langle; U+027E8
lap; U+02A85
laquo; U+000AB «
larr; U+02190
larrb; U+021E4
larrbfs; U+0291F
larrfs; U+0291D
larrhk; U+021A9
larrlp; U+021AB
larrpl; U+02939
larrsim; U+02973
larrtl; U+021A2
lat; U+02AAB
latail; U+02919
late; U+02AAD
lates; U+02AAD U+0FE00 ⪭︀
lbarr; U+0290C
lbbrk; U+02772
lbrace; U+0007B {
lbrack; U+0005B [
lbrke; U+0298B
lbrksld; U+0298F
lbrkslu; U+0298D
lcaron; U+0013E ľ
lcedil; U+0013C ļ
lceil; U+02308
lcub; U+0007B {
lcy; U+0043B л
ldca; U+02936
ldquo; U+0201C
ldquor; U+0201E
ldrdhar; U+02967
ldrushar; U+0294B
ldsh; U+021B2
le; U+02264
leftarrow; U+02190
leftarrowtail; U+021A2
leftharpoondown; U+021BD
leftharpoonup; U+021BC
leftleftarrows; U+021C7
leftrightarrow; U+02194
leftrightarrows; U+021C6
leftrightharpoons; U+021CB
leftrightsquigarrow; U+021AD
leftthreetimes; U+022CB
leg; U+022DA
leq; U+02264
leqq; U+02266
leqslant; U+02A7D
les; U+02A7D
lescc; U+02AA8
lesdot; U+02A7F ⩿
lesdoto; U+02A81
lesdotor; U+02A83
lesg; U+022DA U+0FE00 ⋚︀
lesges; U+02A93
lessapprox; U+02A85
lessdot; U+022D6
lesseqgtr; U+022DA
lesseqqgtr; U+02A8B
lessgtr; U+02276
lesssim; U+02272
lfisht; U+0297C
lfloor; U+0230A
lfr; U+1D529 𝔩
lg; U+02276
lgE; U+02A91
lhard; U+021BD
lharu; U+021BC
lharul; U+0296A
lhblk; U+02584
ljcy; U+00459 љ
ll; U+0226A
llarr; U+021C7
llcorner; U+0231E
llhard; U+0296B
lltri; U+025FA
lmidot; U+00140 ŀ
lmoust; U+023B0
lmoustache; U+023B0
lnE; U+02268
lnap; U+02A89
lnapprox; U+02A89
lne; U+02A87
lneq; U+02A87
lneqq; U+02268
lnsim; U+022E6
loang; U+027EC
loarr; U+021FD
lobrk; U+027E6
longleftarrow; U+027F5
longleftrightarrow; U+027F7
longmapsto; U+027FC
longrightarrow; U+027F6
looparrowleft; U+021AB
looparrowright; U+021AC
lopar; U+02985
lopf; U+1D55D 𝕝
loplus; U+02A2D
lotimes; U+02A34
lowast; U+02217
lowbar; U+0005F _
loz; U+025CA
lozenge; U+025CA
lozf; U+029EB
lpar; U+00028 (
lparlt; U+02993
lrarr; U+021C6
lrcorner; U+0231F
lrhar; U+021CB
lrhard; U+0296D
lrm; U+0200E
lrtri; U+022BF
lsaquo; U+02039
lscr; U+1D4C1 𝓁
lsh; U+021B0
lsim; U+02272
lsime; U+02A8D
lsimg; U+02A8F
lsqb; U+0005B [
lsquo; U+02018
lsquor; U+0201A
lstrok; U+00142 ł
lt; U+0003C <
ltcc; U+02AA6
ltcir; U+02A79
ltdot; U+022D6
lthree; U+022CB
ltimes; U+022C9
ltlarr; U+02976
ltquest; U+02A7B
ltrPar; U+02996
ltri; U+025C3
ltrie; U+022B4
ltrif; U+025C2
lurdshar; U+0294A
luruhar; U+02966
lvertneqq; U+02268 U+0FE00 ≨︀
lvnE; U+02268 U+0FE00 ≨︀
mDDot; U+0223A
macr; U+000AF ¯
male; U+02642
malt; U+02720
maltese; U+02720
map; U+021A6
mapsto; U+021A6
mapstodown; U+021A7
mapstoleft; U+021A4
mapstoup; U+021A5
marker; U+025AE
mcomma; U+02A29
mcy; U+0043C м
mdash; U+02014
measuredangle; U+02221
mfr; U+1D52A 𝔪
mho; U+02127
micro; U+000B5 µ
mid; U+02223
midast; U+0002A *
midcir; U+02AF0
middot; U+000B7 ·
minus; U+02212
minusb; U+0229F
minusd; U+02238
minusdu; U+02A2A
mlcp; U+02ADB
mldr; U+02026
mnplus; U+02213
models; U+022A7
mopf; U+1D55E 𝕞
mp; U+02213
mscr; U+1D4C2 𝓂
mstpos; U+0223E
mu; U+003BC μ
multimap; U+022B8
mumap; U+022B8
nGg; U+022D9 U+00338 ⋙̸
nGt; U+0226B U+020D2 ≫⃒
nGtv; U+0226B U+00338 ≫̸
nLeftarrow; U+021CD
nLeftrightarrow; U+021CE
nLl; U+022D8 U+00338 ⋘̸
nLt; U+0226A U+020D2 ≪⃒
nLtv; U+0226A U+00338 ≪̸
nRightarrow; U+021CF
nVDash; U+022AF
nVdash; U+022AE
nabla; U+02207
nacute; U+00144 ń
nang; U+02220 U+020D2 ∠⃒
nap; U+02249
napE; U+02A70 U+00338 ⩰̸
napid; U+0224B U+00338 ≋̸
napos; U+00149 ʼn
napprox; U+02249
natur; U+0266E
natural; U+0266E
naturals; U+02115
nbsp; U+000A0  
nbump; U+0224E U+00338 ≎̸
nbumpe; U+0224F U+00338 ≏̸
ncap; U+02A43
ncaron; U+00148 ň
ncedil; U+00146 ņ
ncong; U+02247
ncongdot; U+02A6D U+00338 ⩭̸
ncup; U+02A42
ncy; U+0043D н
ndash; U+02013
ne; U+02260
neArr; U+021D7
nearhk; U+02924
nearr; U+02197
nearrow; U+02197
nedot; U+02250 U+00338 ≐̸
nequiv; U+02262
nesear; U+02928
nesim; U+02242 U+00338 ≂̸
nexist; U+02204
nexists; U+02204
nfr; U+1D52B 𝔫
ngE; U+02267 U+00338 ≧̸
nge; U+02271
ngeq; U+02271
ngeqq; U+02267 U+00338 ≧̸
ngeqslant; U+02A7E U+00338 ⩾̸
nges; U+02A7E U+00338 ⩾̸
ngsim; U+02275
ngt; U+0226F
ngtr; U+0226F
nhArr; U+021CE
nharr; U+021AE
nhpar; U+02AF2
ni; U+0220B
nis; U+022FC
nisd; U+022FA
niv; U+0220B
njcy; U+0045A њ
nlArr; U+021CD
nlE; U+02266 U+00338 ≦̸
nlarr; U+0219A
nldr; U+02025
nle; U+02270
nleftarrow; U+0219A
nleftrightarrow; U+021AE
nleq; U+02270
nleqq; U+02266 U+00338 ≦̸
nleqslant; U+02A7D U+00338 ⩽̸
nles; U+02A7D U+00338 ⩽̸
nless; U+0226E
nlsim; U+02274
nlt; U+0226E
nltri; U+022EA
nltrie; U+022EC
nmid; U+02224
nopf; U+1D55F 𝕟
not; U+000AC ¬
notin; U+02209
notinE; U+022F9 U+00338 ⋹̸
notindot; U+022F5 U+00338 ⋵̸
notinva; U+02209
notinvb; U+022F7
notinvc; U+022F6
notni; U+0220C
notniva; U+0220C
notnivb; U+022FE
notnivc; U+022FD
npar; U+02226
nparallel; U+02226
nparsl; U+02AFD U+020E5 ⫽⃥
npart; U+02202 U+00338 ∂̸
npolint; U+02A14
npr; U+02280
nprcue; U+022E0
npre; U+02AAF U+00338 ⪯̸
nprec; U+02280
npreceq; U+02AAF U+00338 ⪯̸
nrArr; U+021CF
nrarr; U+0219B
nrarrc; U+02933 U+00338 ⤳̸
nrarrw; U+0219D U+00338 ↝̸
nrightarrow; U+0219B
nrtri; U+022EB
nrtrie; U+022ED
nsc; U+02281
nsccue; U+022E1
nsce; U+02AB0 U+00338 ⪰̸
nscr; U+1D4C3 𝓃
nshortmid; U+02224
nshortparallel; U+02226
nsim; U+02241
nsime; U+02244
nsimeq; U+02244
nsmid; U+02224
nspar; U+02226
nsqsube; U+022E2
nsqsupe; U+022E3
nsub; U+02284
nsubE; U+02AC5 U+00338 ⫅̸
nsube; U+02288
nsubset; U+02282 U+020D2 ⊂⃒
nsubseteq; U+02288
nsubseteqq; U+02AC5 U+00338 ⫅̸
nsucc; U+02281
nsucceq; U+02AB0 U+00338 ⪰̸
nsup; U+02285
nsupE; U+02AC6 U+00338 ⫆̸
nsupe; U+02289
nsupset; U+02283 U+020D2 ⊃⃒
nsupseteq; U+02289
nsupseteqq; U+02AC6 U+00338 ⫆̸
ntgl; U+02279
ntilde; U+000F1 ñ
ntlg; U+02278
ntriangleleft; U+022EA
ntrianglelefteq; U+022EC
ntriangleright; U+022EB
ntrianglerighteq; U+022ED
nu; U+003BD ν
num; U+00023 #
numero; U+02116
numsp; U+02007
nvDash; U+022AD
nvHarr; U+02904
nvap; U+0224D U+020D2 ≍⃒
nvdash; U+022AC
nvge; U+02265 U+020D2 ≥⃒
nvgt; U+0003E U+020D2 >⃒
nvinfin; U+029DE
nvlArr; U+02902
nvle; U+02264 U+020D2 ≤⃒
nvlt; U+0003C U+020D2 <⃒
nvltrie; U+022B4 U+020D2 ⊴⃒
nvrArr; U+02903
nvrtrie; U+022B5 U+020D2 ⊵⃒
nvsim; U+0223C U+020D2 ∼⃒
nwArr; U+021D6
nwarhk; U+02923
nwarr; U+02196
nwarrow; U+02196
nwnear; U+02927
oS; U+024C8
oacute; U+000F3 ó
oast; U+0229B
ocir; U+0229A
ocirc; U+000F4 ô
ocy; U+0043E о
odash; U+0229D
odblac; U+00151 ő
odiv; U+02A38
odot; U+02299
odsold; U+029BC
oelig; U+00153 œ
ofcir; U+029BF ⦿
ofr; U+1D52C 𝔬
ogon; U+002DB ˛
ograve; U+000F2 ò
ogt; U+029C1
ohbar; U+029B5
ohm; U+003A9 Ω
oint; U+0222E
olarr; U+021BA
olcir; U+029BE
olcross; U+029BB
oline; U+0203E
olt; U+029C0
omacr; U+0014D ō
omega; U+003C9 ω
omicron; U+003BF ο
omid; U+029B6
ominus; U+02296
oopf; U+1D560 𝕠
opar; U+029B7
operp; U+029B9
oplus; U+02295
or; U+02228
orarr; U+021BB
ord; U+02A5D
order; U+02134
orderof; U+02134
ordf; U+000AA ª
ordm; U+000BA º
origof; U+022B6
oror; U+02A56
orslope; U+02A57
orv; U+02A5B
oscr; U+02134
oslash; U+000F8 ø
osol; U+02298
otilde; U+000F5 õ
otimes; U+02297
otimesas; U+02A36
ouml; U+000F6 ö
ovbar; U+0233D
par; U+02225
para; U+000B6
parallel; U+02225
parsim; U+02AF3
parsl; U+02AFD
part; U+02202
pcy; U+0043F п
percnt; U+00025 %
period; U+0002E .
permil; U+02030
perp; U+022A5
pertenk; U+02031
pfr; U+1D52D 𝔭
phi; U+003C6 φ
phiv; U+003D5 ϕ
phmmat; U+02133
phone; U+0260E
pi; U+003C0 π
pitchfork; U+022D4
piv; U+003D6 ϖ
planck; U+0210F
planckh; U+0210E
plankv; U+0210F
plus; U+0002B +
plusacir; U+02A23
plusb; U+0229E
pluscir; U+02A22
plusdo; U+02214
plusdu; U+02A25
pluse; U+02A72
plusmn; U+000B1 ±
plussim; U+02A26
plustwo; U+02A27
pm; U+000B1 ±
pointint; U+02A15
popf; U+1D561 𝕡
pound; U+000A3 £
pr; U+0227A
prE; U+02AB3
prap; U+02AB7
prcue; U+0227C
pre; U+02AAF
prec; U+0227A
precapprox; U+02AB7
preccurlyeq; U+0227C
preceq; U+02AAF
precnapprox; U+02AB9
precneqq; U+02AB5
precnsim; U+022E8
precsim; U+0227E
prime; U+02032
primes; U+02119
prnE; U+02AB5
prnap; U+02AB9
prnsim; U+022E8
prod; U+0220F
profalar; U+0232E
profline; U+02312
profsurf; U+02313
prop; U+0221D
propto; U+0221D
prsim; U+0227E
prurel; U+022B0
pscr; U+1D4C5 𝓅
psi; U+003C8 ψ
puncsp; U+02008
qfr; U+1D52E 𝔮
qint; U+02A0C
qopf; U+1D562 𝕢
qprime; U+02057
qscr; U+1D4C6 𝓆
quaternions; U+0210D
quatint; U+02A16
quest; U+0003F ?
questeq; U+0225F
quot; U+00022 "
rAarr; U+021DB
rArr; U+021D2
rAtail; U+0291C
rBarr; U+0290F
rHar; U+02964
race; U+0223D U+00331 ∽̱
racute; U+00155 ŕ
radic; U+0221A
raemptyv; U+029B3
rang; U+027E9
rangd; U+02992
range; U+029A5
rangle; U+027E9
raquo; U+000BB »
rarr; U+02192
rarrap; U+02975
rarrb; U+021E5
rarrbfs; U+02920
rarrc; U+02933
rarrfs; U+0291E
rarrhk; U+021AA
rarrlp; U+021AC
rarrpl; U+02945
rarrsim; U+02974
rarrtl; U+021A3
rarrw; U+0219D
ratail; U+0291A
ratio; U+02236
rationals; U+0211A
rbarr; U+0290D
rbbrk; U+02773
rbrace; U+0007D }
rbrack; U+0005D ]
rbrke; U+0298C
rbrksld; U+0298E
rbrkslu; U+02990
rcaron; U+00159 ř
rcedil; U+00157 ŗ
rceil; U+02309
rcub; U+0007D }
rcy; U+00440 р
rdca; U+02937
rdldhar; U+02969
rdquo; U+0201D
rdquor; U+0201D
rdsh; U+021B3
real; U+0211C
realine; U+0211B
realpart; U+0211C
reals; U+0211D
rect; U+025AD
reg; U+000AE ®
rfisht; U+0297D
rfloor; U+0230B
rfr; U+1D52F 𝔯
rhard; U+021C1
rharu; U+021C0
rharul; U+0296C
rho; U+003C1 ρ
rhov; U+003F1 ϱ
rightarrow; U+02192
rightarrowtail; U+021A3
rightharpoondown; U+021C1
rightharpoonup; U+021C0
rightleftarrows; U+021C4
rightleftharpoons; U+021CC
rightrightarrows; U+021C9
rightsquigarrow; U+0219D
rightthreetimes; U+022CC
ring; U+002DA ˚
risingdotseq; U+02253
rlarr; U+021C4
rlhar; U+021CC
rlm; U+0200F
rmoust; U+023B1
rmoustache; U+023B1
rnmid; U+02AEE
roang; U+027ED
roarr; U+021FE
robrk; U+027E7
ropar; U+02986
ropf; U+1D563 𝕣
roplus; U+02A2E
rotimes; U+02A35
rpar; U+00029 )
rpargt; U+02994
rppolint; U+02A12
rrarr; U+021C9
rsaquo; U+0203A
rscr; U+1D4C7 𝓇
rsh; U+021B1
rsqb; U+0005D ]
rsquo; U+02019
rsquor; U+02019
rthree; U+022CC
rtimes; U+022CA
rtri; U+025B9
rtrie; U+022B5
rtrif; U+025B8
rtriltri; U+029CE
ruluhar; U+02968
rx; U+0211E
sacute; U+0015B ś
sbquo; U+0201A
sc; U+0227B
scE; U+02AB4
scap; U+02AB8
scaron; U+00161 š
sccue; U+0227D
sce; U+02AB0
scedil; U+0015F ş
scirc; U+0015D ŝ
scnE; U+02AB6
scnap; U+02ABA
scnsim; U+022E9
scpolint; U+02A13
scsim; U+0227F
scy; U+00441 с
sdot; U+022C5
sdotb; U+022A1
sdote; U+02A66
seArr; U+021D8
searhk; U+02925
searr; U+02198
searrow; U+02198
sect; U+000A7 §
semi; U+0003B ;
seswar; U+02929
setminus; U+02216
setmn; U+02216
sext; U+02736
sfr; U+1D530 𝔰
sfrown; U+02322
sharp; U+0266F
shchcy; U+00449 щ
shcy; U+00448 ш
shortmid; U+02223
shortparallel; U+02225
shy; U+000AD ­
sigma; U+003C3 σ
sigmaf; U+003C2 ς
sigmav; U+003C2 ς
sim; U+0223C
simdot; U+02A6A
sime; U+02243
simeq; U+02243
simg; U+02A9E
simgE; U+02AA0
siml; U+02A9D
simlE; U+02A9F
simne; U+02246
simplus; U+02A24
simrarr; U+02972
slarr; U+02190
smallsetminus; U+02216
smashp; U+02A33
smeparsl; U+029E4
smid; U+02223
smile; U+02323
smt; U+02AAA
smte; U+02AAC
smtes; U+02AAC U+0FE00 ⪬︀
softcy; U+0044C ь
sol; U+0002F /
solb; U+029C4
solbar; U+0233F
sopf; U+1D564 𝕤
spades; U+02660
spadesuit; U+02660
spar; U+02225
sqcap; U+02293
sqcaps; U+02293 U+0FE00 ⊓︀
sqcup; U+02294
sqcups; U+02294 U+0FE00 ⊔︀
sqsub; U+0228F
sqsube; U+02291
sqsubset; U+0228F
sqsubseteq; U+02291
sqsup; U+02290
sqsupe; U+02292
sqsupset; U+02290
sqsupseteq; U+02292
squ; U+025A1
square; U+025A1
squarf; U+025AA
squf; U+025AA
srarr; U+02192
sscr; U+1D4C8 𝓈
ssetmn; U+02216
ssmile; U+02323
sstarf; U+022C6
star; U+02606
starf; U+02605
straightepsilon; U+003F5 ϵ
straightphi; U+003D5 ϕ
strns; U+000AF ¯
sub; U+02282
subE; U+02AC5
subdot; U+02ABD
sube; U+02286
subedot; U+02AC3
submult; U+02AC1
subnE; U+02ACB
subne; U+0228A
subplus; U+02ABF ⪿
subrarr; U+02979
subset; U+02282
subseteq; U+02286
subseteqq; U+02AC5
subsetneq; U+0228A
subsetneqq; U+02ACB
subsim; U+02AC7
subsub; U+02AD5
subsup; U+02AD3
succ; U+0227B
succapprox; U+02AB8
succcurlyeq; U+0227D
succeq; U+02AB0
succnapprox; U+02ABA
succneqq; U+02AB6
succnsim; U+022E9
succsim; U+0227F
sum; U+02211
sung; U+0266A
sup; U+02283
sup1; U+000B9 ¹
sup2; U+000B2 ²
sup3; U+000B3 ³
supE; U+02AC6
supdot; U+02ABE
supdsub; U+02AD8
supe; U+02287
supedot; U+02AC4
suphsol; U+027C9
suphsub; U+02AD7
suplarr; U+0297B
supmult; U+02AC2
supnE; U+02ACC
supne; U+0228B
supplus; U+02AC0
supset; U+02283
supseteq; U+02287
supseteqq; U+02AC6
supsetneq; U+0228B
supsetneqq; U+02ACC
supsim; U+02AC8
supsub; U+02AD4
supsup; U+02AD6
swArr; U+021D9
swarhk; U+02926
swarr; U+02199
swarrow; U+02199
swnwar; U+0292A
szlig; U+000DF ß
target; U+02316
tau; U+003C4 τ
tbrk; U+023B4
tcaron; U+00165 ť
tcedil; U+00163 ţ
tcy; U+00442 т
tdot; U+020DB ◌⃛
telrec; U+02315
tfr; U+1D531 𝔱
there4; U+02234
therefore; U+02234
theta; U+003B8 θ
thetasym; U+003D1 ϑ
thetav; U+003D1 ϑ
thickapprox; U+02248
thicksim; U+0223C
thinsp; U+02009
thkap; U+02248
thksim; U+0223C
thorn; U+000FE þ
tilde; U+002DC ˜
times; U+000D7 ×
timesb; U+022A0
timesbar; U+02A31
timesd; U+02A30
tint; U+0222D
toea; U+02928
top; U+022A4
topbot; U+02336
topcir; U+02AF1
topf; U+1D565 𝕥
topfork; U+02ADA
tosa; U+02929
tprime; U+02034
trade; U+02122
triangle; U+025B5
triangledown; U+025BF
triangleleft; U+025C3
trianglelefteq; U+022B4
triangleq; U+0225C
triangleright; U+025B9
trianglerighteq; U+022B5
tridot; U+025EC
trie; U+0225C
triminus; U+02A3A
triplus; U+02A39
trisb; U+029CD
tritime; U+02A3B
trpezium; U+023E2
tscr; U+1D4C9 𝓉
tscy; U+00446 ц
tshcy; U+0045B ћ
tstrok; U+00167 ŧ
twixt; U+0226C
twoheadleftarrow; U+0219E
twoheadrightarrow; U+021A0
uArr; U+021D1
uHar; U+02963
uacute; U+000FA ú
uarr; U+02191
ubrcy; U+0045E ў
ubreve; U+0016D ŭ
ucirc; U+000FB û
ucy; U+00443 у
udarr; U+021C5
udblac; U+00171 ű
udhar; U+0296E
ufisht; U+0297E
ufr; U+1D532 𝔲
ugrave; U+000F9 ù
uharl; U+021BF
uharr; U+021BE
uhblk; U+02580
ulcorn; U+0231C
ulcorner; U+0231C
ulcrop; U+0230F
ultri; U+025F8
umacr; U+0016B ū
uml; U+000A8 ¨
uogon; U+00173 ų
uopf; U+1D566 𝕦
uparrow; U+02191
updownarrow; U+02195
upharpoonleft; U+021BF
upharpoonright; U+021BE
uplus; U+0228E
upsi; U+003C5 υ
upsih; U+003D2 ϒ
upsilon; U+003C5 υ
upuparrows; U+021C8
urcorn; U+0231D
urcorner; U+0231D
urcrop; U+0230E
uring; U+0016F ů
urtri; U+025F9
uscr; U+1D4CA 𝓊
utdot; U+022F0
utilde; U+00169 ũ
utri; U+025B5
utrif; U+025B4
uuarr; U+021C8
uuml; U+000FC ü
uwangle; U+029A7
vArr; U+021D5
vBar; U+02AE8
vBarv; U+02AE9
vDash; U+022A8
vangrt; U+0299C
varepsilon; U+003F5 ϵ
varkappa; U+003F0 ϰ
varnothing; U+02205
varphi; U+003D5 ϕ
varpi; U+003D6 ϖ
varpropto; U+0221D
varr; U+02195
varrho; U+003F1 ϱ
varsigma; U+003C2 ς
varsubsetneq; U+0228A U+0FE00 ⊊︀
varsubsetneqq; U+02ACB U+0FE00 ⫋︀
varsupsetneq; U+0228B U+0FE00 ⊋︀
varsupsetneqq; U+02ACC U+0FE00 ⫌︀
vartheta; U+003D1 ϑ
vartriangleleft; U+022B2
vartriangleright; U+022B3
vcy; U+00432 в
vdash; U+022A2
vee; U+02228
veebar; U+022BB
veeeq; U+0225A
vellip; U+022EE
verbar; U+0007C |
vert; U+0007C |
vfr; U+1D533 𝔳
vltri; U+022B2
vnsub; U+02282 U+020D2 ⊂⃒
vnsup; U+02283 U+020D2 ⊃⃒
vopf; U+1D567 𝕧
vprop; U+0221D
vrtri; U+022B3
vscr; U+1D4CB 𝓋
vsubnE; U+02ACB U+0FE00 ⫋︀
vsubne; U+0228A U+0FE00 ⊊︀
vsupnE; U+02ACC U+0FE00 ⫌︀
vsupne; U+0228B U+0FE00 ⊋︀
vzigzag; U+0299A
wcirc; U+00175 ŵ
wedbar; U+02A5F
wedge; U+02227
wedgeq; U+02259
weierp; U+02118
wfr; U+1D534 𝔴
wopf; U+1D568 𝕨
wp; U+02118
wr; U+02240
wreath; U+02240
wscr; U+1D4CC 𝓌
xcap; U+022C2
xcirc; U+025EF
xcup; U+022C3
xdtri; U+025BD
xfr; U+1D535 𝔵
xhArr; U+027FA
xharr; U+027F7
xi; U+003BE ξ
xlArr; U+027F8
xlarr; U+027F5
xmap; U+027FC
xnis; U+022FB
xodot; U+02A00
xopf; U+1D569 𝕩
xoplus; U+02A01
xotime; U+02A02
xrArr; U+027F9
xrarr; U+027F6
xscr; U+1D4CD 𝓍
xsqcup; U+02A06
xuplus; U+02A04
xutri; U+025B3
xvee; U+022C1
xwedge; U+022C0
yacute; U+000FD ý
yacy; U+0044F я
ycirc; U+00177 ŷ
ycy; U+0044B ы
yen; U+000A5 ¥
yfr; U+1D536 𝔶
yicy; U+00457 ї
yopf; U+1D56A 𝕪
yscr; U+1D4CE 𝓎
yucy; U+0044E ю
yuml; U+000FF ÿ
zacute; U+0017A ź
zcaron; U+0017E ž
zcy; U+00437 з
zdot; U+0017C ż
zeetrf; U+02128
zeta; U+003B6 ζ
zfr; U+1D537 𝔷
zhcy; U+00436 ж
zigrarr; U+021DD
zopf; U+1D56B 𝕫
zscr; U+1D4CF 𝓏
zwj; U+0200D
zwnj; U+0200C
AElig U+000C6 Æ
AMP U+00026 &
Aacute U+000C1 Á
Acirc U+000C2 Â
Agrave U+000C0 À
Aring U+000C5 Å
Atilde U+000C3 Ã
Auml U+000C4 Ä
COPY U+000A9 ©
Ccedil U+000C7 Ç
ETH U+000D0 Ð
Eacute U+000C9 É
Ecirc U+000CA Ê
Egrave U+000C8 È
Euml U+000CB Ë
GT U+0003E >
Iacute U+000CD Í
Icirc U+000CE Î
Igrave U+000CC Ì
Iuml U+000CF Ï
LT U+0003C <
Ntilde U+000D1 Ñ
Oacute U+000D3 Ó
Ocirc U+000D4 Ô
Ograve U+000D2 Ò
Oslash U+000D8 Ø
Otilde U+000D5 Õ
Ouml U+000D6 Ö
QUOT U+00022 "
REG U+000AE ®
THORN U+000DE Þ
Uacute U+000DA Ú
Ucirc U+000DB Û
Ugrave U+000D9 Ù
Uuml U+000DC Ü
Yacute U+000DD Ý
aacute U+000E1 á
acirc U+000E2 â
acute U+000B4 ´
aelig U+000E6 æ
agrave U+000E0 à
amp U+00026 &
aring U+000E5 å
atilde U+000E3 ã
auml U+000E4 ä
brvbar U+000A6 ¦
ccedil U+000E7 ç
cedil U+000B8 ¸
cent U+000A2 ¢
copy U+000A9 ©
curren U+000A4 ¤
deg U+000B0 °
divide U+000F7 ÷
eacute U+000E9 é
ecirc U+000EA ê
egrave U+000E8 è
eth U+000F0 ð
euml U+000EB ë
frac12 U+000BD ½
frac14 U+000BC ¼
frac34 U+000BE ¾
gt U+0003E >
iacute U+000ED í
icirc U+000EE î
iexcl U+000A1 ¡
igrave U+000EC ì
iquest U+000BF ¿
iuml U+000EF ï
laquo U+000AB «
lt U+0003C <
macr U+000AF ¯
micro U+000B5 µ
middot U+000B7 ·
nbsp U+000A0  
not U+000AC ¬
ntilde U+000F1 ñ
oacute U+000F3 ó
ocirc U+000F4 ô
ograve U+000F2 ò
ordf U+000AA ª
ordm U+000BA º
oslash U+000F8 ø
otilde U+000F5 õ
ouml U+000F6 ö
para U+000B6
plusmn U+000B1 ±
pound U+000A3 £
quot U+00022 "
raquo U+000BB »
reg U+000AE ®
sect U+000A7 §
shy U+000AD ­
sup1 U+000B9 ¹
sup2 U+000B2 ²
sup3 U+000B3 ³
szlig U+000DF ß
thorn U+000FE þ
times U+000D7 ×
uacute U+000FA ú
ucirc U+000FB û
ugrave U+000F9 ù
uml U+000A8 ¨
uuml U+000FC ü
yacute U+000FD ý
yen U+000A5 ¥
yuml U+000FF ÿ
"), set the escape flag to false. + + In any case, emit the input character as a character token. Stay + in the data state. + + EOF + Emit an end-of-file token. + + Anything else + Emit the input character as a character token. Stay in the data + state. + + 8.2.4.2 Character reference data state + + (This cannot happen if the content model flag is set to the CDATA + state.) + + Attempt to consume a character reference, with no additional allowed + character. + + If nothing is returned, emit a U+0026 AMPERSAND character token. + + Otherwise, emit the character token that was returned. + + Finally, switch to the data state. + + 8.2.4.3 Tag open state + + The behavior of this state depends on the content model flag. + + If the content model flag is set to the RCDATA or CDATA states + Consume the next input character. If it is a U+002F SOLIDUS (/) + character, switch to the close tag open state. Otherwise, emit a + U+003C LESS-THAN SIGN character token and reconsume the current + input character in the data state. + + If the content model flag is set to the PCDATA state + Consume the next input character: + + U+0021 EXCLAMATION MARK (!) + Switch to the markup declaration open state. + + U+002F SOLIDUS (/) + Switch to the close tag open state. + + U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL + LETTER Z + Create a new start tag token, set its tag name to the + lowercase version of the input character (add 0x0020 to + the character's code point), then switch to the tag name + state. (Don't emit the token yet; further details will be + filled in before it is emitted.) + + U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z + Create a new start tag token, set its tag name to the + input character, then switch to the tag name state. (Don't + emit the token yet; further details will be filled in + before it is emitted.) + + U+003E GREATER-THAN SIGN (>) + Parse error. Emit a U+003C LESS-THAN SIGN character token + and a U+003E GREATER-THAN SIGN character token. Switch to + the data state. + + U+003F QUESTION MARK (?) + Parse error. Switch to the bogus comment state. + + Anything else + Parse error. Emit a U+003C LESS-THAN SIGN character token + and reconsume the current input character in the data + state. + + 8.2.4.4 Close tag open state + + If the content model flag is set to the RCDATA or CDATA states but no + start tag token has ever been emitted by this instance of the tokeniser + (fragment case), or, if the content model flag is set to the RCDATA or + CDATA states and the next few characters do not match the tag name of + the last start tag token emitted (compared in an ASCII case-insensitive + manner), or if they do but they are not immediately followed by one of + the following characters: + * U+0009 CHARACTER TABULATION + * U+000A LINE FEED (LF) + * U+000C FORM FEED (FF) + * U+0020 SPACE + * U+003E GREATER-THAN SIGN (>) + * U+002F SOLIDUS (/) + * EOF + + ...then emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS + character token, and switch to the data state to process the next input + character. + + Otherwise, if the content model flag is set to the PCDATA state, or if + the next few characters do match that tag name, consume the next input + character: + + U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z + Create a new end tag token, set its tag name to the lowercase + version of the input character (add 0x0020 to the character's + code point), then switch to the tag name state. (Don't emit the + token yet; further details will be filled in before it is + emitted.) + + U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z + Create a new end tag token, set its tag name to the input + character, then switch to the tag name state. (Don't emit the + token yet; further details will be filled in before it is + emitted.) + + U+003E GREATER-THAN SIGN (>) + Parse error. Switch to the data state. + + EOF + Parse error. Emit a U+003C LESS-THAN SIGN character token and a + U+002F SOLIDUS character token. Reconsume the EOF character in + the data state. + + Anything else + Parse error. Switch to the bogus comment state. + + 8.2.4.5 Tag name state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. + + U+002F SOLIDUS (/) + Switch to the self-closing start tag state. + + U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. + + U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z + Append the lowercase version of the current input character (add + 0x0020 to the character's code point) to the current tag token's + tag name. Stay in the tag name state. + + EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. + + Anything else + Append the current input character to the current tag token's + tag name. Stay in the tag name state. + + 8.2.4.6 Before attribute name state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. + + U+002F SOLIDUS (/) + Switch to the self-closing start tag state. + + U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. + + U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z + Start a new attribute in the current tag token. Set that + attribute's name to the lowercase version of the current input + character (add 0x0020 to the character's code point), and its + value to the empty string. Switch to the attribute name state. + + U+0022 QUOTATION MARK (") + U+0027 APOSTROPHE (') + U+003D EQUALS SIGN (=) + Parse error. Treat it as per the "anything else" entry below. + + EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. + + Anything else + Start a new attribute in the current tag token. Set that + attribute's name to the current input character, and its value + to the empty string. Switch to the attribute name state. + + 8.2.4.7 Attribute name state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the after attribute name state. + + U+002F SOLIDUS (/) + Switch to the self-closing start tag state. + + U+003D EQUALS SIGN (=) + Switch to the before attribute value state. + + U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. + + U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z + Append the lowercase version of the current input character (add + 0x0020 to the character's code point) to the current attribute's + name. Stay in the attribute name state. + + U+0022 QUOTATION MARK (") + U+0027 APOSTROPHE (') + Parse error. Treat it as per the "anything else" entry below. + + EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. + + Anything else + Append the current input character to the current attribute's + name. Stay in the attribute name state. + + When the user agent leaves the attribute name state (and before + emitting the tag token, if appropriate), the complete attribute's name + must be compared to the other attributes on the same token; if there is + already an attribute on the token with the exact same name, then this + is a parse error and the new attribute must be dropped, along with the + value that gets associated with it (if any). + + 8.2.4.8 After attribute name state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the after attribute name state. + + U+002F SOLIDUS (/) + Switch to the self-closing start tag state. + + U+003D EQUALS SIGN (=) + Switch to the before attribute value state. + + U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. + + U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z + Start a new attribute in the current tag token. Set that + attribute's name to the lowercase version of the current input + character (add 0x0020 to the character's code point), and its + value to the empty string. Switch to the attribute name state. + + U+0022 QUOTATION MARK (") + U+0027 APOSTROPHE (') + Parse error. Treat it as per the "anything else" entry below. + + EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. + + Anything else + Start a new attribute in the current tag token. Set that + attribute's name to the current input character, and its value + to the empty string. Switch to the attribute name state. + + 8.2.4.9 Before attribute value state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute value state. + + U+0022 QUOTATION MARK (") + Switch to the attribute value (double-quoted) state. + + U+0026 AMPERSAND (&) + Switch to the attribute value (unquoted) state and reconsume + this input character. + + U+0027 APOSTROPHE (') + Switch to the attribute value (single-quoted) state. + + U+003E GREATER-THAN SIGN (>) + Parse error. Emit the current tag token. Switch to the data + state. + + U+003D EQUALS SIGN (=) + Parse error. Treat it as per the "anything else" entry below. + + EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. + + Anything else + Append the current input character to the current attribute's + value. Switch to the attribute value (unquoted) state. + + 8.2.4.10 Attribute value (double-quoted) state + + Consume the next input character: + + U+0022 QUOTATION MARK (") + Switch to the after attribute value (quoted) state. + + U+0026 AMPERSAND (&) + Switch to the character reference in attribute value state, with + the additional allowed character being U+0022 QUOTATION MARK + ("). + + EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. + + Anything else + Append the current input character to the current attribute's + value. Stay in the attribute value (double-quoted) state. + + 8.2.4.11 Attribute value (single-quoted) state + + Consume the next input character: + + U+0027 APOSTROPHE (') + Switch to the after attribute value (quoted) state. + + U+0026 AMPERSAND (&) + Switch to the character reference in attribute value state, with + the additional allowed character being U+0027 APOSTROPHE ('). + + EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. + + Anything else + Append the current input character to the current attribute's + value. Stay in the attribute value (single-quoted) state. + + 8.2.4.12 Attribute value (unquoted) state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. + + U+0026 AMPERSAND (&) + Switch to the character reference in attribute value state, with + no additional allowed character. + + U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. + + U+0022 QUOTATION MARK (") + U+0027 APOSTROPHE (') + U+003D EQUALS SIGN (=) + Parse error. Treat it as per the "anything else" entry below. + + EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. + + Anything else + Append the current input character to the current attribute's + value. Stay in the attribute value (unquoted) state. + + 8.2.4.13 Character reference in attribute value state + + Attempt to consume a character reference. + + If nothing is returned, append a U+0026 AMPERSAND character to the + current attribute's value. + + Otherwise, append the returned character token to the current + attribute's value. + + Finally, switch back to the attribute value state that you were in when + were switched into this state. + + 8.2.4.14 After attribute value (quoted) state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. + + U+002F SOLIDUS (/) + Switch to the self-closing start tag state. + + U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. + + EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. + + Anything else + Parse error. Reconsume the character in the before attribute + name state. + + 8.2.4.15 Self-closing start tag state + + Consume the next input character: + + U+003E GREATER-THAN SIGN (>) + Set the self-closing flag of the current tag token. Emit the + current tag token. Switch to the data state. + + EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. + + Anything else + Parse error. Reconsume the character in the before attribute + name state. + + 8.2.4.16 Bogus comment state + + (This can only happen if the content model flag is set to the PCDATA + state.) + + Consume every character up to and including the first U+003E + GREATER-THAN SIGN character (>) or the end of the file (EOF), whichever + comes first. Emit a comment token whose data is the concatenation of + all the characters starting from and including the character that + caused the state machine to switch into the bogus comment state, up to + and including the character immediately before the last consumed + character (i.e. up to the character just before the U+003E or EOF + character). (If the comment was started by the end of the file (EOF), + the token is empty.) + + Switch to the data state. + + If the end of the file was reached, reconsume the EOF character. + + 8.2.4.17 Markup declaration open state + + (This can only happen if the content model flag is set to the PCDATA + state.) + + If the next two characters are both U+002D HYPHEN-MINUS (-) characters, + consume those two characters, create a comment token whose data is the + empty string, and switch to the comment start state. + + Otherwise, if the next seven characters are an ASCII case-insensitive + match for the word "DOCTYPE", then consume those characters and switch + to the DOCTYPE state. + + Otherwise, if the insertion mode is "in foreign content" and the + current node is not an element in the HTML namespace and the next seven + characters are an ASCII case-sensitive match for the string "[CDATA[" + (the five uppercase letters "CDATA" with a U+005B LEFT SQUARE BRACKET + character before and after), then consume those characters and switch + to the CDATA section state (which is unrelated to the content model + flag's CDATA state). + + Otherwise, this is a parse error. Switch to the bogus comment state. + The next character that is consumed, if any, is the first character + that will be in the comment. + + 8.2.4.18 Comment start state + + Consume the next input character: + + U+002D HYPHEN-MINUS (-) + Switch to the comment start dash state. + + U+003E GREATER-THAN SIGN (>) + Parse error. Emit the comment token. Switch to the data state. + + EOF + Parse error. Emit the comment token. Reconsume the EOF character + in the data state. + + Anything else + Append the input character to the comment token's data. Switch + to the comment state. + + 8.2.4.19 Comment start dash state + + Consume the next input character: + + U+002D HYPHEN-MINUS (-) + Switch to the comment end state + + U+003E GREATER-THAN SIGN (>) + Parse error. Emit the comment token. Switch to the data state. + + EOF + Parse error. Emit the comment token. Reconsume the EOF character + in the data state. + + Anything else + Append a U+002D HYPHEN-MINUS (-) character and the input + character to the comment token's data. Switch to the comment + state. + + 8.2.4.20 Comment state + + Consume the next input character: + + U+002D HYPHEN-MINUS (-) + Switch to the comment end dash state + + EOF + Parse error. Emit the comment token. Reconsume the EOF character + in the data state. + + Anything else + Append the input character to the comment token's data. Stay in + the comment state. + + 8.2.4.21 Comment end dash state + + Consume the next input character: + + U+002D HYPHEN-MINUS (-) + Switch to the comment end state + + EOF + Parse error. Emit the comment token. Reconsume the EOF character + in the data state. + + Anything else + Append a U+002D HYPHEN-MINUS (-) character and the input + character to the comment token's data. Switch to the comment + state. + + 8.2.4.22 Comment end state + + Consume the next input character: + + U+003E GREATER-THAN SIGN (>) + Emit the comment token. Switch to the data state. + + U+002D HYPHEN-MINUS (-) + Parse error. Append a U+002D HYPHEN-MINUS (-) character to the + comment token's data. Stay in the comment end state. + + EOF + Parse error. Emit the comment token. Reconsume the EOF character + in the data state. + + Anything else + Parse error. Append two U+002D HYPHEN-MINUS (-) characters and + the input character to the comment token's data. Switch to the + comment state. + + 8.2.4.23 DOCTYPE state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before DOCTYPE name state. + + Anything else + Parse error. Reconsume the current character in the before + DOCTYPE name state. + + 8.2.4.24 Before DOCTYPE name state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before DOCTYPE name state. + + U+003E GREATER-THAN SIGN (>) + Parse error. Create a new DOCTYPE token. Set its force-quirks + flag to on. Emit the token. Switch to the data state. + + U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z + Create a new DOCTYPE token. Set the token's name to the + lowercase version of the input character (add 0x0020 to the + character's code point). Switch to the DOCTYPE name state. + + EOF + Parse error. Create a new DOCTYPE token. Set its force-quirks + flag to on. Emit the token. Reconsume the EOF character in the + data state. + + Anything else + Create a new DOCTYPE token. Set the token's name to the current + input character. Switch to the DOCTYPE name state. + + 8.2.4.25 DOCTYPE name state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the after DOCTYPE name state. + + U+003E GREATER-THAN SIGN (>) + Emit the current DOCTYPE token. Switch to the data state. + + U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z + Append the lowercase version of the input character (add 0x0020 + to the character's code point) to the current DOCTYPE token's + name. Stay in the DOCTYPE name state. + + EOF + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + Append the current input character to the current DOCTYPE + token's name. Stay in the DOCTYPE name state. + + 8.2.4.26 After DOCTYPE name state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the after DOCTYPE name state. + + U+003E GREATER-THAN SIGN (>) + Emit the current DOCTYPE token. Switch to the data state. + + EOF + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + If the six characters starting from the current input character + are an ASCII case-insensitive match for the word "PUBLIC", then + consume those characters and switch to the before DOCTYPE public + identifier state. + + Otherwise, if the six characters starting from the current input + character are an ASCII case-insensitive match for the word + "SYSTEM", then consume those characters and switch to the before + DOCTYPE system identifier state. + + Otherwise, this is the parse error. Set the DOCTYPE token's + force-quirks flag to on. Switch to the bogus DOCTYPE state. + + 8.2.4.27 Before DOCTYPE public identifier state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before DOCTYPE public identifier state. + + U+0022 QUOTATION MARK (") + Set the DOCTYPE token's public identifier to the empty string + (not missing), then switch to the DOCTYPE public identifier + (double-quoted) state. + + U+0027 APOSTROPHE (') + Set the DOCTYPE token's public identifier to the empty string + (not missing), then switch to the DOCTYPE public identifier + (single-quoted) state. + + U+003E GREATER-THAN SIGN (>) + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Switch to the data state. + + EOF + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Switch to the bogus DOCTYPE state. + + 8.2.4.28 DOCTYPE public identifier (double-quoted) state + + Consume the next input character: + + U+0022 QUOTATION MARK (") + Switch to the after DOCTYPE public identifier state. + + U+003E GREATER-THAN SIGN (>) + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Switch to the data state. + + EOF + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + Append the current input character to the current DOCTYPE + token's public identifier. Stay in the DOCTYPE public identifier + (double-quoted) state. + + 8.2.4.29 DOCTYPE public identifier (single-quoted) state + + Consume the next input character: + + U+0027 APOSTROPHE (') + Switch to the after DOCTYPE public identifier state. + + U+003E GREATER-THAN SIGN (>) + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Switch to the data state. + + EOF + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + Append the current input character to the current DOCTYPE + token's public identifier. Stay in the DOCTYPE public identifier + (single-quoted) state. + + 8.2.4.30 After DOCTYPE public identifier state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the after DOCTYPE public identifier state. + + U+0022 QUOTATION MARK (") + Set the DOCTYPE token's system identifier to the empty string + (not missing), then switch to the DOCTYPE system identifier + (double-quoted) state. + + U+0027 APOSTROPHE (') + Set the DOCTYPE token's system identifier to the empty string + (not missing), then switch to the DOCTYPE system identifier + (single-quoted) state. + + U+003E GREATER-THAN SIGN (>) + Emit the current DOCTYPE token. Switch to the data state. + + EOF + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Switch to the bogus DOCTYPE state. + + 8.2.4.31 Before DOCTYPE system identifier state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before DOCTYPE system identifier state. + + U+0022 QUOTATION MARK (") + Set the DOCTYPE token's system identifier to the empty string + (not missing), then switch to the DOCTYPE system identifier + (double-quoted) state. + + U+0027 APOSTROPHE (') + Set the DOCTYPE token's system identifier to the empty string + (not missing), then switch to the DOCTYPE system identifier + (single-quoted) state. + + U+003E GREATER-THAN SIGN (>) + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Switch to the data state. + + EOF + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Switch to the bogus DOCTYPE state. + + 8.2.4.32 DOCTYPE system identifier (double-quoted) state + + Consume the next input character: + + U+0022 QUOTATION MARK (") + Switch to the after DOCTYPE system identifier state. + + U+003E GREATER-THAN SIGN (>) + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Switch to the data state. + + EOF + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + Append the current input character to the current DOCTYPE + token's system identifier. Stay in the DOCTYPE system identifier + (double-quoted) state. + + 8.2.4.33 DOCTYPE system identifier (single-quoted) state + + Consume the next input character: + + U+0027 APOSTROPHE (') + Switch to the after DOCTYPE system identifier state. + + U+003E GREATER-THAN SIGN (>) + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Switch to the data state. + + EOF + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + Append the current input character to the current DOCTYPE + token's system identifier. Stay in the DOCTYPE system identifier + (single-quoted) state. + + 8.2.4.34 After DOCTYPE system identifier state + + Consume the next input character: + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the after DOCTYPE system identifier state. + + U+003E GREATER-THAN SIGN (>) + Emit the current DOCTYPE token. Switch to the data state. + + EOF + Parse error. Set the DOCTYPE token's force-quirks flag to on. + Emit that DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + Parse error. Switch to the bogus DOCTYPE state. (This does not + set the DOCTYPE token's force-quirks flag to on.) + + 8.2.4.35 Bogus DOCTYPE state + + Consume the next input character: + + U+003E GREATER-THAN SIGN (>) + Emit the DOCTYPE token. Switch to the data state. + + EOF + Emit the DOCTYPE token. Reconsume the EOF character in the data + state. + + Anything else + Stay in the bogus DOCTYPE state. + + 8.2.4.36 CDATA section state + + (This can only happen if the content model flag is set to the PCDATA + state, and is unrelated to the content model flag's CDATA state.) + + Consume every character up to the next occurrence of the three + character sequence U+005D RIGHT SQUARE BRACKET U+005D RIGHT SQUARE + BRACKET U+003E GREATER-THAN SIGN (]]>), or the end of the file (EOF), + whichever comes first. Emit a series of character tokens consisting of + all the characters consumed except the matching three character + sequence at the end (if one was found before the end of the file). + + Switch to the data state. + + If the end of the file was reached, reconsume the EOF character. + + 8.2.4.37 Tokenizing character references + + This section defines how to consume a character reference. This + definition is used when parsing character references in text and in + attributes. + + The behavior depends on the identity of the next character (the one + immediately after the U+0026 AMPERSAND character): + + U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000C FORM FEED (FF) + U+0020 SPACE + U+003C LESS-THAN SIGN + U+0026 AMPERSAND + EOF + The additional allowed character, if there is one + Not a character reference. No characters are consumed, and + nothing is returned. (This is not an error, either.) + + U+0023 NUMBER SIGN (#) + Consume the U+0023 NUMBER SIGN. + + The behavior further depends on the character after the U+0023 + NUMBER SIGN: + + U+0078 LATIN SMALL LETTER X + U+0058 LATIN CAPITAL LETTER X + Consume the X. + + Follow the steps below, but using the range of characters + U+0030 DIGIT ZERO through to U+0039 DIGIT NINE, U+0061 + LATIN SMALL LETTER A through to U+0066 LATIN SMALL LETTER + F, and U+0041 LATIN CAPITAL LETTER A, through to U+0046 + LATIN CAPITAL LETTER F (in other words, 0-9, A-F, a-f). + + When it comes to interpreting the number, interpret it as + a hexadecimal number. + + Anything else + Follow the steps below, but using the range of characters + U+0030 DIGIT ZERO through to U+0039 DIGIT NINE (i.e. just + 0-9). + + When it comes to interpreting the number, interpret it as + a decimal number. + + Consume as many characters as match the range of characters + given above. + + If no characters match the range, then don't consume any + characters (and unconsume the U+0023 NUMBER SIGN character and, + if appropriate, the X character). This is a parse error; nothing + is returned. + + Otherwise, if the next character is a U+003B SEMICOLON, consume + that too. If it isn't, there is a parse error. + + If one or more characters match the range, then take them all + and interpret the string of characters as a number (either + hexadecimal or decimal as appropriate). + + If that number is one of the numbers in the first column of the + following table, then this is a parse error. Find the row with + that number in the first column, and return a character token + for the Unicode character given in the second column of that + row. + + Number Unicode character + 0x0D U+000A LINE FEED (LF) + 0x80 U+20AC EURO SIGN ('€') + 0x81 U+FFFD REPLACEMENT CHARACTER + 0x82 U+201A SINGLE LOW-9 QUOTATION MARK ('‚') + 0x83 U+0192 LATIN SMALL LETTER F WITH HOOK ('ƒ') + 0x84 U+201E DOUBLE LOW-9 QUOTATION MARK ('„') + 0x85 U+2026 HORIZONTAL ELLIPSIS ('…') + 0x86 U+2020 DAGGER ('†') + 0x87 U+2021 DOUBLE DAGGER ('‡') + 0x88 U+02C6 MODIFIER LETTER CIRCUMFLEX ACCENT ('ˆ') + 0x89 U+2030 PER MILLE SIGN ('‰') + 0x8A U+0160 LATIN CAPITAL LETTER S WITH CARON ('Š') + 0x8B U+2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('‹') + 0x8C U+0152 LATIN CAPITAL LIGATURE OE ('Œ') + 0x8D U+FFFD REPLACEMENT CHARACTER + 0x8E U+017D LATIN CAPITAL LETTER Z WITH CARON ('Ž') + 0x8F U+FFFD REPLACEMENT CHARACTER + 0x90 U+FFFD REPLACEMENT CHARACTER + 0x91 U+2018 LEFT SINGLE QUOTATION MARK ('‘') + 0x92 U+2019 RIGHT SINGLE QUOTATION MARK ('’') + 0x93 U+201C LEFT DOUBLE QUOTATION MARK ('“') + 0x94 U+201D RIGHT DOUBLE QUOTATION MARK ('”') + 0x95 U+2022 BULLET ('•') + 0x96 U+2013 EN DASH ('–') + 0x97 U+2014 EM DASH ('—') + 0x98 U+02DC SMALL TILDE ('˜') + 0x99 U+2122 TRADE MARK SIGN ('™') + 0x9A U+0161 LATIN SMALL LETTER S WITH CARON ('š') + 0x9B U+203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('›') + 0x9C U+0153 LATIN SMALL LIGATURE OE ('œ') + 0x9D U+FFFD REPLACEMENT CHARACTER + 0x9E U+017E LATIN SMALL LETTER Z WITH CARON ('ž') + 0x9F U+0178 LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ') + + Otherwise, if the number is in the range 0x0000 to 0x0008, + 0x000E to 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF, 0xFDD0 to + 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, + 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, + 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, + 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, + 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, + 0x10FFFE, or 0x10FFFF, or is higher than 0x10FFFF, then this is + a parse error; return a character token for the U+FFFD + REPLACEMENT CHARACTER character instead. + + Otherwise, return a character token for the Unicode character + whose code point is that number. + + Anything else + Consume the maximum number of characters possible, with the + consumed characters matching one of the identifiers in the first + column of the named character references table (in a + case-sensitive manner). + + If no match can be made, then this is a parse error. No + characters are consumed, and nothing is returned. + + If the last character matched is not a U+003B SEMICOLON (;), + there is a parse error. + + If the character reference is being consumed as part of an + attribute, and the last character matched is not a U+003B + SEMICOLON (;), and the next character is in the range U+0030 + DIGIT ZERO to U+0039 DIGIT NINE, U+0041 LATIN CAPITAL LETTER A + to U+005A LATIN CAPITAL LETTER Z, or U+0061 LATIN SMALL LETTER A + to U+007A LATIN SMALL LETTER Z, then, for historical reasons, + all the characters that were matched after the U+0026 AMPERSAND + (&) must be unconsumed, and nothing is returned. + + Otherwise, return a character token for the character + corresponding to the character reference name (as given by the + second column of the named character references table). + + If the markup contains I'm ¬it; I tell you, the character + reference is parsed as "not", as in, I'm ¬it; I tell you. But if + the markup was I'm ∉ I tell you, the character reference + would be parsed as "notin;", resulting in I'm ∉ I tell you. diff --git a/parser/html/java/htmlparser/doc/tree-construction.txt b/parser/html/java/htmlparser/doc/tree-construction.txt new file mode 100644 index 0000000000..0febf147ae --- /dev/null +++ b/parser/html/java/htmlparser/doc/tree-construction.txt @@ -0,0 +1,2201 @@ + #8.2.4 Tokenization Table of contents 8.4 Serializing HTML fragments + + WHATWG + +HTML 5 + +Draft Recommendation — 13 January 2009 + + ← 8.2.4 Tokenization – Table of contents – 8.4 Serializing HTML + fragments → + + 8.2.5 Tree construction + + The input to the tree construction stage is a sequence of tokens from + the tokenization stage. The tree construction stage is associated with + a DOM Document object when a parser is created. The "output" of this + stage consists of dynamically modifying or extending that document's + DOM tree. + + This specification does not define when an interactive user agent has + to render the Document so that it is available to the user, or when it + has to begin accepting user input. + + As each token is emitted from the tokeniser, the user agent must + process the token according to the rules given in the section + corresponding to the current insertion mode. + + When the steps below require the UA to insert a character into a node, + if that node has a child immediately before where the character is to + be inserted, and that child is a Text node, and that Text node was the + last node that the parser inserted into the document, then the + character must be appended to that Text node; otherwise, a new Text + node whose data is just that character must be inserted in the + appropriate place. + + DOM mutation events must not fire for changes caused by the UA parsing + the document. (Conceptually, the parser is not mutating the DOM, it is + constructing it.) This includes the parsing of any content inserted + using document.write() and document.writeln() calls. [DOM3EVENTS] + + Not all of the tag names mentioned below are conformant tag names in + this specification; many are included to handle legacy content. They + still form part of the algorithm that implementations are required to + implement to claim conformance. + + The algorithm described below places no limit on the depth of the DOM + tree generated, or on the length of tag names, attribute names, + attribute values, text nodes, etc. While implementors are encouraged to + avoid arbitrary limits, it is recognized that practical concerns will + likely force user agents to impose nesting depths. + + 8.2.5.1 Creating and inserting elements + + When the steps below require the UA to create an element for a token in + a particular namespace, the UA must create a node implementing the + interface appropriate for the element type corresponding to the tag + name of the token in the given namespace (as given in the specification + that defines that element, e.g. for an a element in the HTML namespace, + this specification defines it to be the HTMLAnchorElement interface), + with the tag name being the name of that element, with the node being + in the given namespace, and with the attributes on the node being those + given in the given token. + + The interface appropriate for an element in the HTML namespace that is + not defined in this specification is HTMLElement. The interface + appropriate for an element in another namespace that is not defined by + that namespace's specification is Element. + + When a resettable element is created in this manner, its reset + algorithm must be invoked once the attributes are set. (This + initializes the element's value and checkedness based on the element's + attributes.) + __________________________________________________________________ + + When the steps below require the UA to insert an HTML element for a + token, the UA must first create an element for the token in the HTML + namespace, and then append this node to the current node, and push it + onto the stack of open elements so that it is the new current node. + + The steps below may also require that the UA insert an HTML element in + a particular place, in which case the UA must follow the same steps + except that it must insert or append the new node in the location + specified instead of appending it to the current node. (This happens in + particular during the parsing of tables with invalid content.) + + If an element created by the insert an HTML element algorithm is a + form-associated element, and the form element pointer is not null, and + the newly created element doesn't have a form attribute, the user agent + must associate the newly created element with the form element pointed + to by the form element pointer before inserting it wherever it is to be + inserted. + __________________________________________________________________ + + When the steps below require the UA to insert a foreign element for a + token, the UA must first create an element for the token in the given + namespace, and then append this node to the current node, and push it + onto the stack of open elements so that it is the new current node. If + the newly created element has an xmlns attribute in the XMLNS namespace + whose value is not exactly the same as the element's namespace, that is + a parse error. + + When the steps below require the user agent to adjust MathML attributes + for a token, then, if the token has an attribute named definitionurl, + change its name to definitionURL (note the case difference). + + When the steps below require the user agent to adjust foreign + attributes for a token, then, if any of the attributes on the token + match the strings given in the first column of the following table, let + the attribute be a namespaced attribute, with the prefix being the + string given in the corresponding cell in the second column, the local + name being the string given in the corresponding cell in the third + column, and the namespace being the namespace given in the + corresponding cell in the fourth column. (This fixes the use of + namespaced attributes, in particular xml:lang.) + + Attribute name Prefix Local name Namespace + xlink:actuate xlink actuate XLink namespace + xlink:arcrole xlink arcrole XLink namespace + xlink:href xlink href XLink namespace + xlink:role xlink role XLink namespace + xlink:show xlink show XLink namespace + xlink:title xlink title XLink namespace + xlink:type xlink type XLink namespace + xml:base xml base XML namespace + xml:lang xml lang XML namespace + xml:space xml space XML namespace + xmlns (none) xmlns XMLNS namespace + xmlns:xlink xmlns xlink XMLNS namespace + __________________________________________________________________ + + The generic CDATA element parsing algorithm and the generic RCDATA + element parsing algorithm consist of the following steps. These + algorithms are always invoked in response to a start tag token. + 1. Insert an HTML element for the token. + 2. If the algorithm that was invoked is the generic CDATA element + parsing algorithm, switch the tokeniser's content model flag to the + CDATA state; otherwise the algorithm invoked was the generic RCDATA + element parsing algorithm, switch the tokeniser's content model + flag to the RCDATA state. + 3. Let the original insertion mode be the current insertion mode. + 4. Then, switch the insertion mode to "in CDATA/RCDATA". + + 8.2.5.2 Closing elements that have implied end tags + + When the steps below require the UA to generate implied end tags, then, + while the current node is a dd element, a dt element, an li element, an + option element, an optgroup element, a p element, an rp element, or an + rt element, the UA must pop the current node off the stack of open + elements. + + If a step requires the UA to generate implied end tags but lists an + element to exclude from the process, then the UA must perform the above + steps as if that element was not in the above list. + + 8.2.5.3 Foster parenting + + Foster parenting happens when content is misnested in tables. + + When a node node is to be foster parented, the node node must be + inserted into the foster parent element, and the current table must be + marked as tainted. (Once the current table has been tainted, whitespace + characters are inserted into the foster parent element instead of the + current node.) + + The foster parent element is the parent element of the last table + element in the stack of open elements, if there is a table element and + it has such a parent element. If there is no table element in the stack + of open elements (fragment case), then the foster parent element is the + first element in the stack of open elements (the html element). + Otherwise, if there is a table element in the stack of open elements, + but the last table element in the stack of open elements has no parent, + or its parent node is not an element, then the foster parent element is + the element before the last table element in the stack of open + elements. + + If the foster parent element is the parent element of the last table + element in the stack of open elements, then node must be inserted + immediately before the last table element in the stack of open elements + in the foster parent element; otherwise, node must be appended to the + foster parent element. + + 8.2.5.4 The "initial" insertion mode + + When the insertion mode is "initial", tokens must be handled as + follows: + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + Ignore the token. + + A comment token + Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + If the DOCTYPE token's name is not a case-sensitive match for + the string "html", or if the token's public identifier is + neither missing nor a case-sensitive match for the string + "XSLT-compat", or if the token's system identifier is not + missing, then there is a parse error (this is the DOCTYPE parse + error). Conformance checkers may, instead of reporting this + error, switch to a conformance checking mode for another + language (e.g. based on the DOCTYPE token a conformance checker + could recognize that the document is an HTML4-era document, and + defer to an HTML4 conformance checker.) + + Append a DocumentType node to the Document node, with the name + attribute set to the name given in the DOCTYPE token; the + publicId attribute set to the public identifier given in the + DOCTYPE token, or the empty string if the public identifier was + missing; the systemId attribute set to the system identifier + given in the DOCTYPE token, or the empty string if the system + identifier was missing; and the other attributes specific to + DocumentType objects set to null and empty lists as appropriate. + Associate the DocumentType node with the Document object so that + it is returned as the value of the doctype attribute of the + Document object. + + Then, if the DOCTYPE token matches one of the conditions in the + following list, then set the document to quirks mode: + + + The force-quirks flag is set to on. + + The name is set to anything other than "HTML". + + The public identifier starts with: "+//Silmaril//dtd html Pro + v0r11 19970101//" + + The public identifier starts with: "-//AdvaSoft Ltd//DTD HTML + 3.0 asWedit + extensions//" + + The public identifier starts with: "-//AS//DTD HTML 3.0 + asWedit + extensions//" + + The public identifier starts with: "-//IETF//DTD HTML 2.0 + Level 1//" + + The public identifier starts with: "-//IETF//DTD HTML 2.0 + Level 2//" + + The public identifier starts with: "-//IETF//DTD HTML 2.0 + Strict Level 1//" + + The public identifier starts with: "-//IETF//DTD HTML 2.0 + Strict Level 2//" + + The public identifier starts with: "-//IETF//DTD HTML 2.0 + Strict//" + + The public identifier starts with: "-//IETF//DTD HTML 2.0//" + + The public identifier starts with: "-//IETF//DTD HTML 2.1E//" + + The public identifier starts with: "-//IETF//DTD HTML 3.0//" + + The public identifier starts with: "-//IETF//DTD HTML 3.2 + Final//" + + The public identifier starts with: "-//IETF//DTD HTML 3.2//" + + The public identifier starts with: "-//IETF//DTD HTML 3//" + + The public identifier starts with: "-//IETF//DTD HTML Level + 0//" + + The public identifier starts with: "-//IETF//DTD HTML Level + 1//" + + The public identifier starts with: "-//IETF//DTD HTML Level + 2//" + + The public identifier starts with: "-//IETF//DTD HTML Level + 3//" + + The public identifier starts with: "-//IETF//DTD HTML Strict + Level 0//" + + The public identifier starts with: "-//IETF//DTD HTML Strict + Level 1//" + + The public identifier starts with: "-//IETF//DTD HTML Strict + Level 2//" + + The public identifier starts with: "-//IETF//DTD HTML Strict + Level 3//" + + The public identifier starts with: "-//IETF//DTD HTML + Strict//" + + The public identifier starts with: "-//IETF//DTD HTML//" + + The public identifier starts with: "-//Metrius//DTD Metrius + Presentational//" + + The public identifier starts with: "-//Microsoft//DTD Internet + Explorer 2.0 HTML Strict//" + + The public identifier starts with: "-//Microsoft//DTD Internet + Explorer 2.0 HTML//" + + The public identifier starts with: "-//Microsoft//DTD Internet + Explorer 2.0 Tables//" + + The public identifier starts with: "-//Microsoft//DTD Internet + Explorer 3.0 HTML Strict//" + + The public identifier starts with: "-//Microsoft//DTD Internet + Explorer 3.0 HTML//" + + The public identifier starts with: "-//Microsoft//DTD Internet + Explorer 3.0 Tables//" + + The public identifier starts with: "-//Netscape Comm. + Corp.//DTD HTML//" + + The public identifier starts with: "-//Netscape Comm. + Corp.//DTD Strict HTML//" + + The public identifier starts with: "-//O'Reilly and + Associates//DTD HTML 2.0//" + + The public identifier starts with: "-//O'Reilly and + Associates//DTD HTML Extended 1.0//" + + The public identifier starts with: "-//O'Reilly and + Associates//DTD HTML Extended Relaxed 1.0//" + + The public identifier starts with: "-//SoftQuad Software//DTD + HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//" + + The public identifier starts with: "-//SoftQuad//DTD HoTMetaL + PRO 4.0::19971010::extensions to HTML 4.0//" + + The public identifier starts with: "-//Spyglass//DTD HTML 2.0 + Extended//" + + The public identifier starts with: "-//SQ//DTD HTML 2.0 + HoTMetaL + extensions//" + + The public identifier starts with: "-//Sun Microsystems + Corp.//DTD HotJava HTML//" + + The public identifier starts with: "-//Sun Microsystems + Corp.//DTD HotJava Strict HTML//" + + The public identifier starts with: "-//W3C//DTD HTML 3 + 1995-03-24//" + + The public identifier starts with: "-//W3C//DTD HTML 3.2 + Draft//" + + The public identifier starts with: "-//W3C//DTD HTML 3.2 + Final//" + + The public identifier starts with: "-//W3C//DTD HTML 3.2//" + + The public identifier starts with: "-//W3C//DTD HTML 3.2S + Draft//" + + The public identifier starts with: "-//W3C//DTD HTML 4.0 + Frameset//" + + The public identifier starts with: "-//W3C//DTD HTML 4.0 + Transitional//" + + The public identifier starts with: "-//W3C//DTD HTML + Experimental 19960712//" + + The public identifier starts with: "-//W3C//DTD HTML + Experimental 970421//" + + The public identifier starts with: "-//W3C//DTD W3 HTML//" + + The public identifier starts with: "-//W3O//DTD W3 HTML 3.0//" + + The public identifier is set to: "-//W3O//DTD W3 HTML Strict + 3.0//EN//" + + The public identifier starts with: "-//WebTechs//DTD Mozilla + HTML 2.0//" + + The public identifier starts with: "-//WebTechs//DTD Mozilla + HTML//" + + The public identifier is set to: "-/W3C/DTD HTML 4.0 + Transitional/EN" + + The public identifier is set to: "HTML" + + The system identifier is set to: + "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" + + The system identifier is missing and the public identifier + starts with: "-//W3C//DTD HTML 4.01 Frameset//" + + The system identifier is missing and the public identifier + starts with: "-//W3C//DTD HTML 4.01 Transitional//" + + Otherwise, if the DOCTYPE token matches one of the conditions in + the following list, then set the document to limited quirks + mode: + + + The public identifier starts with: "-//W3C//DTD XHTML 1.0 + Frameset//" + + The public identifier starts with: "-//W3C//DTD XHTML 1.0 + Transitional//" + + The system identifier is not missing and the public identifier + starts with: "-//W3C//DTD HTML 4.01 Frameset//" + + The system identifier is not missing and the public identifier + starts with: "-//W3C//DTD HTML 4.01 Transitional//" + + The name, system identifier, and public identifier strings must + be compared to the values given in the lists above in an ASCII + case-insensitive manner. A system identifier whose value is the + empty string is not considered missing for the purposes of the + conditions above. + + Then, switch the insertion mode to "before html". + + Anything else + Parse error. + + Set the document to quirks mode. + + Switch the insertion mode to "before html", then reprocess the + current token. + + 8.2.5.5 The "before html" insertion mode + + When the insertion mode is "before html", tokens must be handled as + follows: + + A DOCTYPE token + Parse error. Ignore the token. + + A comment token + Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + Ignore the token. + + A start tag whose tag name is "html" + Create an element for the token in the HTML namespace. Append it + to the Document object. Put this element in the stack of open + elements. + + If the token has an attribute "manifest", then resolve the value + of that attribute to an absolute URL, and if that is successful, + run the application cache selection algorithm with the resulting + absolute URL. Otherwise, if there is no such attribute or + resolving it fails, run the application cache selection + algorithm with no manifest. The algorithm must be passed the + Document object. + + Switch the insertion mode to "before head". + + Anything else + Create an HTMLElement node with the tag name html, in the HTML + namespace. Append it to the Document object. Put this element in + the stack of open elements. + + Run the application cache selection algorithm with no manifest, + passing it the Document object. + + Switch the insertion mode to "before head", then reprocess the + current token. + + Should probably make end tags be ignored, so that "" puts the comment before the root node (or should we?) + + The root element can end up being removed from the Document object, + e.g. by scripts; nothing in particular happens in such cases, content + continues being appended to the nodes as described in the next section. + + 8.2.5.6 The "before head" insertion mode + + When the insertion mode is "before head", tokens must be handled as + follows: + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + Ignore the token. + + A comment token + Append a Comment node to the current node with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + A start tag whose tag name is "head" + Insert an HTML element for the token. + + Set the head element pointer to the newly created head element. + + Switch the insertion mode to "in head". + + An end tag whose tag name is one of: "head", "br" + Act as if a start tag token with the tag name "head" and no + attributes had been seen, then reprocess the current token. + + Any other end tag + Parse error. Ignore the token. + + Anything else + Act as if a start tag token with the tag name "head" and no + attributes had been seen, then reprocess the current token. + + This will result in an empty head element being generated, with + the current token being reprocessed in the "after head" + insertion mode. + + 8.2.5.7 The "in head" insertion mode + + When the insertion mode is "in head", tokens must be handled as + follows: + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + Insert the character into the current node. + + A comment token + Append a Comment node to the current node with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + A start tag whose tag name is one of: "base", "command", "eventsource", + "link" + Insert an HTML element for the token. Immediately pop the + current node off the stack of open elements. + + Acknowledge the token's self-closing flag, if it is set. + + A start tag whose tag name is "meta" + Insert an HTML element for the token. Immediately pop the + current node off the stack of open elements. + + Acknowledge the token's self-closing flag, if it is set. + + If the element has a charset attribute, and its value is a + supported encoding, and the confidence is currently tentative, + then change the encoding to the encoding given by the value of + the charset attribute. + + Otherwise, if the element has a content attribute, and applying + the algorithm for extracting an encoding from a Content-Type to + its value returns a supported encoding encoding, and the + confidence is currently tentative, then change the encoding to + the encoding encoding. + + A start tag whose tag name is "title" + Follow the generic RCDATA element parsing algorithm. + + A start tag whose tag name is "noscript", if the scripting flag is + enabled + + A start tag whose tag name is one of: "noframes", "style" + Follow the generic CDATA element parsing algorithm. + + A start tag whose tag name is "noscript", if the scripting flag is + disabled + Insert an HTML element for the token. + + Switch the insertion mode to "in head noscript". + + A start tag whose tag name is "script" + + 1. Create an element for the token in the HTML namespace. + 2. Mark the element as being "parser-inserted". + This ensures that, if the script is external, any + document.write() calls in the script will execute in-line, + instead of blowing the document away, as would happen in most + other cases. It also prevents the script from executing until + the end tag is seen. + 3. If the parser was originally created for the HTML fragment + parsing algorithm, then mark the script element as "already + executed". (fragment case) + 4. Append the new element to the current node. + 5. Switch the tokeniser's content model flag to the CDATA state. + 6. Let the original insertion mode be the current insertion mode. + 7. Switch the insertion mode to "in CDATA/RCDATA". + + An end tag whose tag name is "head" + Pop the current node (which will be the head element) off the + stack of open elements. + + Switch the insertion mode to "after head". + + An end tag whose tag name is "br" + Act as described in the "anything else" entry below. + + A start tag whose tag name is "head" + Any other end tag + Parse error. Ignore the token. + + Anything else + Act as if an end tag token with the tag name "head" had been + seen, and reprocess the current token. + + In certain UAs, some elements don't trigger the "in body" mode + straight away, but instead get put into the head. Do we want to + copy that? + + 8.2.5.8 The "in head noscript" insertion mode + + When the insertion mode is "in head noscript", tokens must be handled + as follows: + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + An end tag whose tag name is "noscript" + Pop the current node (which will be a noscript element) from the + stack of open elements; the new current node will be a head + element. + + Switch the insertion mode to "in head". + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + + A comment token + A start tag whose tag name is one of: "link", "meta", "noframes", + "style" + Process the token using the rules for the "in head" insertion + mode. + + An end tag whose tag name is "br" + Act as described in the "anything else" entry below. + + A start tag whose tag name is one of: "head", "noscript" + Any other end tag + Parse error. Ignore the token. + + Anything else + Parse error. Act as if an end tag with the tag name "noscript" + had been seen and reprocess the current token. + + 8.2.5.9 The "after head" insertion mode + + When the insertion mode is "after head", tokens must be handled as + follows: + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + Insert the character into the current node. + + A comment token + Append a Comment node to the current node with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + A start tag whose tag name is "body" + Insert an HTML element for the token. + + Switch the insertion mode to "in body". + + A start tag whose tag name is "frameset" + Insert an HTML element for the token. + + Switch the insertion mode to "in frameset". + + A start tag token whose tag name is one of: "base", "link", "meta", + "noframes", "script", "style", "title" + Parse error. + + Push the node pointed to by the head element pointer onto the + stack of open elements. + + Process the token using the rules for the "in head" insertion + mode. + + Remove the node pointed to by the head element pointer from the + stack of open elements. + + An end tag whose tag name is "br" + Act as described in the "anything else" entry below. + + A start tag whose tag name is "head" + Any other end tag + Parse error. Ignore the token. + + Anything else + Act as if a start tag token with the tag name "body" and no + attributes had been seen, and then reprocess the current token. + + 8.2.5.10 The "in body" insertion mode + + When the insertion mode is "in body", tokens must be handled as + follows: + + A character token + Reconstruct the active formatting elements, if any. + + Insert the token's character into the current node. + + A comment token + Append a Comment node to the current node with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "html" + Parse error. For each attribute on the token, check to see if + the attribute is already present on the top element of the stack + of open elements. If it is not, add the attribute and its + corresponding value to that element. + + A start tag token whose tag name is one of: "base", "command", + "eventsource", "link", "meta", "noframes", "script", "style", + "title" + Process the token using the rules for the "in head" insertion + mode. + + A start tag whose tag name is "body" + Parse error. + + If the second element on the stack of open elements is not a + body element, or, if the stack of open elements has only one + node on it, then ignore the token. (fragment case) + + Otherwise, for each attribute on the token, check to see if the + attribute is already present on the body element (the second + element) on the stack of open elements. If it is not, add the + attribute and its corresponding value to that element. + + An end-of-file token + If there is a node in the stack of open elements that is not + either a dd element, a dt element, an li element, a p element, a + tbody element, a td element, a tfoot element, a th element, a + thead element, a tr element, the body element, or the html + element, then this is a parse error. + + Stop parsing. + + An end tag whose tag name is "body" + If the stack of open elements does not have a body element in + scope, this is a parse error; ignore the token. + + Otherwise, if there is a node in the stack of open elements that + is not either a dd element, a dt element, an li element, a p + element, a tbody element, a td element, a tfoot element, a th + element, a thead element, a tr element, the body element, or the + html element, then this is a parse error. + + Switch the insertion mode to "after body". + + An end tag whose tag name is "html" + Act as if an end tag with tag name "body" had been seen, then, + if that token wasn't ignored, reprocess the current token. + + The fake end tag token here can only be ignored in the fragment + case. + + A start tag whose tag name is one of: "address", "article", "aside", + "blockquote", "center", "datagrid", "details", "dialog", "dir", + "div", "dl", "fieldset", "figure", "footer", "header", "menu", + "nav", "ol", "p", "section", "ul" + If the stack of open elements has a p element in scope, then act + as if an end tag with the tag name "p" had been seen. + + Insert an HTML element for the token. + + A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", + "h6" + If the stack of open elements has a p element in scope, then act + as if an end tag with the tag name "p" had been seen. + + If the current node is an element whose tag name is one of "h1", + "h2", "h3", "h4", "h5", or "h6", then this is a parse error; pop + the current node off the stack of open elements. + + Insert an HTML element for the token. + + A start tag whose tag name is one of: "pre", "listing" + If the stack of open elements has a p element in scope, then act + as if an end tag with the tag name "p" had been seen. + + Insert an HTML element for the token. + + If the next token is a U+000A LINE FEED (LF) character token, + then ignore that token and move on to the next one. (Newlines at + the start of pre blocks are ignored as an authoring + convenience.) + + A start tag whose tag name is "form" + If the form element pointer is not null, then this is a parse + error; ignore the token. + + Otherwise: + + If the stack of open elements has a p element in scope, then act + as if an end tag with the tag name "p" had been seen. + + Insert an HTML element for the token, and set the form element + pointer to point to the element created. + + A start tag whose tag name is "li" + Run the following algorithm: + + 1. Initialize node to be the current node (the bottommost node of + the stack). + 2. If node is an li element, then act as if an end tag with the + tag name "li" had been seen, then jump to the last step. + 3. If node is not in the formatting category, and is not in the + phrasing category, and is not an address, div, or p element, + then jump to the last step. + 4. Otherwise, set node to the previous entry in the stack of open + elements and return to step 2. + 5. This is the last step. + If the stack of open elements has a p element in scope, then + act as if an end tag with the tag name "p" had been seen. + Finally, insert an HTML element for the token. + + A start tag whose tag name is one of: "dd", "dt" + Run the following algorithm: + + 1. Initialize node to be the current node (the bottommost node of + the stack). + 2. If node is a dd or dt element, then act as if an end tag with + the same tag name as node had been seen, then jump to the last + step. + 3. If node is not in the formatting category, and is not in the + phrasing category, and is not an address, div, or p element, + then jump to the last step. + 4. Otherwise, set node to the previous entry in the stack of open + elements and return to step 2. + 5. This is the last step. + If the stack of open elements has a p element in scope, then + act as if an end tag with the tag name "p" had been seen. + Finally, insert an HTML element for the token. + + A start tag whose tag name is "plaintext" + If the stack of open elements has a p element in scope, then act + as if an end tag with the tag name "p" had been seen. + + Insert an HTML element for the token. + + Switch the content model flag to the PLAINTEXT state. + + Once a start tag with the tag name "plaintext" has been seen, + that will be the last token ever seen other than character + tokens (and the end-of-file token), because there is no way to + switch the content model flag out of the PLAINTEXT state. + + An end tag whose tag name is one of: "address", "article", "aside", + "blockquote", "center", "datagrid", "details", "dialog", "dir", + "div", "dl", "fieldset", "figure", "footer", "header", + "listing", "menu", "nav", "ol", "pre", "section", "ul" + If the stack of open elements does not have an element in scope + with the same tag name as that of the token, then this is a + parse error; ignore the token. + + Otherwise, run these steps: + + 1. Generate implied end tags. + 2. If the current node is not an element with the same tag name + as that of the token, then this is a parse error. + 3. Pop elements from the stack of open elements until an element + with the same tag name as the token has been popped from the + stack. + + An end tag whose tag name is "form" + Let node be the element that the form element pointer is set to. + + Set the form element pointer to null. + + If node is null or the stack of open elements does not have node + in scope, then this is a parse error; ignore the token. + + Otherwise, run these steps: + + 1. Generate implied end tags. + 2. If the current node is not node, then this is a parse error. + 3. Remove node from the stack of open elements. + + An end tag whose tag name is "p" + If the stack of open elements does not have an element in scope + with the same tag name as that of the token, then this is a + parse error; act as if a start tag with the tag name p had been + seen, then reprocess the current token. + + Otherwise, run these steps: + + 1. Generate implied end tags, except for elements with the same + tag name as the token. + 2. If the current node is not an element with the same tag name + as that of the token, then this is a parse error. + 3. Pop elements from the stack of open elements until an element + with the same tag name as the token has been popped from the + stack. + + An end tag whose tag name is one of: "dd", "dt", "li" + If the stack of open elements does not have an element in scope + with the same tag name as that of the token, then this is a + parse error; ignore the token. + + Otherwise, run these steps: + + 1. Generate implied end tags, except for elements with the same + tag name as the token. + 2. If the current node is not an element with the same tag name + as that of the token, then this is a parse error. + 3. Pop elements from the stack of open elements until an element + with the same tag name as the token has been popped from the + stack. + + An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" + If the stack of open elements does not have an element in scope + whose tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", + then this is a parse error; ignore the token. + + Otherwise, run these steps: + + 1. Generate implied end tags. + 2. If the current node is not an element with the same tag name + as that of the token, then this is a parse error. + 3. Pop elements from the stack of open elements until an element + whose tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6" + has been popped from the stack. + + An end tag whose tag name is "sarcasm" + Take a deep breath, then act as described in the "any other end + tag" entry below. + + A start tag whose tag name is "a" + If the list of active formatting elements contains an element + whose tag name is "a" between the end of the list and the last + marker on the list (or the start of the list if there is no + marker on the list), then this is a parse error; act as if an + end tag with the tag name "a" had been seen, then remove that + element from the list of active formatting elements and the + stack of open elements if the end tag didn't already remove it + (it might not have if the element is not in table scope). + + In the non-conforming stream + ab
x, the first a element + would be closed upon seeing the second one, and the "x" + character would be inside a link to "b", not to "a". This is + despite the fact that the outer a element is not in table scope + (meaning that a regular end tag at the start of the table + wouldn't close the outer a element). + + Reconstruct the active formatting elements, if any. + + Insert an HTML element for the token. Add that element to the + list of active formatting elements. + + A start tag whose tag name is one of: "b", "big", "em", "font", "i", + "s", "small", "strike", "strong", "tt", "u" + Reconstruct the active formatting elements, if any. + + Insert an HTML element for the token. Add that element to the + list of active formatting elements. + + A start tag whose tag name is "nobr" + Reconstruct the active formatting elements, if any. + + If the stack of open elements has a nobr element in scope, then + this is a parse error; act as if an end tag with the tag name + "nobr" had been seen, then once again reconstruct the active + formatting elements, if any. + + Insert an HTML element for the token. Add that element to the + list of active formatting elements. + + An end tag whose tag name is one of: "a", "b", "big", "em", "font", + "i", "nobr", "s", "small", "strike", "strong", "tt", "u" + Follow these steps: + + 1. Let the formatting element be the last element in the list of + active formatting elements that: + o is between the end of the list and the last scope marker + in the list, if any, or the start of the list otherwise, + and + o has the same tag name as the token. + If there is no such node, or, if that node is also in the + stack of open elements but the element is not in scope, then + this is a parse error; ignore the token, and abort these + steps. + Otherwise, if there is such a node, but that node is not in + the stack of open elements, then this is a parse error; remove + the element from the list, and abort these steps. + Otherwise, there is a formatting element and that element is + in the stack and is in scope. If the element is not the + current node, this is a parse error. In any case, proceed with + the algorithm as written in the following steps. + 2. Let the furthest block be the topmost node in the stack of + open elements that is lower in the stack than the formatting + element, and is not an element in the phrasing or formatting + categories. There might not be one. + 3. If there is no furthest block, then the UA must skip the + subsequent steps and instead just pop all the nodes from the + bottom of the stack of open elements, from the current node up + to and including the formatting element, and remove the + formatting element from the list of active formatting + elements. + 4. Let the common ancestor be the element immediately above the + formatting element in the stack of open elements. + 5. If the furthest block has a parent node, then remove the + furthest block from its parent node. + 6. Let a bookmark note the position of the formatting element in + the list of active formatting elements relative to the + elements on either side of it in the list. + 7. Let node and last node be the furthest block. Follow these + steps: + 1. Let node be the element immediately above node in the + stack of open elements. + 2. If node is not in the list of active formatting elements, + then remove node from the stack of open elements and then + go back to step 1. + 3. Otherwise, if node is the formatting element, then go to + the next step in the overall algorithm. + 4. Otherwise, if last node is the furthest block, then move + the aforementioned bookmark to be immediately after the + node in the list of active formatting elements. + 5. If node has any children, perform a shallow clone of + node, replace the entry for node in the list of active + formatting elements with an entry for the clone, replace + the entry for node in the stack of open elements with an + entry for the clone, and let node be the clone. + 6. Insert last node into node, first removing it from its + previous parent node if any. + 7. Let last node be node. + 8. Return to step 1 of this inner set of steps. + 8. If the common ancestor node is a table, tbody, tfoot, thead, + or tr element, then, foster parent whatever last node ended up + being in the previous step. + Otherwise, append whatever last node ended up being in the + previous step to the common ancestor node, first removing it + from its previous parent node if any. + 9. Perform a shallow clone of the formatting element. + 10. Take all of the child nodes of the furthest block and append + them to the clone created in the last step. + 11. Append that clone to the furthest block. + 12. Remove the formatting element from the list of active + formatting elements, and insert the clone into the list of + active formatting elements at the position of the + aforementioned bookmark. + 13. Remove the formatting element from the stack of open elements, + and insert the clone into the stack of open elements + immediately below the position of the furthest block in that + stack. + 14. Jump back to step 1 in this series of steps. + + The way these steps are defined, only elements in the formatting + category ever get cloned by this algorithm. + + Because of the way this algorithm causes elements to change + parents, it has been dubbed the "adoption agency algorithm" (in + contrast with other possibly algorithms for dealing with + misnested content, which included the "incest algorithm", the + "secret affair algorithm", and the "Heisenberg algorithm"). + + A start tag whose tag name is "button" + If the stack of open elements has a button element in scope, + then this is a parse error; act as if an end tag with the tag + name "button" had been seen, then reprocess the token. + + Otherwise: + + Reconstruct the active formatting elements, if any. + + Insert an HTML element for the token. + + Insert a marker at the end of the list of active formatting + elements. + + A start tag token whose tag name is one of: "applet", "marquee", + "object" + Reconstruct the active formatting elements, if any. + + Insert an HTML element for the token. + + Insert a marker at the end of the list of active formatting + elements. + + An end tag token whose tag name is one of: "applet", "button", + "marquee", "object" + If the stack of open elements does not have an element in scope + with the same tag name as that of the token, then this is a + parse error; ignore the token. + + Otherwise, run these steps: + + 1. Generate implied end tags. + 2. If the current node is not an element with the same tag name + as that of the token, then this is a parse error. + 3. Pop elements from the stack of open elements until an element + with the same tag name as the token has been popped from the + stack. + 4. Clear the list of active formatting elements up to the last + marker. + + A start tag whose tag name is "xmp" + Reconstruct the active formatting elements, if any. + + Follow the generic CDATA element parsing algorithm. + + A start tag whose tag name is "table" + If the stack of open elements has a p element in scope, then act + as if an end tag with the tag name "p" had been seen. + + Insert an HTML element for the token. + + Switch the insertion mode to "in table". + + A start tag whose tag name is one of: "area", "basefont", "bgsound", + "br", "embed", "img", "input", "spacer", "wbr" + Reconstruct the active formatting elements, if any. + + Insert an HTML element for the token. Immediately pop the + current node off the stack of open elements. + + Acknowledge the token's self-closing flag, if it is set. + + A start tag whose tag name is one of: "param", "source" + Insert an HTML element for the token. Immediately pop the + current node off the stack of open elements. + + Acknowledge the token's self-closing flag, if it is set. + + A start tag whose tag name is "hr" + If the stack of open elements has a p element in scope, then act + as if an end tag with the tag name "p" had been seen. + + Insert an HTML element for the token. Immediately pop the + current node off the stack of open elements. + + Acknowledge the token's self-closing flag, if it is set. + + A start tag whose tag name is "image" + Parse error. Change the token's tag name to "img" and reprocess + it. (Don't ask.) + + A start tag whose tag name is "isindex" + Parse error. + + If the form element pointer is not null, then ignore the token. + + Otherwise: + + Acknowledge the token's self-closing flag, if it is set. + + Act as if a start tag token with the tag name "form" had been + seen. + + If the token has an attribute called "action", set the action + attribute on the resulting form element to the value of the + "action" attribute of the token. + + Act as if a start tag token with the tag name "hr" had been + seen. + + Act as if a start tag token with the tag name "p" had been seen. + + Act as if a start tag token with the tag name "label" had been + seen. + + Act as if a stream of character tokens had been seen (see below + for what they should say). + + Act as if a start tag token with the tag name "input" had been + seen, with all the attributes from the "isindex" token except + "name", "action", and "prompt". Set the name attribute of the + resulting input element to the value "isindex". + + Act as if a stream of character tokens had been seen (see below + for what they should say). + + Act as if an end tag token with the tag name "label" had been + seen. + + Act as if an end tag token with the tag name "p" had been seen. + + Act as if a start tag token with the tag name "hr" had been + seen. + + Act as if an end tag token with the tag name "form" had been + seen. + + If the token has an attribute with the name "prompt", then the + first stream of characters must be the same string as given in + that attribute, and the second stream of characters must be + empty. Otherwise, the two streams of character tokens together + should, together with the input element, express the equivalent + of "This is a searchable index. Insert your search keywords + here: (input field)" in the user's preferred language. + + A start tag whose tag name is "textarea" + + 1. Insert an HTML element for the token. + 2. If the next token is a U+000A LINE FEED (LF) character token, + then ignore that token and move on to the next one. (Newlines + at the start of textarea elements are ignored as an authoring + convenience.) + 3. Switch the tokeniser's content model flag to the RCDATA state. + 4. Let the original insertion mode be the current insertion mode. + 5. Switch the insertion mode to "in CDATA/RCDATA". + + A start tag whose tag name is one of: "iframe", "noembed" + A start tag whose tag name is "noscript", if the scripting flag is + enabled + Follow the generic CDATA element parsing algorithm. + + A start tag whose tag name is "select" + Reconstruct the active formatting elements, if any. + + Insert an HTML element for the token. + + If the insertion mode is one of in table", "in caption", "in + column group", "in table body", "in row", or "in cell", then + switch the insertion mode to "in select in table". Otherwise, + switch the insertion mode to "in select". + + A start tag whose tag name is one of: "optgroup", "option" + If the stack of open elements has an option element in scope, + then act as if an end tag with the tag name "option" had been + seen. + + Reconstruct the active formatting elements, if any. + + Insert an HTML element for the token. + + A start tag whose tag name is one of: "rp", "rt" + If the stack of open elements has a ruby element in scope, then + generate implied end tags. If the current node is not then a + ruby element, this is a parse error; pop all the nodes from the + current node up to the node immediately before the bottommost + ruby element on the stack of open elements. + + Insert an HTML element for the token. + + An end tag whose tag name is "br" + Parse error. Act as if a start tag token with the tag name "br" + had been seen. Ignore the end tag token. + + A start tag whose tag name is "math" + Reconstruct the active formatting elements, if any. + + Adjust MathML attributes for the token. (This fixes the case of + MathML attributes that are not all lowercase.) + + Adjust foreign attributes for the token. (This fixes the use of + namespaced attributes, in particular XLink.) + + Insert a foreign element for the token, in the MathML namespace. + + If the token has its self-closing flag set, pop the current node + off the stack of open elements and acknowledge the token's + self-closing flag. + + Otherwise, let the secondary insertion mode be the current + insertion mode, and then switch the insertion mode to "in + foreign content". + + A start tag whose tag name is one of: "caption", "col", "colgroup", + "frame", "frameset", "head", "tbody", "td", "tfoot", "th", + "thead", "tr" + Parse error. Ignore the token. + + Any other start tag + Reconstruct the active formatting elements, if any. + + Insert an HTML element for the token. + + This element will be a phrasing element. + + Any other end tag + Run the following steps: + + 1. Initialize node to be the current node (the bottommost node of + the stack). + 2. If node has the same tag name as the end tag token, then: + 1. Generate implied end tags. + 2. If the tag name of the end tag token does not match the + tag name of the current node, this is a parse error. + 3. Pop all the nodes from the current node up to node, + including node, then stop these steps. + 3. Otherwise, if node is in neither the formatting category nor + the phrasing category, then this is a parse error; ignore the + token, and abort these steps. + 4. Set node to the previous entry in the stack of open elements. + 5. Return to step 2. + + 8.2.5.11 The "in CDATA/RCDATA" insertion mode + + When the insertion mode is "in CDATA/RCDATA", tokens must be handled as + follows: + + A character token + Insert the token's character into the current node. + + An end-of-file token + Parse error. + + If the current node is a script element, mark the script element + as "already executed". + + Pop the current node off the stack of open elements. + + Switch the insertion mode to the original insertion mode and + reprocess the current token. + + An end tag whose tag name is "script" + Let script be the current node (which will be a script element). + + Pop the current node off the stack of open elements. + + Switch the insertion mode to the original insertion mode. + + Let the old insertion point have the same value as the current + insertion point. Let the insertion point be just before the next + input character. + + Increment the parser's script nesting level by one. + + Run the script. This might cause some script to execute, which + might cause new characters to be inserted into the tokeniser, + and might cause the tokeniser to output more tokens, resulting + in a reentrant invocation of the parser. + + Decrement the parser's script nesting level by one. If the + parser's script nesting level is zero, then set the parser pause + flag to false. + + Let the insertion point have the value of the old insertion + point. (In other words, restore the insertion point to the value + it had before the previous paragraph. This value might be the + "undefined" value.) + + At this stage, if there is a pending external script, then: + + If the tree construction stage is being called reentrantly, say + from a call to document.write(): + Set the parser pause flag to true, and abort the + processing of any nested invocations of the tokeniser, + yielding control back to the caller. (Tokenization will + resume when the caller returns to the "outer" tree + construction stage.) + + Otherwise: + Follow these steps: + + 1. Let the script be the pending external script. There is + no longer a pending external script. + 2. Pause until the script has completed loading. + 3. Let the insertion point be just before the next input + character. + 4. Execute the script. + 5. Let the insertion point be undefined again. + 6. If there is once again a pending external script, then + repeat these steps from step 1. + + Any other end tag + Pop the current node off the stack of open elements. + + Switch the insertion mode to the original insertion mode. + + 8.2.5.12 The "in table" insertion mode + + When the insertion mode is "in table", tokens must be handled as + follows: + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + If the current table is tainted, then act as described in the + "anything else" entry below. + + Otherwise, insert the character into the current node. + + A comment token + Append a Comment node to the current node with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "caption" + Clear the stack back to a table context. (See below.) + + Insert a marker at the end of the list of active formatting + elements. + + Insert an HTML element for the token, then switch the insertion + mode to "in caption". + + A start tag whose tag name is "colgroup" + Clear the stack back to a table context. (See below.) + + Insert an HTML element for the token, then switch the insertion + mode to "in column group". + + A start tag whose tag name is "col" + Act as if a start tag token with the tag name "colgroup" had + been seen, then reprocess the current token. + + A start tag whose tag name is one of: "tbody", "tfoot", "thead" + Clear the stack back to a table context. (See below.) + + Insert an HTML element for the token, then switch the insertion + mode to "in table body". + + A start tag whose tag name is one of: "td", "th", "tr" + Act as if a start tag token with the tag name "tbody" had been + seen, then reprocess the current token. + + A start tag whose tag name is "table" + Parse error. Act as if an end tag token with the tag name + "table" had been seen, then, if that token wasn't ignored, + reprocess the current token. + + The fake end tag token here can only be ignored in the fragment + case. + + An end tag whose tag name is "table" + If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse + error. Ignore the token. (fragment case) + + Otherwise: + + Pop elements from this stack until a table element has been + popped from the stack. + + Reset the insertion mode appropriately. + + An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" + Parse error. Ignore the token. + + A start tag whose tag name is one of: "style", "script" + If the current table is tainted then act as described in the + "anything else" entry below. + + Otherwise, process the token using the rules for the "in head" + insertion mode. + + A start tag whose tag name is "input" + If the token does not have an attribute with the name "type", or + if it does, but that attribute's value is not an ASCII + case-insensitive match for the string "hidden", or, if the + current table is tainted, then: act as described in the + "anything else" entry below. + + Otherwise: + + Parse error. + + Insert an HTML element for the token. + + Pop that input element off the stack of open elements. + + An end-of-file token + If the current node is not the root html element, then this is a + parse error. + + It can only be the current node in the fragment case. + + Stop parsing. + + Anything else + Parse error. Process the token using the rules for the "in body" + insertion mode, except that if the current node is a table, + tbody, tfoot, thead, or tr element, then, whenever a node would + be inserted into the current node, it must instead be foster + parented. + + When the steps above require the UA to clear the stack back to a table + context, it means that the UA must, while the current node is not a + table element or an html element, pop elements from the stack of open + elements. + + The current node being an html element after this process is a fragment + case. + + 8.2.5.13 The "in caption" insertion mode + + When the insertion mode is "in caption", tokens must be handled as + follows: + + An end tag whose tag name is "caption" + If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse + error. Ignore the token. (fragment case) + + Otherwise: + + Generate implied end tags. + + Now, if the current node is not a caption element, then this is + a parse error. + + Pop elements from this stack until a caption element has been + popped from the stack. + + Clear the list of active formatting elements up to the last + marker. + + Switch the insertion mode to "in table". + + A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" + + An end tag whose tag name is "table" + Parse error. Act as if an end tag with the tag name "caption" + had been seen, then, if that token wasn't ignored, reprocess the + current token. + + The fake end tag token here can only be ignored in the fragment + case. + + An end tag whose tag name is one of: "body", "col", "colgroup", "html", + "tbody", "td", "tfoot", "th", "thead", "tr" + Parse error. Ignore the token. + + Anything else + Process the token using the rules for the "in body" insertion + mode. + + 8.2.5.14 The "in column group" insertion mode + + When the insertion mode is "in column group", tokens must be handled as + follows: + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + Insert the character into the current node. + + A comment token + Append a Comment node to the current node with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + A start tag whose tag name is "col" + Insert an HTML element for the token. Immediately pop the + current node off the stack of open elements. + + Acknowledge the token's self-closing flag, if it is set. + + An end tag whose tag name is "colgroup" + If the current node is the root html element, then this is a + parse error; ignore the token. (fragment case) + + Otherwise, pop the current node (which will be a colgroup + element) from the stack of open elements. Switch the insertion + mode to "in table". + + An end tag whose tag name is "col" + Parse error. Ignore the token. + + An end-of-file token + If the current node is the root html element, then stop parsing. + (fragment case) + + Otherwise, act as described in the "anything else" entry below. + + Anything else + Act as if an end tag with the tag name "colgroup" had been seen, + and then, if that token wasn't ignored, reprocess the current + token. + + The fake end tag token here can only be ignored in the fragment + case. + + 8.2.5.15 The "in table body" insertion mode + + When the insertion mode is "in table body", tokens must be handled as + follows: + + A start tag whose tag name is "tr" + Clear the stack back to a table body context. (See below.) + + Insert an HTML element for the token, then switch the insertion + mode to "in row". + + A start tag whose tag name is one of: "th", "td" + Parse error. Act as if a start tag with the tag name "tr" had + been seen, then reprocess the current token. + + An end tag whose tag name is one of: "tbody", "tfoot", "thead" + If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse + error. Ignore the token. + + Otherwise: + + Clear the stack back to a table body context. (See below.) + + Pop the current node from the stack of open elements. Switch the + insertion mode to "in table". + + A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead" + + An end tag whose tag name is "table" + If the stack of open elements does not have a tbody, thead, or + tfoot element in table scope, this is a parse error. Ignore the + token. (fragment case) + + Otherwise: + + Clear the stack back to a table body context. (See below.) + + Act as if an end tag with the same tag name as the current node + ("tbody", "tfoot", or "thead") had been seen, then reprocess the + current token. + + An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th", "tr" + Parse error. Ignore the token. + + Anything else + Process the token using the rules for the "in table" insertion + mode. + + When the steps above require the UA to clear the stack back to a table + body context, it means that the UA must, while the current node is not + a tbody, tfoot, thead, or html element, pop elements from the stack of + open elements. + + The current node being an html element after this process is a fragment + case. + + 8.2.5.16 The "in row" insertion mode + + When the insertion mode is "in row", tokens must be handled as follows: + + A start tag whose tag name is one of: "th", "td" + Clear the stack back to a table row context. (See below.) + + Insert an HTML element for the token, then switch the insertion + mode to "in cell". + + Insert a marker at the end of the list of active formatting + elements. + + An end tag whose tag name is "tr" + If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse + error. Ignore the token. (fragment case) + + Otherwise: + + Clear the stack back to a table row context. (See below.) + + Pop the current node (which will be a tr element) from the stack + of open elements. Switch the insertion mode to "in table body". + + A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", "tr" + + An end tag whose tag name is "table" + Act as if an end tag with the tag name "tr" had been seen, then, + if that token wasn't ignored, reprocess the current token. + + The fake end tag token here can only be ignored in the fragment + case. + + An end tag whose tag name is one of: "tbody", "tfoot", "thead" + If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse + error. Ignore the token. + + Otherwise, act as if an end tag with the tag name "tr" had been + seen, then reprocess the current token. + + An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th" + Parse error. Ignore the token. + + Anything else + Process the token using the rules for the "in table" insertion + mode. + + When the steps above require the UA to clear the stack back to a table + row context, it means that the UA must, while the current node is not a + tr element or an html element, pop elements from the stack of open + elements. + + The current node being an html element after this process is a fragment + case. + + 8.2.5.17 The "in cell" insertion mode + + When the insertion mode is "in cell", tokens must be handled as + follows: + + An end tag whose tag name is one of: "td", "th" + If the stack of open elements does not have an element in table + scope with the same tag name as that of the token, then this is + a parse error and the token must be ignored. + + Otherwise: + + Generate implied end tags. + + Now, if the current node is not an element with the same tag + name as the token, then this is a parse error. + + Pop elements from this stack until an element with the same tag + name as the token has been popped from the stack. + + Clear the list of active formatting elements up to the last + marker. + + Switch the insertion mode to "in row". (The current node will be + a tr element at this point.) + + A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" + If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (fragment case) + + Otherwise, close the cell (see below) and reprocess the current + token. + + An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html" + Parse error. Ignore the token. + + An end tag whose tag name is one of: "table", "tbody", "tfoot", + "thead", "tr" + If the stack of open elements does not have an element in table + scope with the same tag name as that of the token (which can + only happen for "tbody", "tfoot" and "thead", or, in the + fragment case), then this is a parse error and the token must be + ignored. + + Otherwise, close the cell (see below) and reprocess the current + token. + + Anything else + Process the token using the rules for the "in body" insertion + mode. + + Where the steps above say to close the cell, they mean to run the + following algorithm: + 1. If the stack of open elements has a td element in table scope, then + act as if an end tag token with the tag name "td" had been seen. + 2. Otherwise, the stack of open elements will have a th element in + table scope; act as if an end tag token with the tag name "th" had + been seen. + + The stack of open elements cannot have both a td and a th element in + table scope at the same time, nor can it have neither when the + insertion mode is "in cell". + + 8.2.5.18 The "in select" insertion mode + + When the insertion mode is "in select", tokens must be handled as + follows: + + A character token + Insert the token's character into the current node. + + A comment token + Append a Comment node to the current node with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + A start tag whose tag name is "option" + If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. + + Insert an HTML element for the token. + + A start tag whose tag name is "optgroup" + If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. + + If the current node is an optgroup element, act as if an end tag + with the tag name "optgroup" had been seen. + + Insert an HTML element for the token. + + An end tag whose tag name is "optgroup" + First, if the current node is an option element, and the node + immediately before it in the stack of open elements is an + optgroup element, then act as if an end tag with the tag name + "option" had been seen. + + If the current node is an optgroup element, then pop that node + from the stack of open elements. Otherwise, this is a parse + error; ignore the token. + + An end tag whose tag name is "option" + If the current node is an option element, then pop that node + from the stack of open elements. Otherwise, this is a parse + error; ignore the token. + + An end tag whose tag name is "select" + If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse + error. Ignore the token. (fragment case) + + Otherwise: + + Pop elements from the stack of open elements until a select + element has been popped from the stack. + + Reset the insertion mode appropriately. + + A start tag whose tag name is "select" + Parse error. Act as if the token had been an end tag with the + tag name "select" instead. + + A start tag whose tag name is one of: "input", "textarea" + Parse error. Act as if an end tag with the tag name "select" had + been seen, and reprocess the token. + + A start tag token whose tag name is "script" + Process the token using the rules for the "in head" insertion + mode. + + An end-of-file token + If the current node is not the root html element, then this is a + parse error. + + It can only be the current node in the fragment case. + + Stop parsing. + + Anything else + Parse error. Ignore the token. + + 8.2.5.19 The "in select in table" insertion mode + + When the insertion mode is "in select in table", tokens must be handled + as follows: + + A start tag whose tag name is one of: "caption", "table", "tbody", + "tfoot", "thead", "tr", "td", "th" + Parse error. Act as if an end tag with the tag name "select" had + been seen, and reprocess the token. + + An end tag whose tag name is one of: "caption", "table", "tbody", + "tfoot", "thead", "tr", "td", "th" + Parse error. + + If the stack of open elements has an element in table scope with + the same tag name as that of the token, then act as if an end + tag with the tag name "select" had been seen, and reprocess the + token. Otherwise, ignore the token. + + Anything else + Process the token using the rules for the "in select" insertion + mode. + + 8.2.5.20 The "in foreign content" insertion mode + + When the insertion mode is "in foreign content", tokens must be handled + as follows: + + A character token + Insert the token's character into the current node. + + A comment token + Append a Comment node to the current node with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is neither "mglyph" nor "malignmark", if the + current node is an mi element in the MathML namespace. + + A start tag whose tag name is neither "mglyph" nor "malignmark", if the + current node is an mo element in the MathML namespace. + + A start tag whose tag name is neither "mglyph" nor "malignmark", if the + current node is an mn element in the MathML namespace. + + A start tag whose tag name is neither "mglyph" nor "malignmark", if the + current node is an ms element in the MathML namespace. + + A start tag whose tag name is neither "mglyph" nor "malignmark", if the + current node is an mtext element in the MathML namespace. + + A start tag, if the current node is an element in the HTML namespace. + An end tag + Process the token using the rules for the secondary insertion + mode. + + If, after doing so, the insertion mode is still "in foreign + content", but there is no element in scope that has a namespace + other than the HTML namespace, switch the insertion mode to the + secondary insertion mode. + + A start tag whose tag name is one of: "b", "big", "blockquote", "body", + "br", "center", "code", "dd", "div", "dl", "dt", "em", "embed", + "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i", "img", + "li", "listing", "menu", "meta", "nobr", "ol", "p", "pre", + "ruby", "s", "small", "span", "strong", "strike", "sub", "sup", + "table", "tt", "u", "ul", "var" + + A start tag whose tag name is "font", if the token has any attributes + named "color", "face", or "size" + + An end-of-file token + Parse error. + + Pop elements from the stack of open elements until the current + node is in the HTML namespace. + + Switch the insertion mode to the secondary insertion mode, and + reprocess the token. + + Any other start tag + If the current node is an element in the MathML namespace, + adjust MathML attributes for the token. (This fixes the case of + MathML attributes that are not all lowercase.) + + Adjust foreign attributes for the token. (This fixes the use of + namespaced attributes, in particular XLink in SVG.) + + Insert a foreign element for the token, in the same namespace as + the current node. + + If the token has its self-closing flag set, pop the current node + off the stack of open elements and acknowledge the token's + self-closing flag. + + 8.2.5.21 The "after body" insertion mode + + When the insertion mode is "after body", tokens must be handled as + follows: + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + Process the token using the rules for the "in body" insertion + mode. + + A comment token + Append a Comment node to the first element in the stack of open + elements (the html element), with the data attribute set to the + data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + An end tag whose tag name is "html" + If the parser was originally created as part of the HTML + fragment parsing algorithm, this is a parse error; ignore the + token. (fragment case) + + Otherwise, switch the insertion mode to "after after body". + + An end-of-file token + Stop parsing. + + Anything else + Parse error. Switch the insertion mode to "in body" and + reprocess the token. + + 8.2.5.22 The "in frameset" insertion mode + + When the insertion mode is "in frameset", tokens must be handled as + follows: + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + Insert the character into the current node. + + A comment token + Append a Comment node to the current node with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + A start tag whose tag name is "frameset" + Insert an HTML element for the token. + + An end tag whose tag name is "frameset" + If the current node is the root html element, then this is a + parse error; ignore the token. (fragment case) + + Otherwise, pop the current node from the stack of open elements. + + If the parser was not originally created as part of the HTML + fragment parsing algorithm (fragment case), and the current node + is no longer a frameset element, then switch the insertion mode + to "after frameset". + + A start tag whose tag name is "frame" + Insert an HTML element for the token. Immediately pop the + current node off the stack of open elements. + + Acknowledge the token's self-closing flag, if it is set. + + A start tag whose tag name is "noframes" + Process the token using the rules for the "in head" insertion + mode. + + An end-of-file token + If the current node is not the root html element, then this is a + parse error. + + It can only be the current node in the fragment case. + + Stop parsing. + + Anything else + Parse error. Ignore the token. + + 8.2.5.23 The "after frameset" insertion mode + + When the insertion mode is "after frameset", tokens must be handled as + follows: + + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + Insert the character into the current node. + + A comment token + Append a Comment node to the current node with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + Parse error. Ignore the token. + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + An end tag whose tag name is "html" + Switch the insertion mode to "after after frameset". + + A start tag whose tag name is "noframes" + Process the token using the rules for the "in head" insertion + mode. + + An end-of-file token + Stop parsing. + + Anything else + Parse error. Ignore the token. + + This doesn't handle UAs that don't support frames, or that do support + frames but want to show the NOFRAMES content. Supporting the former is + easy; supporting the latter is harder. + + 8.2.5.24 The "after after body" insertion mode + + When the insertion mode is "after after body", tokens must be handled + as follows: + + A comment token + Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + An end-of-file token + Stop parsing. + + Anything else + Parse error. Switch the insertion mode to "in body" and + reprocess the token. + + 8.2.5.25 The "after after frameset" insertion mode + + When the insertion mode is "after after frameset", tokens must be + handled as follows: + + A comment token + Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. + + A DOCTYPE token + A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE + + A start tag whose tag name is "html" + Process the token using the rules for the "in body" insertion + mode. + + An end-of-file token + Stop parsing. + + A start tag whose tag name is "noframes" + Process the token using the rules for the "in head" insertion + mode. + + Anything else + Parse error. Ignore the token. + + 8.2.6 The end + + Once the user agent stops parsing the document, the user agent must + follow the steps in this section. + + First, the current document readiness must be set to "interactive". + + Then, the rules for when a script completes loading start applying + (script execution is no longer managed by the parser). + + If any of the scripts in the list of scripts that will execute as soon + as possible have completed loading, or if the list of scripts that will + execute asynchronously is not empty and the first script in that list + has completed loading, then the user agent must act as if those scripts + just completed loading, following the rules given for that in the + script element definition. + + Then, if the list of scripts that will execute when the document has + finished parsing is not empty, and the first item in this list has + already completed loading, then the user agent must act as if that + script just finished loading. + + By this point, there will be no scripts that have loaded but have not + yet been executed. + + The user agent must then fire a simple event called DOMContentLoaded at + the Document. + + Once everything that delays the load event has completed, the user + agent must set the current document readiness to "complete", and then + fire a load event at the body element. + + delaying the load event for things like image loads allows for intranet + port scans (even without javascript!). Should we really encode that + into the spec? + + 8.2.7 Coercing an HTML DOM into an infoset + + When an application uses an HTML parser in conjunction with an XML + pipeline, it is possible that the constructed DOM is not compatible + with the XML tool chain in certain subtle ways. For example, an XML + toolchain might not be able to represent attributes with the name + xmlns, since they conflict with the Namespaces in XML syntax. There is + also some data that the HTML parser generates that isn't included in + the DOM itself. This section specifies some rules for handling these + issues. + + If the XML API being used doesn't support DOCTYPEs, the tool may drop + DOCTYPEs altogether. + + If the XML API doesn't support attributes in no namespace that are + named "xmlns", attributes whose names start with "xmlns:", or + attributes in the XMLNS namespace, then the tool may drop such + attributes. + + The tool may annotate the output with any namespace declarations + required for proper operation. + + If the XML API being used restricts the allowable characters in the + local names of elements and attributes, then the tool may map all + element and attribute local names that the API wouldn't support to a + set of names that are allowed, by replacing any character that isn't + supported with the uppercase letter U and the five digits of the + character's Unicode codepoint when expressed in hexadecimal, using + digits 0-9 and capital letters A-F as the symbols, in increasing + numeric order. + + For example, the element name foo start tag will be closed + by a end tag, and never by a end tag, even if + the user agent is using the rules above to then generate an actual + element in the DOM with the name aU0003AU0003A for that start tag. + + 8.3 Namespaces + + The HTML namespace is: http://www.w3.org/1999/xhtml + + The MathML namespace is: http://www.w3.org/1998/Math/MathML + + The SVG namespace is: http://www.w3.org/2000/svg + + The XLink namespace is: http://www.w3.org/1999/xlink + + The XML namespace is: http://www.w3.org/XML/1998/namespace + + The XMLNS namespace is: http://www.w3.org/2000/xmlns/ diff --git a/parser/html/java/htmlparser/generate-encoding-data.py b/parser/html/java/htmlparser/generate-encoding-data.py new file mode 100644 index 0000000000..69b2fdc309 --- /dev/null +++ b/parser/html/java/htmlparser/generate-encoding-data.py @@ -0,0 +1,745 @@ +#!/usr/bin/python + +# Copyright (c) 2013-2015 Mozilla Foundation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import json + +class Label: + def __init__(self, label, preferred): + self.label = label + self.preferred = preferred + def __cmp__(self, other): + return cmp(self.label, other.label) + +# If a multi-byte encoding is on this list, it is assumed to have a +# non-generated decoder implementation class. Otherwise, the JDK default +# decoder is used as a placeholder. +MULTI_BYTE_DECODER_IMPLEMENTED = [ + u"x-user-defined", + u"replacement", + u"big5", +] + +MULTI_BYTE_ENCODER_IMPLEMENTED = [ + u"big5", +] + +preferred = [] + +labels = [] + +data = json.load(open("../encoding/encodings.json", "r")) + +indexes = json.load(open("../encoding/indexes.json", "r")) + +single_byte = [] + +multi_byte = [] + +def to_camel_name(name): + if name == u"iso-8859-8-i": + return u"Iso8I" + if name.startswith(u"iso-8859-"): + return name.replace(u"iso-8859-", u"Iso") + return name.title().replace(u"X-", u"").replace(u"-", u"").replace(u"_", u"") + +def to_constant_name(name): + return name.replace(u"-", u"_").upper() + +# Encoding.java + +for group in data: + if group["heading"] == "Legacy single-byte encodings": + single_byte = group["encodings"] + else: + multi_byte.extend(group["encodings"]) + for encoding in group["encodings"]: + preferred.append(encoding["name"]) + for label in encoding["labels"]: + labels.append(Label(label, encoding["name"])) + +preferred.sort() +labels.sort() + +label_file = open("src/nu/validator/encoding/Encoding.java", "w") + +label_file.write("""/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; +import java.nio.charset.spi.CharsetProvider; +import java.util.Arrays; +import java.util.Collections; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * Represents an encoding + * as defined in the Encoding + * Standard, provides access to each encoding defined in the Encoding + * Standard via a static constant and provides the + * "get an + * encoding" algorithm defined in the Encoding Standard. + * + *

This class inherits from {@link Charset} to allow the Encoding + * Standard-compliant encodings to be used in contexts that support + * Charset instances. However, by design, the Encoding + * Standard-compliant encodings are not supplied via a {@link CharsetProvider} + * and, therefore, are not available via and do not interfere with the static + * methods provided by Charset. (This class provides methods of + * the same name to hide each static method of Charset to help + * avoid accidental calls to the static methods of the superclass when working + * with Encoding Standard-compliant encodings.) + * + *

When an application needs to use a particular encoding, such as utf-8 + * or windows-1252, the corresponding constant, i.e. + * {@link #UTF_8 Encoding.UTF_8} and {@link #WINDOWS_1252 Encoding.WINDOWS_1252} + * respectively, should be used. However, when the application receives an + * encoding label from external input, the method {@link #forName(String) + * forName()} should be used to obtain the object representing the encoding + * identified by the label. In contexts where labels that map to the + * replacement + * encoding should be treated as unknown, the method {@link + * #forNameNoReplacement(String) forNameNoReplacement()} should be used instead. + * + * + * @author hsivonen + */ +public abstract class Encoding extends Charset { + + private static final String[] LABELS = { +""") + +for label in labels: + label_file.write(" \"%s\",\n" % label.label) + +label_file.write(""" }; + + private static final Encoding[] ENCODINGS_FOR_LABELS = { +""") + +for label in labels: + label_file.write(" %s.INSTANCE,\n" % to_camel_name(label.preferred)) + +label_file.write(""" }; + + private static final Encoding[] ENCODINGS = { +""") + +for label in preferred: + label_file.write(" %s.INSTANCE,\n" % to_camel_name(label)) + +label_file.write(""" }; + +""") + +for label in preferred: + label_file.write(""" /** + * The %s encoding. + */ + public static final Encoding %s = %s.INSTANCE; + +""" % (label, to_constant_name(label), to_camel_name(label))) + +label_file.write(""" +private static SortedMap encodings = null; + + protected Encoding(String canonicalName, String[] aliases) { + super(canonicalName, aliases); + } + + private enum State { + HEAD, LABEL, TAIL + }; + + public static Encoding forName(String label) { + if (label == null) { + throw new IllegalArgumentException("Label must not be null."); + } + if (label.length() == 0) { + throw new IllegalCharsetNameException(label); + } + // First try the fast path + int index = Arrays.binarySearch(LABELS, label); + if (index >= 0) { + return ENCODINGS_FOR_LABELS[index]; + } + // Else, slow path + StringBuilder sb = new StringBuilder(); + State state = State.HEAD; + for (int i = 0; i < label.length(); i++) { + char c = label.charAt(i); + if ((c == ' ') || (c == '\\n') || (c == '\\r') || (c == '\\t') + || (c == '\\u000C')) { + if (state == State.LABEL) { + state = State.TAIL; + } + continue; + } + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { + switch (state) { + case HEAD: + state = State.LABEL; + // Fall through + case LABEL: + sb.append(c); + continue; + case TAIL: + throw new IllegalCharsetNameException(label); + } + } + if (c >= 'A' && c <= 'Z') { + c += 0x20; + switch (state) { + case HEAD: + state = State.LABEL; + // Fall through + case LABEL: + sb.append(c); + continue; + case TAIL: + throw new IllegalCharsetNameException(label); + } + } + if ((c == '-') || (c == '+') || (c == '.') || (c == ':') + || (c == '_')) { + switch (state) { + case LABEL: + sb.append(c); + continue; + case HEAD: + case TAIL: + throw new IllegalCharsetNameException(label); + } + } + throw new IllegalCharsetNameException(label); + } + index = Arrays.binarySearch(LABELS, sb.toString()); + if (index >= 0) { + return ENCODINGS_FOR_LABELS[index]; + } + throw new UnsupportedCharsetException(label); + } + + public static Encoding forNameNoReplacement(String label) { + Encoding encoding = Encoding.forName(label); + if (encoding == Encoding.REPLACEMENT) { + throw new UnsupportedCharsetException(label); + } + return encoding; + } + + public static boolean isSupported(String label) { + try { + Encoding.forName(label); + } catch (UnsupportedCharsetException e) { + return false; + } + return true; + } + + public static boolean isSupportedNoReplacement(String label) { + try { + Encoding.forNameNoReplacement(label); + } catch (UnsupportedCharsetException e) { + return false; + } + return true; + } + + public static SortedMap availableCharsets() { + if (encodings == null) { + TreeMap map = new TreeMap(); + for (Encoding encoding : ENCODINGS) { + map.put(encoding.name(), encoding); + } + encodings = Collections.unmodifiableSortedMap(map); + } + return encodings; + } + + public static Encoding defaultCharset() { + return WINDOWS_1252; + } + + @Override public boolean canEncode() { + return false; + } + + @Override public boolean contains(Charset cs) { + return false; + } + + @Override public CharsetEncoder newEncoder() { + throw new UnsupportedOperationException("Encoder not implemented."); + } +} +""") + +label_file.close() + +# Single-byte encodings + +for encoding in single_byte: + name = encoding["name"] + labels = encoding["labels"] + labels.sort() + class_name = to_camel_name(name) + mapping_name = name + if mapping_name == u"iso-8859-8-i": + mapping_name = u"iso-8859-8" + mapping = indexes[mapping_name] + class_file = open("src/nu/validator/encoding/%s.java" % class_name, "w") + class_file.write('''/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class ''') + class_file.write(class_name) + class_file.write(''' extends Encoding { + + private static final char[] TABLE = {''') + fallible = False + comma = False + for code_point in mapping: + # XXX should we have error reporting? + if not code_point: + code_point = 0xFFFD + fallible = True + if comma: + class_file.write(",") + class_file.write("\n '\u%04x'" % code_point); + comma = True + class_file.write(''' + }; + + private static final String[] LABELS = {''') + + comma = False + for label in labels: + if comma: + class_file.write(",") + class_file.write("\n \"%s\"" % label); + comma = True + class_file.write(''' + }; + + private static final String NAME = "''') + class_file.write(name) + class_file.write('''"; + + static final Encoding INSTANCE = new ''') + class_file.write(class_name) + class_file.write('''(); + + private ''') + class_file.write(class_name) + class_file.write('''() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new ''') + class_file.write("Fallible" if fallible else "Infallible") + class_file.write('''SingleByteDecoder(this, TABLE); + } + +} +''') + class_file.close() + +# Multi-byte encodings + +for encoding in multi_byte: + name = encoding["name"] + labels = encoding["labels"] + labels.sort() + class_name = to_camel_name(name) + class_file = open("src/nu/validator/encoding/%s.java" % class_name, "w") + class_file.write('''/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class ''') + class_file.write(class_name) + class_file.write(''' extends Encoding { + + private static final String[] LABELS = {''') + + comma = False + for label in labels: + if comma: + class_file.write(",") + class_file.write("\n \"%s\"" % label); + comma = True + class_file.write(''' + }; + + private static final String NAME = "''') + class_file.write(name) + class_file.write('''"; + + static final ''') + class_file.write(class_name) + class_file.write(''' INSTANCE = new ''') + class_file.write(class_name) + class_file.write('''(); + + private ''') + class_file.write(class_name) + class_file.write('''() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + ''') + if name == "gbk": + class_file.write('''return Charset.forName("gb18030").newDecoder();''') + elif name in MULTI_BYTE_DECODER_IMPLEMENTED: + class_file.write("return new %sDecoder(this);" % class_name) + else: + class_file.write('''return Charset.forName(NAME).newDecoder();''') + class_file.write(''' + } + + @Override public CharsetEncoder newEncoder() { + ''') + if name in MULTI_BYTE_ENCODER_IMPLEMENTED: + class_file.write("return new %sEncoder(this);" % class_name) + else: + class_file.write('''return Charset.forName(NAME).newEncoder();''') + class_file.write(''' + } +} +''') + class_file.close() + +# Big5 + +def null_to_zero(code_point): + if not code_point: + code_point = 0 + return code_point + +index = [] + +for code_point in indexes["big5"]: + index.append(null_to_zero(code_point)) + +# There are four major gaps consisting of more than 4 consecutive invalid pointers +gaps = [] +consecutive = 0 +consecutive_start = 0 +offset = 0 +for code_point in index: + if code_point == 0: + if consecutive == 0: + consecutive_start = offset + consecutive +=1 + else: + if consecutive > 4: + gaps.append((consecutive_start, consecutive_start + consecutive)) + consecutive = 0 + offset += 1 + +def invert_ranges(ranges, cap): + inverted = [] + invert_start = 0 + for (start, end) in ranges: + if start != 0: + inverted.append((invert_start, start)) + invert_start = end + inverted.append((invert_start, cap)) + return inverted + +cap = len(index) +ranges = invert_ranges(gaps, cap) + +# Now compute a compressed lookup table for astralness + +gaps = [] +consecutive = 0 +consecutive_start = 0 +offset = 0 +for code_point in index: + if code_point <= 0xFFFF: + if consecutive == 0: + consecutive_start = offset + consecutive +=1 + else: + if consecutive > 40: + gaps.append((consecutive_start, consecutive_start + consecutive)) + consecutive = 0 + offset += 1 + +astral_ranges = invert_ranges(gaps, cap) + +class_file = open("src/nu/validator/encoding/Big5Data.java", "w") +class_file.write('''/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +final class Big5Data { + + private static final String ASTRALNESS = "''') + +bits = [] +for (low, high) in astral_ranges: + for i in xrange(low, high): + bits.append(1 if index[i] > 0xFFFF else 0) +# pad length to multiple of 16 +for j in xrange(16 - (len(bits) % 16)): + bits.append(0) + +i = 0 +while i < len(bits): + accu = 0 + for j in xrange(16): + accu |= bits[i + j] << j + if accu == 0x22: + class_file.write('\\"') + else: + class_file.write('\\u%04X' % accu) + i += 16 + +class_file.write('''"; + +''') + +j = 0 +for (low, high) in ranges: + class_file.write(''' private static final String TABLE%d = "''' % j) + for i in xrange(low, high): + class_file.write('\\u%04X' % (index[i] & 0xFFFF)) + class_file.write('''"; + +''') + j += 1 + +class_file.write(''' private static boolean readBit(int i) { + return (ASTRALNESS.charAt(i >> 4) & (1 << (i & 0xF))) != 0; + } + + static char lowBits(int pointer) { +''') + +j = 0 +for (low, high) in ranges: + class_file.write(''' if (pointer < %d) { + return '\\u0000'; + } + if (pointer < %d) { + return TABLE%d.charAt(pointer - %d); + } +''' % (low, high, j, low)) + j += 1 + +class_file.write(''' return '\\u0000'; + } + + static boolean isAstral(int pointer) { +''') + +base = 0 +for (low, high) in astral_ranges: + if high - low == 1: + class_file.write(''' if (pointer < %d) { + return false; + } + if (pointer == %d) { + return true; + } +''' % (low, low)) + else: + class_file.write(''' if (pointer < %d) { + return false; + } + if (pointer < %d) { + return readBit(%d + (pointer - %d)); + } +''' % (low, high, base, low)) + base += (high - low) + +class_file.write(''' return false; + } + + public static int findPointer(char lowBits, boolean isAstral) { + if (!isAstral) { + switch (lowBits) { +''') + +hkscs_bound = (0xA1 - 0x81) * 157 + +prefer_last = [ + 0x2550, + 0x255E, + 0x2561, + 0x256A, + 0x5341, + 0x5345, +] + +for code_point in prefer_last: + # Python lists don't have .rindex() :-( + for i in xrange(len(index) - 1, -1, -1): + candidate = index[i] + if candidate == code_point: + class_file.write(''' case 0x%04X: + return %d; +''' % (code_point, i)) + break + +class_file.write(''' default: + break; + } + }''') + +j = 0 +for (low, high) in ranges: + if high > hkscs_bound: + start = 0 + if low <= hkscs_bound and hkscs_bound < high: + # This is the first range we don't ignore and the + # range that contains the first non-HKSCS pointer. + # Avoid searching HKSCS. + start = hkscs_bound - low + class_file.write(''' + for (int i = %d; i < TABLE%d.length(); i++) { + if (TABLE%d.charAt(i) == lowBits) { + int pointer = i + %d; + if (isAstral == isAstral(pointer)) { + return pointer; + } + } + }''' % (start, j, j, low)) + j += 1 + +class_file.write(''' + return 0; + } +} +''') +class_file.close() diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml new file mode 100644 index 0000000000..1eab09c21a --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java new file mode 100644 index 0000000000..29ef2a43a0 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import java.util.LinkedList; + +import nu.validator.htmlparser.common.DocumentMode; +import nu.validator.htmlparser.impl.CoalescingTreeBuilder; +import nu.validator.htmlparser.impl.HtmlAttributes; + +import org.xml.sax.SAXException; + +import com.google.gwt.core.client.JavaScriptException; +import com.google.gwt.core.client.JavaScriptObject; + +class BrowserTreeBuilder extends CoalescingTreeBuilder { + + private JavaScriptObject document; + + private JavaScriptObject script; + + private JavaScriptObject placeholder; + + private boolean readyToRun; + + private final LinkedList scriptStack = new LinkedList(); + + private class ScriptHolder { + private final JavaScriptObject script; + + private final JavaScriptObject placeholder; + + /** + * @param script + * @param placeholder + */ + public ScriptHolder(JavaScriptObject script, + JavaScriptObject placeholder) { + this.script = script; + this.placeholder = placeholder; + } + + /** + * Returns the script. + * + * @return the script + */ + public JavaScriptObject getScript() { + return script; + } + + /** + * Returns the placeholder. + * + * @return the placeholder + */ + public JavaScriptObject getPlaceholder() { + return placeholder; + } + } + + protected BrowserTreeBuilder(JavaScriptObject document) { + super(); + this.document = document; + installExplorerCreateElementNS(document); + } + + private static native boolean installExplorerCreateElementNS( + JavaScriptObject doc) /*-{ + if (!doc.createElementNS) { + doc.createElementNS = function (uri, local) { + if ("http://www.w3.org/1999/xhtml" == uri) { + return doc.createElement(local); + } else if ("http://www.w3.org/1998/Math/MathML" == uri) { + if (!doc.mathplayerinitialized) { + var obj = document.createElement("object"); + obj.setAttribute("id", "mathplayer"); + obj.setAttribute("classid", "clsid:32F66A20-7614-11D4-BD11-00104BD3F987"); + document.getElementsByTagName("head")[0].appendChild(obj); + document.namespaces.add("m", "http://www.w3.org/1998/Math/MathML", "#mathplayer"); + doc.mathplayerinitialized = true; + } + return doc.createElement("m:" + local); + } else if ("http://www.w3.org/2000/svg" == uri) { + if (!doc.renesisinitialized) { + var obj = document.createElement("object"); + obj.setAttribute("id", "renesis"); + obj.setAttribute("classid", "clsid:AC159093-1683-4BA2-9DCF-0C350141D7F2"); + document.getElementsByTagName("head")[0].appendChild(obj); + document.namespaces.add("s", "http://www.w3.org/2000/svg", "#renesis"); + doc.renesisinitialized = true; + } + return doc.createElement("s:" + local); + } else { + // throw + } + } + } + }-*/; + + private static native boolean hasAttributeNS(JavaScriptObject element, + String uri, String localName) /*-{ + return element.hasAttributeNS(uri, localName); + }-*/; + + private static native void setAttributeNS(JavaScriptObject element, + String uri, String localName, String value) /*-{ + element.setAttributeNS(uri, localName, value); + }-*/; + + @Override protected void addAttributesToElement(JavaScriptObject element, + HtmlAttributes attributes) throws SAXException { + try { + for (int i = 0; i < attributes.getLength(); i++) { + String localName = attributes.getLocalNameNoBoundsCheck(i); + String uri = attributes.getURINoBoundsCheck(i); + if (!hasAttributeNS(element, uri, localName)) { + setAttributeNS(element, uri, localName, + attributes.getValueNoBoundsCheck(i)); + } + } + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native void appendChild(JavaScriptObject parent, + JavaScriptObject child) /*-{ + parent.appendChild(child); + }-*/; + + private static native JavaScriptObject createTextNode(JavaScriptObject doc, + String text) /*-{ + return doc.createTextNode(text); + }-*/; + + private static native JavaScriptObject getLastChild(JavaScriptObject node) /*-{ + return node.lastChild; + }-*/; + + private static native void extendTextNode(JavaScriptObject node, String text) /*-{ + node.data += text; + }-*/; + + @Override protected void appendCharacters(JavaScriptObject parent, + String text) throws SAXException { + try { + if (parent == placeholder) { + appendChild(script, createTextNode(document, text)); + + } + JavaScriptObject lastChild = getLastChild(parent); + if (lastChild != null && getNodeType(lastChild) == 3) { + extendTextNode(lastChild, text); + return; + } + appendChild(parent, createTextNode(document, text)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native boolean hasChildNodes(JavaScriptObject element) /*-{ + return element.hasChildNodes(); + }-*/; + + private static native JavaScriptObject getFirstChild( + JavaScriptObject element) /*-{ + return element.firstChild; + }-*/; + + @Override protected void appendChildrenToNewParent( + JavaScriptObject oldParent, JavaScriptObject newParent) + throws SAXException { + try { + while (hasChildNodes(oldParent)) { + appendChild(newParent, getFirstChild(oldParent)); + } + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native JavaScriptObject createComment(JavaScriptObject doc, + String text) /*-{ + return doc.createComment(text); + }-*/; + + @Override protected void appendComment(JavaScriptObject parent, + String comment) throws SAXException { + try { + if (parent == placeholder) { + appendChild(script, createComment(document, comment)); + } + appendChild(parent, createComment(document, comment)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + @Override protected void appendCommentToDocument(String comment) + throws SAXException { + try { + appendChild(document, createComment(document, comment)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native JavaScriptObject createElementNS( + JavaScriptObject doc, String ns, String local) /*-{ + return doc.createElementNS(ns, local); + }-*/; + + @Override protected JavaScriptObject createElement(String ns, String name, + HtmlAttributes attributes) throws SAXException { + try { + JavaScriptObject rv = createElementNS(document, ns, name); + for (int i = 0; i < attributes.getLength(); i++) { + setAttributeNS(rv, attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + + if ("script" == name) { + if (placeholder != null) { + scriptStack.addLast(new ScriptHolder(script, placeholder)); + } + script = rv; + placeholder = createElementNS(document, + "http://n.validator.nu/placeholder/", "script"); + rv = placeholder; + for (int i = 0; i < attributes.getLength(); i++) { + setAttributeNS(rv, attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + } + + return rv; + } catch (JavaScriptException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override protected JavaScriptObject createHtmlElementSetAsRoot( + HtmlAttributes attributes) throws SAXException { + try { + JavaScriptObject rv = createElementNS(document, + "http://www.w3.org/1999/xhtml", "html"); + for (int i = 0; i < attributes.getLength(); i++) { + setAttributeNS(rv, attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + appendChild(document, rv); + return rv; + } catch (JavaScriptException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + private static native JavaScriptObject getParentNode( + JavaScriptObject element) /*-{ + return element.parentNode; + }-*/; + + @Override protected void appendElement(JavaScriptObject child, + JavaScriptObject newParent) throws SAXException { + try { + if (newParent == placeholder) { + appendChild(script, cloneNodeDeep(child)); + } + appendChild(newParent, child); + } catch (JavaScriptException e) { + fatal(e); + } + } + + @Override protected boolean hasChildren(JavaScriptObject element) + throws SAXException { + try { + return hasChildNodes(element); + } catch (JavaScriptException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + private static native void insertBeforeNative(JavaScriptObject parent, + JavaScriptObject child, JavaScriptObject sibling) /*-{ + parent.insertBefore(child, sibling); + }-*/; + + private static native int getNodeType(JavaScriptObject node) /*-{ + return node.nodeType; + }-*/; + + private static native JavaScriptObject cloneNodeDeep(JavaScriptObject node) /*-{ + return node.cloneNode(true); + }-*/; + + /** + * Returns the document. + * + * @return the document + */ + JavaScriptObject getDocument() { + JavaScriptObject rv = document; + document = null; + return rv; + } + + private static native JavaScriptObject createDocumentFragment( + JavaScriptObject doc) /*-{ + return doc.createDocumentFragment(); + }-*/; + + JavaScriptObject getDocumentFragment() { + JavaScriptObject rv = createDocumentFragment(document); + JavaScriptObject rootElt = getFirstChild(document); + while (hasChildNodes(rootElt)) { + appendChild(rv, getFirstChild(rootElt)); + } + document = null; + return rv; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#createJavaScriptObject(String, + * java.lang.String, org.xml.sax.Attributes, java.lang.Object) + */ + @Override protected JavaScriptObject createElement(String ns, String name, + HtmlAttributes attributes, JavaScriptObject form) + throws SAXException { + try { + JavaScriptObject rv = createElement(ns, name, attributes); + // rv.setUserData("nu.validator.form-pointer", form, null); + return rv; + } catch (JavaScriptException e) { + fatal(e); + return null; + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override protected void start(boolean fragment) throws SAXException { + script = null; + placeholder = null; + readyToRun = false; + } + + protected void documentMode(DocumentMode mode, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + // document.setUserData("nu.validator.document-mode", mode, null); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(java.lang.String, + * java.lang.String, java.lang.Object) + */ + @Override protected void elementPopped(String ns, String name, + JavaScriptObject node) throws SAXException { + if (node == placeholder) { + readyToRun = true; + requestSuspension(); + } + } + + private static native void replace(JavaScriptObject oldNode, + JavaScriptObject newNode) /*-{ + oldNode.parentNode.replaceChild(newNode, oldNode); + }-*/; + + private static native JavaScriptObject getPreviousSibling(JavaScriptObject node) /*-{ + return node.previousSibling; + }-*/; + + void maybeRunScript() { + if (readyToRun) { + readyToRun = false; + replace(placeholder, script); + if (scriptStack.isEmpty()) { + script = null; + placeholder = null; + } else { + ScriptHolder scriptHolder = scriptStack.removeLast(); + script = scriptHolder.getScript(); + placeholder = scriptHolder.getPlaceholder(); + } + } + } + + @Override protected void insertFosterParentedCharacters(String text, + JavaScriptObject table, JavaScriptObject stackParent) + throws SAXException { + try { + JavaScriptObject parent = getParentNode(table); + if (parent != null) { // always an element if not null + JavaScriptObject previousSibling = getPreviousSibling(table); + if (previousSibling != null + && getNodeType(previousSibling) == 3) { + extendTextNode(previousSibling, text); + return; + } + insertBeforeNative(parent, createTextNode(document, text), table); + return; + } + JavaScriptObject lastChild = getLastChild(stackParent); + if (lastChild != null && getNodeType(lastChild) == 3) { + extendTextNode(lastChild, text); + return; + } + appendChild(stackParent, createTextNode(document, text)); + } catch (JavaScriptException e) { + fatal(e); + } + } + + @Override protected void insertFosterParentedChild(JavaScriptObject child, + JavaScriptObject table, JavaScriptObject stackParent) + throws SAXException { + JavaScriptObject parent = getParentNode(table); + try { + if (parent != null && getNodeType(parent) == 1) { + insertBeforeNative(parent, child, table); + } else { + appendChild(stackParent, child); + } + } catch (JavaScriptException e) { + fatal(e); + } + } + + private static native void removeChild(JavaScriptObject parent, + JavaScriptObject child) /*-{ + parent.removeChild(child); + }-*/; + + @Override protected void detachFromParent(JavaScriptObject element) + throws SAXException { + try { + JavaScriptObject parent = getParentNode(element); + if (parent != null) { + removeChild(parent, element); + } + } catch (JavaScriptException e) { + fatal(e); + } + } +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java new file mode 100644 index 0000000000..1d71cdfd62 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import java.util.LinkedList; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.impl.UTF16Buffer; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import com.google.gwt.core.client.JavaScriptObject; +import com.google.gwt.user.client.Timer; + +/** + * This class implements an HTML5 parser that exposes data through the DOM + * interface. + * + *

By default, when using the constructor without arguments, the + * this parser treats XML 1.0-incompatible infosets as fatal errors. + * This corresponds to + * FATAL as the general XML violation policy. To make the parser + * support non-conforming HTML fully per the HTML 5 spec while on the other + * hand potentially violating the DOM API contract, set the general XML + * violation policy to ALLOW. This does not work with a standard + * DOM implementation. Handling all input without fatal errors and without + * violating the DOM API contract is possible by setting + * the general XML violation policy to ALTER_INFOSET. This + * makes the parser non-conforming but is probably the most useful + * setting for most applications. + * + *

The doctype is not represented in the tree. + * + *

The document mode is represented as user data DocumentMode + * object with the key nu.validator.document-mode on the document + * node. + * + *

The form pointer is also stored as user data with the key + * nu.validator.form-pointer. + * + * @version $Id: HtmlDocumentBuilder.java 255 2008-05-29 08:57:38Z hsivonen $ + * @author hsivonen + */ +public class HtmlParser { + + private static final int CHUNK_SIZE = 512; + + private final Tokenizer tokenizer; + + private final BrowserTreeBuilder domTreeBuilder; + + private final StringBuilder documentWriteBuffer = new StringBuilder(); + + private ErrorHandler errorHandler; + + private UTF16Buffer stream; + + private int streamLength; + + private boolean lastWasCR; + + private boolean ending; + + private ParseEndListener parseEndListener; + + private final LinkedList bufferStack = new LinkedList(); + + /** + * Instantiates the parser + * + * @param implementation + * the DOM implementation + * @param xmlPolicy the policy + */ + public HtmlParser(JavaScriptObject document) { + this.domTreeBuilder = new BrowserTreeBuilder(document); + this.tokenizer = new ErrorReportingTokenizer(domTreeBuilder); + this.domTreeBuilder.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); + this.tokenizer.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET); + } + + /** + * Parses a document from a SAX InputSource. + * @param is the source + * @return the doc + * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource) + */ + public void parse(String source, ParseEndListener callback) throws SAXException { + parseEndListener = callback; + domTreeBuilder.setFragmentContext(null); + tokenize(source, null); + } + + /** + * @param is + * @throws SAXException + * @throws IOException + * @throws MalformedURLException + */ + private void tokenize(String source, String context) throws SAXException { + lastWasCR = false; + ending = false; + documentWriteBuffer.setLength(0); + streamLength = source.length(); + stream = new UTF16Buffer(source.toCharArray(), 0, + (streamLength < CHUNK_SIZE ? streamLength : CHUNK_SIZE)); + bufferStack.clear(); + push(stream); + domTreeBuilder.setFragmentContext(context == null ? null : context.intern()); + tokenizer.start(); + pump(); + } + + private void pump() throws SAXException { + if (ending) { + tokenizer.end(); + domTreeBuilder.getDocument(); // drops the internal reference + parseEndListener.parseComplete(); + // Don't schedule timeout + return; + } + + int docWriteLen = documentWriteBuffer.length(); + if (docWriteLen > 0) { + char[] newBuf = new char[docWriteLen]; + documentWriteBuffer.getChars(0, docWriteLen, newBuf, 0); + push(new UTF16Buffer(newBuf, 0, docWriteLen)); + documentWriteBuffer.setLength(0); + } + + for (;;) { + UTF16Buffer buffer = peek(); + if (!buffer.hasMore()) { + if (buffer == stream) { + if (buffer.getEnd() == streamLength) { + // Stop parsing + tokenizer.eof(); + ending = true; + break; + } else { + int newEnd = buffer.getStart() + CHUNK_SIZE; + buffer.setEnd(newEnd < streamLength ? newEnd + : streamLength); + continue; + } + } else { + pop(); + continue; + } + } + // now we have a non-empty buffer + buffer.adjust(lastWasCR); + lastWasCR = false; + if (buffer.hasMore()) { + lastWasCR = tokenizer.tokenizeBuffer(buffer); + domTreeBuilder.maybeRunScript(); + break; + } else { + continue; + } + } + + // schedule + Timer timer = new Timer() { + + @Override public void run() { + try { + pump(); + } catch (SAXException e) { + ending = true; + if (errorHandler != null) { + try { + errorHandler.fatalError(new SAXParseException( + e.getMessage(), null, null, -1, -1, e)); + } catch (SAXException e1) { + } + } + } + } + + }; + timer.schedule(1); + } + + private void push(UTF16Buffer buffer) { + bufferStack.addLast(buffer); + } + + private UTF16Buffer peek() { + return bufferStack.getLast(); + } + + private void pop() { + bufferStack.removeLast(); + } + + public void documentWrite(String text) throws SAXException { + UTF16Buffer buffer = new UTF16Buffer(text.toCharArray(), 0, text.length()); + while (buffer.hasMore()) { + buffer.adjust(lastWasCR); + lastWasCR = false; + if (buffer.hasMore()) { + lastWasCR = tokenizer.tokenizeBuffer(buffer); + domTreeBuilder.maybeRunScript(); + } + } + } + + /** + * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler errorHandler) { + this.errorHandler = errorHandler; + domTreeBuilder.setErrorHandler(errorHandler); + tokenizer.setErrorHandler(errorHandler); + } + + /** + * Sets whether comment nodes appear in the tree. + * @param ignoreComments true to ignore comments + * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) + */ + public void setIgnoringComments(boolean ignoreComments) { + domTreeBuilder.setIgnoringComments(ignoreComments); + } + + /** + * Sets whether the parser considers scripting to be enabled for noscript treatment. + * @param scriptingEnabled true to enable + * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + domTreeBuilder.setScriptingEnabled(scriptingEnabled); + } + +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java new file mode 100644 index 0000000000..255a02d134 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import org.xml.sax.SAXException; + +import com.google.gwt.core.client.EntryPoint; +import com.google.gwt.core.client.JavaScriptObject; + +public class HtmlParserModule implements EntryPoint { + + private static native void zapChildren(JavaScriptObject node) /*-{ + while (node.hasChildNodes()) { + node.removeChild(node.lastChild); + } + }-*/; + + private static native void installDocWrite(JavaScriptObject doc, HtmlParser parser) /*-{ + doc.write = function() { + if (arguments.length == 0) { + return; + } + var text = arguments[0]; + for (var i = 1; i < arguments.length; i++) { + text += arguments[i]; + } + parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text); + } + doc.writeln = function() { + if (arguments.length == 0) { + parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)("\n"); + return; + } + var text = arguments[0]; + for (var i = 1; i < arguments.length; i++) { + text += arguments[i]; + } + text += "\n"; + parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text); + } + }-*/; + + @SuppressWarnings("unused") + private static void parseHtmlDocument(String source, JavaScriptObject document, JavaScriptObject readyCallback, JavaScriptObject errorHandler) throws SAXException { + if (readyCallback == null) { + readyCallback = JavaScriptObject.createFunction(); + } + zapChildren(document); + HtmlParser parser = new HtmlParser(document); + parser.setScriptingEnabled(true); + // XXX error handler + + installDocWrite(document, parser); + + parser.parse(source, new ParseEndListener(readyCallback)); + } + + private static native void exportEntryPoints() /*-{ + $wnd.parseHtmlDocument = @nu.validator.htmlparser.gwt.HtmlParserModule::parseHtmlDocument(Ljava/lang/String;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;); + }-*/; + + + public void onModuleLoad() { + exportEntryPoints(); + } + +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java new file mode 100644 index 0000000000..43235c5be9 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.gwt; + +import com.google.gwt.core.client.JavaScriptObject; + +public class ParseEndListener { + + private final JavaScriptObject callback; + + /** + * @param callback + */ + public ParseEndListener(JavaScriptObject callback) { + this.callback = callback; + } + + public void parseComplete() { + call(callback); + } + + private static native void call(JavaScriptObject callback) /*-{ + callback(); + }-*/; + +} diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html new file mode 100644 index 0000000000..4d9cde81c3 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/HtmlParser.html @@ -0,0 +1,225 @@ + + + + Live DOM Viewer + + + + + + +

Live DOM Viewer

+

Markup to test (, upload, download, hide):

+

+

DOM view (hide, refresh):

+ +

Rendered view: (hide):

+

+

innerHTML view: (show, refresh):

+ +

Log: (hide):

+
Script not loaded.
+ +

This script puts a function w(s) into the + global scope of the test page, where s is a string to + output to the log. Also, five files are accessible in the current + directory for test purposes: image (a GIF image), + flash (a Flash file), script (a JS file), + style (a CSS file), and document (an HTML + file).

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt new file mode 100644 index 0000000000..bd2f4fcf13 --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt @@ -0,0 +1,25 @@ +From: +http://software.hixie.ch/utilities/js/live-dom-viewer/LICENSE +regarding the upstream of HtmlParser.html: + +The MIT License + +Copyright (c) 2000, 2006, 2008 Ian Hickson and various contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html new file mode 100644 index 0000000000..a8756c9f7a --- /dev/null +++ b/parser/html/java/htmlparser/gwt-src/nu/validator/htmlparser/public/blank.html @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/README.txt b/parser/html/java/htmlparser/mozilla-export-scripts/README.txt new file mode 100644 index 0000000000..3567b846c1 --- /dev/null +++ b/parser/html/java/htmlparser/mozilla-export-scripts/README.txt @@ -0,0 +1,25 @@ +These scripts export the Java-to-C++ translator and the java source files that +implement the HTML5 parser. The exported translator may be used (with no +external dependencies) to translate the exported java source files into Gecko- +compatible C++. + +Hacking the translator itself still requires a working copy of the Java HTML5 +parser repository, but hacking the parser (modifying the Java source files and +performing the translation) should now be possible using only files committed +to the mozilla source tree. + +Run any of these scripts without arguments to receive usage instructions. + + make-translator-jar.sh: compiles the Java-to-C++ translator into a .jar file + export-java-srcs.sh: exports minimal java source files implementing the + HTML5 parser + export-translator.sh: exports the compiled translator and javaparser.jar + export-all.sh: runs the previous two scripts + util.sh: provides various shell utility functions to the + scripts listed above (does nothing if run directly) + +All path arguments may be either absolute or relative. This includes the path +to the script itself ($0), so the directory from which you run these scripts +doesn't matter. + +Ben Newman (7 July 2009) diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/export-all.sh b/parser/html/java/htmlparser/mozilla-export-scripts/export-all.sh new file mode 100644 index 0000000000..9ae07d33da --- /dev/null +++ b/parser/html/java/htmlparser/mozilla-export-scripts/export-all.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env sh + +SCRIPT_DIR=`dirname $0` +source $SCRIPT_DIR/util.sh +SCRIPT_DIR=`abs $SCRIPT_DIR` + +if [ $# -eq 1 ] +then + MOZ_PARSER_PATH=`abs $1` +else + echo + echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html" + echo "Note that relative paths will work just fine." + echo + exit 1 +fi + +$SCRIPT_DIR/export-translator.sh $MOZ_PARSER_PATH +$SCRIPT_DIR/export-java-srcs.sh $MOZ_PARSER_PATH + +echo +echo "Now go to $MOZ_PARSER_PATH and run" +echo " java -jar javalib/translator.jar javasrc . nsHtml5AtomList.h" +echo diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/export-java-srcs.sh b/parser/html/java/htmlparser/mozilla-export-scripts/export-java-srcs.sh new file mode 100644 index 0000000000..6d32b07da1 --- /dev/null +++ b/parser/html/java/htmlparser/mozilla-export-scripts/export-java-srcs.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env sh + +SCRIPT_DIR=`dirname $0` +source $SCRIPT_DIR/util.sh +SCRIPT_DIR=`abs $SCRIPT_DIR` + +SRCDIR=`abs $SCRIPT_DIR/../src/nu/validator/htmlparser/impl` + +if [ $# -eq 1 ] +then + MOZ_PARSER_PATH=`abs $1` +else + echo + echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html" + echo "Note that relative paths will work just fine." + echo + exit 1 +fi + +SRCTARGET=$MOZ_PARSER_PATH/javasrc + +rm -rf $SRCTARGET +mkdir $SRCTARGET +# Avoid copying the .svn directory: +cp -rv $SRCDIR/*.java $SRCTARGET diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/export-translator.sh b/parser/html/java/htmlparser/mozilla-export-scripts/export-translator.sh new file mode 100644 index 0000000000..d1f4f1c392 --- /dev/null +++ b/parser/html/java/htmlparser/mozilla-export-scripts/export-translator.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env sh + +SCRIPT_DIR=`dirname $0` +source $SCRIPT_DIR/util.sh +SCRIPT_DIR=`abs $SCRIPT_DIR` + +LIBDIR=`abs $SCRIPT_DIR/../translator-lib` + +if [ $# -eq 1 ] +then + MOZ_PARSER_PATH=`abs $1` +else + echo + echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html" + echo "Note that relative paths will work just fine." + echo "Be sure that you have run `dirname $0`/make-translator-jar.sh before running this script." + echo + exit 1 +fi + +LIBTARGET=$MOZ_PARSER_PATH/javalib + +rm -rf $LIBTARGET +cp -rv $LIBDIR $LIBTARGET diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/make-translator-jar.sh b/parser/html/java/htmlparser/mozilla-export-scripts/make-translator-jar.sh new file mode 100644 index 0000000000..4f21ae6650 --- /dev/null +++ b/parser/html/java/htmlparser/mozilla-export-scripts/make-translator-jar.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env sh + +SCRIPT_DIR=`dirname $0` +source $SCRIPT_DIR/util.sh +SCRIPT_DIR=`abs $SCRIPT_DIR` + +SRCDIR=`abs $SCRIPT_DIR/../translator-src` +BINDIR=`abs $SCRIPT_DIR/../translator-bin` +LIBDIR=`abs $SCRIPT_DIR/../translator-lib` + +if [ $# -eq 1 ] +then + JAVAPARSER_JAR_PATH=`abs $1` +else + echo + echo "Usage: sh `basename $0` /path/to/javaparser-1.0.7.jar" + echo "Note that relative paths will work just fine." + echo "Obtain javaparser-1.0.7.jar from http://code.google.com/p/javaparser" + echo + exit 1 +fi + +set_up() { + rm -rf $BINDIR; mkdir $BINDIR + rm -rf $LIBDIR; mkdir $LIBDIR + cp $JAVAPARSER_JAR_PATH $LIBDIR/javaparser.jar +} + +write_manifest() { + rm -f $LIBDIR/manifest + echo "Main-Class: nu.validator.htmlparser.cpptranslate.Main" > $LIBDIR/manifest + echo "Class-Path: javaparser.jar" >> $LIBDIR/manifest +} + +compile_translator() { + find $SRCDIR -name "*.java" | \ + xargs javac -cp $LIBDIR/javaparser.jar -g -d $BINDIR +} + +generate_jar() { + jar cvfm $LIBDIR/translator.jar $LIBDIR/manifest -C $BINDIR . +} + +clean_up() { + rm -f $LIBDIR/manifest +} + +success_message() { + echo + echo "Successfully generated directory \"$LIBDIR\" with contents:" + echo + ls -al $LIBDIR + echo + echo "Now run `dirname $0`/export-all.sh with no arguments and follow the usage instructions." + echo +} + +set_up && \ + compile_translator && \ + write_manifest && \ + generate_jar && \ + clean_up && \ + success_message diff --git a/parser/html/java/htmlparser/mozilla-export-scripts/util.sh b/parser/html/java/htmlparser/mozilla-export-scripts/util.sh new file mode 100644 index 0000000000..348ca14f9f --- /dev/null +++ b/parser/html/java/htmlparser/mozilla-export-scripts/util.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env sh + +abs() { + local rel + local p + if [ $# -ne 1 ] + then + rel=. + else + rel=$1 + fi + if [ -d $rel ] + then + pushd $rel > /dev/null + p=`pwd` + popd > /dev/null + else + pushd `dirname $rel` > /dev/null + p=`pwd`/`basename $rel` + popd > /dev/null + fi + echo $p +} diff --git a/parser/html/java/htmlparser/pom.xml b/parser/html/java/htmlparser/pom.xml new file mode 100644 index 0000000000..41f46725f8 --- /dev/null +++ b/parser/html/java/htmlparser/pom.xml @@ -0,0 +1,240 @@ + + + 4.0.0 + nu.validator.htmlparser + htmlparser + bundle + 1.4 + htmlparser + http://about.validator.nu/htmlparser/ + The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser. + + + + hsivonen + Henri Sivonen + hsivonen@iki.fi + http://hsivonen.iki.fi/ + + + + + The MIT License + http://www.opensource.org/licenses/mit-license.php + repo + + + The (New) BSD License + http://www.opensource.org/licenses/bsd-license.php + repo + + + + scm:hg:http://hg.mozilla.org/projects/htmlparser/ + http://hg.mozilla.org/projects/htmlparser/ + + + ${project.build.directory}/src + ${basedir}/test-src + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.5 + 1.5 + + + + maven-antrun-plugin + 1.7 + + + com.sun + tools + 1.5.0 + system + ${java.home}/../lib/tools.jar + + + + + intitialize-sources + initialize + + run + + + + + + + + + + + + + tokenizer-hotspot-workaround + process-sources + + run + + + + + + + + + + + + + + + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + org.apache.felix + maven-bundle-plugin + 2.3.7 + true + + + false + + + ${project.name} + nu.validator.htmlparser + ${project.version} + J2SE-1.5 + <_removeheaders>Built-By,Bnd-LastModified + + + + + org.codehaus.mojo + rpm-maven-plugin + + 1 + The MIT License + Development/Java + /var/tmp/${project.build.finalName} + + _javadir ${rpm.java.dir} + _javadocdir ${rpm.javadoc.dir} + + + + ${rpm.java.dir} + 644 + root + root + + + ${project.build.directory}/${project.build.finalName}.jar + + + + + ${rpm.javadoc.dir}/${project.build.finalName} + 644 + root + root + + + ${project.build.directory}/apidocs + + + + + %__ln_s ${project.build.finalName}.jar %{buildroot}%{_javadir}/${project.name}.jar + + + + + + + com.ibm.icu + icu4j + 4.0.1 + compile + true + + + xom + xom + 1.1 + compile + true + + + net.sourceforge.jchardet + jchardet + 1.0 + compile + true + + + com.sdicons.jsontools + jsontools-core + 1.4 + test + + + + /usr/share/java + /usr/share/javadoc + UTF-8 + + diff --git a/parser/html/java/htmlparser/ruby-gcj/DomUtils.java b/parser/html/java/htmlparser/ruby-gcj/DomUtils.java new file mode 100644 index 0000000000..dc43da83d3 --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/DomUtils.java @@ -0,0 +1,36 @@ +import java.util.HashSet; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.Element; + +public class DomUtils { + + private static HashSet pinned_list = new HashSet(); + + public static synchronized void pin(Document d) { + pinned_list.add(d); + } + + public static synchronized void unpin(Document d) { + pinned_list.remove(d); + } + + // return all the text content contained by a single element + public static void getElementContent(Element e, StringBuffer b) { + for (Node n = e.getFirstChild(); n!=null; n=n.getNextSibling()) { + if (n.getNodeType() == n.TEXT_NODE) { + b.append(n.getNodeValue()); + } else if (n.getNodeType() == n.ELEMENT_NODE) { + getElementContent((Element) e, b); + } + } + } + + // replace all child nodes of a given element with a single text element + public static void setElementContent(Element e, String s) { + while (e.hasChildNodes()) { + e.removeChild(e.getFirstChild()); + } + e.appendChild(e.getOwnerDocument().createTextNode(s)); + } +} diff --git a/parser/html/java/htmlparser/ruby-gcj/README b/parser/html/java/htmlparser/ruby-gcj/README new file mode 100644 index 0000000000..b368437f77 --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/README @@ -0,0 +1,65 @@ +Disclaimer: + + This code is experimental. + + When some people say experimental, they mean "it may not do what it is + intended to do; in fact, it might even wipe out your hard drive". I mean + that too. But I mean something more than that. + + In this case, experimental means that I don't even know what it is intended + to do. I just have a vague vision, and I am trying out various things in + the hopes that one of them will work out. + +Vision: + + My vague vision is that I would like to see HTML 5 be a success. For me to + consider it to be a success, it needs to be a standard, be interoperable, + and be ubiquitous. + + I believe that the Validator.nu parser can be used to bootstrap that + process. It is written in Java. Has been compiled into JavaScript. Has + been translated into C++ based on the Mozilla libraries with the intent of + being included in Firefox. It very closely tracks to the standard. + + For the moment, the effort is on extending that to another language (Ruby) + on a single environment (i.e., Linux). Once that is complete, intent is to + evaluate the results, decide what needs to be changed, and what needs to be + done to support other languages and environments. + + The bar I'm setting for myself isn't just another SWIG generated low level + interface to a DOM, but rather a best of breed interface; which for Ruby + seems to be the one pioneered by Hpricot and adopted by Nokogiri. Success + will mean passing all of the tests from one of those two parsers as well as + all of the HTML5 tests. + +Build instructions: + + You'll need icu4j and chardet jars. If you checked out and ran dldeps you + are already all set: + + svn co http://svn.versiondude.net/whattf/build/trunk/ build + python build/build.py checkout dldeps + + Fedora 11: + + yum install ruby-devel rubygem-rake java-1.5.0-gcj-devel gcc-c++ + + Ubuntu 9.04: + + apt-get install ruby ruby1.8-dev rake gcj g++ + + Also at this time, you need to install a jdk (e.g. sun-java6-jdk), simply + because the javac that comes with gcj doesn't support -sourcepath, and + I haven't spent the time to find a replacement. + + Finally, make sure that libjaxp1.3-java is *not* installed. + + http://gcc.gnu.org/ml/java/2009-06/msg00055.html + + If this is done, you should be all set. + + cd htmlparser/ruby-gcj + rake test + + If things are successful, the last lines of the output will list the + font attributes and values found in the test/google.html file. diff --git a/parser/html/java/htmlparser/ruby-gcj/Rakefile b/parser/html/java/htmlparser/ruby-gcj/Rakefile new file mode 100644 index 0000000000..7b51802539 --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/Rakefile @@ -0,0 +1,77 @@ +deps = ENV['deps'] || '../../dependencies' +icu4j = "#{deps}/icu4j-4_0.jar" +chardet = "#{deps}/mozilla/intl/chardet/java/dist/lib/chardet.jar" +libgcj = Dir['/usr/share/java/libgcj*.jar'].grep(/gcj[-\d.]*jar$/).sort.last + +task :default => %w(headers libs Makefile validator.so) + +# headers + +hdb = 'nu/validator/htmlparser/dom/HtmlDocumentBuilder' +task :headers => %W(headers/DomUtils.h headers/#{hdb}.h) + +file 'headers/DomUtils.h' => 'DomUtils.java' do |t| + mkdir_p %w(classes headers), :verbose => false + sh "javac -d classes #{t.prerequisites.first}" + sh "gcjh -force -o #{t.name} -cp #{libgcj}:classes DomUtils" +end + +file "headers/#{hdb}.h" => "../src/#{hdb}.java" do |t| + mkdir_p %w(classes headers), :verbose => false + sh "javac -cp #{icu4j}:#{chardet} -d classes -sourcepath ../src " + + t.prerequisites.first + sh "gcjh -force -cp classes -o #{t.name} -cp #{libgcj}:classes " + + hdb.gsub('/','.') +end + +# libs + +task :libs => %w(htmlparser chardet icu).map {|name| "lib/libnu-#{name}.so"} + +htmlparser = Dir['../src/**/*.java'].reject {|name| name.include? '/xom/'} +file 'lib/libnu-htmlparser.so' => htmlparser + ['DomUtils.java'] do |t| + mkdir_p 'lib', :verbose => false + sh "gcj -shared --classpath=#{icu4j}:#{chardet} -fPIC " + + "-o #{t.name} #{t.prerequisites.join(' ')}" +end + +file 'lib/libnu-chardet.so' => chardet do |t| + mkdir_p 'lib', :verbose => false + sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}" +end + +file 'lib/libnu-icu.so' => icu4j do |t| + mkdir_p 'lib', :verbose => false + sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}" +end + +# module + +file 'Makefile' do + sh "ruby extconf.rb --with-gcj=#{libgcj}" +end + +file 'validator.so' => %w(Makefile validator.cpp headers/DomUtils.h) do + system 'make' +end + +file 'nu/validator.so' do + mkdir_p 'nu', :verbose => false + system 'ln -s -t nu ../validator.so' +end + +# tasks + +task :test => [:default, 'nu/validator.so'] do + ENV['LD_LIBRARY_PATH']='lib' + sh 'ruby test/fonts.rb test/google.html' +end + +task :clean do + rm_rf %W(classes lib nu mkmf.log headers/DomUtils.h headers/#{hdb}.h) + + Dir['*.o'] + Dir['*.so'] +end + +task :clobber => :clean do + rm_rf %w(headers Makefile) +end diff --git a/parser/html/java/htmlparser/ruby-gcj/extconf.rb b/parser/html/java/htmlparser/ruby-gcj/extconf.rb new file mode 100644 index 0000000000..415cf430af --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/extconf.rb @@ -0,0 +1,45 @@ +require 'mkmf' + +# system dependencies +gcj = with_config('gcj', '/usr/share/java/libgcj.jar') + +# headers for JAXP +CONFIG['CC'] = 'g++' +with_cppflags('-xc++') do + + unless find_header('org/w3c/dom/Document.h', 'headers') + + `jar tf #{gcj}`.split.each do |file| + next unless file =~ /\.class$/ + next unless file =~ /^(javax|org)\/(w3c|xml)/ + next if file.include? '$' + + dest = 'headers/' + file.sub(/\.class$/,'.h') + name = file.sub(/\.class$/,'').gsub('/','.') + + next if File.exist? dest + + cmd = "gcjh -cp #{gcj} -o #{dest} #{name}" + puts cmd + break unless system cmd + system "ruby -pi -e '$_.sub!(/namespace namespace$/," + + "\"namespace namespace$\")' #{dest}" + system "ruby -pi -e '$_.sub!(/::namespace::/," + + "\"::namespace$::\")' #{dest}" + end + + exit unless find_header('org/w3c/dom/Document.h', 'headers') + end + + find_header 'nu/validator/htmlparser/dom/HtmlDocumentBuilder.h', 'headers' +end + +# Java libraries +Config::CONFIG['CC'] = 'g++ -shared' +dir_config('nu-htmlparser', nil, 'lib') +have_library 'nu-htmlparser' +have_library 'nu-icu' +have_library 'nu-chardet' + +# Ruby library +create_makefile 'nu/validator' diff --git a/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb b/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb new file mode 100644 index 0000000000..1beb94c10e --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb @@ -0,0 +1,5 @@ +require 'nu/validator' + +ARGV.each do |arg| + puts Nu::Validator::parse(open(arg)).root.name +end diff --git a/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb b/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb new file mode 100644 index 0000000000..595e3ae062 --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb @@ -0,0 +1,11 @@ +require 'nu/validator' +require 'open-uri' + +ARGV.each do |arg| + doc = Nu::Validator::parse(open(arg)) + doc.xpath("//*[local-name()='font']").each do |font| + font.attributes.each do |name, attr| + puts "#{name} => #{attr.value}" + end + end +end diff --git a/parser/html/java/htmlparser/ruby-gcj/test/google.html b/parser/html/java/htmlparser/ruby-gcj/test/google.html new file mode 100644 index 0000000000..8d2183b295 --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/test/google.html @@ -0,0 +1,10 @@ +Google



 
  Advanced Search
  Preferences
  Language Tools

Find an opportunity to volunteer in your community today.


Advertising Programs - Business Solutions - About Google

©2009 - Privacy

\ No newline at end of file diff --git a/parser/html/java/htmlparser/ruby-gcj/test/greek.xml b/parser/html/java/htmlparser/ruby-gcj/test/greek.xml new file mode 100644 index 0000000000..a14d23eb1a --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/test/greek.xml @@ -0,0 +1,2 @@ + + diff --git a/parser/html/java/htmlparser/ruby-gcj/validator.cpp b/parser/html/java/htmlparser/ruby-gcj/validator.cpp new file mode 100644 index 0000000000..aadd24abe6 --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/validator.cpp @@ -0,0 +1,210 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nu/validator/htmlparser/dom/HtmlDocumentBuilder.h" + +#include "DomUtils.h" + +#include "ruby.h" + +using namespace java::io; +using namespace java::lang; +using namespace java::util; +using namespace javax::xml::parsers; +using namespace javax::xml::xpath; +using namespace nu::validator::htmlparser::dom; +using namespace org::w3c::dom; +using namespace org::xml::sax; + +static VALUE jaxp_Document; +static VALUE jaxp_Attr; +static VALUE jaxp_Element; +static ID ID_read; +static ID ID_doc; +static ID ID_element; + +// convert a Java string into a Ruby string +static VALUE j2r(String *string) { + if (string == NULL) return Qnil; + jint len = JvGetStringUTFLength(string); + char buf[len]; + JvGetStringUTFRegion(string, 0, len, buf); + return rb_str_new(buf, len); +} + +// convert a Ruby string into a Java string +static String *r2j(VALUE string) { + return JvNewStringUTF(RSTRING(string)->ptr); +} + +// release the Java Document associated with this Ruby Document +static void vnu_document_free(Document *doc) { + DomUtils::unpin(doc); +} + +// Nu::Validator::parse( string|file ) +static VALUE vnu_parse(VALUE self, VALUE input) { + HtmlDocumentBuilder *parser = new HtmlDocumentBuilder(); + + // read file-like objects into memory. TODO: buffer such objects + if (rb_respond_to(input, ID_read)) + input = rb_funcall(input, ID_read, 0); + + // convert input in to a ByteArrayInputStream + jbyteArray bytes = JvNewByteArray(RSTRING(input)->len); + memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len); + InputSource *source = new InputSource(new ByteArrayInputStream(bytes)); + + // parse, pin, and wrap + Document *doc = parser->parse(source); + DomUtils::pin(doc); + return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc); +} + +// Jaxp::parse( string|file ) +static VALUE jaxp_parse(VALUE self, VALUE input) { + DocumentBuilderFactory *factory = DocumentBuilderFactory::newInstance(); + DocumentBuilder *parser = factory->newDocumentBuilder(); + + // read file-like objects into memory. TODO: buffer such objects + if (rb_respond_to(input, ID_read)) + input = rb_funcall(input, ID_read, 0); + + try { + jbyteArray bytes = JvNewByteArray(RSTRING(input)->len); + memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len); + Document *doc = parser->parse(new ByteArrayInputStream(bytes)); + DomUtils::pin(doc); + return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc); + } catch (java::lang::Throwable *ex) { + ex->printStackTrace(); + return Qnil; + } +} + + +// Nu::Validator::Document#encoding +static VALUE jaxp_document_encoding(VALUE rdoc) { + Document *jdoc; + Data_Get_Struct(rdoc, Document, jdoc); + return j2r(jdoc->getXmlEncoding()); +} + +// Nu::Validator::Document#root +static VALUE jaxp_document_root(VALUE rdoc) { + Document *jdoc; + Data_Get_Struct(rdoc, Document, jdoc); + + Element *jelement = jdoc->getDocumentElement(); + if (jelement==NULL) return Qnil; + + VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, jelement); + rb_ivar_set(relement, ID_doc, rdoc); + return relement; +} + +// Nu::Validator::Document#xpath +static VALUE jaxp_document_xpath(VALUE rdoc, VALUE path) { + Document *jdoc; + Data_Get_Struct(rdoc, Document, jdoc); + + Element *jelement = jdoc->getDocumentElement(); + if (jelement==NULL) return Qnil; + + XPath *xpath = XPathFactory::newInstance()->newXPath(); + XPathExpression *expr = xpath->compile(r2j(path)); + NodeList *list = (NodeList*) expr->evaluate(jdoc, XPathConstants::NODESET); + + VALUE result = rb_ary_new(); + for (int i=0; igetLength(); i++) { + VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, list->item(i)); + rb_ivar_set(relement, ID_doc, rdoc); + rb_ary_push(result, relement); + } + return result; +} + +// Nu::Validator::Element#name +static VALUE jaxp_element_name(VALUE relement) { + Element *jelement; + Data_Get_Struct(relement, Element, jelement); + return j2r(jelement->getNodeName()); +} + +// Nu::Validator::Element#attributes +static VALUE jaxp_element_attributes(VALUE relement) { + Element *jelement; + Data_Get_Struct(relement, Element, jelement); + VALUE result = rb_hash_new(); + NamedNodeMap *map = jelement->getAttributes(); + for (int i=0; igetLength(); i++) { + Attr *jattr = (Attr *) map->item(i); + VALUE rattr = Data_Wrap_Struct(jaxp_Attr, NULL, NULL, jattr); + rb_ivar_set(rattr, ID_element, relement); + rb_hash_aset(result, j2r(jattr->getName()), rattr); + } + return result; +} + +// Nu::Validator::Attribute#value +static VALUE jaxp_attribute_value(VALUE rattribute) { + Attr *jattribute; + Data_Get_Struct(rattribute, Attr, jattribute); + return j2r(jattribute->getValue()); +} + +typedef VALUE (ruby_method)(...); + +// Nu::Validator module initialization +extern "C" void Init_validator() { + JvCreateJavaVM(NULL); + JvAttachCurrentThread(NULL, NULL); + JvInitClass(&DomUtils::class$); + JvInitClass(&XPathFactory::class$); + JvInitClass(&XPathConstants::class$); + + VALUE jaxp = rb_define_module("Jaxp"); + rb_define_singleton_method(jaxp, "parse", (ruby_method*)&jaxp_parse, 1); + + VALUE nu = rb_define_module("Nu"); + VALUE validator = rb_define_module_under(nu, "Validator"); + rb_define_singleton_method(validator, "parse", (ruby_method*)&vnu_parse, 1); + + jaxp_Document = rb_define_class_under(jaxp, "Document", rb_cObject); + rb_define_method(jaxp_Document, "encoding", + (ruby_method*)&jaxp_document_encoding, 0); + rb_define_method(jaxp_Document, "root", + (ruby_method*)&jaxp_document_root, 0); + rb_define_method(jaxp_Document, "xpath", + (ruby_method*)&jaxp_document_xpath, 1); + + jaxp_Element = rb_define_class_under(jaxp, "Element", rb_cObject); + rb_define_method(jaxp_Element, "name", + (ruby_method*)&jaxp_element_name, 0); + rb_define_method(jaxp_Element, "attributes", + (ruby_method*)&jaxp_element_attributes, 0); + + jaxp_Attr = rb_define_class_under(jaxp, "Attr", rb_cObject); + rb_define_method(jaxp_Attr, "value", + (ruby_method*)&jaxp_attribute_value, 0); + + ID_read = rb_intern("read"); + ID_doc = rb_intern("@doc"); + ID_element = rb_intern("@element"); +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java new file mode 100644 index 0000000000..00e5f7ca72 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Big5 extends Encoding { + + private static final String[] LABELS = { + "big5", + "big5-hkscs", + "cn-big5", + "csbig5", + "x-x-big5" + }; + + private static final String NAME = "big5"; + + static final Big5 INSTANCE = new Big5(); + + private Big5() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new Big5Decoder(this); + } + + @Override public CharsetEncoder newEncoder() { + return new Big5Encoder(this); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java new file mode 100644 index 0000000000..9f35be3411 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +final class Big5Data { + + private static final String ASTRALNESS = "\uF829\u7A22\u1290\uC5C4\u0007\u0200\u7549\"\u0000\uA000\u3859\u0300\u002C\u573E\uF72B\u6EFC\u90F2\u3B7B\u83E9\uF049\u9DA6\uBBFC\uBEF7\uFDFE\u0C83\uABD1\u7BFF\u7FBF\u1804\u002C\u4840\u2046\u0408\u2A22\u4858\u091A\u5100\u3122\uC000\u5000\uC00D\u6110\uD44C\u9A24\u0180\u0004\u92B2\u0209\u8631\u1242\u8140\u0351\uAB48\u7460\uD5A2\u3E5C\uE361\u1083\u720B\u08A0\u51D6\uE00A\u8100\u1686\uC443\u1135\u6037\u7AE6\u056D\u7D0C\u0E66\u81E0\u7F88\u2420\u2406\u1D03\u340C\u4268\u454A\uF13F\u080D\u8084\uBB00\u0C4D\u6ED6\u97D7\u41DF\u5D3E\uDA68\u305C\uB800\u26E9\u80BC\u0151\uE078\u89A1\u59C0\u9679\u3BCC\u5EDE\uBC2C\uDF9B\u6C5D\u046D\u6043\u4A36\uD860\u073E\uC8C4\u6C69\uD8B1\u8302\u0F88\u0973\u806E\u3B6B\u5A17\uA503\u2D52\u3F40\u1120\u4101\u5024\uB903\u90EE\u1079\u5CAD\u1820\uDA0A\u8060\u9E26\u6E73\u1021\u080E\u4368\u6FB2\u161F\u8AFE\u76B6\u763A\u8262\u1894\u1801\uFE7D\u578D\u1327\u5BD2\u1937\uDB8C\u4862\u0024\u0000\u0010\u8000\u0000\u0000\u0038\u3800\uB9E2\uFD7D\u75F8\uDCF7\u6FF3\uBBF2\uFF4A\uAE3F\u9FC5\uEAFF\uBABA\uBC5D\u9F73\uD8FA\uDED6\u4B25\u975E\u2ADA\u6DB9\u06E6\u9D36\u53F9\u6FC5\uF98A\u49BF\uDB5D\uFFF8\u14A6\uE605\u96F7\u0A99\u00E5\u0800\u3D81\u5002\u0102\uBF49\u475E\u036F\u6280\uEECA\u4819\u6081\u205A\u24F7\u0000\u0004\u0000\u2804\u22C8\u0200\u0000\u2010\u5082\u3040\u0001\u0010\u1284\u0041\u0504\u2000\uC100\u3F7F\uB059\u8AC1\uAFAF\uAC05\u033F\u0204\u7280\u420A\u0426\u02D0\u0EC3\u0958\u0A80\u20B5\u9206\u8B77\u0560\u21C9\u4606\u6038\uC048\u24B4\u84DE\uC0E0\u3364\u3154\u300D\u688A\u5F2B\u0626\u8496\uB108\uE890\uA394\u734F\u50B8\u0D11\uDFA4\u4003\u5D20\u8480\u6160\u51CE\u800A\u58B7\u0050\uE862\u6750\u7220\u1228"; + + private static final String TABLE0 = "\u43F0\u4C32\u4603\u45A6\u4578\u7267\u4D77\u45B3\u7CB1\u4CE2\u7CC5\u3B95\u4736\u4744\u4C47\u4C40\u42BF\u3617\u7352\u6E8B\u70D2\u4C57\uA351\u474F\u45DA\u4C85\u7C6C\u4D07\u4AA4\u46A1\u6B23\u7225\u5A54\u1A63\u3E06\u3F61\u664D\u56FB\u0000\u7D95\u591D\u8BB9\u3DF4\u9734\u7BEF\u5BDB\u1D5E\u5AA4\u3625\u9EB0\u5AD1\u5BB7\u5CFC\u676E\u8593\u9945\u7461\u749D\u3875\u1D53\u369E\u6021\u3EEC\u58DE\u3AF5\u7AFC\u9F97\u4161\u890D\u31EA\u0A8A\u325E\u430A\u8484\u9F96\u942F\u4930\u8613\u5896\u974A\u9218\u79D0\u7A32\u6660\u6A29\u889D\u744C\u7BC5\u6782\u7A2C\u524F\u9046\u34E6\u73C4\u5DB9\u74C6\u9FC7\u57B3\u492F\u544C\u4131\u368E\u5818\u7A72\u7B65\u8B8F\u46AE\u6E88\u4181\u5D99\u7BAE\u24BC\u9FC8\u24C1\u24C9\u24CC\u9FC9\u8504\u35BB\u40B4\u9FCA\u44E1\uADFF\u62C1\u706E\u9FCB"; + + private static final String TABLE1 = "\u31C0\u31C1\u31C2\u31C3\u31C4\u010C\u31C5\u00D1\u00CD\u31C6\u31C7\u00CB\u1FE8\u31C8\u00CA\u31C9\u31CA\u31CB\u31CC\u010E\u31CD\u31CE\u0100\u00C1\u01CD\u00C0\u0112\u00C9\u011A\u00C8\u014C\u00D3\u01D1\u00D2\u0000\u1EBE\u0000\u1EC0\u00CA\u0101\u00E1\u01CE\u00E0\u0251\u0113\u00E9\u011B\u00E8\u012B\u00ED\u01D0\u00EC\u014D\u00F3\u01D2\u00F2\u016B\u00FA\u01D4\u00F9\u01D6\u01D8\u01DA\u01DC\u00FC\u0000\u1EBF\u0000\u1EC1\u00EA\u0261\u23DA\u23DB"; + + private static final String TABLE2 = "\uA3A9\u1145\u0000\u650A\u0000\u0000\u4E3D\u6EDD\u9D4E\u91DF\u0000\u0000\u7735\u6491\u4F1A\u4F28\u4FA8\u5156\u5174\u519C\u51E4\u52A1\u52A8\u533B\u534E\u53D1\u53D8\u56E2\u58F0\u5904\u5907\u5932\u5934\u5B66\u5B9E\u5B9F\u5C9A\u5E86\u603B\u6589\u67FE\u6804\u6865\u6D4E\u70BC\u7535\u7EA4\u7EAC\u7EBA\u7EC7\u7ECF\u7EDF\u7F06\u7F37\u827A\u82CF\u836F\u89C6\u8BBE\u8BE2\u8F66\u8F67\u8F6E\u7411\u7CFC\u7DCD\u6946\u7AC9\u5227\u0000\u0000\u0000\u0000\u918C\u78B8\u915E\u80BC\u0000\u8D0B\u80F6\u09E7\u0000\u0000\u809F\u9EC7\u4CCD\u9DC9\u9E0C\u4C3E\u9DF6\u700E\u9E0A\uA133\u35C1\u0000\u6E9A\u823E\u7519\u0000\u4911\u9A6C\u9A8F\u9F99\u7987\u846C\u1DCA\u05D0\u2AE6\u4E24\u4E81\u4E80\u4E87\u4EBF\u4EEB\u4F37\u344C\u4FBD\u3E48\u5003\u5088\u347D\u3493\u34A5\u5186\u5905\u51DB\u51FC\u5205\u4E89\u5279\u5290\u5327\u35C7\u53A9\u3551\u53B0\u3553\u53C2\u5423\u356D\u3572\u3681\u5493\u54A3\u54B4\u54B9\u54D0\u54EF\u5518\u5523\u5528\u3598\u553F\u35A5\u35BF\u55D7\u35C5\u7D84\u5525\u0000\u0C42\u0D15\u512B\u5590\u2CC6\u39EC\u0341\u8E46\u4DB8\u94E5\u4053\u80BE\u777A\u2C38\u3A34\u47D5\u815D\u69F2\u4DEA\u64DD\u0D7C\u0FB4\u0CD5\u10F4\u648D\u8E7E\u0E96\u0C0B\u0F64\u2CA9\u8256\u44D3\u0000\u0D46\u9A4D\u80E9\u47F4\u4EA7\u2CC2\u9AB2\u3A67\u95F4\u3FED\u3506\u52C7\u97D4\u78C8\u2D44\u9D6E\u9815\u0000\u43D9\u60A5\u64B4\u54E3\u2D4C\u2BCA\u1077\u39FB\u106F\u66DA\u6716\u79A0\u64EA\u5052\u0C43\u8E68\u21A1\u8B4C\u0731\u0000\u480B\u01A9\u3FFA\u5873\u2D8D\u0000\u45C8\u04FC\u6097\u0F4C\u0D96\u5579\u40BB\u43BA\u0000\u4AB4\u2A66\u109D\u81AA\u98F5\u0D9C\u6379\u39FE\u2775\u8DC0\u56A1\u647C\u3E43\u0000\uA601\u0E09\u2ACF\u2CC9\u0000\u10C8\u39C2\u3992\u3A06\u829B\u3578\u5E49\u20C7\u5652\u0F31\u2CB2\u9720\u34BC\u6C3D\u4E3B\u0000\u0000\u7574\u2E8B\u2208\uA65B\u8CCD\u0E7A\u0C34\u681C\u7F93\u10CF\u2803\u2939\u35FB\u51E3\u0E8C\u0F8D\u0EAA\u3F93\u0F30\u0D47\u114F\u0E4C\u0000\u0EAB\u0BA9\u0D48\u10C0\u113D\u3FF9\u2696\u6432\u0FAD\u33F4\u7639\u2BCE\u0D7E\u0D7F\u2C51\u2C55\u3A18\u0E98\u10C7\u0F2E\uA632\u6B50\u8CD2\u8D99\u8CCA\u95AA\u54CC\u82C4\u55B9\u0000\u9EC3\u9C26\u9AB6\u775E\u2DEE\u7140\u816D\u80EC\u5C1C\u6572\u8134\u3797\u535F\u80BD\u91B6\u0EFA\u0E0F\u0E77\u0EFB\u35DD\u4DEB\u3609\u0CD6\u56AF\u27B5\u10C9\u0E10\u0E78\u1078\u1148\u8207\u1455\u0E79\u4E50\u2DA4\u5A54\u101D\u101E\u10F5\u10F6\u579C\u0E11\u7694\u82CD\u0FB5\u0E7B\u517E\u3703\u0FB6\u1180\u52D8\uA2BD\u49DA\u183A\u4177\u827C\u5899\u5268\u361A\u573D\u7BB2\u5B68\u4800\u4B2C\u9F27\u49E7\u9C1F\u9B8D\u5B74\u313D\u55FB\u35F2\u5689\u4E28\u5902\u1BC1\uF878\u9751\u0086\u4E5B\u4EBB\u353E\u5C23\u5F51\u5FC4\u38FA\u624C\u6535\u6B7A\u6C35\u6C3A\u706C\u722B\u4E2C\u72AD\u48E9\u7F52\u793B\u7CF9\u7F53\u626A\u34C1\u0000\u634B\u8002\u8080\u6612\u6951\u535D\u8864\u89C1\u78B2\u8BA0\u8D1D\u9485\u9578\u957F\u95E8\u8E0F\u97E6\u9875\u98CE\u98DE\u9963\u9810\u9C7C\u9E1F\u9EC4\u6B6F\uF907\u4E37\u0087\u961D\u6237\u94A2\u0000\u503B\u6DFE\u9C73\u9FA6\u3DC9\u888F\u414E\u7077\u5CF5\u4B20\u51CD\u3559\u5D30\u6122\u8A32\u8FA7\u91F6\u7191\u6719\u73BA\u3281\uA107\u3C8B\u1980\u4B10\u78E4\u7402\u51AE\u870F\u4009\u6A63\uA2BA\u4223\u860F\u0A6F\u7A2A\u9947\u8AEA\u9755\u704D\u5324\u207E\u93F4\u76D9\u89E3\u9FA7\u77DD\u4EA3\u4FF0\u50BC\u4E2F\u4F17\u9FA8\u5434\u7D8B\u5892\u58D0\u1DB6\u5E92\u5E99\u5FC2\u2712\u658B\u33F9\u6919\u6A43\u3C63\u6CFF\u0000\u7200\u4505\u738C\u3EDB\u4A13\u5B15\u74B9\u8B83\u5CA4\u5695\u7A93\u7BEC\u7CC3\u7E6C\u82F8\u8597\u9FA9\u8890\u9FAA\u8EB9\u9FAB\u8FCF\u855F\u99E0\u9221\u9FAC\u8DB9\u143F\u4071\u42A2\u5A1A\u0000\u0000\u0000\u9868\u676B\u4276\u573D\u0000\u85D6\u497B\u82BF\u710D\u4C81\u6D74\u5D7B\u6B15\u6FBE\u9FAD\u9FAE\u5B96\u9FAF\u66E7\u7E5B\u6E57\u79CA\u3D88\u44C3\u3256\u2796\u439A\u4536\u0000\u5CD5\u3B1A\u8AF9\u5C78\u3D12\u3551\u5D78\u9FB2\u7157\u4558\u40EC\u1E23\u4C77\u3978\u344A\u01A4\u6C41\u8ACC\u4FB4\u0239\u59BF\u816C\u9856\u98FA\u5F3B\u0B9F\u0000\u21C1\u896D\u4102\u46BB\u9079\u3F07\u9FB3\uA1B5\u40F8\u37D6\u46F7\u6C46\u417C\u86B2\u73FF\u456D\u38D4\u549A\u4561\u451B\u4D89\u4C7B\u4D76\u45EA\u3FC8\u4B0F\u3661\u44DE\u44BD\u41ED\u5D3E\u5D48\u5D56\u3DFC\u380F\u5DA4\u5DB9\u3820\u3838\u5E42\u5EBD\u5F25\u5F83\u3908\u3914\u393F\u394D\u60D7\u613D\u5CE5\u3989\u61B7\u61B9\u61CF\u39B8\u622C\u6290\u62E5\u6318\u39F8\u56B1\u3A03\u63E2\u63FB\u6407\u645A\u3A4B\u64C0\u5D15\u5621\u9F9F\u3A97\u6586\u3ABD\u65FF\u6653\u3AF2\u6692\u3B22\u6716\u3B42\u67A4\u6800\u3B58\u684A\u6884\u3B72\u3B71\u3B7B\u6909\u6943\u725C\u6964\u699F\u6985\u3BBC\u69D6\u3BDD\u6A65\u6A74\u6A71\u6A82\u3BEC\u6A99\u3BF2\u6AAB\u6AB5\u6AD4\u6AF6\u6B81\u6BC1\u6BEA\u6C75\u6CAA\u3CCB\u6D02\u6D06\u6D26\u6D81\u3CEF\u6DA4\u6DB1\u6E15\u6E18\u6E29\u6E86\u89C0\u6EBB\u6EE2\u6EDA\u9F7F\u6EE8\u6EE9\u6F24\u6F34\u3D46\u3F41\u6F81\u6FBE\u3D6A\u3D75\u71B7\u5C99\u3D8A\u702C\u3D91\u7050\u7054\u706F\u707F\u7089\u0325\u43C1\u35F1\u0ED8\u3ED7\u57BE\u6ED3\u713E\u57E0\u364E\u69A2\u8BE9\u5B74\u7A49\u58E1\u94D9\u7A65\u7A7D\u59AC\u7ABB\u7AB0\u7AC2\u7AC3\u71D1\u648D\u41CA\u7ADA\u7ADD\u7AEA\u41EF\u54B2\u5C01\u7B0B\u7B55\u7B29\u530E\u5CFE\u7BA2\u7B6F\u839C\u5BB4\u6C7F\u7BD0\u8421\u7B92\u7BB8\u5D20\u3DAD\u5C65\u8492\u7BFA\u7C06\u7C35\u5CC1\u7C44\u7C83\u4882\u7CA6\u667D\u4578\u7CC9\u7CC7\u7CE6\u7C74\u7CF3\u7CF5\u7CCE\u7E67\u451D\u6E44\u7D5D\u6ED6\u748D\u7D89\u7DAB\u7135\u7DB3\u7DD2\u4057\u6029\u7DE4\u3D13\u7DF5\u17F9\u7DE5\u836D\u7E1D\u6121\u615A\u7E6E\u7E92\u432B\u946C\u7E27\u7F40\u7F41\u7F47\u7936\u62D0\u99E1\u7F97\u6351\u7FA3\u1661\u0068\u455C\u3766\u4503\u833A\u7FFA\u6489\u8005\u8008\u801D\u8028\u802F\uA087\u6CC3\u803B\u803C\u8061\u2714\u4989\u6626\u3DE3\u66E8\u6725\u80A7\u8A48\u8107\u811A\u58B0\u26F6\u6C7F\u6498\u4FB8\u64E7\u148A\u8218\u185E\u6A53\u4A65\u4A95\u447A\u8229\u0B0D\u6A52\u3D7E\u4FF9\u14FD\u84E2\u8362\u6B0A\u49A7\u3530\u1773\u3DF8\u82AA\u691B\uF994\u41DB\u854B\u82D0\u831A\u0E16\u17B4\u36C1\u317D\u355A\u827B\u82E2\u8318\u3E8B\u6DA3\u6B05\u6B97\u35CE\u3DBF\u831D\u55EC\u8385\u450B\u6DA5\u83AC\u83C1\u83D3\u347E\u6ED4\u6A57\u855A\u3496\u6E42\u2EEF\u8458\u5BE4\u8471\u3DD3\u44E4\u6AA7\u844A\u3CB5\u7958\u84A8\u6B96\u6E77\u6E43\u84DE\u840F\u8391\u44A0\u8493\u84E4\u5C91\u4240\u5CC0\u4543\u8534\u5AF2\u6E99\u4527\u8573\u4516\u67BF\u8616\u8625\u863B\u85C1\u7088\u8602\u1582\u70CD\uF9B2\u456A\u8628\u3648\u18A2\u53F7\u739A\u867E\u8771\uA0F8\u87EE\u2C27\u87B1\u87DA\u880F\u5661\u866C\u6856\u460F\u8845\u8846\u75E0\u3DB9\u75E4\u885E\u889C\u465B\u88B4\u88B5\u63C1\u88C5\u7777\u770F\u8987\u898A\u89A6\u89A9\u89A7\u89BC\u8A25\u89E7\u7924\u7ABD\u8A9C\u7793\u91FE\u8A90\u7A59\u7AE9\u7B3A\u3F8F\u4713\u7B38\u717C\u8B0C\u8B1F\u5430\u5565\u8B3F\u8B4C\u8B4D\u8AA9\u4A7A\u8B90\u8B9B\u8AAF\u16DF\u4615\u884F\u8C9B\u7D54\u7D8F\uF9D4\u3725\u7D53\u8CD6\u7D98\u7DBD\u8D12\u8D03\u1910\u8CDB\u705C\u8D11\u4CC9\u3ED0\u8D77\u8DA9\u8002\u1014\u498A\u3B7C\u81BC\u710C\u7AE7\u8EAD\u8EB6\u8EC3\u92D4\u8F19\u8F2D\u8365\u8412\u8FA5\u9303\uA29F\u0A50\u8FB3\u492A\u89DE\u853D\u3DBB\u5EF8\u3262\u8FF9\uA014\u86BC\u8501\u2325\u3980\u6ED7\u9037\u853C\u7ABE\u9061\u856C\u860B\u90A8\u8713\u90C4\u86E6\u90AE\u90FD\u9167\u3AF0\u91A9\u91C4\u7CAC\u8933\u1E89\u920E\u6C9F\u9241\u9262\u55B9\u92B9\u8AC6\u3C9B\u8B0C\u55DB\u0D31\u932C\u936B\u8AE1\u8BEB\u708F\u5AC3\u8AE2\u8AE5\u4965\u9244\u8BEC\u8C39\u8BFF\u9373\u945B\u8EBC\u9585\u95A6\u9426\u95A0\u6FF6\u42B9\u267A\u86D8\u127C\u3E2E\u49DF\u6C1C\u967B\u9696\u416C\u96A3\u6ED5\u61DA\u96B6\u78F5\u8AE0\u96BD\u53CC\u49A1\u6CB8\u0274\u6410\u90AF\u90E5\u4AD1\u1915\u330A\u9731\u8642\u9736\u4A0F\u453D\u4585\u4AE9\u7075\u5B41\u971B\u975C\u91D5\u9757\u5B4A\u91EB\u975F\u9425\u50D0\u30B7\u30BC\u9789\u979F\u97B1\u97BE\u97C0\u97D2\u97E0\u546C\u97EE\u741C\u9433\u97FF\u97F5\u941D\u797A\u4AD1\u9834\u9833\u984B\u9866\u3B0E\u7175\u3D51\u0630\u415C\u5706\u98CA\u98B7\u98C8\u98C7\u4AFF\u6D27\u16D3\u55B0\u98E1\u98E6\u98EC\u9378\u9939\u4A29\u4B72\u9857\u9905\u99F5\u9A0C\u9A3B\u9A10\u9A58\u5725\u36C4\u90B1\u9BD5\u9AE0\u9AE2\u9B05\u9AF4\u4C0E\u9B14\u9B2D\u8600\u5034\u9B34\u69A8\u38C3\u307D\u9B50\u9B40\u9D3E\u5A45\u1863\u9B8E\u424B\u9C02\u9BFF\u9C0C\u9E68\u9DD4\u9FB7\uA192\uA1AB\uA0E1\uA123\uA1DF\u9D7E\u9D83\uA134\u9E0E\u6888\u9DC4\u215B\uA193\uA220\u193B\uA233\u9D39\uA0B9\uA2B4\u9E90\u9E95\u9E9E\u9EA2\u4D34\u9EAA\u9EAF\u4364\u9EC1\u3B60\u39E5\u3D1D\u4F32\u37BE\u8C2B\u9F02\u9F08\u4B96\u9424\u6DA2\u9F17\u9F16\u9F39\u569F\u568A\u9F45\u99B8\u908B\u97F2\u847F\u9F62\u9F69\u7ADC\u9F8E\u7216\u4BBE\u4975\u49BB\u7177\u49F8\u4348\u4A51\u739E\u8BDA\u18FA\u799F\u897E\u8E36\u9369\u93F3\u8A44\u92EC\u9381\u93CB\u896C\u44B9\u7217\u3EEB\u7772\u7A43\u70D0\u4473\u43F8\u717E\u17EF\u70A3\u18BE\u3599\u3EC7\u1885\u542F\u17F8\u3722\u16FB\u1839\u36E1\u1774\u18D1\u5F4B\u3723\u16C0\u575B\u4A25\u13FE\u12A8\u13C6\u14B6\u8503\u36A6\u8503\u8455\u4994\u7165\u3E31\u555C\u3EFB\u7052\u44F4\u36EE\u999D\u6F26\u67F9\u3733\u3C15\u3DE7\u586C\u1922\u6810\u4057\u373F\u40E1\u408B\u410F\u6C21\u54CB\u569E\u66B1\u5692\u0FDF\u0BA8\u0E0D\u93C6\u8B13\u939C\u4EF8\u512B\u3819\u4436\u4EBC\u0465\u037F\u4F4B\u4F8A\u5651\u5A68\u01AB\u03CB\u3999\u030A\u0414\u3435\u4F29\u02C0\u8EB3\u0275\u8ADA\u020C\u4E98\u50CD\u510D\u4FA2\u4F03\u4A0E\u3E8A\u4F42\u502E\u506C\u5081\u4FCC\u4FE5\u5058\u50FC\u5159\u515B\u515D\u515E\u6E76\u3595\u3E39\u3EBF\u6D72\u1884\u3E89\u51A8\u51C3\u05E0\u44DD\u04A3\u0492\u0491\u8D7A\u8A9C\u070E\u5259\u52A4\u0873\u52E1\u936E\u467A\u718C\u438C\u0C20\u49AC\u10E4\u69D1\u0E1D\u7479\u3EDE\u7499\u7414\u7456\u7398\u4B8E\u4ABC\u408D\u53D0\u3584\u720F\u40C9\u55B4\u0345\u54CD\u0BC6\u571D\u925D\u96F4\u9366\u57DD\u578D\u577F\u363E\u58CB\u5A99\u8A46\u16FA\u176F\u1710\u5A2C\u59B8\u928F\u5A7E\u5ACF\u5A12\u5946\u19F3\u1861\u4295\u36F5\u6D05\u7443\u5A21\u5E83\u5A81\u8BD7\u0413\u93E0\u748C\u1303\u7105\u4972\u9408\u89FB\u93BD\u37A0\u5C1E\u5C9E\u5E5E\u5E48\u1996\u197C\u3AEE\u5ECD\u5B4F\u1903\u1904\u3701\u18A0\u36DD\u16FE\u36D3\u812A\u8A47\u1DBA\u3472\u89A8\u5F0C\u5F0E\u1927\u17AB\u5A6B\u173B\u5B44\u8614\u75FD\u8860\u607E\u2860\u262B\u5FDB\u3EB8\u25AF\u25BE\u9088\u6F73\u61C0\u003E\u0046\u261B\u6199\u6198\u6075\u2C9B\u2D07\u46D4\u914D\u6471\u4665\u2B6A\u3A29\u2B22\u3450\u98EA\u2E78\u6337\uA45B\u64B6\u6331\u63D1\u49E3\u2D67\u62A4\u2CA1\u643B\u656B\u6972\u3BF4\u308E\u32AD\u4989\u32AB\u550D\u32E0\u18D9\u943F\u66CE\u3289\u31B3\u3AE0\u4190\u5584\u8B22\u558F\u16FC\u555B\u5425\u78EE\u3103\u182A\u3234\u3464\u320F\u3182\u42C9\u668E\u6D24\u666B\u4B93\u6630\u7870\u1DEB\u6663\u32D2\u32E1\u661E\u5872\u38D1\u383A\u37BC\u3B99\u37A2\u33FE\u74D0\u3B96\u678F\u462A\u68B6\u681E\u3BC4\u6ABE\u3863\u37D5\u4487\u6A33\u6A52\u6AC9\u6B05\u1912\u6511\u6898\u6A4C\u3BD7\u6A7A\u6B57\u3FC0\u3C9A\u93A0\u92F2\u8BEA\u8ACB\u9289\u801E\u89DC\u9467\u6DA5\u6F0B\u49EC\u6D67\u3F7F\u3D8F\u6E04\u403C\u5A3D\u6E0A\u5847\u6D24\u7842\u713B\u431A\u4276\u70F1\u7250\u7287\u7294\u478F\u4725\u5179\u4AA4\u05EB\u747A\u3EF8\u365F\u4A4A\u4917\u5FE1\u3F06\u3EB1\u4ADF\u8C23\u3F35\u60A7\u3EF3\u74CC\u743C\u9387\u7437\u449F\u6DEA\u4551\u7583\u3F63\u4CD9\u4D06\u3F58\u7555\u7673\uA5C6\u3B19\u7468\u8ACC\u49AB\u498E\u3AFB\u3DCD\u4A4E\u3EFF\u49C5\u48F3\u91FA\u5732\u9342\u8AE3\u1864\u50DF\u5221\u51E7\u7778\u3232\u770E\u770F\u777B\u4697\u3781\u3A5E\u48F0\u7438\u749B\u3EBF\u4ABA\u4AC7\u40C8\u4A96\u61AE\u9307\u5581\u781E\u788D\u7888\u78D2\u73D0\u7959\u7741\u56E3\u410E\u799B\u8496\u79A5\u6A2D\u3EFA\u7A3A\u79F4\u416E\u16E6\u4132\u9235\u79F1\u0D4C\u498C\u0299\u3DBA\u176E\u3597\u556B\u3570\u36AA\u01D4\u0C0D\u7AE2\u5A59\u26F5\u5AAF\u5A9C\u5A0D\u025B\u78F0\u5A2A\u5BC6\u7AFE\u41F9\u7C5D\u7C6D\u4211\u5BB3\u5EBC\u5EA6\u7CCD\u49F9\u17B0\u7C8E\u7C7C\u7CAE\u6AB2\u7DDC\u7E07\u7DD3\u7F4E\u6261\u615C\u7B48\u7D97\u5E82\u426A\u6B75\u0916\u67D6\u004E\u35CF\u57C4\u6412\u63F8\u4962\u7FDD\u7B27\u082C\u5AE9\u5D43\u7B0C\u5E0E\u99E6\u8645\u9A63\u6A1C\u343F\u39E2\u49F7\u65AD\u9A1F\u65A0\u8480\u7127\u6CD1\u44EA\u8137\u4402\u80C6\u8109\u8142\u67B4\u98C3\u6A42\u8262\u8265\u6A51\u8453\u6DA7\u8610\u721B\u5A86\u417F\u1840\u5B2B\u18A1\u5AE4\u18D8\u86A0\uF9BC\u3D8F\u882D\u7422\u5A02\u886E\u4F45\u8887\u88BF\u88E6\u8965\u894D\u5683\u8954\u7785\u7784\u8BF5\u8BD9\u8B9C\u89F9\u3EAD\u84A3\u46F5\u46CF\u37F2\u8A3D\u8A1C\u9448\u5F4D\u922B\u4284\u65D4\u7129\u70C4\u1845\u9D6D\u8C9F\u8CE9\u7DDC\u599A\u77C3\u59F0\u436E\u36D4\u8E2A\u8EA7\u4C09\u8F30\u8F4A\u42F4\u6C58\u6FBB\u2321\u489B\u6F79\u6E8B\u17DA\u9BE9\u36B5\u492F\u90BB\u9097\u5571\u4906\u91BB\u9404\u8A4B\u4062\u8AFC\u9427\u8C1D\u8C3B\u84E5\u8A2B\u9599\u95A7\u9597\u9596\u8D34\u7445\u3EC2\u48FF\u4A42\u43EA\u3EE7\u3225\u968F\u8EE7\u8E66\u8E65\u3ECC\u49ED\u4A78\u3FEE\u7412\u746B\u3EFC\u9741\u90B0\u6847\u4A1D\u9093\u57DF\u975D\u9368\u8989\u8C26\u8B2F\u63BE\u92BA\u5B11\u8B69\u493C\u73F9\u421B\u979B\u9771\u9938\u0F26\u5DC1\u8BC5\u4AB2\u981F\u94DA\u92F6\u95D7\u91E5\u44C0\u8B50\u4A67\u8B64\u98DC\u8A45\u3F00\u922A\u4925\u8414\u993B\u994D\u7B06\u3DFD\u999B\u4B6F\u99AA\u9A5C\u8B65\u58C8\u6A8F\u9A21\u5AFE\u9A2F\u98F1\u4B90\u9948\u99BC\u4BBD\u4B97\u937D\u5872\u1302\u5822\u49B8\u14E8\u7844\u271F\u3DB8\u68C5\u3D7D\u9458\u3927\u6150\u2781\u296B\u6107\u9C4F\u9C53\u9C7B\u9C35\u9C10\u9B7F\u9BCF\u9E2D\u9B9F\uA1F5\uA0FE\u9D21\u4CAE\u4104\u9E18\u4CB0\u9D0C\uA1B4\uA0ED\uA0F3\u992F\u9DA5\u84BD\u6E12\u6FDF\u6B82\u85FC\u4533\u6DA4\u6E84\u6DF0\u8420\u85EE\u6E00\u37D7\u6064\u79E2\u359C\u3640\u492D\u49DE\u3D62\u93DB\u92BE\u9348\u02BF\u78B9\u9277\u944D\u4FE4\u3440\u9064\u555D\u783D\u7854\u78B6\u784B\u1757\u31C9\u4941\u369A\u4F72\u6FDA\u6FD9\u701E\u701E\u5414\u41B5\u57BB\u58F3\u578A\u9D16\u57D7\u7134\u34AF\u41AC\u71EB\u6C40\u4F97\u5B28\u17B5\u8A49\u610C\u5ACE\u5A0B\u42BC\u4488\u372C\u4B7B\u89FC\u93BB\u93B8\u18D6\u0F1D\u8472\u6CC0\u1413\u42FA\u2C26\u43C1\u5994\u3DB7\u6741\u7DA8\u615B\u60A4\u49B9\u498B\u89FA\u92E5\u73E2\u3EE9\u74B4\u8B63\u189F\u3EE1\u4AB3\u6AD8\u73F3\u73FB\u3ED6\u4A3E\u4A94\u17D9\u4A66\u03A7\u1424\u49E5\u7448\u4916\u70A5\u4976\u9284\u73E6\u935F\u04FE\u9331\u8ACE\u8A16\u9386\u8BE7\u55D5\u4935\u8A82\u716B\u4943\u0CFF\u56A4\u061A\u0BEB\u0CB8\u5502\u79C4\u17FA\u7DFE\u16C2\u4A50\u1852\u452E\u9401\u370A\u8AC0\u49AD\u59B0\u18BF\u1883\u7484\u5AA1\u36E2\u3D5B\u36B0\u925F\u5A79\u8A81\u1862\u9374\u3CCD\u0AB4\u4A96\u398A\u50F4\u3D69\u3D4C\u139C\u7175\u42FB\u8218\u6E0F\u90E4\u44EB\u6D57\u7E4F\u7067\u6CAF\u3CD6\u3FED\u3E2D\u6E02\u6F0C\u3D6F\u03F5\u7551\u36BC\u34C8\u4680\u3EDA\u4871\u59C4\u926E\u493E\u8F41\u8C1C\u6BC0\u5812\u57C8\u36D6\u1452\u70FE\u4362\u4A71\u2FE3\u12B0\u23BD\u68B9\u6967\u1398\u34E5\u7BF4\u36DF\u8A83\u37D6\u33FA\u4C9F\u6A1A\u36AD\u6CB7\u843E\u44DF\u44CE\u6D26\u6D51\u6C82\u6FDE\u6F17\u7109\u833D\u173A\u83ED\u6C80\u7053\u17DB\u5989\u5A82\u17B3\u5A61\u5A71\u1905\u41FC\u372D\u59EF\u173C\u36C7\u718E\u9390\u669A\u42A5\u5A6E\u5A2B\u4293\u6A2B\u3EF9\u7736\u445B\u42CA\u711D\u4259\u89E1\u4FB0\u6D28\u5CC2\u44CE\u7E4D\u43BD\u6A0C\u4256\u1304\u70A6\u7133\u43E9\u3DA5\u6CDF\uF825\u4A4F\u7E65\u59EB\u5D2F\u3DF3\u5F5C\u4A5D\u17DF\u7DA4\u8426\u5485\u3AFA\u3300\u0214\u577E\u08D5\u0619\u3FE5\u1F9E\uA2B6\u7003\u915B\u5D70\u738F\u7CD3\u8A59\u9420\u4FC8\u7FE7\u72CD\u7310\u7AF4\u7338\u7339\u56F6\u7341\u7348\u3EA9\u7B18\u906C\u71F5\u48F2\u73E1\u81F6\u3ECA\u770C\u3ED1\u6CA2\u56FD\u7419\u741E\u741F\u3EE2\u3EF0\u3EF4\u3EFA\u74D3\u3F0E\u3F53\u7542\u756D\u7572\u758D\u3F7C\u75C8\u75DC\u3FC0\u764D\u3FD7\u7674\u3FDC\u767A\u4F5C\u7188\u5623\u8980\u5869\u401D\u7743\u4039\u6761\u4045\u35DB\u7798\u406A\u406F\u5C5E\u77BE\u77CB\u58F2\u7818\u70B9\u781C\u40A8\u7839\u7847\u7851\u7866\u8448\u5535\u7933\u6803\u7932\u4103\u4109\u7991\u7999\u8FBB\u7A06\u8FBC\u4167\u7A91\u41B2\u7ABC\u8279\u41C4\u7ACF\u7ADB\u41CF\u4E21\u7B62\u7B6C\u7B7B\u7C12\u7C1B\u4260\u427A\u7C7B\u7C9C\u428C\u7CB8\u4294\u7CED\u8F93\u70C0\u0CCF\u7DCF\u7DD4\u7DD0\u7DFD\u7FAE\u7FB4\u729F\u4397\u8020\u8025\u7B39\u802E\u8031\u8054\u3DCC\u57B4\u70A0\u80B7\u80E9\u43ED\u810C\u732A\u810E\u8112\u7560\u8114\u4401\u3B39\u8156\u8159\u815A\u4413\u583A\u817C\u8184\u4425\u8193\u442D\u81A5\u57EF\u81C1\u81E4\u8254\u448F\u82A6\u8276\u82CA\u82D8\u82FF\u44B0\u8357\u9669\u698A\u8405\u70F5\u8464\u60E3\u8488\u4504\u84BE\u84E1\u84F8\u8510\u8538\u8552\u453B\u856F\u8570\u85E0\u4577\u8672\u8692\u86B2\u86EF\u9645\u878B\u4606\u4617\u88AE\u88FF\u8924\u8947\u8991\u7967\u8A29\u8A38\u8A94\u8AB4\u8C51\u8CD4\u8CF2\u8D1C\u4798\u585F\u8DC3\u47ED\u4EEE\u8E3A\u55D8\u5754\u8E71\u55F5\u8EB0\u4837\u8ECE\u8EE2\u8EE4\u8EED\u8EF2\u8FB7\u8FC1\u8FCA\u8FCC\u9033\u99C4\u48AD\u98E0\u9213\u491E\u9228\u9258\u926B\u92B1\u92AE\u92BF\u92E3\u92EB\u92F3\u92F4\u92FD\u9343\u9384\u93AD\u4945\u4951\u9EBF\u9417\u5301\u941D\u942D\u943E\u496A\u9454\u9479\u952D\u95A2\u49A7\u95F4\u9633\u49E5\u67A0\u4A24\u9740\u4A35\u97B2\u97C2\u5654\u4AE4\u60E8\u98B9\u4B19\u98F1\u5844\u990E\u9919\u51B4\u991C\u9937\u9942\u995D\u9962\u4B70\u99C5\u4B9D\u9A3C\u9B0F\u7A83\u9B69\u9B81\u9BDD\u9BF1\u9BF4\u4C6D\u9C20\u376F\u1BC2\u9D49\u9C3A\u9EFE\u5650\u9D93\u9DBD\u9DC0\u9DFC\u94F6\u8FB6\u9E7B\u9EAC\u9EB1\u9EBD\u9EC6\u94DC\u9EE2\u9EF1\u9EF8\u7AC8\u9F44\u0094\u02B7\u03A0\u691A\u94C3\u59AC\u04D7\u5840\u94C1\u37B9\u05D5\u0615\u0676\u16BA\u5757\u7173\u0AC2\u0ACD\u0BBF\u546A\uF83B\u0BCB\u549E\u0BFB\u0C3B\u0C53\u0C65\u0C7C\u60E7\u0C8D\u567A\u0CB5\u0CDD\u0CED\u0D6F\u0DB2\u0DC8\u6955\u9C2F\u87A5\u0E04\u0E0E\u0ED7\u0F90\u0F2D\u0E73\u5C20\u0FBC\u5E0B\u105C\u104F\u1076\u671E\u107B\u1088\u1096\u3647\u10BF\u10D3\u112F\u113B\u5364\u84AD\u12E3\u1375\u1336\u8B81\u1577\u1619\u17C3\u17C7\u4E78\u70BB\u182D\u196A\u1A2D\u1A45\u1C2A\u1C70\u1CAC\u1EC8\u62C3\u1ED5\u1F15\u7198\u6855\u2045\u69E9\u36C8\u227C\u23D7\u23FA\u272A\u2871\u294F\u82FD\u2967\u2993\u2AD5\u89A5\u2AE8\u8FA0\u2B0E\u97B8\u2B3F\u9847\u9ABD\u2C4C\u0000\u2C88\u2CB7\u5BE8\u2D08\u2D12\u2DB7\u2D95\u2E42\u2F74\u2FCC\u3033\u3066\u331F\u33DE\u5FB1\u6648\u66BF\u7A79\u3567\u35F3\u7201\u49BA\u77D7\u361A\u3716\u7E87\u0346\u58B5\u670E\u6918\u3AA7\u7657\u5FE2\u3E11\u3EB9\u75FE\u209A\u48D0\u4AB8\u4119\u8A9A\u42EE\u430D\u403B\u4334\u4396\u4A45\u05CA\u51D2\u0611\u599F\u1EA8\u3BBE\u3CFF\u4404\u44D6\u5788\u4674\u399B\u472F\u85E8\u99C9\u3762\u21C3\u8B5E\u8B4E\u99D6\u4812\u48FB\u4A15\u7209\u4AC0\u0C78\u5965\u4EA5\u4F86\u0779\u8EDA\u502C\u528F\u573F\u7171\u5299\u5419\u3F4A\u4AA7\u55BC\u5446\u546E\u6B52\u91D4\u3473\u553F\u7632\u555E\u4718\u5562\u5566\u57C7\u493F\u585D\u5066\u34FB\u33CC\u60DE\u5903\u477C\u8948\u5AAE\u5B89\u5C06\u1D90\u57A1\u7151\u6FB6\u6102\u7C12\u9056\u61B2\u4F9A\u8B62\u6402\u644A\u5D5B\u6BF7\u8F36\u6484\u191C\u8AEA\u49F6\u6488\u3FEF\u6512\u4BC0\u65BF\u66B5\u271B\u9465\u57E1\u6195\u5A27\uF8CD\u4FBB\u56B9\u4521\u66FC\u4E6A\u4934\u9656\u6D8F\u6CBD\u3618\u8977\u6799\u686E\u6411\u685E\u71DF\u68C7\u7B42\u90C0\u0A11\u6926\u9104\u6939\u7A45\u9DF0\u69FA\u9A26\u6A2D\u365F\u6469\u0021\u7983\u6A34\u6B5B\u5D2C\u3519\u83CF\u6B9D\u46D0\u6CA4\u753B\u8865\u6DAE\u58B6\u371C\u258D\u704B\u71CD\u3C54\u7280\u7285\u9281\u217A\u728B\u9330\u72E6\u49D0\u6C39\u949F\u7450\u0EF8\u8827\u88F5\u2926\u8473\u17B1\u6EB8\u4A2A\u1820\u39A4\u36B9\u5C10\u79E3\u453F\u66B6\u9CAD\u98A4\u8943\u77CC\u7858\u56D6\u40DF\u160A\u39A1\u372F\u80E8\u13C5\u71AD\u8366\u79DD\u91A8\u5A67\u4CB7\u70AF\u89AB\u79FD\u7A0A\u7B0B\u7D66\u417A\u7B43\u797E\u8009\u6FB5\uA2DF\u6A03\u8318\u53A2\u6E07\u93BF\u6836\u975D\u816F\u8023\u69B5\u13ED\u322F\u8048\u5D85\u8C30\u8083\u5715\u9823\u8949\u5DAB\u4988\u65BE\u69D5\u53D2\u4AA5\u3F81\u3C11\u6736\u8090\u80F4\u812E\u1FA1\u814F\u8189\u81AF\u821A\u8306\u832F\u838A\u35CA\u8468\u86AA\u48FA\u63E6\u8956\u7808\u9255\u89B8\u43F2\u89E7\u43DF\u89E8\u8B46\u8BD4\u59F8\u8C09\u8F0B\u8FC5\u90EC\u7B51\u9110\u913C\u3DF7\u915E\u4ACA\u8FD0\u728F\u568B\u94E7\u95E9\u95B0\u95B8\u9732\u98D1\u9949\u996A\u99C3\u9A28\u9B0E\u9D5A\u9D9B\u7E9F\u9EF8\u9F23\u4CA4\u9547\uA293\u71A2\uA2FF\u4D91\u9012\uA5CB\u4D9C\u0C9C\u8FBE\u55C1\u8FBA\u24B0\u8FB9\u4A93\u4509\u7E7F\u6F56\u6AB1\u4EEA\u34E4\u8B2C\u789D\u373A\u8E80\u17F5\u8024\u8B6C\u8B99\u7A3E\u66AF\u3DEB\u7655\u3CB7\u5635\u5956\u4E9A\u5E81\u6258\u56BF\u0E6D\u8E0E\u5B6D\u3E88\u4C9E\u63DE\u62D0\u17F6\u187B\u6530\u562D\u5C4A\u541A\u5311\u3DC6\u9D98\u4C7D\u5622\u561E\u7F49\u5ED8\u5975\u3D40\u8770\u4E1C\u0FEA\u0D49\u36BA\u8117\u9D5E\u8D18\u763B\u9C45\u764E\u77B9\u9345\u5432\u8148\u82F7\u5625\u8132\u8418\u80BD\u55EA\u7962\u5643\u5416\u0E9D\u35CE\u5605\u55F1\u66F1\u82E2\u362D\u7534\u55F0\u55BA\u5497\u5572\u0C41\u0C96\u5ED0\u5148\u0E76\u2C62\u0EA2\u9EAB\u7D5A\u55DE\u1075\u629D\u976D\u5494\u8CCD\u71F6\u9176\u63FC\u63B9\u63FE\u5569\u2B43\u9C72\u2EB3\u519A\u34DF\u0DA7\u51A7\u544D\u551E\u5513\u7666\u8E2D\u688A\u75B1\u80B6\u8804\u8786\u88C7\u81B6\u841C\u10C1\u44EC\u7304\u4706\u5B90\u830B\u6893\u567B\u26F4\u7D2F\u41A3\u7D73\u6ED0\u72B6\u9170\u11D9\u9208\u3CFC\uA6A9\u0EAC\u0EF9\u7266\u1CA2\u474E\u4FC2\u7FF9\u0FEB\u40FA\u9C5D\u651F\u2DA0\u48F3\u47E0\u9D7C\u0FEC\u0E0A\u6062\u75A3\u0FED\u0000\u6048\u1187\u71A3\u7E8E\u9D50\u4E1A\u4E04\u3577\u5B0D\u6CB2\u5367\u36AC\u39DC\u537D\u36A5\u4618\u589A\u4B6E\u822D\u544B\u57AA\u5A95\u0979\u0000\u3A52\u2465\u7374\u9EAC\u4D09\u9BED\u3CFE\u9F30\u4C5B\u4FA9\u959E\u9FDE\u845C\u3DB6\u72B2\u67B3\u3720\u632E\u7D25\u3EF7\u3E2C\u3A2A\u9008\u52CC\u3E74\u367A\u45E9\u048E\u7640\u5AF0\u0EB6\u787A\u7F2E\u58A7\u40BF\u567C\u9B8B\u5D74\u7654\uA434\u9E85\u4CE1\u75F9\u37FB\u6119\u30DA\u43F2\u0000\u565D\u12A9\u57A7\u4963\u9E06\u5234\u70AE\u35AD\u6C4A\u9D7C\u7C56\u9B39\u57DE\u176C\u5C53\u64D3\u94D0\u6335\u7164\u86AD\u0D28\u6D22\u4AE2\u0D71\u0000\u51FE\u1F0F\u5D8E\u9703\u1DD1\u9E81\u904C\u7B1F\u9B02\u5CD1\u7BA3\u6268\u6335\u9AFF\u7BCF\u9B2A\u7C7E\u9B2E\u7C42\u7C86\u9C15\u7BFC\u9B09\u9F17\u9C1B\u493E\u9F5A\u5573\u5BC3\u4FFD\u9E98\u4FF2\u5260\u3E06\u52D1\u5767\u5056\u59B7\u5E12\u97C8\u9DAB\u8F5C\u5469\u97B4\u9940\u97BA\u532C\u6130\u692C\u53DA\u9C0A\u9D02\u4C3B\u9641\u6980\u50A6\u7546\u176D\u99DA\u5273\u0000\u9159\u9681\u915C\u0000\u9151\u8E97\u637F\u6D23\u6ACA\u5611\u918E\u757A\u6285\u03FC\u734F\u7C70\u5C21\u3CFD\u0000\u4919\u76D6\u9B9D\u4E2A\u0CD4\u83BE\u8842\u0000\u5C4A\u69C0\u50ED\u577A\u521F\u5DF5\u4ECE\u6C31\u01F2\u4F39\u549C\u54DA\u529A\u8D82\u35FE\u5F0C\u35F3\u0000\u6B52\u917C\u9FA5\u9B97\u982E\u98B4\u9ABA\u9EA8\u9E84\u717A\u7B14\u0000\u6BFA\u8818\u7F78\u0000\u5620\uA64A\u8E77\u9F53\u0000\u8DD4\u8E4F\u9E1C\u8E01\u6282\u837D\u8E28\u8E75\u7AD3\u4A77\u7A3E\u78D8\u6CEA\u8A67\u7607\u8A5A\u9F26\u6CCE\u87D6\u75C3\uA2B2\u7853\uF840\u8D0C\u72E2\u7371\u8B2D\u7302\u74F1\u8CEB\u4ABB\u862F\u5FBA\u88A0\u44B7\u0000\u183B\u6E05\u0000\u8A7E\u251B\u0000\u60FD\u7667\u9AD7\u9D44\u936E\u9B8F\u87F5\u0000\u880F\u8CF7\u732C\u9721\u9BB0\u35D6\u72B2\u4C07\u7C51\u994A\u6159\u6159\u4C04\u9E96\u617D\u0000\u575F\u616F\u62A6\u6239\u62CE\u3A5C\u61E2\u53AA\u33F5\u6364\u6802\u35D2\u5D57\u8BC2\u8FDA\u8E39\u0000\u50D9\u1D46\u7906\u5332\u9638\u0F3B\u4065\u0000\u77FE\u0000\u7CC2\u5F1A\u7CDA\u7A2D\u8066\u8063\u7D4D\u7505\u74F2\u8994\u821A\u670C\u8062\u7486\u805B\u74F0\u8103\u7724\u8989\u67CC\u7553\u6ED1\u87A9\u87CE\u81C8\u878C\u8A49\u8CAD\u8B43\u772B\u74F8\u84DA\u3635\u69B2\u8DA6\u0000\u89A9\u7468\u6DB9\u87C1\u4011\u74E7\u3DDB\u7176\u60A4\u619C\u3CD1\u7162\u6077\u0000\u7F71\u8B2D\u7250\u60E9\u4B7E\u5220\u3C18\u3CC7\u5ED7\u7656\u5531\u1944\u12FE\u9903\u6DDC\u70AD\u5CC1\u61AD\u8A0F\u3677\u00EE\u6846\u4F0E\u4562\u5B1F\u634C\u9F50\u9EA6\u626B\u3000\uFF0C\u3001\u3002\uFF0E\u2027\uFF1B\uFF1A\uFF1F\uFF01\uFE30\u2026\u2025\uFE50\uFE51\uFE52\u00B7\uFE54\uFE55\uFE56\uFE57\uFF5C\u2013\uFE31\u2014\uFE33\u2574\uFE34\uFE4F\uFF08\uFF09\uFE35\uFE36\uFF5B\uFF5D\uFE37\uFE38\u3014\u3015\uFE39\uFE3A\u3010\u3011\uFE3B\uFE3C\u300A\u300B\uFE3D\uFE3E\u3008\u3009\uFE3F\uFE40\u300C\u300D\uFE41\uFE42\u300E\u300F\uFE43\uFE44\uFE59\uFE5A\uFE5B\uFE5C\uFE5D\uFE5E\u2018\u2019\u201C\u201D\u301D\u301E\u2035\u2032\uFF03\uFF06\uFF0A\u203B\u00A7\u3003\u25CB\u25CF\u25B3\u25B2\u25CE\u2606\u2605\u25C7\u25C6\u25A1\u25A0\u25BD\u25BC\u32A3\u2105\u00AF\uFFE3\uFF3F\u02CD\uFE49\uFE4A\uFE4D\uFE4E\uFE4B\uFE4C\uFE5F\uFE60\uFE61\uFF0B\uFF0D\u00D7\u00F7\u00B1\u221A\uFF1C\uFF1E\uFF1D\u2266\u2267\u2260\u221E\u2252\u2261\uFE62\uFE63\uFE64\uFE65\uFE66\uFF5E\u2229\u222A\u22A5\u2220\u221F\u22BF\u33D2\u33D1\u222B\u222E\u2235\u2234\u2640\u2642\u2295\u2299\u2191\u2193\u2190\u2192\u2196\u2197\u2199\u2198\u2225\u2223\uFF0F\uFF3C\u2215\uFE68\uFF04\uFFE5\u3012\uFFE0\uFFE1\uFF05\uFF20\u2103\u2109\uFE69\uFE6A\uFE6B\u33D5\u339C\u339D\u339E\u33CE\u33A1\u338E\u338F\u33C4\u00B0\u5159\u515B\u515E\u515D\u5161\u5163\u55E7\u74E9\u7CCE\u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588\u258F\u258E\u258D\u258C\u258B\u258A\u2589\u253C\u2534\u252C\u2524\u251C\u2594\u2500\u2502\u2595\u250C\u2510\u2514\u2518\u256D\u256E\u2570\u256F\u2550\u255E\u256A\u2561\u25E2\u25E3\u25E5\u25E4\u2571\u2572\u2573\uFF10\uFF11\uFF12\uFF13\uFF14\uFF15\uFF16\uFF17\uFF18\uFF19\u2160\u2161\u2162\u2163\u2164\u2165\u2166\u2167\u2168\u2169\u3021\u3022\u3023\u3024\u3025\u3026\u3027\u3028\u3029\u5341\u5344\u5345\uFF21\uFF22\uFF23\uFF24\uFF25\uFF26\uFF27\uFF28\uFF29\uFF2A\uFF2B\uFF2C\uFF2D\uFF2E\uFF2F\uFF30\uFF31\uFF32\uFF33\uFF34\uFF35\uFF36\uFF37\uFF38\uFF39\uFF3A\uFF41\uFF42\uFF43\uFF44\uFF45\uFF46\uFF47\uFF48\uFF49\uFF4A\uFF4B\uFF4C\uFF4D\uFF4E\uFF4F\uFF50\uFF51\uFF52\uFF53\uFF54\uFF55\uFF56\uFF57\uFF58\uFF59\uFF5A\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039A\u039B\u039C\u039D\u039E\u039F\u03A0\u03A1\u03A3\u03A4\u03A5\u03A6\u03A7\u03A8\u03A9\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB\u03BC\u03BD\u03BE\u03BF\u03C0\u03C1\u03C3\u03C4\u03C5\u03C6\u03C7\u03C8\u03C9\u3105\u3106\u3107\u3108\u3109\u310A\u310B\u310C\u310D\u310E\u310F\u3110\u3111\u3112\u3113\u3114\u3115\u3116\u3117\u3118\u3119\u311A\u311B\u311C\u311D\u311E\u311F\u3120\u3121\u3122\u3123\u3124\u3125\u3126\u3127\u3128\u3129\u02D9\u02C9\u02CA\u02C7\u02CB\u2400\u2401\u2402\u2403\u2404\u2405\u2406\u2407\u2408\u2409\u240A\u240B\u240C\u240D\u240E\u240F\u2410\u2411\u2412\u2413\u2414\u2415\u2416\u2417\u2418\u2419\u241A\u241B\u241C\u241D\u241E\u241F\u2421\u20AC"; + + private static final String TABLE3 = "\u4E00\u4E59\u4E01\u4E03\u4E43\u4E5D\u4E86\u4E8C\u4EBA\u513F\u5165\u516B\u51E0\u5200\u5201\u529B\u5315\u5341\u535C\u53C8\u4E09\u4E0B\u4E08\u4E0A\u4E2B\u4E38\u51E1\u4E45\u4E48\u4E5F\u4E5E\u4E8E\u4EA1\u5140\u5203\u52FA\u5343\u53C9\u53E3\u571F\u58EB\u5915\u5927\u5973\u5B50\u5B51\u5B53\u5BF8\u5C0F\u5C22\u5C38\u5C71\u5DDD\u5DE5\u5DF1\u5DF2\u5DF3\u5DFE\u5E72\u5EFE\u5F0B\u5F13\u624D\u4E11\u4E10\u4E0D\u4E2D\u4E30\u4E39\u4E4B\u5C39\u4E88\u4E91\u4E95\u4E92\u4E94\u4EA2\u4EC1\u4EC0\u4EC3\u4EC6\u4EC7\u4ECD\u4ECA\u4ECB\u4EC4\u5143\u5141\u5167\u516D\u516E\u516C\u5197\u51F6\u5206\u5207\u5208\u52FB\u52FE\u52FF\u5316\u5339\u5348\u5347\u5345\u535E\u5384\u53CB\u53CA\u53CD\u58EC\u5929\u592B\u592A\u592D\u5B54\u5C11\u5C24\u5C3A\u5C6F\u5DF4\u5E7B\u5EFF\u5F14\u5F15\u5FC3\u6208\u6236\u624B\u624E\u652F\u6587\u6597\u65A4\u65B9\u65E5\u66F0\u6708\u6728\u6B20\u6B62\u6B79\u6BCB\u6BD4\u6BDB\u6C0F\u6C34\u706B\u722A\u7236\u723B\u7247\u7259\u725B\u72AC\u738B\u4E19\u4E16\u4E15\u4E14\u4E18\u4E3B\u4E4D\u4E4F\u4E4E\u4EE5\u4ED8\u4ED4\u4ED5\u4ED6\u4ED7\u4EE3\u4EE4\u4ED9\u4EDE\u5145\u5144\u5189\u518A\u51AC\u51F9\u51FA\u51F8\u520A\u52A0\u529F\u5305\u5306\u5317\u531D\u4EDF\u534A\u5349\u5361\u5360\u536F\u536E\u53BB\u53EF\u53E4\u53F3\u53EC\u53EE\u53E9\u53E8\u53FC\u53F8\u53F5\u53EB\u53E6\u53EA\u53F2\u53F1\u53F0\u53E5\u53ED\u53FB\u56DB\u56DA\u5916\u592E\u5931\u5974\u5976\u5B55\u5B83\u5C3C\u5DE8\u5DE7\u5DE6\u5E02\u5E03\u5E73\u5E7C\u5F01\u5F18\u5F17\u5FC5\u620A\u6253\u6254\u6252\u6251\u65A5\u65E6\u672E\u672C\u672A\u672B\u672D\u6B63\u6BCD\u6C11\u6C10\u6C38\u6C41\u6C40\u6C3E\u72AF\u7384\u7389\u74DC\u74E6\u7518\u751F\u7528\u7529\u7530\u7531\u7532\u7533\u758B\u767D\u76AE\u76BF\u76EE\u77DB\u77E2\u77F3\u793A\u79BE\u7A74\u7ACB\u4E1E\u4E1F\u4E52\u4E53\u4E69\u4E99\u4EA4\u4EA6\u4EA5\u4EFF\u4F09\u4F19\u4F0A\u4F15\u4F0D\u4F10\u4F11\u4F0F\u4EF2\u4EF6\u4EFB\u4EF0\u4EF3\u4EFD\u4F01\u4F0B\u5149\u5147\u5146\u5148\u5168\u5171\u518D\u51B0\u5217\u5211\u5212\u520E\u5216\u52A3\u5308\u5321\u5320\u5370\u5371\u5409\u540F\u540C\u540A\u5410\u5401\u540B\u5404\u5411\u540D\u5408\u5403\u540E\u5406\u5412\u56E0\u56DE\u56DD\u5733\u5730\u5728\u572D\u572C\u572F\u5729\u5919\u591A\u5937\u5938\u5984\u5978\u5983\u597D\u5979\u5982\u5981\u5B57\u5B58\u5B87\u5B88\u5B85\u5B89\u5BFA\u5C16\u5C79\u5DDE\u5E06\u5E76\u5E74\u5F0F\u5F1B\u5FD9\u5FD6\u620E\u620C\u620D\u6210\u6263\u625B\u6258\u6536\u65E9\u65E8\u65EC\u65ED\u66F2\u66F3\u6709\u673D\u6734\u6731\u6735\u6B21\u6B64\u6B7B\u6C16\u6C5D\u6C57\u6C59\u6C5F\u6C60\u6C50\u6C55\u6C61\u6C5B\u6C4D\u6C4E\u7070\u725F\u725D\u767E\u7AF9\u7C73\u7CF8\u7F36\u7F8A\u7FBD\u8001\u8003\u800C\u8012\u8033\u807F\u8089\u808B\u808C\u81E3\u81EA\u81F3\u81FC\u820C\u821B\u821F\u826E\u8272\u827E\u866B\u8840\u884C\u8863\u897F\u9621\u4E32\u4EA8\u4F4D\u4F4F\u4F47\u4F57\u4F5E\u4F34\u4F5B\u4F55\u4F30\u4F50\u4F51\u4F3D\u4F3A\u4F38\u4F43\u4F54\u4F3C\u4F46\u4F63\u4F5C\u4F60\u4F2F\u4F4E\u4F36\u4F59\u4F5D\u4F48\u4F5A\u514C\u514B\u514D\u5175\u51B6\u51B7\u5225\u5224\u5229\u522A\u5228\u52AB\u52A9\u52AA\u52AC\u5323\u5373\u5375\u541D\u542D\u541E\u543E\u5426\u544E\u5427\u5446\u5443\u5433\u5448\u5442\u541B\u5429\u544A\u5439\u543B\u5438\u542E\u5435\u5436\u5420\u543C\u5440\u5431\u542B\u541F\u542C\u56EA\u56F0\u56E4\u56EB\u574A\u5751\u5740\u574D\u5747\u574E\u573E\u5750\u574F\u573B\u58EF\u593E\u599D\u5992\u59A8\u599E\u59A3\u5999\u5996\u598D\u59A4\u5993\u598A\u59A5\u5B5D\u5B5C\u5B5A\u5B5B\u5B8C\u5B8B\u5B8F\u5C2C\u5C40\u5C41\u5C3F\u5C3E\u5C90\u5C91\u5C94\u5C8C\u5DEB\u5E0C\u5E8F\u5E87\u5E8A\u5EF7\u5F04\u5F1F\u5F64\u5F62\u5F77\u5F79\u5FD8\u5FCC\u5FD7\u5FCD\u5FF1\u5FEB\u5FF8\u5FEA\u6212\u6211\u6284\u6297\u6296\u6280\u6276\u6289\u626D\u628A\u627C\u627E\u6279\u6273\u6292\u626F\u6298\u626E\u6295\u6293\u6291\u6286\u6539\u653B\u6538\u65F1\u66F4\u675F\u674E\u674F\u6750\u6751\u675C\u6756\u675E\u6749\u6746\u6760\u6753\u6757\u6B65\u6BCF\u6C42\u6C5E\u6C99\u6C81\u6C88\u6C89\u6C85\u6C9B\u6C6A\u6C7A\u6C90\u6C70\u6C8C\u6C68\u6C96\u6C92\u6C7D\u6C83\u6C72\u6C7E\u6C74\u6C86\u6C76\u6C8D\u6C94\u6C98\u6C82\u7076\u707C\u707D\u7078\u7262\u7261\u7260\u72C4\u72C2\u7396\u752C\u752B\u7537\u7538\u7682\u76EF\u77E3\u79C1\u79C0\u79BF\u7A76\u7CFB\u7F55\u8096\u8093\u809D\u8098\u809B\u809A\u80B2\u826F\u8292\u828B\u828D\u898B\u89D2\u8A00\u8C37\u8C46\u8C55\u8C9D\u8D64\u8D70\u8DB3\u8EAB\u8ECA\u8F9B\u8FB0\u8FC2\u8FC6\u8FC5\u8FC4\u5DE1\u9091\u90A2\u90AA\u90A6\u90A3\u9149\u91C6\u91CC\u9632\u962E\u9631\u962A\u962C\u4E26\u4E56\u4E73\u4E8B\u4E9B\u4E9E\u4EAB\u4EAC\u4F6F\u4F9D\u4F8D\u4F73\u4F7F\u4F6C\u4F9B\u4F8B\u4F86\u4F83\u4F70\u4F75\u4F88\u4F69\u4F7B\u4F96\u4F7E\u4F8F\u4F91\u4F7A\u5154\u5152\u5155\u5169\u5177\u5176\u5178\u51BD\u51FD\u523B\u5238\u5237\u523A\u5230\u522E\u5236\u5241\u52BE\u52BB\u5352\u5354\u5353\u5351\u5366\u5377\u5378\u5379\u53D6\u53D4\u53D7\u5473\u5475\u5496\u5478\u5495\u5480\u547B\u5477\u5484\u5492\u5486\u547C\u5490\u5471\u5476\u548C\u549A\u5462\u5468\u548B\u547D\u548E\u56FA\u5783\u5777\u576A\u5769\u5761\u5766\u5764\u577C\u591C\u5949\u5947\u5948\u5944\u5954\u59BE\u59BB\u59D4\u59B9\u59AE\u59D1\u59C6\u59D0\u59CD\u59CB\u59D3\u59CA\u59AF\u59B3\u59D2\u59C5\u5B5F\u5B64\u5B63\u5B97\u5B9A\u5B98\u5B9C\u5B99\u5B9B\u5C1A\u5C48\u5C45\u5C46\u5CB7\u5CA1\u5CB8\u5CA9\u5CAB\u5CB1\u5CB3\u5E18\u5E1A\u5E16\u5E15\u5E1B\u5E11\u5E78\u5E9A\u5E97\u5E9C\u5E95\u5E96\u5EF6\u5F26\u5F27\u5F29\u5F80\u5F81\u5F7F\u5F7C\u5FDD\u5FE0\u5FFD\u5FF5\u5FFF\u600F\u6014\u602F\u6035\u6016\u602A\u6015\u6021\u6027\u6029\u602B\u601B\u6216\u6215\u623F\u623E\u6240\u627F\u62C9\u62CC\u62C4\u62BF\u62C2\u62B9\u62D2\u62DB\u62AB\u62D3\u62D4\u62CB\u62C8\u62A8\u62BD\u62BC\u62D0\u62D9\u62C7\u62CD\u62B5\u62DA\u62B1\u62D8\u62D6\u62D7\u62C6\u62AC\u62CE\u653E\u65A7\u65BC\u65FA\u6614\u6613\u660C\u6606\u6602\u660E\u6600\u660F\u6615\u660A\u6607\u670D\u670B\u676D\u678B\u6795\u6771\u679C\u6773\u6777\u6787\u679D\u6797\u676F\u6770\u677F\u6789\u677E\u6790\u6775\u679A\u6793\u677C\u676A\u6772\u6B23\u6B66\u6B67\u6B7F\u6C13\u6C1B\u6CE3\u6CE8\u6CF3\u6CB1\u6CCC\u6CE5\u6CB3\u6CBD\u6CBE\u6CBC\u6CE2\u6CAB\u6CD5\u6CD3\u6CB8\u6CC4\u6CB9\u6CC1\u6CAE\u6CD7\u6CC5\u6CF1\u6CBF\u6CBB\u6CE1\u6CDB\u6CCA\u6CAC\u6CEF\u6CDC\u6CD6\u6CE0\u7095\u708E\u7092\u708A\u7099\u722C\u722D\u7238\u7248\u7267\u7269\u72C0\u72CE\u72D9\u72D7\u72D0\u73A9\u73A8\u739F\u73AB\u73A5\u753D\u759D\u7599\u759A\u7684\u76C2\u76F2\u76F4\u77E5\u77FD\u793E\u7940\u7941\u79C9\u79C8\u7A7A\u7A79\u7AFA\u7CFE\u7F54\u7F8C\u7F8B\u8005\u80BA\u80A5\u80A2\u80B1\u80A1\u80AB\u80A9\u80B4\u80AA\u80AF\u81E5\u81FE\u820D\u82B3\u829D\u8299\u82AD\u82BD\u829F\u82B9\u82B1\u82AC\u82A5\u82AF\u82B8\u82A3\u82B0\u82BE\u82B7\u864E\u8671\u521D\u8868\u8ECB\u8FCE\u8FD4\u8FD1\u90B5\u90B8\u90B1\u90B6\u91C7\u91D1\u9577\u9580\u961C\u9640\u963F\u963B\u9644\u9642\u96B9\u96E8\u9752\u975E\u4E9F\u4EAD\u4EAE\u4FE1\u4FB5\u4FAF\u4FBF\u4FE0\u4FD1\u4FCF\u4FDD\u4FC3\u4FB6\u4FD8\u4FDF\u4FCA\u4FD7\u4FAE\u4FD0\u4FC4\u4FC2\u4FDA\u4FCE\u4FDE\u4FB7\u5157\u5192\u5191\u51A0\u524E\u5243\u524A\u524D\u524C\u524B\u5247\u52C7\u52C9\u52C3\u52C1\u530D\u5357\u537B\u539A\u53DB\u54AC\u54C0\u54A8\u54CE\u54C9\u54B8\u54A6\u54B3\u54C7\u54C2\u54BD\u54AA\u54C1\u54C4\u54C8\u54AF\u54AB\u54B1\u54BB\u54A9\u54A7\u54BF\u56FF\u5782\u578B\u57A0\u57A3\u57A2\u57CE\u57AE\u5793\u5955\u5951\u594F\u594E\u5950\u59DC\u59D8\u59FF\u59E3\u59E8\u5A03\u59E5\u59EA\u59DA\u59E6\u5A01\u59FB\u5B69\u5BA3\u5BA6\u5BA4\u5BA2\u5BA5\u5C01\u5C4E\u5C4F\u5C4D\u5C4B\u5CD9\u5CD2\u5DF7\u5E1D\u5E25\u5E1F\u5E7D\u5EA0\u5EA6\u5EFA\u5F08\u5F2D\u5F65\u5F88\u5F85\u5F8A\u5F8B\u5F87\u5F8C\u5F89\u6012\u601D\u6020\u6025\u600E\u6028\u604D\u6070\u6068\u6062\u6046\u6043\u606C\u606B\u606A\u6064\u6241\u62DC\u6316\u6309\u62FC\u62ED\u6301\u62EE\u62FD\u6307\u62F1\u62F7\u62EF\u62EC\u62FE\u62F4\u6311\u6302\u653F\u6545\u65AB\u65BD\u65E2\u6625\u662D\u6620\u6627\u662F\u661F\u6628\u6631\u6624\u66F7\u67FF\u67D3\u67F1\u67D4\u67D0\u67EC\u67B6\u67AF\u67F5\u67E9\u67EF\u67C4\u67D1\u67B4\u67DA\u67E5\u67B8\u67CF\u67DE\u67F3\u67B0\u67D9\u67E2\u67DD\u67D2\u6B6A\u6B83\u6B86\u6BB5\u6BD2\u6BD7\u6C1F\u6CC9\u6D0B\u6D32\u6D2A\u6D41\u6D25\u6D0C\u6D31\u6D1E\u6D17\u6D3B\u6D3D\u6D3E\u6D36\u6D1B\u6CF5\u6D39\u6D27\u6D38\u6D29\u6D2E\u6D35\u6D0E\u6D2B\u70AB\u70BA\u70B3\u70AC\u70AF\u70AD\u70B8\u70AE\u70A4\u7230\u7272\u726F\u7274\u72E9\u72E0\u72E1\u73B7\u73CA\u73BB\u73B2\u73CD\u73C0\u73B3\u751A\u752D\u754F\u754C\u754E\u754B\u75AB\u75A4\u75A5\u75A2\u75A3\u7678\u7686\u7687\u7688\u76C8\u76C6\u76C3\u76C5\u7701\u76F9\u76F8\u7709\u770B\u76FE\u76FC\u7707\u77DC\u7802\u7814\u780C\u780D\u7946\u7949\u7948\u7947\u79B9\u79BA\u79D1\u79D2\u79CB\u7A7F\u7A81\u7AFF\u7AFD\u7C7D\u7D02\u7D05\u7D00\u7D09\u7D07\u7D04\u7D06\u7F38\u7F8E\u7FBF\u8004\u8010\u800D\u8011\u8036\u80D6\u80E5\u80DA\u80C3\u80C4\u80CC\u80E1\u80DB\u80CE\u80DE\u80E4\u80DD\u81F4\u8222\u82E7\u8303\u8305\u82E3\u82DB\u82E6\u8304\u82E5\u8302\u8309\u82D2\u82D7\u82F1\u8301\u82DC\u82D4\u82D1\u82DE\u82D3\u82DF\u82EF\u8306\u8650\u8679\u867B\u867A\u884D\u886B\u8981\u89D4\u8A08\u8A02\u8A03\u8C9E\u8CA0\u8D74\u8D73\u8DB4\u8ECD\u8ECC\u8FF0\u8FE6\u8FE2\u8FEA\u8FE5\u8FED\u8FEB\u8FE4\u8FE8\u90CA\u90CE\u90C1\u90C3\u914B\u914A\u91CD\u9582\u9650\u964B\u964C\u964D\u9762\u9769\u97CB\u97ED\u97F3\u9801\u98A8\u98DB\u98DF\u9996\u9999\u4E58\u4EB3\u500C\u500D\u5023\u4FEF\u5026\u5025\u4FF8\u5029\u5016\u5006\u503C\u501F\u501A\u5012\u5011\u4FFA\u5000\u5014\u5028\u4FF1\u5021\u500B\u5019\u5018\u4FF3\u4FEE\u502D\u502A\u4FFE\u502B\u5009\u517C\u51A4\u51A5\u51A2\u51CD\u51CC\u51C6\u51CB\u5256\u525C\u5254\u525B\u525D\u532A\u537F\u539F\u539D\u53DF\u54E8\u5510\u5501\u5537\u54FC\u54E5\u54F2\u5506\u54FA\u5514\u54E9\u54ED\u54E1\u5509\u54EE\u54EA\u54E6\u5527\u5507\u54FD\u550F\u5703\u5704\u57C2\u57D4\u57CB\u57C3\u5809\u590F\u5957\u5958\u595A\u5A11\u5A18\u5A1C\u5A1F\u5A1B\u5A13\u59EC\u5A20\u5A23\u5A29\u5A25\u5A0C\u5A09\u5B6B\u5C58\u5BB0\u5BB3\u5BB6\u5BB4\u5BAE\u5BB5\u5BB9\u5BB8\u5C04\u5C51\u5C55\u5C50\u5CED\u5CFD\u5CFB\u5CEA\u5CE8\u5CF0\u5CF6\u5D01\u5CF4\u5DEE\u5E2D\u5E2B\u5EAB\u5EAD\u5EA7\u5F31\u5F92\u5F91\u5F90\u6059\u6063\u6065\u6050\u6055\u606D\u6069\u606F\u6084\u609F\u609A\u608D\u6094\u608C\u6085\u6096\u6247\u62F3\u6308\u62FF\u634E\u633E\u632F\u6355\u6342\u6346\u634F\u6349\u633A\u6350\u633D\u632A\u632B\u6328\u634D\u634C\u6548\u6549\u6599\u65C1\u65C5\u6642\u6649\u664F\u6643\u6652\u664C\u6645\u6641\u66F8\u6714\u6715\u6717\u6821\u6838\u6848\u6846\u6853\u6839\u6842\u6854\u6829\u68B3\u6817\u684C\u6851\u683D\u67F4\u6850\u6840\u683C\u6843\u682A\u6845\u6813\u6818\u6841\u6B8A\u6B89\u6BB7\u6C23\u6C27\u6C28\u6C26\u6C24\u6CF0\u6D6A\u6D95\u6D88\u6D87\u6D66\u6D78\u6D77\u6D59\u6D93\u6D6C\u6D89\u6D6E\u6D5A\u6D74\u6D69\u6D8C\u6D8A\u6D79\u6D85\u6D65\u6D94\u70CA\u70D8\u70E4\u70D9\u70C8\u70CF\u7239\u7279\u72FC\u72F9\u72FD\u72F8\u72F7\u7386\u73ED\u7409\u73EE\u73E0\u73EA\u73DE\u7554\u755D\u755C\u755A\u7559\u75BE\u75C5\u75C7\u75B2\u75B3\u75BD\u75BC\u75B9\u75C2\u75B8\u768B\u76B0\u76CA\u76CD\u76CE\u7729\u771F\u7720\u7728\u77E9\u7830\u7827\u7838\u781D\u7834\u7837\u7825\u782D\u7820\u781F\u7832\u7955\u7950\u7960\u795F\u7956\u795E\u795D\u7957\u795A\u79E4\u79E3\u79E7\u79DF\u79E6\u79E9\u79D8\u7A84\u7A88\u7AD9\u7B06\u7B11\u7C89\u7D21\u7D17\u7D0B\u7D0A\u7D20\u7D22\u7D14\u7D10\u7D15\u7D1A\u7D1C\u7D0D\u7D19\u7D1B\u7F3A\u7F5F\u7F94\u7FC5\u7FC1\u8006\u8018\u8015\u8019\u8017\u803D\u803F\u80F1\u8102\u80F0\u8105\u80ED\u80F4\u8106\u80F8\u80F3\u8108\u80FD\u810A\u80FC\u80EF\u81ED\u81EC\u8200\u8210\u822A\u822B\u8228\u822C\u82BB\u832B\u8352\u8354\u834A\u8338\u8350\u8349\u8335\u8334\u834F\u8332\u8339\u8336\u8317\u8340\u8331\u8328\u8343\u8654\u868A\u86AA\u8693\u86A4\u86A9\u868C\u86A3\u869C\u8870\u8877\u8881\u8882\u887D\u8879\u8A18\u8A10\u8A0E\u8A0C\u8A15\u8A0A\u8A17\u8A13\u8A16\u8A0F\u8A11\u8C48\u8C7A\u8C79\u8CA1\u8CA2\u8D77\u8EAC\u8ED2\u8ED4\u8ECF\u8FB1\u9001\u9006\u8FF7\u9000\u8FFA\u8FF4\u9003\u8FFD\u9005\u8FF8\u9095\u90E1\u90DD\u90E2\u9152\u914D\u914C\u91D8\u91DD\u91D7\u91DC\u91D9\u9583\u9662\u9663\u9661\u965B\u965D\u9664\u9658\u965E\u96BB\u98E2\u99AC\u9AA8\u9AD8\u9B25\u9B32\u9B3C\u4E7E\u507A\u507D\u505C\u5047\u5043\u504C\u505A\u5049\u5065\u5076\u504E\u5055\u5075\u5074\u5077\u504F\u500F\u506F\u506D\u515C\u5195\u51F0\u526A\u526F\u52D2\u52D9\u52D8\u52D5\u5310\u530F\u5319\u533F\u5340\u533E\u53C3\u66FC\u5546\u556A\u5566\u5544\u555E\u5561\u5543\u554A\u5531\u5556\u554F\u5555\u552F\u5564\u5538\u552E\u555C\u552C\u5563\u5533\u5541\u5557\u5708\u570B\u5709\u57DF\u5805\u580A\u5806\u57E0\u57E4\u57FA\u5802\u5835\u57F7\u57F9\u5920\u5962\u5A36\u5A41\u5A49\u5A66\u5A6A\u5A40\u5A3C\u5A62\u5A5A\u5A46\u5A4A\u5B70\u5BC7\u5BC5\u5BC4\u5BC2\u5BBF\u5BC6\u5C09\u5C08\u5C07\u5C60\u5C5C\u5C5D\u5D07\u5D06\u5D0E\u5D1B\u5D16\u5D22\u5D11\u5D29\u5D14\u5D19\u5D24\u5D27\u5D17\u5DE2\u5E38\u5E36\u5E33\u5E37\u5EB7\u5EB8\u5EB6\u5EB5\u5EBE\u5F35\u5F37\u5F57\u5F6C\u5F69\u5F6B\u5F97\u5F99\u5F9E\u5F98\u5FA1\u5FA0\u5F9C\u607F\u60A3\u6089\u60A0\u60A8\u60CB\u60B4\u60E6\u60BD\u60C5\u60BB\u60B5\u60DC\u60BC\u60D8\u60D5\u60C6\u60DF\u60B8\u60DA\u60C7\u621A\u621B\u6248\u63A0\u63A7\u6372\u6396\u63A2\u63A5\u6377\u6367\u6398\u63AA\u6371\u63A9\u6389\u6383\u639B\u636B\u63A8\u6384\u6388\u6399\u63A1\u63AC\u6392\u638F\u6380\u637B\u6369\u6368\u637A\u655D\u6556\u6551\u6559\u6557\u555F\u654F\u6558\u6555\u6554\u659C\u659B\u65AC\u65CF\u65CB\u65CC\u65CE\u665D\u665A\u6664\u6668\u6666\u665E\u66F9\u52D7\u671B\u6881\u68AF\u68A2\u6893\u68B5\u687F\u6876\u68B1\u68A7\u6897\u68B0\u6883\u68C4\u68AD\u6886\u6885\u6894\u689D\u68A8\u689F\u68A1\u6882\u6B32\u6BBA\u6BEB\u6BEC\u6C2B\u6D8E\u6DBC\u6DF3\u6DD9\u6DB2\u6DE1\u6DCC\u6DE4\u6DFB\u6DFA\u6E05\u6DC7\u6DCB\u6DAF\u6DD1\u6DAE\u6DDE\u6DF9\u6DB8\u6DF7\u6DF5\u6DC5\u6DD2\u6E1A\u6DB5\u6DDA\u6DEB\u6DD8\u6DEA\u6DF1\u6DEE\u6DE8\u6DC6\u6DC4\u6DAA\u6DEC\u6DBF\u6DE6\u70F9\u7109\u710A\u70FD\u70EF\u723D\u727D\u7281\u731C\u731B\u7316\u7313\u7319\u7387\u7405\u740A\u7403\u7406\u73FE\u740D\u74E0\u74F6\u74F7\u751C\u7522\u7565\u7566\u7562\u7570\u758F\u75D4\u75D5\u75B5\u75CA\u75CD\u768E\u76D4\u76D2\u76DB\u7737\u773E\u773C\u7736\u7738\u773A\u786B\u7843\u784E\u7965\u7968\u796D\u79FB\u7A92\u7A95\u7B20\u7B28\u7B1B\u7B2C\u7B26\u7B19\u7B1E\u7B2E\u7C92\u7C97\u7C95\u7D46\u7D43\u7D71\u7D2E\u7D39\u7D3C\u7D40\u7D30\u7D33\u7D44\u7D2F\u7D42\u7D32\u7D31\u7F3D\u7F9E\u7F9A\u7FCC\u7FCE\u7FD2\u801C\u804A\u8046\u812F\u8116\u8123\u812B\u8129\u8130\u8124\u8202\u8235\u8237\u8236\u8239\u838E\u839E\u8398\u8378\u83A2\u8396\u83BD\u83AB\u8392\u838A\u8393\u8389\u83A0\u8377\u837B\u837C\u8386\u83A7\u8655\u5F6A\u86C7\u86C0\u86B6\u86C4\u86B5\u86C6\u86CB\u86B1\u86AF\u86C9\u8853\u889E\u8888\u88AB\u8892\u8896\u888D\u888B\u8993\u898F\u8A2A\u8A1D\u8A23\u8A25\u8A31\u8A2D\u8A1F\u8A1B\u8A22\u8C49\u8C5A\u8CA9\u8CAC\u8CAB\u8CA8\u8CAA\u8CA7\u8D67\u8D66\u8DBE\u8DBA\u8EDB\u8EDF\u9019\u900D\u901A\u9017\u9023\u901F\u901D\u9010\u9015\u901E\u9020\u900F\u9022\u9016\u901B\u9014\u90E8\u90ED\u90FD\u9157\u91CE\u91F5\u91E6\u91E3\u91E7\u91ED\u91E9\u9589\u966A\u9675\u9673\u9678\u9670\u9674\u9676\u9677\u966C\u96C0\u96EA\u96E9\u7AE0\u7ADF\u9802\u9803\u9B5A\u9CE5\u9E75\u9E7F\u9EA5\u9EBB\u50A2\u508D\u5085\u5099\u5091\u5080\u5096\u5098\u509A\u6700\u51F1\u5272\u5274\u5275\u5269\u52DE\u52DD\u52DB\u535A\u53A5\u557B\u5580\u55A7\u557C\u558A\u559D\u5598\u5582\u559C\u55AA\u5594\u5587\u558B\u5583\u55B3\u55AE\u559F\u553E\u55B2\u559A\u55BB\u55AC\u55B1\u557E\u5589\u55AB\u5599\u570D\u582F\u582A\u5834\u5824\u5830\u5831\u5821\u581D\u5820\u58F9\u58FA\u5960\u5A77\u5A9A\u5A7F\u5A92\u5A9B\u5AA7\u5B73\u5B71\u5BD2\u5BCC\u5BD3\u5BD0\u5C0A\u5C0B\u5C31\u5D4C\u5D50\u5D34\u5D47\u5DFD\u5E45\u5E3D\u5E40\u5E43\u5E7E\u5ECA\u5EC1\u5EC2\u5EC4\u5F3C\u5F6D\u5FA9\u5FAA\u5FA8\u60D1\u60E1\u60B2\u60B6\u60E0\u611C\u6123\u60FA\u6115\u60F0\u60FB\u60F4\u6168\u60F1\u610E\u60F6\u6109\u6100\u6112\u621F\u6249\u63A3\u638C\u63CF\u63C0\u63E9\u63C9\u63C6\u63CD\u63D2\u63E3\u63D0\u63E1\u63D6\u63ED\u63EE\u6376\u63F4\u63EA\u63DB\u6452\u63DA\u63F9\u655E\u6566\u6562\u6563\u6591\u6590\u65AF\u666E\u6670\u6674\u6676\u666F\u6691\u667A\u667E\u6677\u66FE\u66FF\u671F\u671D\u68FA\u68D5\u68E0\u68D8\u68D7\u6905\u68DF\u68F5\u68EE\u68E7\u68F9\u68D2\u68F2\u68E3\u68CB\u68CD\u690D\u6912\u690E\u68C9\u68DA\u696E\u68FB\u6B3E\u6B3A\u6B3D\u6B98\u6B96\u6BBC\u6BEF\u6C2E\u6C2F\u6C2C\u6E2F\u6E38\u6E54\u6E21\u6E32\u6E67\u6E4A\u6E20\u6E25\u6E23\u6E1B\u6E5B\u6E58\u6E24\u6E56\u6E6E\u6E2D\u6E26\u6E6F\u6E34\u6E4D\u6E3A\u6E2C\u6E43\u6E1D\u6E3E\u6ECB\u6E89\u6E19\u6E4E\u6E63\u6E44\u6E72\u6E69\u6E5F\u7119\u711A\u7126\u7130\u7121\u7136\u716E\u711C\u724C\u7284\u7280\u7336\u7325\u7334\u7329\u743A\u742A\u7433\u7422\u7425\u7435\u7436\u7434\u742F\u741B\u7426\u7428\u7525\u7526\u756B\u756A\u75E2\u75DB\u75E3\u75D9\u75D8\u75DE\u75E0\u767B\u767C\u7696\u7693\u76B4\u76DC\u774F\u77ED\u785D\u786C\u786F\u7A0D\u7A08\u7A0B\u7A05\u7A00\u7A98\u7A97\u7A96\u7AE5\u7AE3\u7B49\u7B56\u7B46\u7B50\u7B52\u7B54\u7B4D\u7B4B\u7B4F\u7B51\u7C9F\u7CA5\u7D5E\u7D50\u7D68\u7D55\u7D2B\u7D6E\u7D72\u7D61\u7D66\u7D62\u7D70\u7D73\u5584\u7FD4\u7FD5\u800B\u8052\u8085\u8155\u8154\u814B\u8151\u814E\u8139\u8146\u813E\u814C\u8153\u8174\u8212\u821C\u83E9\u8403\u83F8\u840D\u83E0\u83C5\u840B\u83C1\u83EF\u83F1\u83F4\u8457\u840A\u83F0\u840C\u83CC\u83FD\u83F2\u83CA\u8438\u840E\u8404\u83DC\u8407\u83D4\u83DF\u865B\u86DF\u86D9\u86ED\u86D4\u86DB\u86E4\u86D0\u86DE\u8857\u88C1\u88C2\u88B1\u8983\u8996\u8A3B\u8A60\u8A55\u8A5E\u8A3C\u8A41\u8A54\u8A5B\u8A50\u8A46\u8A34\u8A3A\u8A36\u8A56\u8C61\u8C82\u8CAF\u8CBC\u8CB3\u8CBD\u8CC1\u8CBB\u8CC0\u8CB4\u8CB7\u8CB6\u8CBF\u8CB8\u8D8A\u8D85\u8D81\u8DCE\u8DDD\u8DCB\u8DDA\u8DD1\u8DCC\u8DDB\u8DC6\u8EFB\u8EF8\u8EFC\u8F9C\u902E\u9035\u9031\u9038\u9032\u9036\u9102\u90F5\u9109\u90FE\u9163\u9165\u91CF\u9214\u9215\u9223\u9209\u921E\u920D\u9210\u9207\u9211\u9594\u958F\u958B\u9591\u9593\u9592\u958E\u968A\u968E\u968B\u967D\u9685\u9686\u968D\u9672\u9684\u96C1\u96C5\u96C4\u96C6\u96C7\u96EF\u96F2\u97CC\u9805\u9806\u9808\u98E7\u98EA\u98EF\u98E9\u98F2\u98ED\u99AE\u99AD\u9EC3\u9ECD\u9ED1\u4E82\u50AD\u50B5\u50B2\u50B3\u50C5\u50BE\u50AC\u50B7\u50BB\u50AF\u50C7\u527F\u5277\u527D\u52DF\u52E6\u52E4\u52E2\u52E3\u532F\u55DF\u55E8\u55D3\u55E6\u55CE\u55DC\u55C7\u55D1\u55E3\u55E4\u55EF\u55DA\u55E1\u55C5\u55C6\u55E5\u55C9\u5712\u5713\u585E\u5851\u5858\u5857\u585A\u5854\u586B\u584C\u586D\u584A\u5862\u5852\u584B\u5967\u5AC1\u5AC9\u5ACC\u5ABE\u5ABD\u5ABC\u5AB3\u5AC2\u5AB2\u5D69\u5D6F\u5E4C\u5E79\u5EC9\u5EC8\u5F12\u5F59\u5FAC\u5FAE\u611A\u610F\u6148\u611F\u60F3\u611B\u60F9\u6101\u6108\u614E\u614C\u6144\u614D\u613E\u6134\u6127\u610D\u6106\u6137\u6221\u6222\u6413\u643E\u641E\u642A\u642D\u643D\u642C\u640F\u641C\u6414\u640D\u6436\u6416\u6417\u6406\u656C\u659F\u65B0\u6697\u6689\u6687\u6688\u6696\u6684\u6698\u668D\u6703\u6994\u696D\u695A\u6977\u6960\u6954\u6975\u6930\u6982\u694A\u6968\u696B\u695E\u6953\u6979\u6986\u695D\u6963\u695B\u6B47\u6B72\u6BC0\u6BBF\u6BD3\u6BFD\u6EA2\u6EAF\u6ED3\u6EB6\u6EC2\u6E90\u6E9D\u6EC7\u6EC5\u6EA5\u6E98\u6EBC\u6EBA\u6EAB\u6ED1\u6E96\u6E9C\u6EC4\u6ED4\u6EAA\u6EA7\u6EB4\u714E\u7159\u7169\u7164\u7149\u7167\u715C\u716C\u7166\u714C\u7165\u715E\u7146\u7168\u7156\u723A\u7252\u7337\u7345\u733F\u733E\u746F\u745A\u7455\u745F\u745E\u7441\u743F\u7459\u745B\u745C\u7576\u7578\u7600\u75F0\u7601\u75F2\u75F1\u75FA\u75FF\u75F4\u75F3\u76DE\u76DF\u775B\u776B\u7766\u775E\u7763\u7779\u776A\u776C\u775C\u7765\u7768\u7762\u77EE\u788E\u78B0\u7897\u7898\u788C\u7889\u787C\u7891\u7893\u787F\u797A\u797F\u7981\u842C\u79BD\u7A1C\u7A1A\u7A20\u7A14\u7A1F\u7A1E\u7A9F\u7AA0\u7B77\u7BC0\u7B60\u7B6E\u7B67\u7CB1\u7CB3\u7CB5\u7D93\u7D79\u7D91\u7D81\u7D8F\u7D5B\u7F6E\u7F69\u7F6A\u7F72\u7FA9\u7FA8\u7FA4\u8056\u8058\u8086\u8084\u8171\u8170\u8178\u8165\u816E\u8173\u816B\u8179\u817A\u8166\u8205\u8247\u8482\u8477\u843D\u8431\u8475\u8466\u846B\u8449\u846C\u845B\u843C\u8435\u8461\u8463\u8469\u846D\u8446\u865E\u865C\u865F\u86F9\u8713\u8708\u8707\u8700\u86FE\u86FB\u8702\u8703\u8706\u870A\u8859\u88DF\u88D4\u88D9\u88DC\u88D8\u88DD\u88E1\u88CA\u88D5\u88D2\u899C\u89E3\u8A6B\u8A72\u8A73\u8A66\u8A69\u8A70\u8A87\u8A7C\u8A63\u8AA0\u8A71\u8A85\u8A6D\u8A62\u8A6E\u8A6C\u8A79\u8A7B\u8A3E\u8A68\u8C62\u8C8A\u8C89\u8CCA\u8CC7\u8CC8\u8CC4\u8CB2\u8CC3\u8CC2\u8CC5\u8DE1\u8DDF\u8DE8\u8DEF\u8DF3\u8DFA\u8DEA\u8DE4\u8DE6\u8EB2\u8F03\u8F09\u8EFE\u8F0A\u8F9F\u8FB2\u904B\u904A\u9053\u9042\u9054\u903C\u9055\u9050\u9047\u904F\u904E\u904D\u9051\u903E\u9041\u9112\u9117\u916C\u916A\u9169\u91C9\u9237\u9257\u9238\u923D\u9240\u923E\u925B\u924B\u9264\u9251\u9234\u9249\u924D\u9245\u9239\u923F\u925A\u9598\u9698\u9694\u9695\u96CD\u96CB\u96C9\u96CA\u96F7\u96FB\u96F9\u96F6\u9756\u9774\u9776\u9810\u9811\u9813\u980A\u9812\u980C\u98FC\u98F4\u98FD\u98FE\u99B3\u99B1\u99B4\u9AE1\u9CE9\u9E82\u9F0E\u9F13\u9F20\u50E7\u50EE\u50E5\u50D6\u50ED\u50DA\u50D5\u50CF\u50D1\u50F1\u50CE\u50E9\u5162\u51F3\u5283\u5282\u5331\u53AD\u55FE\u5600\u561B\u5617\u55FD\u5614\u5606\u5609\u560D\u560E\u55F7\u5616\u561F\u5608\u5610\u55F6\u5718\u5716\u5875\u587E\u5883\u5893\u588A\u5879\u5885\u587D\u58FD\u5925\u5922\u5924\u596A\u5969\u5AE1\u5AE6\u5AE9\u5AD7\u5AD6\u5AD8\u5AE3\u5B75\u5BDE\u5BE7\u5BE1\u5BE5\u5BE6\u5BE8\u5BE2\u5BE4\u5BDF\u5C0D\u5C62\u5D84\u5D87\u5E5B\u5E63\u5E55\u5E57\u5E54\u5ED3\u5ED6\u5F0A\u5F46\u5F70\u5FB9\u6147\u613F\u614B\u6177\u6162\u6163\u615F\u615A\u6158\u6175\u622A\u6487\u6458\u6454\u64A4\u6478\u645F\u647A\u6451\u6467\u6434\u646D\u647B\u6572\u65A1\u65D7\u65D6\u66A2\u66A8\u669D\u699C\u69A8\u6995\u69C1\u69AE\u69D3\u69CB\u699B\u69B7\u69BB\u69AB\u69B4\u69D0\u69CD\u69AD\u69CC\u69A6\u69C3\u69A3\u6B49\u6B4C\u6C33\u6F33\u6F14\u6EFE\u6F13\u6EF4\u6F29\u6F3E\u6F20\u6F2C\u6F0F\u6F02\u6F22\u6EFF\u6EEF\u6F06\u6F31\u6F38\u6F32\u6F23\u6F15\u6F2B\u6F2F\u6F88\u6F2A\u6EEC\u6F01\u6EF2\u6ECC\u6EF7\u7194\u7199\u717D\u718A\u7184\u7192\u723E\u7292\u7296\u7344\u7350\u7464\u7463\u746A\u7470\u746D\u7504\u7591\u7627\u760D\u760B\u7609\u7613\u76E1\u76E3\u7784\u777D\u777F\u7761\u78C1\u789F\u78A7\u78B3\u78A9\u78A3\u798E\u798F\u798D\u7A2E\u7A31\u7AAA\u7AA9\u7AED\u7AEF\u7BA1\u7B95\u7B8B\u7B75\u7B97\u7B9D\u7B94\u7B8F\u7BB8\u7B87\u7B84\u7CB9\u7CBD\u7CBE\u7DBB\u7DB0\u7D9C\u7DBD\u7DBE\u7DA0\u7DCA\u7DB4\u7DB2\u7DB1\u7DBA\u7DA2\u7DBF\u7DB5\u7DB8\u7DAD\u7DD2\u7DC7\u7DAC\u7F70\u7FE0\u7FE1\u7FDF\u805E\u805A\u8087\u8150\u8180\u818F\u8188\u818A\u817F\u8182\u81E7\u81FA\u8207\u8214\u821E\u824B\u84C9\u84BF\u84C6\u84C4\u8499\u849E\u84B2\u849C\u84CB\u84B8\u84C0\u84D3\u8490\u84BC\u84D1\u84CA\u873F\u871C\u873B\u8722\u8725\u8734\u8718\u8755\u8737\u8729\u88F3\u8902\u88F4\u88F9\u88F8\u88FD\u88E8\u891A\u88EF\u8AA6\u8A8C\u8A9E\u8AA3\u8A8D\u8AA1\u8A93\u8AA4\u8AAA\u8AA5\u8AA8\u8A98\u8A91\u8A9A\u8AA7\u8C6A\u8C8D\u8C8C\u8CD3\u8CD1\u8CD2\u8D6B\u8D99\u8D95\u8DFC\u8F14\u8F12\u8F15\u8F13\u8FA3\u9060\u9058\u905C\u9063\u9059\u905E\u9062\u905D\u905B\u9119\u9118\u911E\u9175\u9178\u9177\u9174\u9278\u9280\u9285\u9298\u9296\u927B\u9293\u929C\u92A8\u927C\u9291\u95A1\u95A8\u95A9\u95A3\u95A5\u95A4\u9699\u969C\u969B\u96CC\u96D2\u9700\u977C\u9785\u97F6\u9817\u9818\u98AF\u98B1\u9903\u9905\u990C\u9909\u99C1\u9AAF\u9AB0\u9AE6\u9B41\u9B42\u9CF4\u9CF6\u9CF3\u9EBC\u9F3B\u9F4A\u5104\u5100\u50FB\u50F5\u50F9\u5102\u5108\u5109\u5105\u51DC\u5287\u5288\u5289\u528D\u528A\u52F0\u53B2\u562E\u563B\u5639\u5632\u563F\u5634\u5629\u5653\u564E\u5657\u5674\u5636\u562F\u5630\u5880\u589F\u589E\u58B3\u589C\u58AE\u58A9\u58A6\u596D\u5B09\u5AFB\u5B0B\u5AF5\u5B0C\u5B08\u5BEE\u5BEC\u5BE9\u5BEB\u5C64\u5C65\u5D9D\u5D94\u5E62\u5E5F\u5E61\u5EE2\u5EDA\u5EDF\u5EDD\u5EE3\u5EE0\u5F48\u5F71\u5FB7\u5FB5\u6176\u6167\u616E\u615D\u6155\u6182\u617C\u6170\u616B\u617E\u61A7\u6190\u61AB\u618E\u61AC\u619A\u61A4\u6194\u61AE\u622E\u6469\u646F\u6479\u649E\u64B2\u6488\u6490\u64B0\u64A5\u6493\u6495\u64A9\u6492\u64AE\u64AD\u64AB\u649A\u64AC\u6499\u64A2\u64B3\u6575\u6577\u6578\u66AE\u66AB\u66B4\u66B1\u6A23\u6A1F\u69E8\u6A01\u6A1E\u6A19\u69FD\u6A21\u6A13\u6A0A\u69F3\u6A02\u6A05\u69ED\u6A11\u6B50\u6B4E\u6BA4\u6BC5\u6BC6\u6F3F\u6F7C\u6F84\u6F51\u6F66\u6F54\u6F86\u6F6D\u6F5B\u6F78\u6F6E\u6F8E\u6F7A\u6F70\u6F64\u6F97\u6F58\u6ED5\u6F6F\u6F60\u6F5F\u719F\u71AC\u71B1\u71A8\u7256\u729B\u734E\u7357\u7469\u748B\u7483\u747E\u7480\u757F\u7620\u7629\u761F\u7624\u7626\u7621\u7622\u769A\u76BA\u76E4\u778E\u7787\u778C\u7791\u778B\u78CB\u78C5\u78BA\u78CA\u78BE\u78D5\u78BC\u78D0\u7A3F\u7A3C\u7A40\u7A3D\u7A37\u7A3B\u7AAF\u7AAE\u7BAD\u7BB1\u7BC4\u7BB4\u7BC6\u7BC7\u7BC1\u7BA0\u7BCC\u7CCA\u7DE0\u7DF4\u7DEF\u7DFB\u7DD8\u7DEC\u7DDD\u7DE8\u7DE3\u7DDA\u7DDE\u7DE9\u7D9E\u7DD9\u7DF2\u7DF9\u7F75\u7F77\u7FAF\u7FE9\u8026\u819B\u819C\u819D\u81A0\u819A\u8198\u8517\u853D\u851A\u84EE\u852C\u852D\u8513\u8511\u8523\u8521\u8514\u84EC\u8525\u84FF\u8506\u8782\u8774\u8776\u8760\u8766\u8778\u8768\u8759\u8757\u874C\u8753\u885B\u885D\u8910\u8907\u8912\u8913\u8915\u890A\u8ABC\u8AD2\u8AC7\u8AC4\u8A95\u8ACB\u8AF8\u8AB2\u8AC9\u8AC2\u8ABF\u8AB0\u8AD6\u8ACD\u8AB6\u8AB9\u8ADB\u8C4C\u8C4E\u8C6C\u8CE0\u8CDE\u8CE6\u8CE4\u8CEC\u8CED\u8CE2\u8CE3\u8CDC\u8CEA\u8CE1\u8D6D\u8D9F\u8DA3\u8E2B\u8E10\u8E1D\u8E22\u8E0F\u8E29\u8E1F\u8E21\u8E1E\u8EBA\u8F1D\u8F1B\u8F1F\u8F29\u8F26\u8F2A\u8F1C\u8F1E\u8F25\u9069\u906E\u9068\u906D\u9077\u9130\u912D\u9127\u9131\u9187\u9189\u918B\u9183\u92C5\u92BB\u92B7\u92EA\u92AC\u92E4\u92C1\u92B3\u92BC\u92D2\u92C7\u92F0\u92B2\u95AD\u95B1\u9704\u9706\u9707\u9709\u9760\u978D\u978B\u978F\u9821\u982B\u981C\u98B3\u990A\u9913\u9912\u9918\u99DD\u99D0\u99DF\u99DB\u99D1\u99D5\u99D2\u99D9\u9AB7\u9AEE\u9AEF\u9B27\u9B45\u9B44\u9B77\u9B6F\u9D06\u9D09\u9D03\u9EA9\u9EBE\u9ECE\u58A8\u9F52\u5112\u5118\u5114\u5110\u5115\u5180\u51AA\u51DD\u5291\u5293\u52F3\u5659\u566B\u5679\u5669\u5664\u5678\u566A\u5668\u5665\u5671\u566F\u566C\u5662\u5676\u58C1\u58BE\u58C7\u58C5\u596E\u5B1D\u5B34\u5B78\u5BF0\u5C0E\u5F4A\u61B2\u6191\u61A9\u618A\u61CD\u61B6\u61BE\u61CA\u61C8\u6230\u64C5\u64C1\u64CB\u64BB\u64BC\u64DA\u64C4\u64C7\u64C2\u64CD\u64BF\u64D2\u64D4\u64BE\u6574\u66C6\u66C9\u66B9\u66C4\u66C7\u66B8\u6A3D\u6A38\u6A3A\u6A59\u6A6B\u6A58\u6A39\u6A44\u6A62\u6A61\u6A4B\u6A47\u6A35\u6A5F\u6A48\u6B59\u6B77\u6C05\u6FC2\u6FB1\u6FA1\u6FC3\u6FA4\u6FC1\u6FA7\u6FB3\u6FC0\u6FB9\u6FB6\u6FA6\u6FA0\u6FB4\u71BE\u71C9\u71D0\u71D2\u71C8\u71D5\u71B9\u71CE\u71D9\u71DC\u71C3\u71C4\u7368\u749C\u74A3\u7498\u749F\u749E\u74E2\u750C\u750D\u7634\u7638\u763A\u76E7\u76E5\u77A0\u779E\u779F\u77A5\u78E8\u78DA\u78EC\u78E7\u79A6\u7A4D\u7A4E\u7A46\u7A4C\u7A4B\u7ABA\u7BD9\u7C11\u7BC9\u7BE4\u7BDB\u7BE1\u7BE9\u7BE6\u7CD5\u7CD6\u7E0A\u7E11\u7E08\u7E1B\u7E23\u7E1E\u7E1D\u7E09\u7E10\u7F79\u7FB2\u7FF0\u7FF1\u7FEE\u8028\u81B3\u81A9\u81A8\u81FB\u8208\u8258\u8259\u854A\u8559\u8548\u8568\u8569\u8543\u8549\u856D\u856A\u855E\u8783\u879F\u879E\u87A2\u878D\u8861\u892A\u8932\u8925\u892B\u8921\u89AA\u89A6\u8AE6\u8AFA\u8AEB\u8AF1\u8B00\u8ADC\u8AE7\u8AEE\u8AFE\u8B01\u8B02\u8AF7\u8AED\u8AF3\u8AF6\u8AFC\u8C6B\u8C6D\u8C93\u8CF4\u8E44\u8E31\u8E34\u8E42\u8E39\u8E35\u8F3B\u8F2F\u8F38\u8F33\u8FA8\u8FA6\u9075\u9074\u9078\u9072\u907C\u907A\u9134\u9192\u9320\u9336\u92F8\u9333\u932F\u9322\u92FC\u932B\u9304\u931A\u9310\u9326\u9321\u9315\u932E\u9319\u95BB\u96A7\u96A8\u96AA\u96D5\u970E\u9711\u9716\u970D\u9713\u970F\u975B\u975C\u9766\u9798\u9830\u9838\u983B\u9837\u982D\u9839\u9824\u9910\u9928\u991E\u991B\u9921\u991A\u99ED\u99E2\u99F1\u9AB8\u9ABC\u9AFB\u9AED\u9B28\u9B91\u9D15\u9D23\u9D26\u9D28\u9D12\u9D1B\u9ED8\u9ED4\u9F8D\u9F9C\u512A\u511F\u5121\u5132\u52F5\u568E\u5680\u5690\u5685\u5687\u568F\u58D5\u58D3\u58D1\u58CE\u5B30\u5B2A\u5B24\u5B7A\u5C37\u5C68\u5DBC\u5DBA\u5DBD\u5DB8\u5E6B\u5F4C\u5FBD\u61C9\u61C2\u61C7\u61E6\u61CB\u6232\u6234\u64CE\u64CA\u64D8\u64E0\u64F0\u64E6\u64EC\u64F1\u64E2\u64ED\u6582\u6583\u66D9\u66D6\u6A80\u6A94\u6A84\u6AA2\u6A9C\u6ADB\u6AA3\u6A7E\u6A97\u6A90\u6AA0\u6B5C\u6BAE\u6BDA\u6C08\u6FD8\u6FF1\u6FDF\u6FE0\u6FDB\u6FE4\u6FEB\u6FEF\u6F80\u6FEC\u6FE1\u6FE9\u6FD5\u6FEE\u6FF0\u71E7\u71DF\u71EE\u71E6\u71E5\u71ED\u71EC\u71F4\u71E0\u7235\u7246\u7370\u7372\u74A9\u74B0\u74A6\u74A8\u7646\u7642\u764C\u76EA\u77B3\u77AA\u77B0\u77AC\u77A7\u77AD\u77EF\u78F7\u78FA\u78F4\u78EF\u7901\u79A7\u79AA\u7A57\u7ABF\u7C07\u7C0D\u7BFE\u7BF7\u7C0C\u7BE0\u7CE0\u7CDC\u7CDE\u7CE2\u7CDF\u7CD9\u7CDD\u7E2E\u7E3E\u7E46\u7E37\u7E32\u7E43\u7E2B\u7E3D\u7E31\u7E45\u7E41\u7E34\u7E39\u7E48\u7E35\u7E3F\u7E2F\u7F44\u7FF3\u7FFC\u8071\u8072\u8070\u806F\u8073\u81C6\u81C3\u81BA\u81C2\u81C0\u81BF\u81BD\u81C9\u81BE\u81E8\u8209\u8271\u85AA\u8584\u857E\u859C\u8591\u8594\u85AF\u859B\u8587\u85A8\u858A\u8667\u87C0\u87D1\u87B3\u87D2\u87C6\u87AB\u87BB\u87BA\u87C8\u87CB\u893B\u8936\u8944\u8938\u893D\u89AC\u8B0E\u8B17\u8B19\u8B1B\u8B0A\u8B20\u8B1D\u8B04\u8B10\u8C41\u8C3F\u8C73\u8CFA\u8CFD\u8CFC\u8CF8\u8CFB\u8DA8\u8E49\u8E4B\u8E48\u8E4A\u8F44\u8F3E\u8F42\u8F45\u8F3F\u907F\u907D\u9084\u9081\u9082\u9080\u9139\u91A3\u919E\u919C\u934D\u9382\u9328\u9375\u934A\u9365\u934B\u9318\u937E\u936C\u935B\u9370\u935A\u9354\u95CA\u95CB\u95CC\u95C8\u95C6\u96B1\u96B8\u96D6\u971C\u971E\u97A0\u97D3\u9846\u98B6\u9935\u9A01\u99FF\u9BAE\u9BAB\u9BAA\u9BAD\u9D3B\u9D3F\u9E8B\u9ECF\u9EDE\u9EDC\u9EDD\u9EDB\u9F3E\u9F4B\u53E2\u5695\u56AE\u58D9\u58D8\u5B38\u5F5D\u61E3\u6233\u64F4\u64F2\u64FE\u6506\u64FA\u64FB\u64F7\u65B7\u66DC\u6726\u6AB3\u6AAC\u6AC3\u6ABB\u6AB8\u6AC2\u6AAE\u6AAF\u6B5F\u6B78\u6BAF\u7009\u700B\u6FFE\u7006\u6FFA\u7011\u700F\u71FB\u71FC\u71FE\u71F8\u7377\u7375\u74A7\u74BF\u7515\u7656\u7658\u7652\u77BD\u77BF\u77BB\u77BC\u790E\u79AE\u7A61\u7A62\u7A60\u7AC4\u7AC5\u7C2B\u7C27\u7C2A\u7C1E\u7C23\u7C21\u7CE7\u7E54\u7E55\u7E5E\u7E5A\u7E61\u7E52\u7E59\u7F48\u7FF9\u7FFB\u8077\u8076\u81CD\u81CF\u820A\u85CF\u85A9\u85CD\u85D0\u85C9\u85B0\u85BA\u85B9\u85A6\u87EF\u87EC\u87F2\u87E0\u8986\u89B2\u89F4\u8B28\u8B39\u8B2C\u8B2B\u8C50\u8D05\u8E59\u8E63\u8E66\u8E64\u8E5F\u8E55\u8EC0\u8F49\u8F4D\u9087\u9083\u9088\u91AB\u91AC\u91D0\u9394\u938A\u9396\u93A2\u93B3\u93AE\u93AC\u93B0\u9398\u939A\u9397\u95D4\u95D6\u95D0\u95D5\u96E2\u96DC\u96D9\u96DB\u96DE\u9724\u97A3\u97A6\u97AD\u97F9\u984D\u984F\u984C\u984E\u9853\u98BA\u993E\u993F\u993D\u992E\u99A5\u9A0E\u9AC1\u9B03\u9B06\u9B4F\u9B4E\u9B4D\u9BCA\u9BC9\u9BFD\u9BC8\u9BC0\u9D51\u9D5D\u9D60\u9EE0\u9F15\u9F2C\u5133\u56A5\u58DE\u58DF\u58E2\u5BF5\u9F90\u5EEC\u61F2\u61F7\u61F6\u61F5\u6500\u650F\u66E0\u66DD\u6AE5\u6ADD\u6ADA\u6AD3\u701B\u701F\u7028\u701A\u701D\u7015\u7018\u7206\u720D\u7258\u72A2\u7378\u737A\u74BD\u74CA\u74E3\u7587\u7586\u765F\u7661\u77C7\u7919\u79B1\u7A6B\u7A69\u7C3E\u7C3F\u7C38\u7C3D\u7C37\u7C40\u7E6B\u7E6D\u7E79\u7E69\u7E6A\u7F85\u7E73\u7FB6\u7FB9\u7FB8\u81D8\u85E9\u85DD\u85EA\u85D5\u85E4\u85E5\u85F7\u87FB\u8805\u880D\u87F9\u87FE\u8960\u895F\u8956\u895E\u8B41\u8B5C\u8B58\u8B49\u8B5A\u8B4E\u8B4F\u8B46\u8B59\u8D08\u8D0A\u8E7C\u8E72\u8E87\u8E76\u8E6C\u8E7A\u8E74\u8F54\u8F4E\u8FAD\u908A\u908B\u91B1\u91AE\u93E1\u93D1\u93DF\u93C3\u93C8\u93DC\u93DD\u93D6\u93E2\u93CD\u93D8\u93E4\u93D7\u93E8\u95DC\u96B4\u96E3\u972A\u9727\u9761\u97DC\u97FB\u985E\u9858\u985B\u98BC\u9945\u9949\u9A16\u9A19\u9B0D\u9BE8\u9BE7\u9BD6\u9BDB\u9D89\u9D61\u9D72\u9D6A\u9D6C\u9E92\u9E97\u9E93\u9EB4\u52F8\u56A8\u56B7\u56B6\u56B4\u56BC\u58E4\u5B40\u5B43\u5B7D\u5BF6\u5DC9\u61F8\u61FA\u6518\u6514\u6519\u66E6\u6727\u6AEC\u703E\u7030\u7032\u7210\u737B\u74CF\u7662\u7665\u7926\u792A\u792C\u792B\u7AC7\u7AF6\u7C4C\u7C43\u7C4D\u7CEF\u7CF0\u8FAE\u7E7D\u7E7C\u7E82\u7F4C\u8000\u81DA\u8266\u85FB\u85F9\u8611\u85FA\u8606\u860B\u8607\u860A\u8814\u8815\u8964\u89BA\u89F8\u8B70\u8B6C\u8B66\u8B6F\u8B5F\u8B6B\u8D0F\u8D0D\u8E89\u8E81\u8E85\u8E82\u91B4\u91CB\u9418\u9403\u93FD\u95E1\u9730\u98C4\u9952\u9951\u99A8\u9A2B\u9A30\u9A37\u9A35\u9C13\u9C0D\u9E79\u9EB5\u9EE8\u9F2F\u9F5F\u9F63\u9F61\u5137\u5138\u56C1\u56C0\u56C2\u5914\u5C6C\u5DCD\u61FC\u61FE\u651D\u651C\u6595\u66E9\u6AFB\u6B04\u6AFA\u6BB2\u704C\u721B\u72A7\u74D6\u74D4\u7669\u77D3\u7C50\u7E8F\u7E8C\u7FBC\u8617\u862D\u861A\u8823\u8822\u8821\u881F\u896A\u896C\u89BD\u8B74\u8B77\u8B7D\u8D13\u8E8A\u8E8D\u8E8B\u8F5F\u8FAF\u91BA\u942E\u9433\u9435\u943A\u9438\u9432\u942B\u95E2\u9738\u9739\u9732\u97FF\u9867\u9865\u9957\u9A45\u9A43\u9A40\u9A3E\u9ACF\u9B54\u9B51\u9C2D\u9C25\u9DAF\u9DB4\u9DC2\u9DB8\u9E9D\u9EEF\u9F19\u9F5C\u9F66\u9F67\u513C\u513B\u56C8\u56CA\u56C9\u5B7F\u5DD4\u5DD2\u5F4E\u61FF\u6524\u6B0A\u6B61\u7051\u7058\u7380\u74E4\u758A\u766E\u766C\u79B3\u7C60\u7C5F\u807E\u807D\u81DF\u8972\u896F\u89FC\u8B80\u8D16\u8D17\u8E91\u8E93\u8F61\u9148\u9444\u9451\u9452\u973D\u973E\u97C3\u97C1\u986B\u9955\u9A55\u9A4D\u9AD2\u9B1A\u9C49\u9C31\u9C3E\u9C3B\u9DD3\u9DD7\u9F34\u9F6C\u9F6A\u9F94\u56CC\u5DD6\u6200\u6523\u652B\u652A\u66EC\u6B10\u74DA\u7ACA\u7C64\u7C63\u7C65\u7E93\u7E96\u7E94\u81E2\u8638\u863F\u8831\u8B8A\u9090\u908F\u9463\u9460\u9464\u9768\u986F\u995C\u9A5A\u9A5B\u9A57\u9AD3\u9AD4\u9AD1\u9C54\u9C57\u9C56\u9DE5\u9E9F\u9EF4\u56D1\u58E9\u652C\u705E\u7671\u7672\u77D7\u7F50\u7F88\u8836\u8839\u8862\u8B93\u8B92\u8B96\u8277\u8D1B\u91C0\u946A\u9742\u9748\u9744\u97C6\u9870\u9A5F\u9B22\u9B58\u9C5F\u9DF9\u9DFA\u9E7C\u9E7D\u9F07\u9F77\u9F72\u5EF3\u6B16\u7063\u7C6C\u7C6E\u883B\u89C0\u8EA1\u91C1\u9472\u9470\u9871\u995E\u9AD6\u9B23\u9ECC\u7064\u77DA\u8B9A\u9477\u97C9\u9A62\u9A65\u7E9C\u8B9C\u8EAA\u91C5\u947D\u947E\u947C\u9C77\u9C78\u9EF7\u8C54\u947F\u9E1A\u7228\u9A6A\u9B31\u9E1B\u9E1E\u7C72\u2460\u2461\u2462\u2463\u2464\u2465\u2466\u2467\u2468\u2469\u2474\u2475\u2476\u2477\u2478\u2479\u247A\u247B\u247C\u247D\u2170\u2171\u2172\u2173\u2174\u2175\u2176\u2177\u2178\u2179\u4E36\u4E3F\u4E85\u4EA0\u5182\u5196\u51AB\u52F9\u5338\u5369\u53B6\u590A\u5B80\u5DDB\u2F33\u5E7F\u5EF4\u5F50\u5F61\u6534\u65E0\u7592\u7676\u8FB5\u96B6\u00A8\u02C6\u30FD\u30FE\u309D\u309E\u3003\u4EDD\u3005\u3006\u3007\u30FC\uFF3B\uFF3D\u273D\u3041\u3042\u3043\u3044\u3045\u3046\u3047\u3048\u3049\u304A\u304B\u304C\u304D\u304E\u304F\u3050\u3051\u3052\u3053\u3054\u3055\u3056\u3057\u3058\u3059\u305A\u305B\u305C\u305D\u305E\u305F\u3060\u3061\u3062\u3063\u3064\u3065\u3066\u3067\u3068\u3069\u306A\u306B\u306C\u306D\u306E\u306F\u3070\u3071\u3072\u3073\u3074\u3075\u3076\u3077\u3078\u3079\u307A\u307B\u307C\u307D\u307E\u307F\u3080\u3081\u3082\u3083\u3084\u3085\u3086\u3087\u3088\u3089\u308A\u308B\u308C\u308D\u308E\u308F\u3090\u3091\u3092\u3093\u30A1\u30A2\u30A3\u30A4\u30A5\u30A6\u30A7\u30A8\u30A9\u30AA\u30AB\u30AC\u30AD\u30AE\u30AF\u30B0\u30B1\u30B2\u30B3\u30B4\u30B5\u30B6\u30B7\u30B8\u30B9\u30BA\u30BB\u30BC\u30BD\u30BE\u30BF\u30C0\u30C1\u30C2\u30C3\u30C4\u30C5\u30C6\u30C7\u30C8\u30C9\u30CA\u30CB\u30CC\u30CD\u30CE\u30CF\u30D0\u30D1\u30D2\u30D3\u30D4\u30D5\u30D6\u30D7\u30D8\u30D9\u30DA\u30DB\u30DC\u30DD\u30DE\u30DF\u30E0\u30E1\u30E2\u30E3\u30E4\u30E5\u30E6\u30E7\u30E8\u30E9\u30EA\u30EB\u30EC\u30ED\u30EE\u30EF\u30F0\u30F1\u30F2\u30F3\u30F4\u30F5\u30F6\u0410\u0411\u0412\u0413\u0414\u0415\u0401\u0416\u0417\u0418\u0419\u041A\u041B\u041C\u041D\u041E\u041F\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042A\u042B\u042C\u042D\u042E\u042F\u0430\u0431\u0432\u0433\u0434\u0435\u0451\u0436\u0437\u0438\u0439\u043A\u043B\u043C\u043D\u043E\u043F\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044A\u044B\u044C\u044D\u044E\u044F\u21E7\u21B8\u21B9\u31CF\u00CC\u4E5A\u008A\u5202\u4491\u9FB0\u5188\u9FB1\u7607"; + + private static final String TABLE4 = "\uFFE2\uFFE4\uFF07\uFF02\u3231\u2116\u2121\u309B\u309C\u2E80\u2E84\u2E86\u2E87\u2E88\u2E8A\u2E8C\u2E8D\u2E95\u2E9C\u2E9D\u2EA5\u2EA7\u2EAA\u2EAC\u2EAE\u2EB6\u2EBC\u2EBE\u2EC6\u2ECA\u2ECC\u2ECD\u2ECF\u2ED6\u2ED7\u2EDE\u2EE3\u0000\u0000\u0000\u0283\u0250\u025B\u0254\u0275\u0153\u00F8\u014B\u028A\u026A\u4E42\u4E5C\u51F5\u531A\u5382\u4E07\u4E0C\u4E47\u4E8D\u56D7\uFA0C\u5C6E\u5F73\u4E0F\u5187\u4E0E\u4E2E\u4E93\u4EC2\u4EC9\u4EC8\u5198\u52FC\u536C\u53B9\u5720\u5903\u592C\u5C10\u5DFF\u65E1\u6BB3\u6BCC\u6C14\u723F\u4E31\u4E3C\u4EE8\u4EDC\u4EE9\u4EE1\u4EDD\u4EDA\u520C\u531C\u534C\u5722\u5723\u5917\u592F\u5B81\u5B84\u5C12\u5C3B\u5C74\u5C73\u5E04\u5E80\u5E82\u5FC9\u6209\u6250\u6C15\u6C36\u6C43\u6C3F\u6C3B\u72AE\u72B0\u738A\u79B8\u808A\u961E\u4F0E\u4F18\u4F2C\u4EF5\u4F14\u4EF1\u4F00\u4EF7\u4F08\u4F1D\u4F02\u4F05\u4F22\u4F13\u4F04\u4EF4\u4F12\u51B1\u5213\u5209\u5210\u52A6\u5322\u531F\u534D\u538A\u5407\u56E1\u56DF\u572E\u572A\u5734\u593C\u5980\u597C\u5985\u597B\u597E\u5977\u597F\u5B56\u5C15\u5C25\u5C7C\u5C7A\u5C7B\u5C7E\u5DDF\u5E75\u5E84\u5F02\u5F1A\u5F74\u5FD5\u5FD4\u5FCF\u625C\u625E\u6264\u6261\u6266\u6262\u6259\u6260\u625A\u6265\u65EF\u65EE\u673E\u6739\u6738\u673B\u673A\u673F\u673C\u6733\u6C18\u6C46\u6C52\u6C5C\u6C4F\u6C4A\u6C54\u6C4B\u6C4C\u7071\u725E\u72B4\u72B5\u738E\u752A\u767F\u7A75\u7F51\u8278\u827C\u8280\u827D\u827F\u864D\u897E\u9099\u9097\u9098\u909B\u9094\u9622\u9624\u9620\u9623\u4F56\u4F3B\u4F62\u4F49\u4F53\u4F64\u4F3E\u4F67\u4F52\u4F5F\u4F41\u4F58\u4F2D\u4F33\u4F3F\u4F61\u518F\u51B9\u521C\u521E\u5221\u52AD\u52AE\u5309\u5363\u5372\u538E\u538F\u5430\u5437\u542A\u5454\u5445\u5419\u541C\u5425\u5418\u543D\u544F\u5441\u5428\u5424\u5447\u56EE\u56E7\u56E5\u5741\u5745\u574C\u5749\u574B\u5752\u5906\u5940\u59A6\u5998\u59A0\u5997\u598E\u59A2\u5990\u598F\u59A7\u59A1\u5B8E\u5B92\u5C28\u5C2A\u5C8D\u5C8F\u5C88\u5C8B\u5C89\u5C92\u5C8A\u5C86\u5C93\u5C95\u5DE0\u5E0A\u5E0E\u5E8B\u5E89\u5E8C\u5E88\u5E8D\u5F05\u5F1D\u5F78\u5F76\u5FD2\u5FD1\u5FD0\u5FED\u5FE8\u5FEE\u5FF3\u5FE1\u5FE4\u5FE3\u5FFA\u5FEF\u5FF7\u5FFB\u6000\u5FF4\u623A\u6283\u628C\u628E\u628F\u6294\u6287\u6271\u627B\u627A\u6270\u6281\u6288\u6277\u627D\u6272\u6274\u6537\u65F0\u65F4\u65F3\u65F2\u65F5\u6745\u6747\u6759\u6755\u674C\u6748\u675D\u674D\u675A\u674B\u6BD0\u6C19\u6C1A\u6C78\u6C67\u6C6B\u6C84\u6C8B\u6C8F\u6C71\u6C6F\u6C69\u6C9A\u6C6D\u6C87\u6C95\u6C9C\u6C66\u6C73\u6C65\u6C7B\u6C8E\u7074\u707A\u7263\u72BF\u72BD\u72C3\u72C6\u72C1\u72BA\u72C5\u7395\u7397\u7393\u7394\u7392\u753A\u7539\u7594\u7595\u7681\u793D\u8034\u8095\u8099\u8090\u8092\u809C\u8290\u828F\u8285\u828E\u8291\u8293\u828A\u8283\u8284\u8C78\u8FC9\u8FBF\u909F\u90A1\u90A5\u909E\u90A7\u90A0\u9630\u9628\u962F\u962D\u4E33\u4F98\u4F7C\u4F85\u4F7D\u4F80\u4F87\u4F76\u4F74\u4F89\u4F84\u4F77\u4F4C\u4F97\u4F6A\u4F9A\u4F79\u4F81\u4F78\u4F90\u4F9C\u4F94\u4F9E\u4F92\u4F82\u4F95\u4F6B\u4F6E\u519E\u51BC\u51BE\u5235\u5232\u5233\u5246\u5231\u52BC\u530A\u530B\u533C\u5392\u5394\u5487\u547F\u5481\u5491\u5482\u5488\u546B\u547A\u547E\u5465\u546C\u5474\u5466\u548D\u546F\u5461\u5460\u5498\u5463\u5467\u5464\u56F7\u56F9\u576F\u5772\u576D\u576B\u5771\u5770\u5776\u5780\u5775\u577B\u5773\u5774\u5762\u5768\u577D\u590C\u5945\u59B5\u59BA\u59CF\u59CE\u59B2\u59CC\u59C1\u59B6\u59BC\u59C3\u59D6\u59B1\u59BD\u59C0\u59C8\u59B4\u59C7\u5B62\u5B65\u5B93\u5B95\u5C44\u5C47\u5CAE\u5CA4\u5CA0\u5CB5\u5CAF\u5CA8\u5CAC\u5C9F\u5CA3\u5CAD\u5CA2\u5CAA\u5CA7\u5C9D\u5CA5\u5CB6\u5CB0\u5CA6\u5E17\u5E14\u5E19\u5F28\u5F22\u5F23\u5F24\u5F54\u5F82\u5F7E\u5F7D\u5FDE\u5FE5\u602D\u6026\u6019\u6032\u600B\u6034\u600A\u6017\u6033\u601A\u601E\u602C\u6022\u600D\u6010\u602E\u6013\u6011\u600C\u6009\u601C\u6214\u623D\u62AD\u62B4\u62D1\u62BE\u62AA\u62B6\u62CA\u62AE\u62B3\u62AF\u62BB\u62A9\u62B0\u62B8\u653D\u65A8\u65BB\u6609\u65FC\u6604\u6612\u6608\u65FB\u6603\u660B\u660D\u6605\u65FD\u6611\u6610\u66F6\u670A\u6785\u676C\u678E\u6792\u6776\u677B\u6798\u6786\u6784\u6774\u678D\u678C\u677A\u679F\u6791\u6799\u6783\u677D\u6781\u6778\u6779\u6794\u6B25\u6B80\u6B7E\u6BDE\u6C1D\u6C93\u6CEC\u6CEB\u6CEE\u6CD9\u6CB6\u6CD4\u6CAD\u6CE7\u6CB7\u6CD0\u6CC2\u6CBA\u6CC3\u6CC6\u6CED\u6CF2\u6CD2\u6CDD\u6CB4\u6C8A\u6C9D\u6C80\u6CDE\u6CC0\u6D30\u6CCD\u6CC7\u6CB0\u6CF9\u6CCF\u6CE9\u6CD1\u7094\u7098\u7085\u7093\u7086\u7084\u7091\u7096\u7082\u709A\u7083\u726A\u72D6\u72CB\u72D8\u72C9\u72DC\u72D2\u72D4\u72DA\u72CC\u72D1\u73A4\u73A1\u73AD\u73A6\u73A2\u73A0\u73AC\u739D\u74DD\u74E8\u753F\u7540\u753E\u758C\u7598\u76AF\u76F3\u76F1\u76F0\u76F5\u77F8\u77FC\u77F9\u77FB\u77FA\u77F7\u7942\u793F\u79C5\u7A78\u7A7B\u7AFB\u7C75\u7CFD\u8035\u808F\u80AE\u80A3\u80B8\u80B5\u80AD\u8220\u82A0\u82C0\u82AB\u829A\u8298\u829B\u82B5\u82A7\u82AE\u82BC\u829E\u82BA\u82B4\u82A8\u82A1\u82A9\u82C2\u82A4\u82C3\u82B6\u82A2\u8670\u866F\u866D\u866E\u8C56\u8FD2\u8FCB\u8FD3\u8FCD\u8FD6\u8FD5\u8FD7\u90B2\u90B4\u90AF\u90B3\u90B0\u9639\u963D\u963C\u963A\u9643\u4FCD\u4FC5\u4FD3\u4FB2\u4FC9\u4FCB\u4FC1\u4FD4\u4FDC\u4FD9\u4FBB\u4FB3\u4FDB\u4FC7\u4FD6\u4FBA\u4FC0\u4FB9\u4FEC\u5244\u5249\u52C0\u52C2\u533D\u537C\u5397\u5396\u5399\u5398\u54BA\u54A1\u54AD\u54A5\u54CF\u54C3\u830D\u54B7\u54AE\u54D6\u54B6\u54C5\u54C6\u54A0\u5470\u54BC\u54A2\u54BE\u5472\u54DE\u54B0\u57B5\u579E\u579F\u57A4\u578C\u5797\u579D\u579B\u5794\u5798\u578F\u5799\u57A5\u579A\u5795\u58F4\u590D\u5953\u59E1\u59DE\u59EE\u5A00\u59F1\u59DD\u59FA\u59FD\u59FC\u59F6\u59E4\u59F2\u59F7\u59DB\u59E9\u59F3\u59F5\u59E0\u59FE\u59F4\u59ED\u5BA8\u5C4C\u5CD0\u5CD8\u5CCC\u5CD7\u5CCB\u5CDB\u5CDE\u5CDA\u5CC9\u5CC7\u5CCA\u5CD6\u5CD3\u5CD4\u5CCF\u5CC8\u5CC6\u5CCE\u5CDF\u5CF8\u5DF9\u5E21\u5E22\u5E23\u5E20\u5E24\u5EB0\u5EA4\u5EA2\u5E9B\u5EA3\u5EA5\u5F07\u5F2E\u5F56\u5F86\u6037\u6039\u6054\u6072\u605E\u6045\u6053\u6047\u6049\u605B\u604C\u6040\u6042\u605F\u6024\u6044\u6058\u6066\u606E\u6242\u6243\u62CF\u630D\u630B\u62F5\u630E\u6303\u62EB\u62F9\u630F\u630C\u62F8\u62F6\u6300\u6313\u6314\u62FA\u6315\u62FB\u62F0\u6541\u6543\u65AA\u65BF\u6636\u6621\u6632\u6635\u661C\u6626\u6622\u6633\u662B\u663A\u661D\u6634\u6639\u662E\u670F\u6710\u67C1\u67F2\u67C8\u67BA\u67DC\u67BB\u67F8\u67D8\u67C0\u67B7\u67C5\u67EB\u67E4\u67DF\u67B5\u67CD\u67B3\u67F7\u67F6\u67EE\u67E3\u67C2\u67B9\u67CE\u67E7\u67F0\u67B2\u67FC\u67C6\u67ED\u67CC\u67AE\u67E6\u67DB\u67FA\u67C9\u67CA\u67C3\u67EA\u67CB\u6B28\u6B82\u6B84\u6BB6\u6BD6\u6BD8\u6BE0\u6C20\u6C21\u6D28\u6D34\u6D2D\u6D1F\u6D3C\u6D3F\u6D12\u6D0A\u6CDA\u6D33\u6D04\u6D19\u6D3A\u6D1A\u6D11\u6D00\u6D1D\u6D42\u6D01\u6D18\u6D37\u6D03\u6D0F\u6D40\u6D07\u6D20\u6D2C\u6D08\u6D22\u6D09\u6D10\u70B7\u709F\u70BE\u70B1\u70B0\u70A1\u70B4\u70B5\u70A9\u7241\u7249\u724A\u726C\u7270\u7273\u726E\u72CA\u72E4\u72E8\u72EB\u72DF\u72EA\u72E6\u72E3\u7385\u73CC\u73C2\u73C8\u73C5\u73B9\u73B6\u73B5\u73B4\u73EB\u73BF\u73C7\u73BE\u73C3\u73C6\u73B8\u73CB\u74EC\u74EE\u752E\u7547\u7548\u75A7\u75AA\u7679\u76C4\u7708\u7703\u7704\u7705\u770A\u76F7\u76FB\u76FA\u77E7\u77E8\u7806\u7811\u7812\u7805\u7810\u780F\u780E\u7809\u7803\u7813\u794A\u794C\u794B\u7945\u7944\u79D5\u79CD\u79CF\u79D6\u79CE\u7A80\u7A7E\u7AD1\u7B00\u7B01\u7C7A\u7C78\u7C79\u7C7F\u7C80\u7C81\u7D03\u7D08\u7D01\u7F58\u7F91\u7F8D\u7FBE\u8007\u800E\u800F\u8014\u8037\u80D8\u80C7\u80E0\u80D1\u80C8\u80C2\u80D0\u80C5\u80E3\u80D9\u80DC\u80CA\u80D5\u80C9\u80CF\u80D7\u80E6\u80CD\u81FF\u8221\u8294\u82D9\u82FE\u82F9\u8307\u82E8\u8300\u82D5\u833A\u82EB\u82D6\u82F4\u82EC\u82E1\u82F2\u82F5\u830C\u82FB\u82F6\u82F0\u82EA\u82E4\u82E0\u82FA\u82F3\u82ED\u8677\u8674\u867C\u8673\u8841\u884E\u8867\u886A\u8869\u89D3\u8A04\u8A07\u8D72\u8FE3\u8FE1\u8FEE\u8FE0\u90F1\u90BD\u90BF\u90D5\u90C5\u90BE\u90C7\u90CB\u90C8\u91D4\u91D3\u9654\u964F\u9651\u9653\u964A\u964E\u501E\u5005\u5007\u5013\u5022\u5030\u501B\u4FF5\u4FF4\u5033\u5037\u502C\u4FF6\u4FF7\u5017\u501C\u5020\u5027\u5035\u502F\u5031\u500E\u515A\u5194\u5193\u51CA\u51C4\u51C5\u51C8\u51CE\u5261\u525A\u5252\u525E\u525F\u5255\u5262\u52CD\u530E\u539E\u5526\u54E2\u5517\u5512\u54E7\u54F3\u54E4\u551A\u54FF\u5504\u5508\u54EB\u5511\u5505\u54F1\u550A\u54FB\u54F7\u54F8\u54E0\u550E\u5503\u550B\u5701\u5702\u57CC\u5832\u57D5\u57D2\u57BA\u57C6\u57BD\u57BC\u57B8\u57B6\u57BF\u57C7\u57D0\u57B9\u57C1\u590E\u594A\u5A19\u5A16\u5A2D\u5A2E\u5A15\u5A0F\u5A17\u5A0A\u5A1E\u5A33\u5B6C\u5BA7\u5BAD\u5BAC\u5C03\u5C56\u5C54\u5CEC\u5CFF\u5CEE\u5CF1\u5CF7\u5D00\u5CF9\u5E29\u5E28\u5EA8\u5EAE\u5EAA\u5EAC\u5F33\u5F30\u5F67\u605D\u605A\u6067\u6041\u60A2\u6088\u6080\u6092\u6081\u609D\u6083\u6095\u609B\u6097\u6087\u609C\u608E\u6219\u6246\u62F2\u6310\u6356\u632C\u6344\u6345\u6336\u6343\u63E4\u6339\u634B\u634A\u633C\u6329\u6341\u6334\u6358\u6354\u6359\u632D\u6347\u6333\u635A\u6351\u6338\u6357\u6340\u6348\u654A\u6546\u65C6\u65C3\u65C4\u65C2\u664A\u665F\u6647\u6651\u6712\u6713\u681F\u681A\u6849\u6832\u6833\u683B\u684B\u684F\u6816\u6831\u681C\u6835\u682B\u682D\u682F\u684E\u6844\u6834\u681D\u6812\u6814\u6826\u6828\u682E\u684D\u683A\u6825\u6820\u6B2C\u6B2F\u6B2D\u6B31\u6B34\u6B6D\u8082\u6B88\u6BE6\u6BE4\u6BE8\u6BE3\u6BE2\u6BE7\u6C25\u6D7A\u6D63\u6D64\u6D76\u6D0D\u6D61\u6D92\u6D58\u6D62\u6D6D\u6D6F\u6D91\u6D8D\u6DEF\u6D7F\u6D86\u6D5E\u6D67\u6D60\u6D97\u6D70\u6D7C\u6D5F\u6D82\u6D98\u6D2F\u6D68\u6D8B\u6D7E\u6D80\u6D84\u6D16\u6D83\u6D7B\u6D7D\u6D75\u6D90\u70DC\u70D3\u70D1\u70DD\u70CB\u7F39\u70E2\u70D7\u70D2\u70DE\u70E0\u70D4\u70CD\u70C5\u70C6\u70C7\u70DA\u70CE\u70E1\u7242\u7278\u7277\u7276\u7300\u72FA\u72F4\u72FE\u72F6\u72F3\u72FB\u7301\u73D3\u73D9\u73E5\u73D6\u73BC\u73E7\u73E3\u73E9\u73DC\u73D2\u73DB\u73D4\u73DD\u73DA\u73D7\u73D8\u73E8\u74DE\u74DF\u74F4\u74F5\u7521\u755B\u755F\u75B0\u75C1\u75BB\u75C4\u75C0\u75BF\u75B6\u75BA\u768A\u76C9\u771D\u771B\u7710\u7713\u7712\u7723\u7711\u7715\u7719\u771A\u7722\u7727\u7823\u782C\u7822\u7835\u782F\u7828\u782E\u782B\u7821\u7829\u7833\u782A\u7831\u7954\u795B\u794F\u795C\u7953\u7952\u7951\u79EB\u79EC\u79E0\u79EE\u79ED\u79EA\u79DC\u79DE\u79DD\u7A86\u7A89\u7A85\u7A8B\u7A8C\u7A8A\u7A87\u7AD8\u7B10\u7B04\u7B13\u7B05\u7B0F\u7B08\u7B0A\u7B0E\u7B09\u7B12\u7C84\u7C91\u7C8A\u7C8C\u7C88\u7C8D\u7C85\u7D1E\u7D1D\u7D11\u7D0E\u7D18\u7D16\u7D13\u7D1F\u7D12\u7D0F\u7D0C\u7F5C\u7F61\u7F5E\u7F60\u7F5D\u7F5B\u7F96\u7F92\u7FC3\u7FC2\u7FC0\u8016\u803E\u8039\u80FA\u80F2\u80F9\u80F5\u8101\u80FB\u8100\u8201\u822F\u8225\u8333\u832D\u8344\u8319\u8351\u8325\u8356\u833F\u8341\u8326\u831C\u8322\u8342\u834E\u831B\u832A\u8308\u833C\u834D\u8316\u8324\u8320\u8337\u832F\u8329\u8347\u8345\u834C\u8353\u831E\u832C\u834B\u8327\u8348\u8653\u8652\u86A2\u86A8\u8696\u868D\u8691\u869E\u8687\u8697\u8686\u868B\u869A\u8685\u86A5\u8699\u86A1\u86A7\u8695\u8698\u868E\u869D\u8690\u8694\u8843\u8844\u886D\u8875\u8876\u8872\u8880\u8871\u887F\u886F\u8883\u887E\u8874\u887C\u8A12\u8C47\u8C57\u8C7B\u8CA4\u8CA3\u8D76\u8D78\u8DB5\u8DB7\u8DB6\u8ED1\u8ED3\u8FFE\u8FF5\u9002\u8FFF\u8FFB\u9004\u8FFC\u8FF6\u90D6\u90E0\u90D9\u90DA\u90E3\u90DF\u90E5\u90D8\u90DB\u90D7\u90DC\u90E4\u9150\u914E\u914F\u91D5\u91E2\u91DA\u965C\u965F\u96BC\u98E3\u9ADF\u9B2F\u4E7F\u5070\u506A\u5061\u505E\u5060\u5053\u504B\u505D\u5072\u5048\u504D\u5041\u505B\u504A\u5062\u5015\u5045\u505F\u5069\u506B\u5063\u5064\u5046\u5040\u506E\u5073\u5057\u5051\u51D0\u526B\u526D\u526C\u526E\u52D6\u52D3\u532D\u539C\u5575\u5576\u553C\u554D\u5550\u5534\u552A\u5551\u5562\u5536\u5535\u5530\u5552\u5545\u550C\u5532\u5565\u554E\u5539\u5548\u552D\u553B\u5540\u554B\u570A\u5707\u57FB\u5814\u57E2\u57F6\u57DC\u57F4\u5800\u57ED\u57FD\u5808\u57F8\u580B\u57F3\u57CF\u5807\u57EE\u57E3\u57F2\u57E5\u57EC\u57E1\u580E\u57FC\u5810\u57E7\u5801\u580C\u57F1\u57E9\u57F0\u580D\u5804\u595C\u5A60\u5A58\u5A55\u5A67\u5A5E\u5A38\u5A35\u5A6D\u5A50\u5A5F\u5A65\u5A6C\u5A53\u5A64\u5A57\u5A43\u5A5D\u5A52\u5A44\u5A5B\u5A48\u5A8E\u5A3E\u5A4D\u5A39\u5A4C\u5A70\u5A69\u5A47\u5A51\u5A56\u5A42\u5A5C\u5B72\u5B6E\u5BC1\u5BC0\u5C59\u5D1E\u5D0B\u5D1D\u5D1A\u5D20\u5D0C\u5D28\u5D0D\u5D26\u5D25\u5D0F\u5D30\u5D12\u5D23\u5D1F\u5D2E\u5E3E\u5E34\u5EB1\u5EB4\u5EB9\u5EB2\u5EB3\u5F36\u5F38\u5F9B\u5F96\u5F9F\u608A\u6090\u6086\u60BE\u60B0\u60BA\u60D3\u60D4\u60CF\u60E4\u60D9\u60DD\u60C8\u60B1\u60DB\u60B7\u60CA\u60BF\u60C3\u60CD\u60C0\u6332\u6365\u638A\u6382\u637D\u63BD\u639E\u63AD\u639D\u6397\u63AB\u638E\u636F\u6387\u6390\u636E\u63AF\u6375\u639C\u636D\u63AE\u637C\u63A4\u633B\u639F\u6378\u6385\u6381\u6391\u638D\u6370\u6553\u65CD\u6665\u6661\u665B\u6659\u665C\u6662\u6718\u6879\u6887\u6890\u689C\u686D\u686E\u68AE\u68AB\u6956\u686F\u68A3\u68AC\u68A9\u6875\u6874\u68B2\u688F\u6877\u6892\u687C\u686B\u6872\u68AA\u6880\u6871\u687E\u689B\u6896\u688B\u68A0\u6889\u68A4\u6878\u687B\u6891\u688C\u688A\u687D\u6B36\u6B33\u6B37\u6B38\u6B91\u6B8F\u6B8D\u6B8E\u6B8C\u6C2A\u6DC0\u6DAB\u6DB4\u6DB3\u6E74\u6DAC\u6DE9\u6DE2\u6DB7\u6DF6\u6DD4\u6E00\u6DC8\u6DE0\u6DDF\u6DD6\u6DBE\u6DE5\u6DDC\u6DDD\u6DDB\u6DF4\u6DCA\u6DBD\u6DED\u6DF0\u6DBA\u6DD5\u6DC2\u6DCF\u6DC9\u6DD0\u6DF2\u6DD3\u6DFD\u6DD7\u6DCD\u6DE3\u6DBB\u70FA\u710D\u70F7\u7117\u70F4\u710C\u70F0\u7104\u70F3\u7110\u70FC\u70FF\u7106\u7113\u7100\u70F8\u70F6\u710B\u7102\u710E\u727E\u727B\u727C\u727F\u731D\u7317\u7307\u7311\u7318\u730A\u7308\u72FF\u730F\u731E\u7388\u73F6\u73F8\u73F5\u7404\u7401\u73FD\u7407\u7400\u73FA\u73FC\u73FF\u740C\u740B\u73F4\u7408\u7564\u7563\u75CE\u75D2\u75CF\u75CB\u75CC\u75D1\u75D0\u768F\u7689\u76D3\u7739\u772F\u772D\u7731\u7732\u7734\u7733\u773D\u7725\u773B\u7735\u7848\u7852\u7849\u784D\u784A\u784C\u7826\u7845\u7850\u7964\u7967\u7969\u796A\u7963\u796B\u7961\u79BB\u79FA\u79F8\u79F6\u79F7\u7A8F\u7A94\u7A90\u7B35\u7B47\u7B34\u7B25\u7B30\u7B22\u7B24\u7B33\u7B18\u7B2A\u7B1D\u7B31\u7B2B\u7B2D\u7B2F\u7B32\u7B38\u7B1A\u7B23\u7C94\u7C98\u7C96\u7CA3\u7D35\u7D3D\u7D38\u7D36\u7D3A\u7D45\u7D2C\u7D29\u7D41\u7D47\u7D3E\u7D3F\u7D4A\u7D3B\u7D28\u7F63\u7F95\u7F9C\u7F9D\u7F9B\u7FCA\u7FCB\u7FCD\u7FD0\u7FD1\u7FC7\u7FCF\u7FC9\u801F\u801E\u801B\u8047\u8043\u8048\u8118\u8125\u8119\u811B\u812D\u811F\u812C\u811E\u8121\u8115\u8127\u811D\u8122\u8211\u8238\u8233\u823A\u8234\u8232\u8274\u8390\u83A3\u83A8\u838D\u837A\u8373\u83A4\u8374\u838F\u8381\u8395\u8399\u8375\u8394\u83A9\u837D\u8383\u838C\u839D\u839B\u83AA\u838B\u837E\u83A5\u83AF\u8388\u8397\u83B0\u837F\u83A6\u8387\u83AE\u8376\u839A\u8659\u8656\u86BF\u86B7\u86C2\u86C1\u86C5\u86BA\u86B0\u86C8\u86B9\u86B3\u86B8\u86CC\u86B4\u86BB\u86BC\u86C3\u86BD\u86BE\u8852\u8889\u8895\u88A8\u88A2\u88AA\u889A\u8891\u88A1\u889F\u8898\u88A7\u8899\u889B\u8897\u88A4\u88AC\u888C\u8893\u888E\u8982\u89D6\u89D9\u89D5\u8A30\u8A27\u8A2C\u8A1E\u8C39\u8C3B\u8C5C\u8C5D\u8C7D\u8CA5\u8D7D\u8D7B\u8D79\u8DBC\u8DC2\u8DB9\u8DBF\u8DC1\u8ED8\u8EDE\u8EDD\u8EDC\u8ED7\u8EE0\u8EE1\u9024\u900B\u9011\u901C\u900C\u9021\u90EF\u90EA\u90F0\u90F4\u90F2\u90F3\u90D4\u90EB\u90EC\u90E9\u9156\u9158\u915A\u9153\u9155\u91EC\u91F4\u91F1\u91F3\u91F8\u91E4\u91F9\u91EA\u91EB\u91F7\u91E8\u91EE\u957A\u9586\u9588\u967C\u966D\u966B\u9671\u966F\u96BF\u976A\u9804\u98E5\u9997\u509B\u5095\u5094\u509E\u508B\u50A3\u5083\u508C\u508E\u509D\u5068\u509C\u5092\u5082\u5087\u515F\u51D4\u5312\u5311\u53A4\u53A7\u5591\u55A8\u55A5\u55AD\u5577\u5645\u55A2\u5593\u5588\u558F\u55B5\u5581\u55A3\u5592\u55A4\u557D\u558C\u55A6\u557F\u5595\u55A1\u558E\u570C\u5829\u5837\u5819\u581E\u5827\u5823\u5828\u57F5\u5848\u5825\u581C\u581B\u5833\u583F\u5836\u582E\u5839\u5838\u582D\u582C\u583B\u5961\u5AAF\u5A94\u5A9F\u5A7A\u5AA2\u5A9E\u5A78\u5AA6\u5A7C\u5AA5\u5AAC\u5A95\u5AAE\u5A37\u5A84\u5A8A\u5A97\u5A83\u5A8B\u5AA9\u5A7B\u5A7D\u5A8C\u5A9C\u5A8F\u5A93\u5A9D\u5BEA\u5BCD\u5BCB\u5BD4\u5BD1\u5BCA\u5BCE\u5C0C\u5C30\u5D37\u5D43\u5D6B\u5D41\u5D4B\u5D3F\u5D35\u5D51\u5D4E\u5D55\u5D33\u5D3A\u5D52\u5D3D\u5D31\u5D59\u5D42\u5D39\u5D49\u5D38\u5D3C\u5D32\u5D36\u5D40\u5D45\u5E44\u5E41\u5F58\u5FA6\u5FA5\u5FAB\u60C9\u60B9\u60CC\u60E2\u60CE\u60C4\u6114\u60F2\u610A\u6116\u6105\u60F5\u6113\u60F8\u60FC\u60FE\u60C1\u6103\u6118\u611D\u6110\u60FF\u6104\u610B\u624A\u6394\u63B1\u63B0\u63CE\u63E5\u63E8\u63EF\u63C3\u649D\u63F3\u63CA\u63E0\u63F6\u63D5\u63F2\u63F5\u6461\u63DF\u63BE\u63DD\u63DC\u63C4\u63D8\u63D3\u63C2\u63C7\u63CC\u63CB\u63C8\u63F0\u63D7\u63D9\u6532\u6567\u656A\u6564\u655C\u6568\u6565\u658C\u659D\u659E\u65AE\u65D0\u65D2\u667C\u666C\u667B\u6680\u6671\u6679\u666A\u6672\u6701\u690C\u68D3\u6904\u68DC\u692A\u68EC\u68EA\u68F1\u690F\u68D6\u68F7\u68EB\u68E4\u68F6\u6913\u6910\u68F3\u68E1\u6907\u68CC\u6908\u6970\u68B4\u6911\u68EF\u68C6\u6914\u68F8\u68D0\u68FD\u68FC\u68E8\u690B\u690A\u6917\u68CE\u68C8\u68DD\u68DE\u68E6\u68F4\u68D1\u6906\u68D4\u68E9\u6915\u6925\u68C7\u6B39\u6B3B\u6B3F\u6B3C\u6B94\u6B97\u6B99\u6B95\u6BBD\u6BF0\u6BF2\u6BF3\u6C30\u6DFC\u6E46\u6E47\u6E1F\u6E49\u6E88\u6E3C\u6E3D\u6E45\u6E62\u6E2B\u6E3F\u6E41\u6E5D\u6E73\u6E1C\u6E33\u6E4B\u6E40\u6E51\u6E3B\u6E03\u6E2E\u6E5E\u6E68\u6E5C\u6E61\u6E31\u6E28\u6E60\u6E71\u6E6B\u6E39\u6E22\u6E30\u6E53\u6E65\u6E27\u6E78\u6E64\u6E77\u6E55\u6E79\u6E52\u6E66\u6E35\u6E36\u6E5A\u7120\u711E\u712F\u70FB\u712E\u7131\u7123\u7125\u7122\u7132\u711F\u7128\u713A\u711B\u724B\u725A\u7288\u7289\u7286\u7285\u728B\u7312\u730B\u7330\u7322\u7331\u7333\u7327\u7332\u732D\u7326\u7323\u7335\u730C\u742E\u742C\u7430\u742B\u7416\u741A\u7421\u742D\u7431\u7424\u7423\u741D\u7429\u7420\u7432\u74FB\u752F\u756F\u756C\u75E7\u75DA\u75E1\u75E6\u75DD\u75DF\u75E4\u75D7\u7695\u7692\u76DA\u7746\u7747\u7744\u774D\u7745\u774A\u774E\u774B\u774C\u77DE\u77EC\u7860\u7864\u7865\u785C\u786D\u7871\u786A\u786E\u7870\u7869\u7868\u785E\u7862\u7974\u7973\u7972\u7970\u7A02\u7A0A\u7A03\u7A0C\u7A04\u7A99\u7AE6\u7AE4\u7B4A\u7B3B\u7B44\u7B48\u7B4C\u7B4E\u7B40\u7B58\u7B45\u7CA2\u7C9E\u7CA8\u7CA1\u7D58\u7D6F\u7D63\u7D53\u7D56\u7D67\u7D6A\u7D4F\u7D6D\u7D5C\u7D6B\u7D52\u7D54\u7D69\u7D51\u7D5F\u7D4E\u7F3E\u7F3F\u7F65\u7F66\u7FA2\u7FA0\u7FA1\u7FD7\u8051\u804F\u8050\u80FE\u80D4\u8143\u814A\u8152\u814F\u8147\u813D\u814D\u813A\u81E6\u81EE\u81F7\u81F8\u81F9\u8204\u823C\u823D\u823F\u8275\u833B\u83CF\u83F9\u8423\u83C0\u83E8\u8412\u83E7\u83E4\u83FC\u83F6\u8410\u83C6\u83C8\u83EB\u83E3\u83BF\u8401\u83DD\u83E5\u83D8\u83FF\u83E1\u83CB\u83CE\u83D6\u83F5\u83C9\u8409\u840F\u83DE\u8411\u8406\u83C2\u83F3\u83D5\u83FA\u83C7\u83D1\u83EA\u8413\u83C3\u83EC\u83EE\u83C4\u83FB\u83D7\u83E2\u841B\u83DB\u83FE\u86D8\u86E2\u86E6\u86D3\u86E3\u86DA\u86EA\u86DD\u86EB\u86DC\u86EC\u86E9\u86D7\u86E8\u86D1\u8848\u8856\u8855\u88BA\u88D7\u88B9\u88B8\u88C0\u88BE\u88B6\u88BC\u88B7\u88BD\u88B2\u8901\u88C9\u8995\u8998\u8997\u89DD\u89DA\u89DB\u8A4E\u8A4D\u8A39\u8A59\u8A40\u8A57\u8A58\u8A44\u8A45\u8A52\u8A48\u8A51\u8A4A\u8A4C\u8A4F\u8C5F\u8C81\u8C80\u8CBA\u8CBE\u8CB0\u8CB9\u8CB5\u8D84\u8D80\u8D89\u8DD8\u8DD3\u8DCD\u8DC7\u8DD6\u8DDC\u8DCF\u8DD5\u8DD9\u8DC8\u8DD7\u8DC5\u8EEF\u8EF7\u8EFA\u8EF9\u8EE6\u8EEE\u8EE5\u8EF5\u8EE7\u8EE8\u8EF6\u8EEB\u8EF1\u8EEC\u8EF4\u8EE9\u902D\u9034\u902F\u9106\u912C\u9104\u90FF\u90FC\u9108\u90F9\u90FB\u9101\u9100\u9107\u9105\u9103\u9161\u9164\u915F\u9162\u9160\u9201\u920A\u9225\u9203\u921A\u9226\u920F\u920C\u9200\u9212\u91FF\u91FD\u9206\u9204\u9227\u9202\u921C\u9224\u9219\u9217\u9205\u9216\u957B\u958D\u958C\u9590\u9687\u967E\u9688\u9689\u9683\u9680\u96C2\u96C8\u96C3\u96F1\u96F0\u976C\u9770\u976E\u9807\u98A9\u98EB\u9CE6\u9EF9\u4E83\u4E84\u4EB6\u50BD\u50BF\u50C6\u50AE\u50C4\u50CA\u50B4\u50C8\u50C2\u50B0\u50C1\u50BA\u50B1\u50CB\u50C9\u50B6\u50B8\u51D7\u527A\u5278\u527B\u527C\u55C3\u55DB\u55CC\u55D0\u55CB\u55CA\u55DD\u55C0\u55D4\u55C4\u55E9\u55BF\u55D2\u558D\u55CF\u55D5\u55E2\u55D6\u55C8\u55F2\u55CD\u55D9\u55C2\u5714\u5853\u5868\u5864\u584F\u584D\u5849\u586F\u5855\u584E\u585D\u5859\u5865\u585B\u583D\u5863\u5871\u58FC\u5AC7\u5AC4\u5ACB\u5ABA\u5AB8\u5AB1\u5AB5\u5AB0\u5ABF\u5AC8\u5ABB\u5AC6\u5AB7\u5AC0\u5ACA\u5AB4\u5AB6\u5ACD\u5AB9\u5A90\u5BD6\u5BD8\u5BD9\u5C1F\u5C33\u5D71\u5D63\u5D4A\u5D65\u5D72\u5D6C\u5D5E\u5D68\u5D67\u5D62\u5DF0\u5E4F\u5E4E\u5E4A\u5E4D\u5E4B\u5EC5\u5ECC\u5EC6\u5ECB\u5EC7\u5F40\u5FAF\u5FAD\u60F7\u6149\u614A\u612B\u6145\u6136\u6132\u612E\u6146\u612F\u614F\u6129\u6140\u6220\u9168\u6223\u6225\u6224\u63C5\u63F1\u63EB\u6410\u6412\u6409\u6420\u6424\u6433\u6443\u641F\u6415\u6418\u6439\u6437\u6422\u6423\u640C\u6426\u6430\u6428\u6441\u6435\u642F\u640A\u641A\u6440\u6425\u6427\u640B\u63E7\u641B\u642E\u6421\u640E\u656F\u6592\u65D3\u6686\u668C\u6695\u6690\u668B\u668A\u6699\u6694\u6678\u6720\u6966\u695F\u6938\u694E\u6962\u6971\u693F\u6945\u696A\u6939\u6942\u6957\u6959\u697A\u6948\u6949\u6935\u696C\u6933\u693D\u6965\u68F0\u6978\u6934\u6969\u6940\u696F\u6944\u6976\u6958\u6941\u6974\u694C\u693B\u694B\u6937\u695C\u694F\u6951\u6932\u6952\u692F\u697B\u693C\u6B46\u6B45\u6B43\u6B42\u6B48\u6B41\u6B9B\uFA0D\u6BFB\u6BFC\u6BF9\u6BF7\u6BF8\u6E9B\u6ED6\u6EC8\u6E8F\u6EC0\u6E9F\u6E93\u6E94\u6EA0\u6EB1\u6EB9\u6EC6\u6ED2\u6EBD\u6EC1\u6E9E\u6EC9\u6EB7\u6EB0\u6ECD\u6EA6\u6ECF\u6EB2\u6EBE\u6EC3\u6EDC\u6ED8\u6E99\u6E92\u6E8E\u6E8D\u6EA4\u6EA1\u6EBF\u6EB3\u6ED0\u6ECA\u6E97\u6EAE\u6EA3\u7147\u7154\u7152\u7163\u7160\u7141\u715D\u7162\u7172\u7178\u716A\u7161\u7142\u7158\u7143\u714B\u7170\u715F\u7150\u7153\u7144\u714D\u715A\u724F\u728D\u728C\u7291\u7290\u728E\u733C\u7342\u733B\u733A\u7340\u734A\u7349\u7444\u744A\u744B\u7452\u7451\u7457\u7440\u744F\u7450\u744E\u7442\u7446\u744D\u7454\u74E1\u74FF\u74FE\u74FD\u751D\u7579\u7577\u6983\u75EF\u760F\u7603\u75F7\u75FE\u75FC\u75F9\u75F8\u7610\u75FB\u75F6\u75ED\u75F5\u75FD\u7699\u76B5\u76DD\u7755\u775F\u7760\u7752\u7756\u775A\u7769\u7767\u7754\u7759\u776D\u77E0\u7887\u789A\u7894\u788F\u7884\u7895\u7885\u7886\u78A1\u7883\u7879\u7899\u7880\u7896\u787B\u797C\u7982\u797D\u7979\u7A11\u7A18\u7A19\u7A12\u7A17\u7A15\u7A22\u7A13\u7A1B\u7A10\u7AA3\u7AA2\u7A9E\u7AEB\u7B66\u7B64\u7B6D\u7B74\u7B69\u7B72\u7B65\u7B73\u7B71\u7B70\u7B61\u7B78\u7B76\u7B63\u7CB2\u7CB4\u7CAF\u7D88\u7D86\u7D80\u7D8D\u7D7F\u7D85\u7D7A\u7D8E\u7D7B\u7D83\u7D7C\u7D8C\u7D94\u7D84\u7D7D\u7D92\u7F6D\u7F6B\u7F67\u7F68\u7F6C\u7FA6\u7FA5\u7FA7\u7FDB\u7FDC\u8021\u8164\u8160\u8177\u815C\u8169\u815B\u8162\u8172\u6721\u815E\u8176\u8167\u816F\u8144\u8161\u821D\u8249\u8244\u8240\u8242\u8245\u84F1\u843F\u8456\u8476\u8479\u848F\u848D\u8465\u8451\u8440\u8486\u8467\u8430\u844D\u847D\u845A\u8459\u8474\u8473\u845D\u8507\u845E\u8437\u843A\u8434\u847A\u8443\u8478\u8432\u8445\u8429\u83D9\u844B\u842F\u8442\u842D\u845F\u8470\u8439\u844E\u844C\u8452\u846F\u84C5\u848E\u843B\u8447\u8436\u8433\u8468\u847E\u8444\u842B\u8460\u8454\u846E\u8450\u870B\u8704\u86F7\u870C\u86FA\u86D6\u86F5\u874D\u86F8\u870E\u8709\u8701\u86F6\u870D\u8705\u88D6\u88CB\u88CD\u88CE\u88DE\u88DB\u88DA\u88CC\u88D0\u8985\u899B\u89DF\u89E5\u89E4\u89E1\u89E0\u89E2\u89DC\u89E6\u8A76\u8A86\u8A7F\u8A61\u8A3F\u8A77\u8A82\u8A84\u8A75\u8A83\u8A81\u8A74\u8A7A\u8C3C\u8C4B\u8C4A\u8C65\u8C64\u8C66\u8C86\u8C84\u8C85\u8CCC\u8D68\u8D69\u8D91\u8D8C\u8D8E\u8D8F\u8D8D\u8D93\u8D94\u8D90\u8D92\u8DF0\u8DE0\u8DEC\u8DF1\u8DEE\u8DD0\u8DE9\u8DE3\u8DE2\u8DE7\u8DF2\u8DEB\u8DF4\u8F06\u8EFF\u8F01\u8F00\u8F05\u8F07\u8F08\u8F02\u8F0B\u9052\u903F\u9044\u9049\u903D\u9110\u910D\u910F\u9111\u9116\u9114\u910B\u910E\u916E\u916F\u9248\u9252\u9230\u923A\u9266\u9233\u9265\u925E\u9283\u922E\u924A\u9246\u926D\u926C\u924F\u9260\u9267\u926F\u9236\u9261\u9270\u9231\u9254\u9263\u9250\u9272\u924E\u9253\u924C\u9256\u9232\u959F\u959C\u959E\u959B\u9692\u9693\u9691\u9697\u96CE\u96FA\u96FD\u96F8\u96F5\u9773\u9777\u9778\u9772\u980F\u980D\u980E\u98AC\u98F6\u98F9\u99AF\u99B2\u99B0\u99B5\u9AAD\u9AAB\u9B5B\u9CEA\u9CED\u9CE7\u9E80\u9EFD\u50E6\u50D4\u50D7\u50E8\u50F3\u50DB\u50EA\u50DD\u50E4\u50D3\u50EC\u50F0\u50EF\u50E3\u50E0\u51D8\u5280\u5281\u52E9\u52EB\u5330\u53AC\u5627\u5615\u560C\u5612\u55FC\u560F\u561C\u5601\u5613\u5602\u55FA\u561D\u5604\u55FF\u55F9\u5889\u587C\u5890\u5898\u5886\u5881\u587F\u5874\u588B\u587A\u5887\u5891\u588E\u5876\u5882\u5888\u587B\u5894\u588F\u58FE\u596B\u5ADC\u5AEE\u5AE5\u5AD5\u5AEA\u5ADA\u5AED\u5AEB\u5AF3\u5AE2\u5AE0\u5ADB\u5AEC\u5ADE\u5ADD\u5AD9\u5AE8\u5ADF\u5B77\u5BE0\u5BE3\u5C63\u5D82\u5D80\u5D7D\u5D86\u5D7A\u5D81\u5D77\u5D8A\u5D89\u5D88\u5D7E\u5D7C\u5D8D\u5D79\u5D7F\u5E58\u5E59\u5E53\u5ED8\u5ED1\u5ED7\u5ECE\u5EDC\u5ED5\u5ED9\u5ED2\u5ED4\u5F44\u5F43\u5F6F\u5FB6\u612C\u6128\u6141\u615E\u6171\u6173\u6152\u6153\u6172\u616C\u6180\u6174\u6154\u617A\u615B\u6165\u613B\u616A\u6161\u6156\u6229\u6227\u622B\u642B\u644D\u645B\u645D\u6474\u6476\u6472\u6473\u647D\u6475\u6466\u64A6\u644E\u6482\u645E\u645C\u644B\u6453\u6460\u6450\u647F\u643F\u646C\u646B\u6459\u6465\u6477\u6573\u65A0\u66A1\u66A0\u669F\u6705\u6704\u6722\u69B1\u69B6\u69C9\u69A0\u69CE\u6996\u69B0\u69AC\u69BC\u6991\u6999\u698E\u69A7\u698D\u69A9\u69BE\u69AF\u69BF\u69C4\u69BD\u69A4\u69D4\u69B9\u69CA\u699A\u69CF\u69B3\u6993\u69AA\u69A1\u699E\u69D9\u6997\u6990\u69C2\u69B5\u69A5\u69C6\u6B4A\u6B4D\u6B4B\u6B9E\u6B9F\u6BA0\u6BC3\u6BC4\u6BFE\u6ECE\u6EF5\u6EF1\u6F03\u6F25\u6EF8\u6F37\u6EFB\u6F2E\u6F09\u6F4E\u6F19\u6F1A\u6F27\u6F18\u6F3B\u6F12\u6EED\u6F0A\u6F36\u6F73\u6EF9\u6EEE\u6F2D\u6F40\u6F30\u6F3C\u6F35\u6EEB\u6F07\u6F0E\u6F43\u6F05\u6EFD\u6EF6\u6F39\u6F1C\u6EFC\u6F3A\u6F1F\u6F0D\u6F1E\u6F08\u6F21\u7187\u7190\u7189\u7180\u7185\u7182\u718F\u717B\u7186\u7181\u7197\u7244\u7253\u7297\u7295\u7293\u7343\u734D\u7351\u734C\u7462\u7473\u7471\u7475\u7472\u7467\u746E\u7500\u7502\u7503\u757D\u7590\u7616\u7608\u760C\u7615\u7611\u760A\u7614\u76B8\u7781\u777C\u7785\u7782\u776E\u7780\u776F\u777E\u7783\u78B2\u78AA\u78B4\u78AD\u78A8\u787E\u78AB\u789E\u78A5\u78A0\u78AC\u78A2\u78A4\u7998\u798A\u798B\u7996\u7995\u7994\u7993\u7997\u7988\u7992\u7990\u7A2B\u7A4A\u7A30\u7A2F\u7A28\u7A26\u7AA8\u7AAB\u7AAC\u7AEE\u7B88\u7B9C\u7B8A\u7B91\u7B90\u7B96\u7B8D\u7B8C\u7B9B\u7B8E\u7B85\u7B98\u5284\u7B99\u7BA4\u7B82\u7CBB\u7CBF\u7CBC\u7CBA\u7DA7\u7DB7\u7DC2\u7DA3\u7DAA\u7DC1\u7DC0\u7DC5\u7D9D\u7DCE\u7DC4\u7DC6\u7DCB\u7DCC\u7DAF\u7DB9\u7D96\u7DBC\u7D9F\u7DA6\u7DAE\u7DA9\u7DA1\u7DC9\u7F73\u7FE2\u7FE3\u7FE5\u7FDE\u8024\u805D\u805C\u8189\u8186\u8183\u8187\u818D\u818C\u818B\u8215\u8497\u84A4\u84A1\u849F\u84BA\u84CE\u84C2\u84AC\u84AE\u84AB\u84B9\u84B4\u84C1\u84CD\u84AA\u849A\u84B1\u84D0\u849D\u84A7\u84BB\u84A2\u8494\u84C7\u84CC\u849B\u84A9\u84AF\u84A8\u84D6\u8498\u84B6\u84CF\u84A0\u84D7\u84D4\u84D2\u84DB\u84B0\u8491\u8661\u8733\u8723\u8728\u876B\u8740\u872E\u871E\u8721\u8719\u871B\u8743\u872C\u8741\u873E\u8746\u8720\u8732\u872A\u872D\u873C\u8712\u873A\u8731\u8735\u8742\u8726\u8727\u8738\u8724\u871A\u8730\u8711\u88F7\u88E7\u88F1\u88F2\u88FA\u88FE\u88EE\u88FC\u88F6\u88FB\u88F0\u88EC\u88EB\u899D\u89A1\u899F\u899E\u89E9\u89EB\u89E8\u8AAB\u8A99\u8A8B\u8A92\u8A8F\u8A96\u8C3D\u8C68\u8C69\u8CD5\u8CCF\u8CD7\u8D96\u8E09\u8E02\u8DFF\u8E0D\u8DFD\u8E0A\u8E03\u8E07\u8E06\u8E05\u8DFE\u8E00\u8E04\u8F10\u8F11\u8F0E\u8F0D\u9123\u911C\u9120\u9122\u911F\u911D\u911A\u9124\u9121\u911B\u917A\u9172\u9179\u9173\u92A5\u92A4\u9276\u929B\u927A\u92A0\u9294\u92AA\u928D\u92A6\u929A\u92AB\u9279\u9297\u927F\u92A3\u92EE\u928E\u9282\u9295\u92A2\u927D\u9288\u92A1\u928A\u9286\u928C\u9299\u92A7\u927E\u9287\u92A9\u929D\u928B\u922D\u969E\u96A1\u96FF\u9758\u977D\u977A\u977E\u9783\u9780\u9782\u977B\u9784\u9781\u977F\u97CE\u97CD\u9816\u98AD\u98AE\u9902\u9900\u9907\u999D\u999C\u99C3\u99B9\u99BB\u99BA\u99C2\u99BD\u99C7\u9AB1\u9AE3\u9AE7\u9B3E\u9B3F\u9B60\u9B61\u9B5F\u9CF1\u9CF2\u9CF5\u9EA7\u50FF\u5103\u5130\u50F8\u5106\u5107\u50F6\u50FE\u510B\u510C\u50FD\u510A\u528B\u528C\u52F1\u52EF\u5648\u5642\u564C\u5635\u5641\u564A\u5649\u5646\u5658\u565A\u5640\u5633\u563D\u562C\u563E\u5638\u562A\u563A\u571A\u58AB\u589D\u58B1\u58A0\u58A3\u58AF\u58AC\u58A5\u58A1\u58FF\u5AFF\u5AF4\u5AFD\u5AF7\u5AF6\u5B03\u5AF8\u5B02\u5AF9\u5B01\u5B07\u5B05\u5B0F\u5C67\u5D99\u5D97\u5D9F\u5D92\u5DA2\u5D93\u5D95\u5DA0\u5D9C\u5DA1\u5D9A\u5D9E\u5E69\u5E5D\u5E60\u5E5C\u7DF3\u5EDB\u5EDE\u5EE1\u5F49\u5FB2\u618B\u6183\u6179\u61B1\u61B0\u61A2\u6189\u619B\u6193\u61AF\u61AD\u619F\u6192\u61AA\u61A1\u618D\u6166\u61B3\u622D\u646E\u6470\u6496\u64A0\u6485\u6497\u649C\u648F\u648B\u648A\u648C\u64A3\u649F\u6468\u64B1\u6498\u6576\u657A\u6579\u657B\u65B2\u65B3\u66B5\u66B0\u66A9\u66B2\u66B7\u66AA\u66AF\u6A00\u6A06\u6A17\u69E5\u69F8\u6A15\u69F1\u69E4\u6A20\u69FF\u69EC\u69E2\u6A1B\u6A1D\u69FE\u6A27\u69F2\u69EE\u6A14\u69F7\u69E7\u6A40\u6A08\u69E6\u69FB\u6A0D\u69FC\u69EB\u6A09\u6A04\u6A18\u6A25\u6A0F\u69F6\u6A26\u6A07\u69F4\u6A16\u6B51\u6BA5\u6BA3\u6BA2\u6BA6\u6C01\u6C00\u6BFF\u6C02\u6F41\u6F26\u6F7E\u6F87\u6FC6\u6F92\u6F8D\u6F89\u6F8C\u6F62\u6F4F\u6F85\u6F5A\u6F96\u6F76\u6F6C\u6F82\u6F55\u6F72\u6F52\u6F50\u6F57\u6F94\u6F93\u6F5D\u6F00\u6F61\u6F6B\u6F7D\u6F67\u6F90\u6F53\u6F8B\u6F69\u6F7F\u6F95\u6F63\u6F77\u6F6A\u6F7B\u71B2\u71AF\u719B\u71B0\u71A0\u719A\u71A9\u71B5\u719D\u71A5\u719E\u71A4\u71A1\u71AA\u719C\u71A7\u71B3\u7298\u729A\u7358\u7352\u735E\u735F\u7360\u735D\u735B\u7361\u735A\u7359\u7362\u7487\u7489\u748A\u7486\u7481\u747D\u7485\u7488\u747C\u7479\u7508\u7507\u757E\u7625\u761E\u7619\u761D\u761C\u7623\u761A\u7628\u761B\u769C\u769D\u769E\u769B\u778D\u778F\u7789\u7788\u78CD\u78BB\u78CF\u78CC\u78D1\u78CE\u78D4\u78C8\u78C3\u78C4\u78C9\u799A\u79A1\u79A0\u799C\u79A2\u799B\u6B76\u7A39\u7AB2\u7AB4\u7AB3\u7BB7\u7BCB\u7BBE\u7BAC\u7BCE\u7BAF\u7BB9\u7BCA\u7BB5\u7CC5\u7CC8\u7CCC\u7CCB\u7DF7\u7DDB\u7DEA\u7DE7\u7DD7\u7DE1\u7E03\u7DFA\u7DE6\u7DF6\u7DF1\u7DF0\u7DEE\u7DDF\u7F76\u7FAC\u7FB0\u7FAD\u7FED\u7FEB\u7FEA\u7FEC\u7FE6\u7FE8\u8064\u8067\u81A3\u819F\u819E\u8195\u81A2\u8199\u8197\u8216\u824F\u8253\u8252\u8250\u824E\u8251\u8524\u853B\u850F\u8500\u8529\u850E\u8509\u850D\u851F\u850A\u8527\u851C\u84FB\u852B\u84FA\u8508\u850C\u84F4\u852A\u84F2\u8515\u84F7\u84EB\u84F3\u84FC\u8512\u84EA\u84E9\u8516\u84FE\u8528\u851D\u852E\u8502\u84FD\u851E\u84F6\u8531\u8526\u84E7\u84E8\u84F0\u84EF\u84F9\u8518\u8520\u8530\u850B\u8519\u852F\u8662\u8756\u8763\u8764\u8777\u87E1\u8773\u8758\u8754\u875B\u8752\u8761\u875A\u8751\u875E\u876D\u876A\u8750\u874E\u875F\u875D\u876F\u876C\u877A\u876E\u875C\u8765\u874F\u877B\u8775\u8762\u8767\u8769\u885A\u8905\u890C\u8914\u890B\u8917\u8918\u8919\u8906\u8916\u8911\u890E\u8909\u89A2\u89A4\u89A3\u89ED\u89F0\u89EC\u8ACF\u8AC6\u8AB8\u8AD3\u8AD1\u8AD4\u8AD5\u8ABB\u8AD7\u8ABE\u8AC0\u8AC5\u8AD8\u8AC3\u8ABA\u8ABD\u8AD9\u8C3E\u8C4D\u8C8F\u8CE5\u8CDF\u8CD9\u8CE8\u8CDA\u8CDD\u8CE7\u8DA0\u8D9C\u8DA1\u8D9B\u8E20\u8E23\u8E25\u8E24\u8E2E\u8E15\u8E1B\u8E16\u8E11\u8E19\u8E26\u8E27\u8E14\u8E12\u8E18\u8E13\u8E1C\u8E17\u8E1A\u8F2C\u8F24\u8F18\u8F1A\u8F20\u8F23\u8F16\u8F17\u9073\u9070\u906F\u9067\u906B\u912F\u912B\u9129\u912A\u9132\u9126\u912E\u9185\u9186\u918A\u9181\u9182\u9184\u9180\u92D0\u92C3\u92C4\u92C0\u92D9\u92B6\u92CF\u92F1\u92DF\u92D8\u92E9\u92D7\u92DD\u92CC\u92EF\u92C2\u92E8\u92CA\u92C8\u92CE\u92E6\u92CD\u92D5\u92C9\u92E0\u92DE\u92E7\u92D1\u92D3\u92B5\u92E1\u92C6\u92B4\u957C\u95AC\u95AB\u95AE\u95B0\u96A4\u96A2\u96D3\u9705\u9708\u9702\u975A\u978A\u978E\u9788\u97D0\u97CF\u981E\u981D\u9826\u9829\u9828\u9820\u981B\u9827\u98B2\u9908\u98FA\u9911\u9914\u9916\u9917\u9915\u99DC\u99CD\u99CF\u99D3\u99D4\u99CE\u99C9\u99D6\u99D8\u99CB\u99D7\u99CC\u9AB3\u9AEC\u9AEB\u9AF3\u9AF2\u9AF1\u9B46\u9B43\u9B67\u9B74\u9B71\u9B66\u9B76\u9B75\u9B70\u9B68\u9B64\u9B6C\u9CFC\u9CFA\u9CFD\u9CFF\u9CF7\u9D07\u9D00\u9CF9\u9CFB\u9D08\u9D05\u9D04\u9E83\u9ED3\u9F0F\u9F10\u511C\u5113\u5117\u511A\u5111\u51DE\u5334\u53E1\u5670\u5660\u566E\u5673\u5666\u5663\u566D\u5672\u565E\u5677\u571C\u571B\u58C8\u58BD\u58C9\u58BF\u58BA\u58C2\u58BC\u58C6\u5B17\u5B19\u5B1B\u5B21\u5B14\u5B13\u5B10\u5B16\u5B28\u5B1A\u5B20\u5B1E\u5BEF\u5DAC\u5DB1\u5DA9\u5DA7\u5DB5\u5DB0\u5DAE\u5DAA\u5DA8\u5DB2\u5DAD\u5DAF\u5DB4\u5E67\u5E68\u5E66\u5E6F\u5EE9\u5EE7\u5EE6\u5EE8\u5EE5\u5F4B\u5FBC\u619D\u61A8\u6196\u61C5\u61B4\u61C6\u61C1\u61CC\u61BA\u61BF\u61B8\u618C\u64D7\u64D6\u64D0\u64CF\u64C9\u64BD\u6489\u64C3\u64DB\u64F3\u64D9\u6533\u657F\u657C\u65A2\u66C8\u66BE\u66C0\u66CA\u66CB\u66CF\u66BD\u66BB\u66BA\u66CC\u6723\u6A34\u6A66\u6A49\u6A67\u6A32\u6A68\u6A3E\u6A5D\u6A6D\u6A76\u6A5B\u6A51\u6A28\u6A5A\u6A3B\u6A3F\u6A41\u6A6A\u6A64\u6A50\u6A4F\u6A54\u6A6F\u6A69\u6A60\u6A3C\u6A5E\u6A56\u6A55\u6A4D\u6A4E\u6A46\u6B55\u6B54\u6B56\u6BA7\u6BAA\u6BAB\u6BC8\u6BC7\u6C04\u6C03\u6C06\u6FAD\u6FCB\u6FA3\u6FC7\u6FBC\u6FCE\u6FC8\u6F5E\u6FC4\u6FBD\u6F9E\u6FCA\u6FA8\u7004\u6FA5\u6FAE\u6FBA\u6FAC\u6FAA\u6FCF\u6FBF\u6FB8\u6FA2\u6FC9\u6FAB\u6FCD\u6FAF\u6FB2\u6FB0\u71C5\u71C2\u71BF\u71B8\u71D6\u71C0\u71C1\u71CB\u71D4\u71CA\u71C7\u71CF\u71BD\u71D8\u71BC\u71C6\u71DA\u71DB\u729D\u729E\u7369\u7366\u7367\u736C\u7365\u736B\u736A\u747F\u749A\u74A0\u7494\u7492\u7495\u74A1\u750B\u7580\u762F\u762D\u7631\u763D\u7633\u763C\u7635\u7632\u7630\u76BB\u76E6\u779A\u779D\u77A1\u779C\u779B\u77A2\u77A3\u7795\u7799\u7797\u78DD\u78E9\u78E5\u78EA\u78DE\u78E3\u78DB\u78E1\u78E2\u78ED\u78DF\u78E0\u79A4\u7A44\u7A48\u7A47\u7AB6\u7AB8\u7AB5\u7AB1\u7AB7\u7BDE\u7BE3\u7BE7\u7BDD\u7BD5\u7BE5\u7BDA\u7BE8\u7BF9\u7BD4\u7BEA\u7BE2\u7BDC\u7BEB\u7BD8\u7BDF\u7CD2\u7CD4\u7CD7\u7CD0\u7CD1\u7E12\u7E21\u7E17\u7E0C\u7E1F\u7E20\u7E13\u7E0E\u7E1C\u7E15\u7E1A\u7E22\u7E0B\u7E0F\u7E16\u7E0D\u7E14\u7E25\u7E24\u7F43\u7F7B\u7F7C\u7F7A\u7FB1\u7FEF\u802A\u8029\u806C\u81B1\u81A6\u81AE\u81B9\u81B5\u81AB\u81B0\u81AC\u81B4\u81B2\u81B7\u81A7\u81F2\u8255\u8256\u8257\u8556\u8545\u856B\u854D\u8553\u8561\u8558\u8540\u8546\u8564\u8541\u8562\u8544\u8551\u8547\u8563\u853E\u855B\u8571\u854E\u856E\u8575\u8555\u8567\u8560\u858C\u8566\u855D\u8554\u8565\u856C\u8663\u8665\u8664\u879B\u878F\u8797\u8793\u8792\u8788\u8781\u8796\u8798\u8779\u8787\u87A3\u8785\u8790\u8791\u879D\u8784\u8794\u879C\u879A\u8789\u891E\u8926\u8930\u892D\u892E\u8927\u8931\u8922\u8929\u8923\u892F\u892C\u891F\u89F1\u8AE0\u8AE2\u8AF2\u8AF4\u8AF5\u8ADD\u8B14\u8AE4\u8ADF\u8AF0\u8AC8\u8ADE\u8AE1\u8AE8\u8AFF\u8AEF\u8AFB\u8C91\u8C92\u8C90\u8CF5\u8CEE\u8CF1\u8CF0\u8CF3\u8D6C\u8D6E\u8DA5\u8DA7\u8E33\u8E3E\u8E38\u8E40\u8E45\u8E36\u8E3C\u8E3D\u8E41\u8E30\u8E3F\u8EBD\u8F36\u8F2E\u8F35\u8F32\u8F39\u8F37\u8F34\u9076\u9079\u907B\u9086\u90FA\u9133\u9135\u9136\u9193\u9190\u9191\u918D\u918F\u9327\u931E\u9308\u931F\u9306\u930F\u937A\u9338\u933C\u931B\u9323\u9312\u9301\u9346\u932D\u930E\u930D\u92CB\u931D\u92FA\u9325\u9313\u92F9\u92F7\u9334\u9302\u9324\u92FF\u9329\u9339\u9335\u932A\u9314\u930C\u930B\u92FE\u9309\u9300\u92FB\u9316\u95BC\u95CD\u95BE\u95B9\u95BA\u95B6\u95BF\u95B5\u95BD\u96A9\u96D4\u970B\u9712\u9710\u9799\u9797\u9794\u97F0\u97F8\u9835\u982F\u9832\u9924\u991F\u9927\u9929\u999E\u99EE\u99EC\u99E5\u99E4\u99F0\u99E3\u99EA\u99E9\u99E7\u9AB9\u9ABF\u9AB4\u9ABB\u9AF6\u9AFA\u9AF9\u9AF7\u9B33\u9B80\u9B85\u9B87\u9B7C\u9B7E\u9B7B\u9B82\u9B93\u9B92\u9B90\u9B7A\u9B95\u9B7D\u9B88\u9D25\u9D17\u9D20\u9D1E\u9D14\u9D29\u9D1D\u9D18\u9D22\u9D10\u9D19\u9D1F\u9E88\u9E86\u9E87\u9EAE\u9EAD\u9ED5\u9ED6\u9EFA\u9F12\u9F3D\u5126\u5125\u5122\u5124\u5120\u5129\u52F4\u5693\u568C\u568D\u5686\u5684\u5683\u567E\u5682\u567F\u5681\u58D6\u58D4\u58CF\u58D2\u5B2D\u5B25\u5B32\u5B23\u5B2C\u5B27\u5B26\u5B2F\u5B2E\u5B7B\u5BF1\u5BF2\u5DB7\u5E6C\u5E6A\u5FBE\u5FBB\u61C3\u61B5\u61BC\u61E7\u61E0\u61E5\u61E4\u61E8\u61DE\u64EF\u64E9\u64E3\u64EB\u64E4\u64E8\u6581\u6580\u65B6\u65DA\u66D2\u6A8D\u6A96\u6A81\u6AA5\u6A89\u6A9F\u6A9B\u6AA1\u6A9E\u6A87\u6A93\u6A8E\u6A95\u6A83\u6AA8\u6AA4\u6A91\u6A7F\u6AA6\u6A9A\u6A85\u6A8C\u6A92\u6B5B\u6BAD\u6C09\u6FCC\u6FA9\u6FF4\u6FD4\u6FE3\u6FDC\u6FED\u6FE7\u6FE6\u6FDE\u6FF2\u6FDD\u6FE2\u6FE8\u71E1\u71F1\u71E8\u71F2\u71E4\u71F0\u71E2\u7373\u736E\u736F\u7497\u74B2\u74AB\u7490\u74AA\u74AD\u74B1\u74A5\u74AF\u7510\u7511\u7512\u750F\u7584\u7643\u7648\u7649\u7647\u76A4\u76E9\u77B5\u77AB\u77B2\u77B7\u77B6\u77B4\u77B1\u77A8\u77F0\u78F3\u78FD\u7902\u78FB\u78FC\u78F2\u7905\u78F9\u78FE\u7904\u79AB\u79A8\u7A5C\u7A5B\u7A56\u7A58\u7A54\u7A5A\u7ABE\u7AC0\u7AC1\u7C05\u7C0F\u7BF2\u7C00\u7BFF\u7BFB\u7C0E\u7BF4\u7C0B\u7BF3\u7C02\u7C09\u7C03\u7C01\u7BF8\u7BFD\u7C06\u7BF0\u7BF1\u7C10\u7C0A\u7CE8\u7E2D\u7E3C\u7E42\u7E33\u9848\u7E38\u7E2A\u7E49\u7E40\u7E47\u7E29\u7E4C\u7E30\u7E3B\u7E36\u7E44\u7E3A\u7F45\u7F7F\u7F7E\u7F7D\u7FF4\u7FF2\u802C\u81BB\u81C4\u81CC\u81CA\u81C5\u81C7\u81BC\u81E9\u825B\u825A\u825C\u8583\u8580\u858F\u85A7\u8595\u85A0\u858B\u85A3\u857B\u85A4\u859A\u859E\u8577\u857C\u8589\u85A1\u857A\u8578\u8557\u858E\u8596\u8586\u858D\u8599\u859D\u8581\u85A2\u8582\u8588\u8585\u8579\u8576\u8598\u8590\u859F\u8668\u87BE\u87AA\u87AD\u87C5\u87B0\u87AC\u87B9\u87B5\u87BC\u87AE\u87C9\u87C3\u87C2\u87CC\u87B7\u87AF\u87C4\u87CA\u87B4\u87B6\u87BF\u87B8\u87BD\u87DE\u87B2\u8935\u8933\u893C\u893E\u8941\u8952\u8937\u8942\u89AD\u89AF\u89AE\u89F2\u89F3\u8B1E\u8B18\u8B16\u8B11\u8B05\u8B0B\u8B22\u8B0F\u8B12\u8B15\u8B07\u8B0D\u8B08\u8B06\u8B1C\u8B13\u8B1A\u8C4F\u8C70\u8C72\u8C71\u8C6F\u8C95\u8C94\u8CF9\u8D6F\u8E4E\u8E4D\u8E53\u8E50\u8E4C\u8E47\u8F43\u8F40\u9085\u907E\u9138\u919A\u91A2\u919B\u9199\u919F\u91A1\u919D\u91A0\u93A1\u9383\u93AF\u9364\u9356\u9347\u937C\u9358\u935C\u9376\u9349\u9350\u9351\u9360\u936D\u938F\u934C\u936A\u9379\u9357\u9355\u9352\u934F\u9371\u9377\u937B\u9361\u935E\u9363\u9367\u9380\u934E\u9359\u95C7\u95C0\u95C9\u95C3\u95C5\u95B7\u96AE\u96B0\u96AC\u9720\u971F\u9718\u971D\u9719\u979A\u97A1\u979C\u979E\u979D\u97D5\u97D4\u97F1\u9841\u9844\u984A\u9849\u9845\u9843\u9925\u992B\u992C\u992A\u9933\u9932\u992F\u992D\u9931\u9930\u9998\u99A3\u99A1\u9A02\u99FA\u99F4\u99F7\u99F9\u99F8\u99F6\u99FB\u99FD\u99FE\u99FC\u9A03\u9ABE\u9AFE\u9AFD\u9B01\u9AFC\u9B48\u9B9A\u9BA8\u9B9E\u9B9B\u9BA6\u9BA1\u9BA5\u9BA4\u9B86\u9BA2\u9BA0\u9BAF\u9D33\u9D41\u9D67\u9D36\u9D2E\u9D2F\u9D31\u9D38\u9D30\u9D45\u9D42\u9D43\u9D3E\u9D37\u9D40\u9D3D\u7FF5\u9D2D\u9E8A\u9E89\u9E8D\u9EB0\u9EC8\u9EDA\u9EFB\u9EFF\u9F24\u9F23\u9F22\u9F54\u9FA0\u5131\u512D\u512E\u5698\u569C\u5697\u569A\u569D\u5699\u5970\u5B3C\u5C69\u5C6A\u5DC0\u5E6D\u5E6E\u61D8\u61DF\u61ED\u61EE\u61F1\u61EA\u61F0\u61EB\u61D6\u61E9\u64FF\u6504\u64FD\u64F8\u6501\u6503\u64FC\u6594\u65DB\u66DA\u66DB\u66D8\u6AC5\u6AB9\u6ABD\u6AE1\u6AC6\u6ABA\u6AB6\u6AB7\u6AC7\u6AB4\u6AAD\u6B5E\u6BC9\u6C0B\u7007\u700C\u700D\u7001\u7005\u7014\u700E\u6FFF\u7000\u6FFB\u7026\u6FFC\u6FF7\u700A\u7201\u71FF\u71F9\u7203\u71FD\u7376\u74B8\u74C0\u74B5\u74C1\u74BE\u74B6\u74BB\u74C2\u7514\u7513\u765C\u7664\u7659\u7650\u7653\u7657\u765A\u76A6\u76BD\u76EC\u77C2\u77BA\u78FF\u790C\u7913\u7914\u7909\u7910\u7912\u7911\u79AD\u79AC\u7A5F\u7C1C\u7C29\u7C19\u7C20\u7C1F\u7C2D\u7C1D\u7C26\u7C28\u7C22\u7C25\u7C30\u7E5C\u7E50\u7E56\u7E63\u7E58\u7E62\u7E5F\u7E51\u7E60\u7E57\u7E53\u7FB5\u7FB3\u7FF7\u7FF8\u8075\u81D1\u81D2\u81D0\u825F\u825E\u85B4\u85C6\u85C0\u85C3\u85C2\u85B3\u85B5\u85BD\u85C7\u85C4\u85BF\u85CB\u85CE\u85C8\u85C5\u85B1\u85B6\u85D2\u8624\u85B8\u85B7\u85BE\u8669\u87E7\u87E6\u87E2\u87DB\u87EB\u87EA\u87E5\u87DF\u87F3\u87E4\u87D4\u87DC\u87D3\u87ED\u87D8\u87E3\u87A4\u87D7\u87D9\u8801\u87F4\u87E8\u87DD\u8953\u894B\u894F\u894C\u8946\u8950\u8951\u8949\u8B2A\u8B27\u8B23\u8B33\u8B30\u8B35\u8B47\u8B2F\u8B3C\u8B3E\u8B31\u8B25\u8B37\u8B26\u8B36\u8B2E\u8B24\u8B3B\u8B3D\u8B3A\u8C42\u8C75\u8C99\u8C98\u8C97\u8CFE\u8D04\u8D02\u8D00\u8E5C\u8E62\u8E60\u8E57\u8E56\u8E5E\u8E65\u8E67\u8E5B\u8E5A\u8E61\u8E5D\u8E69\u8E54\u8F46\u8F47\u8F48\u8F4B\u9128\u913A\u913B\u913E\u91A8\u91A5\u91A7\u91AF\u91AA\u93B5\u938C\u9392\u93B7\u939B\u939D\u9389\u93A7\u938E\u93AA\u939E\u93A6\u9395\u9388\u9399\u939F\u938D\u93B1\u9391\u93B2\u93A4\u93A8\u93B4\u93A3\u93A5\u95D2\u95D3\u95D1\u96B3\u96D7\u96DA\u5DC2\u96DF\u96D8\u96DD\u9723\u9722\u9725\u97AC\u97AE\u97A8\u97AB\u97A4\u97AA\u97A2\u97A5\u97D7\u97D9\u97D6\u97D8\u97FA\u9850\u9851\u9852\u98B8\u9941\u993C\u993A\u9A0F\u9A0B\u9A09\u9A0D\u9A04\u9A11\u9A0A\u9A05\u9A07\u9A06\u9AC0\u9ADC\u9B08\u9B04\u9B05\u9B29\u9B35\u9B4A\u9B4C\u9B4B\u9BC7\u9BC6\u9BC3\u9BBF\u9BC1\u9BB5\u9BB8\u9BD3\u9BB6\u9BC4\u9BB9\u9BBD\u9D5C\u9D53\u9D4F\u9D4A\u9D5B\u9D4B\u9D59\u9D56\u9D4C\u9D57\u9D52\u9D54\u9D5F\u9D58\u9D5A\u9E8E\u9E8C\u9EDF\u9F01\u9F00\u9F16\u9F25\u9F2B\u9F2A\u9F29\u9F28\u9F4C\u9F55\u5134\u5135\u5296\u52F7\u53B4\u56AB\u56AD\u56A6\u56A7\u56AA\u56AC\u58DA\u58DD\u58DB\u5912\u5B3D\u5B3E\u5B3F\u5DC3\u5E70\u5FBF\u61FB\u6507\u6510\u650D\u6509\u650C\u650E\u6584\u65DE\u65DD\u66DE\u6AE7\u6AE0\u6ACC\u6AD1\u6AD9\u6ACB\u6ADF\u6ADC\u6AD0\u6AEB\u6ACF\u6ACD\u6ADE\u6B60\u6BB0\u6C0C\u7019\u7027\u7020\u7016\u702B\u7021\u7022\u7023\u7029\u7017\u7024\u701C\u702A\u720C\u720A\u7207\u7202\u7205\u72A5\u72A6\u72A4\u72A3\u72A1\u74CB\u74C5\u74B7\u74C3\u7516\u7660\u77C9\u77CA\u77C4\u77F1\u791D\u791B\u7921\u791C\u7917\u791E\u79B0\u7A67\u7A68\u7C33\u7C3C\u7C39\u7C2C\u7C3B\u7CEC\u7CEA\u7E76\u7E75\u7E78\u7E70\u7E77\u7E6F\u7E7A\u7E72\u7E74\u7E68\u7F4B\u7F4A\u7F83\u7F86\u7FB7\u7FFD\u7FFE\u8078\u81D7\u81D5\u8264\u8261\u8263\u85EB\u85F1\u85ED\u85D9\u85E1\u85E8\u85DA\u85D7\u85EC\u85F2\u85F8\u85D8\u85DF\u85E3\u85DC\u85D1\u85F0\u85E6\u85EF\u85DE\u85E2\u8800\u87FA\u8803\u87F6\u87F7\u8809\u880C\u880B\u8806\u87FC\u8808\u87FF\u880A\u8802\u8962\u895A\u895B\u8957\u8961\u895C\u8958\u895D\u8959\u8988\u89B7\u89B6\u89F6\u8B50\u8B48\u8B4A\u8B40\u8B53\u8B56\u8B54\u8B4B\u8B55\u8B51\u8B42\u8B52\u8B57\u8C43\u8C77\u8C76\u8C9A\u8D06\u8D07\u8D09\u8DAC\u8DAA\u8DAD\u8DAB\u8E6D\u8E78\u8E73\u8E6A\u8E6F\u8E7B\u8EC2\u8F52\u8F51\u8F4F\u8F50\u8F53\u8FB4\u9140\u913F\u91B0\u91AD\u93DE\u93C7\u93CF\u93C2\u93DA\u93D0\u93F9\u93EC\u93CC\u93D9\u93A9\u93E6\u93CA\u93D4\u93EE\u93E3\u93D5\u93C4\u93CE\u93C0\u93D2\u93E7\u957D\u95DA\u95DB\u96E1\u9729\u972B\u972C\u9728\u9726\u97B3\u97B7\u97B6\u97DD\u97DE\u97DF\u985C\u9859\u985D\u9857\u98BF\u98BD\u98BB\u98BE\u9948\u9947\u9943\u99A6\u99A7\u9A1A\u9A15\u9A25\u9A1D\u9A24\u9A1B\u9A22\u9A20\u9A27\u9A23\u9A1E\u9A1C\u9A14\u9AC2\u9B0B\u9B0A\u9B0E\u9B0C\u9B37\u9BEA\u9BEB\u9BE0\u9BDE\u9BE4\u9BE6\u9BE2\u9BF0\u9BD4\u9BD7\u9BEC\u9BDC\u9BD9\u9BE5\u9BD5\u9BE1\u9BDA\u9D77\u9D81\u9D8A\u9D84\u9D88\u9D71\u9D80\u9D78\u9D86\u9D8B\u9D8C\u9D7D\u9D6B\u9D74\u9D75\u9D70\u9D69\u9D85\u9D73\u9D7B\u9D82\u9D6F\u9D79\u9D7F\u9D87\u9D68\u9E94\u9E91\u9EC0\u9EFC\u9F2D\u9F40\u9F41\u9F4D\u9F56\u9F57\u9F58\u5337\u56B2\u56B5\u56B3\u58E3\u5B45\u5DC6\u5DC7\u5EEE\u5EEF\u5FC0\u5FC1\u61F9\u6517\u6516\u6515\u6513\u65DF\u66E8\u66E3\u66E4\u6AF3\u6AF0\u6AEA\u6AE8\u6AF9\u6AF1\u6AEE\u6AEF\u703C\u7035\u702F\u7037\u7034\u7031\u7042\u7038\u703F\u703A\u7039\u7040\u703B\u7033\u7041\u7213\u7214\u72A8\u737D\u737C\u74BA\u76AB\u76AA\u76BE\u76ED\u77CC\u77CE\u77CF\u77CD\u77F2\u7925\u7923\u7927\u7928\u7924\u7929\u79B2\u7A6E\u7A6C\u7A6D\u7AF7\u7C49\u7C48\u7C4A\u7C47\u7C45\u7CEE\u7E7B\u7E7E\u7E81\u7E80\u7FBA\u7FFF\u8079\u81DB\u81D9\u820B\u8268\u8269\u8622\u85FF\u8601\u85FE\u861B\u8600\u85F6\u8604\u8609\u8605\u860C\u85FD\u8819\u8810\u8811\u8817\u8813\u8816\u8963\u8966\u89B9\u89F7\u8B60\u8B6A\u8B5D\u8B68\u8B63\u8B65\u8B67\u8B6D\u8DAE\u8E86\u8E88\u8E84\u8F59\u8F56\u8F57\u8F55\u8F58\u8F5A\u908D\u9143\u9141\u91B7\u91B5\u91B2\u91B3\u940B\u9413\u93FB\u9420\u940F\u9414\u93FE\u9415\u9410\u9428\u9419\u940D\u93F5\u9400\u93F7\u9407\u940E\u9416\u9412\u93FA\u9409\u93F8\u940A\u93FF\u93FC\u940C\u93F6\u9411\u9406\u95DE\u95E0\u95DF\u972E\u972F\u97B9\u97BB\u97FD\u97FE\u9860\u9862\u9863\u985F\u98C1\u98C2\u9950\u994E\u9959\u994C\u994B\u9953\u9A32\u9A34\u9A31\u9A2C\u9A2A\u9A36\u9A29\u9A2E\u9A38\u9A2D\u9AC7\u9ACA\u9AC6\u9B10\u9B12\u9B11\u9C0B\u9C08\u9BF7\u9C05\u9C12\u9BF8\u9C40\u9C07\u9C0E\u9C06\u9C17\u9C14\u9C09\u9D9F\u9D99\u9DA4\u9D9D\u9D92\u9D98\u9D90\u9D9B\u9DA0\u9D94\u9D9C\u9DAA\u9D97\u9DA1\u9D9A\u9DA2\u9DA8\u9D9E\u9DA3\u9DBF\u9DA9\u9D96\u9DA6\u9DA7\u9E99\u9E9B\u9E9A\u9EE5\u9EE4\u9EE7\u9EE6\u9F30\u9F2E\u9F5B\u9F60\u9F5E\u9F5D\u9F59\u9F91\u513A\u5139\u5298\u5297\u56C3\u56BD\u56BE\u5B48\u5B47\u5DCB\u5DCF\u5EF1\u61FD\u651B\u6B02\u6AFC\u6B03\u6AF8\u6B00\u7043\u7044\u704A\u7048\u7049\u7045\u7046\u721D\u721A\u7219\u737E\u7517\u766A\u77D0\u792D\u7931\u792F\u7C54\u7C53\u7CF2\u7E8A\u7E87\u7E88\u7E8B\u7E86\u7E8D\u7F4D\u7FBB\u8030\u81DD\u8618\u862A\u8626\u861F\u8623\u861C\u8619\u8627\u862E\u8621\u8620\u8629\u861E\u8625\u8829\u881D\u881B\u8820\u8824\u881C\u882B\u884A\u896D\u8969\u896E\u896B\u89FA\u8B79\u8B78\u8B45\u8B7A\u8B7B\u8D10\u8D14\u8DAF\u8E8E\u8E8C\u8F5E\u8F5B\u8F5D\u9146\u9144\u9145\u91B9\u943F\u943B\u9436\u9429\u943D\u943C\u9430\u9439\u942A\u9437\u942C\u9440\u9431\u95E5\u95E4\u95E3\u9735\u973A\u97BF\u97E1\u9864\u98C9\u98C6\u98C0\u9958\u9956\u9A39\u9A3D\u9A46\u9A44\u9A42\u9A41\u9A3A\u9A3F\u9ACD\u9B15\u9B17\u9B18\u9B16\u9B3A\u9B52\u9C2B\u9C1D\u9C1C\u9C2C\u9C23\u9C28\u9C29\u9C24\u9C21\u9DB7\u9DB6\u9DBC\u9DC1\u9DC7\u9DCA\u9DCF\u9DBE\u9DC5\u9DC3\u9DBB\u9DB5\u9DCE\u9DB9\u9DBA\u9DAC\u9DC8\u9DB1\u9DAD\u9DCC\u9DB3\u9DCD\u9DB2\u9E7A\u9E9C\u9EEB\u9EEE\u9EED\u9F1B\u9F18\u9F1A\u9F31\u9F4E\u9F65\u9F64\u9F92\u4EB9\u56C6\u56C5\u56CB\u5971\u5B4B\u5B4C\u5DD5\u5DD1\u5EF2\u6521\u6520\u6526\u6522\u6B0B\u6B08\u6B09\u6C0D\u7055\u7056\u7057\u7052\u721E\u721F\u72A9\u737F\u74D8\u74D5\u74D9\u74D7\u766D\u76AD\u7935\u79B4\u7A70\u7A71\u7C57\u7C5C\u7C59\u7C5B\u7C5A\u7CF4\u7CF1\u7E91\u7F4F\u7F87\u81DE\u826B\u8634\u8635\u8633\u862C\u8632\u8636\u882C\u8828\u8826\u882A\u8825\u8971\u89BF\u89BE\u89FB\u8B7E\u8B84\u8B82\u8B86\u8B85\u8B7F\u8D15\u8E95\u8E94\u8E9A\u8E92\u8E90\u8E96\u8E97\u8F60\u8F62\u9147\u944C\u9450\u944A\u944B\u944F\u9447\u9445\u9448\u9449\u9446\u973F\u97E3\u986A\u9869\u98CB\u9954\u995B\u9A4E\u9A53\u9A54\u9A4C\u9A4F\u9A48\u9A4A\u9A49\u9A52\u9A50\u9AD0\u9B19\u9B2B\u9B3B\u9B56\u9B55\u9C46\u9C48\u9C3F\u9C44\u9C39\u9C33\u9C41\u9C3C\u9C37\u9C34\u9C32\u9C3D\u9C36\u9DDB\u9DD2\u9DDE\u9DDA\u9DCB\u9DD0\u9DDC\u9DD1\u9DDF\u9DE9\u9DD9\u9DD8\u9DD6\u9DF5\u9DD5\u9DDD\u9EB6\u9EF0\u9F35\u9F33\u9F32\u9F42\u9F6B\u9F95\u9FA2\u513D\u5299\u58E8\u58E7\u5972\u5B4D\u5DD8\u882F\u5F4F\u6201\u6203\u6204\u6529\u6525\u6596\u66EB\u6B11\u6B12\u6B0F\u6BCA\u705B\u705A\u7222\u7382\u7381\u7383\u7670\u77D4\u7C67\u7C66\u7E95\u826C\u863A\u8640\u8639\u863C\u8631\u863B\u863E\u8830\u8832\u882E\u8833\u8976\u8974\u8973\u89FE\u8B8C\u8B8E\u8B8B\u8B88\u8C45\u8D19\u8E98\u8F64\u8F63\u91BC\u9462\u9455\u945D\u9457\u945E\u97C4\u97C5\u9800\u9A56\u9A59\u9B1E\u9B1F\u9B20\u9C52\u9C58\u9C50\u9C4A\u9C4D\u9C4B\u9C55\u9C59\u9C4C\u9C4E\u9DFB\u9DF7\u9DEF\u9DE3\u9DEB\u9DF8\u9DE4\u9DF6\u9DE1\u9DEE\u9DE6\u9DF2\u9DF0\u9DE2\u9DEC\u9DF4\u9DF3\u9DE8\u9DED\u9EC2\u9ED0\u9EF2\u9EF3\u9F06\u9F1C\u9F38\u9F37\u9F36\u9F43\u9F4F\u9F71\u9F70\u9F6E\u9F6F\u56D3\u56CD\u5B4E\u5C6D\u652D\u66ED\u66EE\u6B13\u705F\u7061\u705D\u7060\u7223\u74DB\u74E5\u77D5\u7938\u79B7\u79B6\u7C6A\u7E97\u7F89\u826D\u8643\u8838\u8837\u8835\u884B\u8B94\u8B95\u8E9E\u8E9F\u8EA0\u8E9D\u91BE\u91BD\u91C2\u946B\u9468\u9469\u96E5\u9746\u9743\u9747\u97C7\u97E5\u9A5E\u9AD5\u9B59\u9C63\u9C67\u9C66\u9C62\u9C5E\u9C60\u9E02\u9DFE\u9E07\u9E03\u9E06\u9E05\u9E00\u9E01\u9E09\u9DFF\u9DFD\u9E04\u9EA0\u9F1E\u9F46\u9F74\u9F75\u9F76\u56D4\u652E\u65B8\u6B18\u6B19\u6B17\u6B1A\u7062\u7226\u72AA\u77D8\u77D9\u7939\u7C69\u7C6B\u7CF6\u7E9A\u7E98\u7E9B\u7E99\u81E0\u81E1\u8646\u8647\u8648\u8979\u897A\u897C\u897B\u89FF\u8B98\u8B99\u8EA5\u8EA4\u8EA3\u946E\u946D\u946F\u9471\u9473\u9749\u9872\u995F\u9C68\u9C6E\u9C6D\u9E0B\u9E0D\u9E10\u9E0F\u9E12\u9E11\u9EA1\u9EF5\u9F09\u9F47\u9F78\u9F7B\u9F7A\u9F79\u571E\u7066\u7C6F\u883C\u8DB2\u8EA6\u91C3\u9474\u9478\u9476\u9475\u9A60\u9C74\u9C73\u9C71\u9C75\u9E14\u9E13\u9EF6\u9F0A\u9FA4\u7068\u7065\u7CF7\u866A\u883E\u883D\u883F\u8B9E\u8C9C\u8EA9\u8EC9\u974B\u9873\u9874\u98CC\u9961\u99AB\u9A64\u9A66\u9A67\u9B24\u9E15\u9E17\u9F48\u6207\u6B1E\u7227\u864C\u8EA8\u9482\u9480\u9481\u9A69\u9A68\u9B2E\u9E19\u7229\u864B\u8B9F\u9483\u9C79\u9EB7\u7675\u9A6B\u9C7A\u9E1D\u7069\u706A\u9EA4\u9F7E\u9F49\u9F98\u7881\u92B9\u88CF\u58BB\u6052\u7CA7\u5AFA\u2554\u2566\u2557\u2560\u256C\u2563\u255A\u2569\u255D\u2552\u2564\u2555\u255E\u256A\u2561\u2558\u2567\u255B\u2553\u2565\u2556\u255F\u256B\u2562\u2559\u2568\u255C\u2551\u2550\u256D\u256E\u2570\u256F\uFFED\u0547\u92DB\u05DF\u3FC5\u854C\u42B5\u73EF\u51B5\u3649\u4942\u89E4\u9344\u19DB\u82EE\u3CC8\u783C\u6744\u62DF\u4933\u89AA\u02A0\u6BB3\u1305\u4FAB\u24ED\u5008\u6D29\u7A84\u3600\u4AB1\u2513\u5029\u037E\u5FA4\u0380\u0347\u6EDB\u041F\u507D\u5101\u347A\u510E\u986C\u3743\u8416\u49A4\u0487\u5160\u33B4\u516A\u0BFF\u20FC\u02E5\u2530\u058E\u3233\u1983\u5B82\u877D\u05B3\u3C99\u51B2\u51B8\u9D34\u51C9\u51CF\u51D1\u3CDC\u51D3\u4AA6\u51B3\u51E2\u5342\u51ED\u83CD\u693E\u372D\u5F7B\u520B\u5226\u523C\u52B5\u5257\u5294\u52B9\u52C5\u7C15\u8542\u52E0\u860D\u6B13\u5305\u8ADE\u5549\u6ED9\u3F80\u0954\u3FEC\u5333\u5344\u0BE2\u6CCB\u1726\u681B\u73D5\u604A\u3EAA\u38CC\u16E8\u71DD\u44A2\u536D\u5374\u86AB\u537E\u537F\u1596\u1613\u77E6\u5393\u8A9B\u53A0\u53AB\u53AE\u73A7\u5772\u3F59\u739C\u53C1\u53C5\u6C49\u4E49\u57FE\u53D9\u3AAB\u0B8F\u53E0\u3FEB\u2DA3\u53F6\u0C77\u5413\u7079\u552B\u6657\u6D5B\u546D\u6B53\u0D74\u555D\u548F\u54A4\u47A6\u170D\u0EDD\u3DB4\u0D4D\u89BC\u2698\u5547\u4CED\u542F\u7417\u5586\u55A9\u5605\u18D7\u403A\u4552\u4435\u66B3\u10B4\u5637\u66CD\u328A\u66A4\u66AD\u564D\u564F\u78F1\u56F1\u9787\u53FE\u5700\u56EF\u56ED\u8B66\u3623\u124F\u5746\u41A5\u6C6E\u708B\u5742\u36B1\u6C7E\u57E6\u1416\u5803\u1454\u4363\u5826\u4BF5\u585C\u58AA\u3561\u58E0\u58DC\u123C\u58FB\u5BFF\u5743\uA150\u4278\u93D3\u35A1\u591F\u68A6\u36C3\u6E59\u163E\u5A24\u5553\u1692\u8505\u59C9\u0D4E\u6C81\u6D2A\u17DC\u59D9\u17FB\u17B2\u6DA6\u6D71\u1828\u16D5\u59F9\u6E45\u5AAB\u5A63\u36E6\u49A9\u5A77\u3708\u5A96\u7465\u5AD3\u6FA1\u2554\u3D85\u1911\u3732\u16B8\u5E83\u52D0\u5B76\u6588\u5B7C\u7A0E\u4004\u485D\u0204\u5BD5\u6160\u1A34\u59CC\u05A5\u5BF3\u5B9D\u4D10\u5C05\u1B44\u5C13\u73CE\u5C14\u1CA5\u6B28\u5C49\u48DD\u5C85\u5CE9\u5CEF\u5D8B\u1DF9\u1E37\u5D10\u5D18\u5D46\u1EA4\u5CBA\u5DD7\u82FC\u382D\u4901\u2049\u2173\u8287\u3836\u3BC2\u5E2E\u6A8A\u5E75\u5E7A\u44BC\u0CD3\u53A6\u4EB7\u5ED0\u53A8\u1771\u5E09\u5EF4\u8482\u5EF9\u5EFB\u38A0\u5EFC\u683E\u941B\u5F0D\u01C1\uF894\u3ADE\u48AE\u133A\u5F3A\u6888\u23D0\u5F58\u2471\u5F63\u97BD\u6E6E\u5F72\u9340\u8A36\u5FA7\u5DB6\u3D5F\u5250\u1F6A\u70F8\u2668\u91D6\u029E\u8A29\u6031\u6685\u1877\u3963\u3DC7\u3639\u5790\u27B4\u7971\u3E40\u609E\u60A4\u60B3\u4982\u498F\u7A53\u74A4\u50E1\u5AA0\u6164\u8424\u6142\uF8A6\u6ED2\u6181\u51F4\u0656\u6187\u5BAA\u3FB7\u285F\u61D3\u8B9D\u995D\u61D0\u3932\u2980\u28C1\u6023\u615C\u651E\u638B\u0118\u62C5\u1770\u62D5\u2E0D\u636C\u49DF\u3A17\u6438\u63F8\u138E\u17FC\u6490\u6F8A\u2E36\u9814\u408C\u571D\u64E1\u64E5\u947B\u3A66\u643A\u3A57\u654D\u6F16\u4A28\u4A23\u6585\u656D\u655F\u307E\u65B5\u4940\u4B37\u65D1\u40D8\u1829\u65E0\u65E3\u5FDF\u3400\u6618\u31F7\u31F8\u6644\u31A4\u31A5\u664B\u0E75\u6667\u51E6\u6673\u6674\u1E3D\u3231\u85F4\u31C8\u5313\u77C5\u28F7\u99A4\u6702\u439C\u4A21\u3B2B\u69FA\u37C2\u675E\u6767\u6762\u41CD\u90ED\u67D7\u44E9\u6822\u6E50\u923C\u6801\u33E6\u6DA0\u685D\u346F\u69E1\u6A0B\u8ADF\u6973\u68C3\u35CD\u6901\u6900\u3D32\u3A01\u363C\u3B80\u67AC\u6961\u8A4A\u42FC\u6936\u6998\u3BA1\u03C9\u8363\u5090\u69F9\u3659\u212A\u6A45\u3703\u6A9D\u3BF3\u67B1\u6AC8\u919C\u3C0D\u6B1D\u0923\u60DE\u6B35\u6B74\u27CD\u6EB5\u3ADB\u03B5\u1958\u3740\u5421\u3B5A\u6BE1\u3EFC\u6BDC\u6C37\u248B\u48F1\u6B51\u6C5A\u8226\u6C79\u3DBC\u44C5\u3DBD\u41A4\u490C\u4900\u3CC9\u36E5\u3CEB\u0D32\u9B83\u31F9\u2491\u7F8F\u6837\u6D25\u6DA1\u6DEB\u6D96\u6D5C\u6E7C\u6F04\u497F\u4085\u6E72\u8533\u6F74\u51C7\u6C9C\u6E1D\u842E\u8B21\u6E2F\u3E2F\u7453\u3F82\u79CC\u6E4F\u5A91\u304B\u6FF8\u370D\u6F9D\u3E30\u6EFA\u1497\u403D\u4555\u93F0\u6F44\u6F5C\u3D4E\u6F74\u9170\u3D3B\u6F9F\u4144\u6FD3\u4091\u4155\u4039\u3FF0\u3FB4\u413F\u51DF\u4156\u4157\u4140\u61DD\u704B\u707E\u70A7\u7081\u70CC\u70D5\u70D6\u70DF\u4104\u3DE8\u71B4\u7196\u4277\u712B\u7145\u5A88\u714A\u716E\u5C9C\u4365\u714F\u9362\u42C1\u712C\u445A\u4A27\u4A22\u71BA\u8BE8\u70BD\u720E\u9442\u7215\u5911\u9443\u7224\u9341\u5605\u722E\u7240\u4974\u68BD\u7255\u7257\u3E55\u3044\u680D\u6F3D\u7282\u732A\u732B\u4823\u882B\u48ED\u8804\u7328\u732E\u73CF\u73AA\u0C3A\u6A2E\u73C9\u7449\u41E2\u16E7\u4A24\u6623\u36C5\u49B7\u498D\u49FB\u73F7\u7415\u6903\u4A26\u7439\u05C3\u3ED7\u745C\u28AD\u7460\u8EB2\u7447\u73E4\u7476\u83B9\u746C\u3730\u7474\u93F1\u6A2C\u7482\u4953\u4A8C\u415F\u4A79\u8B8F\u5B46\u8C03\u189E\u74C8\u1988\u750E\u74E9\u751E\u8ED9\u1A4B\u5BD7\u8EAC\u9385\u754D\u754A\u7567\u756E\u4F82\u3F04\u4D13\u758E\u745D\u759E\u75B4\u7602\u762C\u7651\u764F\u766F\u7676\u63F5\u7690\u81EF\u37F8\u6911\u690E\u76A1\u76A5\u76B7\u76CC\u6F9F\u8462\u509D\u517D\u1E1C\u771E\u7726\u7740\u64AF\u5220\u7758\u32AC\u77AF\u8964\u8968\u16C1\u77F4\u7809\u1376\u4A12\u68CA\u78AF\u78C7\u78D3\u96A5\u792E\u55E0\u78D7\u7934\u78B1\u760C\u8FB8\u8884\u8B2B\u6083\u261C\u7986\u8900\u6902\u7980\u5857\u799D\u7B39\u793C\u79A9\u6E2A\u7126\u3EA8\u79C6\u910D\u79D4"; + + private static boolean readBit(int i) { + return (ASTRALNESS.charAt(i >> 4) & (1 << (i & 0xF))) != 0; + } + + static char lowBits(int pointer) { + if (pointer < 942) { + return '\u0000'; + } + if (pointer < 1068) { + return TABLE0.charAt(pointer - 942); + } + if (pointer < 1099) { + return '\u0000'; + } + if (pointer < 1172) { + return TABLE1.charAt(pointer - 1099); + } + if (pointer < 1256) { + return '\u0000'; + } + if (pointer < 5466) { + return TABLE2.charAt(pointer - 1256); + } + if (pointer < 5495) { + return '\u0000'; + } + if (pointer < 11214) { + return TABLE3.charAt(pointer - 5495); + } + if (pointer < 11254) { + return '\u0000'; + } + if (pointer < 19782) { + return TABLE4.charAt(pointer - 11254); + } + return '\u0000'; + } + + static boolean isAstral(int pointer) { + if (pointer < 947) { + return false; + } + if (pointer < 1119) { + return readBit(0 + (pointer - 947)); + } + if (pointer < 1256) { + return false; + } + if (pointer < 1269) { + return readBit(172 + (pointer - 1256)); + } + if (pointer < 1336) { + return false; + } + if (pointer < 1364) { + return readBit(185 + (pointer - 1336)); + } + if (pointer < 1413) { + return false; + } + if (pointer < 1912) { + return readBit(213 + (pointer - 1413)); + } + if (pointer < 2012) { + return false; + } + if (pointer < 3800) { + return readBit(712 + (pointer - 2012)); + } + if (pointer < 3883) { + return false; + } + if (pointer == 3883) { + return true; + } + if (pointer < 3985) { + return false; + } + if (pointer < 5024) { + return readBit(2501 + (pointer - 3985)); + } + if (pointer < 11205) { + return false; + } + if (pointer < 11214) { + return readBit(3540 + (pointer - 11205)); + } + if (pointer < 18997) { + return false; + } + if (pointer < 19782) { + return readBit(3549 + (pointer - 18997)); + } + return false; + } + + public static int findPointer(char lowBits, boolean isAstral) { + if (!isAstral) { + switch (lowBits) { + case 0x2550: + return 18991; + case 0x255E: + return 18975; + case 0x2561: + return 18977; + case 0x256A: + return 18976; + case 0x5341: + return 5512; + case 0x5345: + return 5599; + default: + break; + } + } + for (int i = 3768; i < TABLE2.length(); i++) { + if (TABLE2.charAt(i) == lowBits) { + int pointer = i + 1256; + if (isAstral == isAstral(pointer)) { + return pointer; + } + } + } + for (int i = 0; i < TABLE3.length(); i++) { + if (TABLE3.charAt(i) == lowBits) { + int pointer = i + 5495; + if (isAstral == isAstral(pointer)) { + return pointer; + } + } + } + for (int i = 0; i < TABLE4.length(); i++) { + if (TABLE4.charAt(i) == lowBits) { + int pointer = i + 11254; + if (isAstral == isAstral(pointer)) { + return pointer; + } + } + } + return 0; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java new file mode 100644 index 0000000000..cc56b892f3 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CoderResult; + +public class Big5Decoder extends Decoder { + + private int big5Lead = 0; + + private char pendingTrail = '\u0000'; + + protected Big5Decoder(Charset cs) { + super(cs, 0.5f, 1.0f); + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + assert !(this.report && (big5Lead != 0)): + "When reporting, this method should never return with big5Lead set."; + if (pendingTrail != '\u0000') { + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put(pendingTrail); + pendingTrail = '\u0000'; + } + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + int b = ((int) in.get() & 0xFF); + if (big5Lead == 0) { + if (b <= 0x7F) { + out.put((char) b); + continue; + } + if (b >= 0x81 && b <= 0xFE) { + if (this.report && !in.hasRemaining()) { + // The Java API is badly documented. Need to do this + // crazy thing and hope the caller knows about the + // undocumented aspects of the API! + in.position(in.position() - 1); + return CoderResult.UNDERFLOW; + } + big5Lead = b; + continue; + } + if (this.report) { + in.position(in.position() - 1); + return CoderResult.malformedForLength(1); + } + out.put('\uFFFD'); + continue; + } + int lead = big5Lead; + big5Lead = 0; + int offset = (b < 0x7F) ? 0x40 : 0x62; + if ((b >= 0x40 && b <= 0x7E) || (b >= 0xA1 && b <= 0xFE)) { + int pointer = (lead - 0x81) * 157 + (b - offset); + char outTrail; + switch (pointer) { + case 1133: + out.put('\u00CA'); + outTrail = '\u0304'; + break; + case 1135: + out.put('\u00CA'); + outTrail = '\u030C'; + break; + case 1164: + out.put('\u00EA'); + outTrail = '\u0304'; + break; + case 1166: + out.put('\u00EA'); + outTrail = '\u030C'; + break; + default: + char lowBits = Big5Data.lowBits(pointer); + if (lowBits == '\u0000') { + // The following |if| block fixes + // https://github.com/whatwg/encoding/issues/5 + if (b <= 0x7F) { + // prepend byte to stream + // Always legal, since we've always just read a byte + // if we come here. + in.position(in.position() - 1); + } + if (this.report) { + // This can go past the start of the buffer + // if the caller does not conform to the + // undocumented aspects of the API. + in.position(in.position() - 1); + return CoderResult.malformedForLength(b <= 0x7F ? 1 : 2); + } + out.put('\uFFFD'); + continue; + } + if (Big5Data.isAstral(pointer)) { + int codePoint = lowBits | 0x20000; + out.put((char) (0xD7C0 + (codePoint >> 10))); + outTrail = (char) (0xDC00 + (codePoint & 0x3FF)); + break; + } + out.put(lowBits); + continue; + } + if (!out.hasRemaining()) { + pendingTrail = outTrail; + return CoderResult.OVERFLOW; + } + out.put(outTrail); + continue; + } + // pointer is null + if (b <= 0x7F) { + // prepend byte to stream + // Always legal, since we've always just read a byte + // if we come here. + in.position(in.position() - 1); + } + if (this.report) { + // if position() == 0, the caller is not using the + // undocumented part of the API right and the line + // below will throw! + in.position(in.position() - 1); + return CoderResult.malformedForLength(b <= 0x7F ? 1 : 2); + } + out.put('\uFFFD'); + continue; + } + } + + @Override protected CoderResult implFlush(CharBuffer out) { + if (pendingTrail != '\u0000') { + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put(pendingTrail); + pendingTrail = '\u0000'; + } + if (big5Lead != 0) { + assert !this.report: "How come big5Lead got to be non-zero when decodeLoop() returned in the reporting mode?"; + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put('\uFFFD'); + big5Lead = 0; + } + return CoderResult.UNDERFLOW; + } + + @Override protected void implReset() { + big5Lead = 0; + pendingTrail = '\u0000'; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java new file mode 100644 index 0000000000..de51321514 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CoderResult; + +public class Big5Encoder extends Encoder { + + private char utf16Lead = '\u0000'; + + private byte pendingTrail = 0; + + protected Big5Encoder(Charset cs) { + super(cs, 1.5f, 2.0f); + } + + @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { + assert !((this.reportMalformed || this.reportUnmappable) && (utf16Lead != '\u0000')): + "When reporting, this method should never return with utf16Lead set."; + if (pendingTrail != 0) { + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put(pendingTrail); + pendingTrail = 0; + } + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + boolean isAstral; // true means Plane 2, false means BMP + char lowBits; // The low 16 bits of the code point + char codeUnit = in.get(); + int highBits = (codeUnit & 0xFC00); + if (highBits == 0xD800) { + // high surrogate + if (utf16Lead != '\u0000') { + // High surrogate follows another high surrogate. The + // *previous* code unit is in error. + if (this.reportMalformed) { + // The caller had better adhere to the API contract. + // Otherwise, this may throw. + in.position(in.position() - 2); + utf16Lead = '\u0000'; + return CoderResult.malformedForLength(1); + } + out.put((byte) '?'); + } + utf16Lead = codeUnit; + continue; + } + if (highBits == 0xDC00) { + // low surrogate + if (utf16Lead == '\u0000') { + // Got low surrogate without a previous high surrogate + if (this.reportMalformed) { + in.position(in.position() - 1); + return CoderResult.malformedForLength(1); + } + out.put((byte) '?'); + continue; + } + int codePoint = (utf16Lead << 10) + codeUnit - 56613888; + utf16Lead = '\u0000'; + // Plane 2 is the only astral plane that has potentially + // Big5-encodable characters. + if ((0xFF0000 & codePoint) != 0x20000) { + if (this.reportUnmappable) { + in.position(in.position() - 2); + return CoderResult.unmappableForLength(2); + } + out.put((byte) '?'); + continue; + } + isAstral = true; + lowBits = (char)(codePoint & 0xFFFF); + } else { + // not a surrogate + if (utf16Lead != '\u0000') { + // Non-surrogate follows a high surrogate. The *previous* + // code unit is in error. + utf16Lead = '\u0000'; + if (this.reportMalformed) { + // The caller had better adhere to the API contract. + // Otherwise, this may throw. + in.position(in.position() - 2); + return CoderResult.malformedForLength(1); + } + out.put((byte) '?'); + // Let's unconsume this code unit and reloop in order to + // re-check if the output buffer still has space. + in.position(in.position() - 1); + continue; + } + isAstral = false; + lowBits = codeUnit; + } + // isAstral now tells us if we have a Plane 2 or a BMP character. + // lowBits tells us the low 16 bits. + // After all the above setup to deal with UTF-16, we are now + // finally ready to follow the spec. + if (!isAstral && lowBits <= 0x7F) { + out.put((byte)lowBits); + continue; + } + int pointer = Big5Data.findPointer(lowBits, isAstral); + if (pointer == 0) { + if (this.reportUnmappable) { + if (isAstral) { + in.position(in.position() - 2); + return CoderResult.unmappableForLength(2); + } + in.position(in.position() - 1); + return CoderResult.unmappableForLength(1); + } + out.put((byte)'?'); + continue; + } + int lead = pointer / 157 + 0x81; + int trail = pointer % 157; + if (trail < 0x3F) { + trail += 0x40; + } else { + trail += 0x62; + } + out.put((byte)lead); + if (!out.hasRemaining()) { + pendingTrail = (byte)trail; + return CoderResult.OVERFLOW; + } + out.put((byte)trail); + continue; + } + } + + @Override protected CoderResult implFlush(ByteBuffer out) { + if (pendingTrail != 0) { + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put(pendingTrail); + pendingTrail = 0; + } + if (utf16Lead != '\u0000') { + assert !this.reportMalformed: "How come utf16Lead got to be non-zero when decodeLoop() returned in the reporting mode?"; + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put((byte)'?'); + utf16Lead = '\u0000'; + } + return CoderResult.UNDERFLOW; + } + + @Override protected void implReset() { + utf16Lead = '\u0000'; + pendingTrail = 0; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java new file mode 100644 index 0000000000..41e06c63a8 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; + +public abstract class Decoder extends CharsetDecoder { + + protected boolean report = true; + + protected Decoder(Charset cs, float averageCharsPerByte, float maxCharsPerByte) { + super(cs, averageCharsPerByte, maxCharsPerByte); + } + + @Override protected final void implOnMalformedInput(CodingErrorAction newAction) { + if (newAction == null) { + throw new IllegalArgumentException("The argument must not be null."); + } + if (newAction == CodingErrorAction.IGNORE) { + throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored."); + } + if (newAction == CodingErrorAction.REPLACE) { + this.report = false; + return; + } + if (newAction == CodingErrorAction.REPORT) { + this.report = true; + return; + } + assert false: "Unreachable."; + throw new IllegalArgumentException("Unknown CodingErrorAction."); + } + + @Override protected final void implOnUnmappableCharacter( + CodingErrorAction newAction) { + if (newAction == null) { + throw new IllegalArgumentException("The argument must not be null."); + } + if (newAction == CodingErrorAction.IGNORE) { + throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored."); + } + if (newAction == CodingErrorAction.REPLACE) { + return; // We don't actually care, since there are no unmappables. + } + if (newAction == CodingErrorAction.REPORT) { + return; // We don't actually care, since there are no unmappables. + } + assert false: "Unreachable."; + throw new IllegalArgumentException("Unknown CodingErrorAction."); + } + + @Override protected final void implReplaceWith(String newReplacement) { + if (!"\uFFFD".equals(newReplacement)) { + throw new IllegalArgumentException("Only U+FFFD is allowed as the replacement."); + } + } + + // TODO: Check if the JDK decoders reset the reporting state on reset() +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java new file mode 100644 index 0000000000..6fc011ed23 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; + +public abstract class Encoder extends CharsetEncoder { + + boolean reportMalformed = true; + + boolean reportUnmappable = true; + + protected Encoder(Charset cs, float averageBytesPerChar, + float maxBytesPerChar) { + super(cs, averageBytesPerChar, maxBytesPerChar); + } + + @Override protected final void implOnMalformedInput(CodingErrorAction newAction) { + if (newAction == null) { + throw new IllegalArgumentException("The argument must not be null."); + } + if (newAction == CodingErrorAction.IGNORE) { + throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored."); + } + if (newAction == CodingErrorAction.REPLACE) { + this.reportMalformed = false; + return; + } + if (newAction == CodingErrorAction.REPORT) { + this.reportUnmappable = true; + return; + } + assert false: "Unreachable."; + throw new IllegalArgumentException("Unknown CodingErrorAction."); + } + + @Override protected final void implOnUnmappableCharacter( + CodingErrorAction newAction) { + if (newAction == null) { + throw new IllegalArgumentException("The argument must not be null."); + } + if (newAction == CodingErrorAction.IGNORE) { + throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored."); + } + if (newAction == CodingErrorAction.REPLACE) { + this.reportUnmappable = false; + return; + } + if (newAction == CodingErrorAction.REPORT) { + this.reportMalformed = true; + return; + } + assert false: "Unreachable."; + throw new IllegalArgumentException("Unknown CodingErrorAction."); + } + + @Override public boolean isLegalReplacement(byte[] repl) { + if (repl == null) { + return false; + } + if (repl.length != 1) { + return false; + } + if (repl[0] != '?') { + return false; + } + return true; + } + + @Override protected final void implReplaceWith(byte[] newReplacement) { + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java new file mode 100644 index 0000000000..6e59ef7c7b --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java @@ -0,0 +1,886 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; +import java.nio.charset.spi.CharsetProvider; +import java.util.Arrays; +import java.util.Collections; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * Represents an encoding + * as defined in the Encoding + * Standard, provides access to each encoding defined in the Encoding + * Standard via a static constant and provides the + * "get an + * encoding" algorithm defined in the Encoding Standard. + * + *

This class inherits from {@link Charset} to allow the Encoding + * Standard-compliant encodings to be used in contexts that support + * Charset instances. However, by design, the Encoding + * Standard-compliant encodings are not supplied via a {@link CharsetProvider} + * and, therefore, are not available via and do not interfere with the static + * methods provided by Charset. (This class provides methods of + * the same name to hide each static method of Charset to help + * avoid accidental calls to the static methods of the superclass when working + * with Encoding Standard-compliant encodings.) + * + *

When an application needs to use a particular encoding, such as utf-8 + * or windows-1252, the corresponding constant, i.e. + * {@link #UTF_8 Encoding.UTF_8} and {@link #WINDOWS_1252 Encoding.WINDOWS_1252} + * respectively, should be used. However, when the application receives an + * encoding label from external input, the method {@link #forName(String) + * forName()} should be used to obtain the object representing the encoding + * identified by the label. In contexts where labels that map to the + * replacement + * encoding should be treated as unknown, the method {@link + * #forNameNoReplacement(String) forNameNoReplacement()} should be used instead. + * + * + * @author hsivonen + */ +public abstract class Encoding extends Charset { + + private static final String[] LABELS = { + "866", + "ansi_x3.4-1968", + "arabic", + "ascii", + "asmo-708", + "big5", + "big5-hkscs", + "chinese", + "cn-big5", + "cp1250", + "cp1251", + "cp1252", + "cp1253", + "cp1254", + "cp1255", + "cp1256", + "cp1257", + "cp1258", + "cp819", + "cp866", + "csbig5", + "cseuckr", + "cseucpkdfmtjapanese", + "csgb2312", + "csibm866", + "csiso2022jp", + "csiso2022kr", + "csiso58gb231280", + "csiso88596e", + "csiso88596i", + "csiso88598e", + "csiso88598i", + "csisolatin1", + "csisolatin2", + "csisolatin3", + "csisolatin4", + "csisolatin5", + "csisolatin6", + "csisolatin9", + "csisolatinarabic", + "csisolatincyrillic", + "csisolatingreek", + "csisolatinhebrew", + "cskoi8r", + "csksc56011987", + "csmacintosh", + "csshiftjis", + "cyrillic", + "dos-874", + "ecma-114", + "ecma-118", + "elot_928", + "euc-jp", + "euc-kr", + "gb18030", + "gb2312", + "gb_2312", + "gb_2312-80", + "gbk", + "greek", + "greek8", + "hebrew", + "hz-gb-2312", + "ibm819", + "ibm866", + "iso-2022-cn", + "iso-2022-cn-ext", + "iso-2022-jp", + "iso-2022-kr", + "iso-8859-1", + "iso-8859-10", + "iso-8859-11", + "iso-8859-13", + "iso-8859-14", + "iso-8859-15", + "iso-8859-16", + "iso-8859-2", + "iso-8859-3", + "iso-8859-4", + "iso-8859-5", + "iso-8859-6", + "iso-8859-6-e", + "iso-8859-6-i", + "iso-8859-7", + "iso-8859-8", + "iso-8859-8-e", + "iso-8859-8-i", + "iso-8859-9", + "iso-ir-100", + "iso-ir-101", + "iso-ir-109", + "iso-ir-110", + "iso-ir-126", + "iso-ir-127", + "iso-ir-138", + "iso-ir-144", + "iso-ir-148", + "iso-ir-149", + "iso-ir-157", + "iso-ir-58", + "iso8859-1", + "iso8859-10", + "iso8859-11", + "iso8859-13", + "iso8859-14", + "iso8859-15", + "iso8859-2", + "iso8859-3", + "iso8859-4", + "iso8859-5", + "iso8859-6", + "iso8859-7", + "iso8859-8", + "iso8859-9", + "iso88591", + "iso885910", + "iso885911", + "iso885913", + "iso885914", + "iso885915", + "iso88592", + "iso88593", + "iso88594", + "iso88595", + "iso88596", + "iso88597", + "iso88598", + "iso88599", + "iso_8859-1", + "iso_8859-15", + "iso_8859-1:1987", + "iso_8859-2", + "iso_8859-2:1987", + "iso_8859-3", + "iso_8859-3:1988", + "iso_8859-4", + "iso_8859-4:1988", + "iso_8859-5", + "iso_8859-5:1988", + "iso_8859-6", + "iso_8859-6:1987", + "iso_8859-7", + "iso_8859-7:1987", + "iso_8859-8", + "iso_8859-8:1988", + "iso_8859-9", + "iso_8859-9:1989", + "koi", + "koi8", + "koi8-r", + "koi8-ru", + "koi8-u", + "koi8_r", + "korean", + "ks_c_5601-1987", + "ks_c_5601-1989", + "ksc5601", + "ksc_5601", + "l1", + "l2", + "l3", + "l4", + "l5", + "l6", + "l9", + "latin1", + "latin2", + "latin3", + "latin4", + "latin5", + "latin6", + "logical", + "mac", + "macintosh", + "ms932", + "ms_kanji", + "shift-jis", + "shift_jis", + "sjis", + "sun_eu_greek", + "tis-620", + "unicode-1-1-utf-8", + "us-ascii", + "utf-16", + "utf-16be", + "utf-16le", + "utf-8", + "utf8", + "visual", + "windows-1250", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "windows-31j", + "windows-874", + "windows-949", + "x-cp1250", + "x-cp1251", + "x-cp1252", + "x-cp1253", + "x-cp1254", + "x-cp1255", + "x-cp1256", + "x-cp1257", + "x-cp1258", + "x-euc-jp", + "x-gbk", + "x-mac-cyrillic", + "x-mac-roman", + "x-mac-ukrainian", + "x-sjis", + "x-user-defined", + "x-x-big5", + }; + + private static final Encoding[] ENCODINGS_FOR_LABELS = { + Ibm866.INSTANCE, + Windows1252.INSTANCE, + Iso6.INSTANCE, + Windows1252.INSTANCE, + Iso6.INSTANCE, + Big5.INSTANCE, + Big5.INSTANCE, + Gbk.INSTANCE, + Big5.INSTANCE, + Windows1250.INSTANCE, + Windows1251.INSTANCE, + Windows1252.INSTANCE, + Windows1253.INSTANCE, + Windows1254.INSTANCE, + Windows1255.INSTANCE, + Windows1256.INSTANCE, + Windows1257.INSTANCE, + Windows1258.INSTANCE, + Windows1252.INSTANCE, + Ibm866.INSTANCE, + Big5.INSTANCE, + EucKr.INSTANCE, + EucJp.INSTANCE, + Gbk.INSTANCE, + Ibm866.INSTANCE, + Iso2022Jp.INSTANCE, + Replacement.INSTANCE, + Gbk.INSTANCE, + Iso6.INSTANCE, + Iso6.INSTANCE, + Iso8.INSTANCE, + Iso8I.INSTANCE, + Windows1252.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Windows1254.INSTANCE, + Iso10.INSTANCE, + Iso15.INSTANCE, + Iso6.INSTANCE, + Iso5.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Koi8R.INSTANCE, + EucKr.INSTANCE, + Macintosh.INSTANCE, + ShiftJis.INSTANCE, + Iso5.INSTANCE, + Windows874.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso7.INSTANCE, + EucJp.INSTANCE, + EucKr.INSTANCE, + Gb18030.INSTANCE, + Gbk.INSTANCE, + Gbk.INSTANCE, + Gbk.INSTANCE, + Gbk.INSTANCE, + Iso7.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Replacement.INSTANCE, + Windows1252.INSTANCE, + Ibm866.INSTANCE, + Replacement.INSTANCE, + Replacement.INSTANCE, + Iso2022Jp.INSTANCE, + Replacement.INSTANCE, + Windows1252.INSTANCE, + Iso10.INSTANCE, + Windows874.INSTANCE, + Iso13.INSTANCE, + Iso14.INSTANCE, + Iso15.INSTANCE, + Iso16.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso5.INSTANCE, + Iso6.INSTANCE, + Iso6.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Iso8.INSTANCE, + Iso8I.INSTANCE, + Windows1254.INSTANCE, + Windows1252.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso7.INSTANCE, + Iso6.INSTANCE, + Iso8.INSTANCE, + Iso5.INSTANCE, + Windows1254.INSTANCE, + EucKr.INSTANCE, + Iso10.INSTANCE, + Gbk.INSTANCE, + Windows1252.INSTANCE, + Iso10.INSTANCE, + Windows874.INSTANCE, + Iso13.INSTANCE, + Iso14.INSTANCE, + Iso15.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso5.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Windows1254.INSTANCE, + Windows1252.INSTANCE, + Iso10.INSTANCE, + Windows874.INSTANCE, + Iso13.INSTANCE, + Iso14.INSTANCE, + Iso15.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso5.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Windows1254.INSTANCE, + Windows1252.INSTANCE, + Iso15.INSTANCE, + Windows1252.INSTANCE, + Iso2.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso4.INSTANCE, + Iso5.INSTANCE, + Iso5.INSTANCE, + Iso6.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Iso8.INSTANCE, + Windows1254.INSTANCE, + Windows1254.INSTANCE, + Koi8R.INSTANCE, + Koi8R.INSTANCE, + Koi8R.INSTANCE, + Koi8U.INSTANCE, + Koi8U.INSTANCE, + Koi8R.INSTANCE, + EucKr.INSTANCE, + EucKr.INSTANCE, + EucKr.INSTANCE, + EucKr.INSTANCE, + EucKr.INSTANCE, + Windows1252.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Windows1254.INSTANCE, + Iso10.INSTANCE, + Iso15.INSTANCE, + Windows1252.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Windows1254.INSTANCE, + Iso10.INSTANCE, + Iso8I.INSTANCE, + Macintosh.INSTANCE, + Macintosh.INSTANCE, + ShiftJis.INSTANCE, + ShiftJis.INSTANCE, + ShiftJis.INSTANCE, + ShiftJis.INSTANCE, + ShiftJis.INSTANCE, + Iso7.INSTANCE, + Windows874.INSTANCE, + Utf8.INSTANCE, + Windows1252.INSTANCE, + Utf16Le.INSTANCE, + Utf16Be.INSTANCE, + Utf16Le.INSTANCE, + Utf8.INSTANCE, + Utf8.INSTANCE, + Iso8.INSTANCE, + Windows1250.INSTANCE, + Windows1251.INSTANCE, + Windows1252.INSTANCE, + Windows1253.INSTANCE, + Windows1254.INSTANCE, + Windows1255.INSTANCE, + Windows1256.INSTANCE, + Windows1257.INSTANCE, + Windows1258.INSTANCE, + ShiftJis.INSTANCE, + Windows874.INSTANCE, + EucKr.INSTANCE, + Windows1250.INSTANCE, + Windows1251.INSTANCE, + Windows1252.INSTANCE, + Windows1253.INSTANCE, + Windows1254.INSTANCE, + Windows1255.INSTANCE, + Windows1256.INSTANCE, + Windows1257.INSTANCE, + Windows1258.INSTANCE, + EucJp.INSTANCE, + Gbk.INSTANCE, + MacCyrillic.INSTANCE, + Macintosh.INSTANCE, + MacCyrillic.INSTANCE, + ShiftJis.INSTANCE, + UserDefined.INSTANCE, + Big5.INSTANCE, + }; + + private static final Encoding[] ENCODINGS = { + Big5.INSTANCE, + EucJp.INSTANCE, + EucKr.INSTANCE, + Gb18030.INSTANCE, + Gbk.INSTANCE, + Ibm866.INSTANCE, + Iso2022Jp.INSTANCE, + Iso10.INSTANCE, + Iso13.INSTANCE, + Iso14.INSTANCE, + Iso15.INSTANCE, + Iso16.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso5.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Iso8I.INSTANCE, + Koi8R.INSTANCE, + Koi8U.INSTANCE, + Macintosh.INSTANCE, + Replacement.INSTANCE, + ShiftJis.INSTANCE, + Utf16Be.INSTANCE, + Utf16Le.INSTANCE, + Utf8.INSTANCE, + Windows1250.INSTANCE, + Windows1251.INSTANCE, + Windows1252.INSTANCE, + Windows1253.INSTANCE, + Windows1254.INSTANCE, + Windows1255.INSTANCE, + Windows1256.INSTANCE, + Windows1257.INSTANCE, + Windows1258.INSTANCE, + Windows874.INSTANCE, + MacCyrillic.INSTANCE, + UserDefined.INSTANCE, + }; + + /** + * The big5 encoding. + */ + public static final Encoding BIG5 = Big5.INSTANCE; + + /** + * The euc-jp encoding. + */ + public static final Encoding EUC_JP = EucJp.INSTANCE; + + /** + * The euc-kr encoding. + */ + public static final Encoding EUC_KR = EucKr.INSTANCE; + + /** + * The gb18030 encoding. + */ + public static final Encoding GB18030 = Gb18030.INSTANCE; + + /** + * The gbk encoding. + */ + public static final Encoding GBK = Gbk.INSTANCE; + + /** + * The ibm866 encoding. + */ + public static final Encoding IBM866 = Ibm866.INSTANCE; + + /** + * The iso-2022-jp encoding. + */ + public static final Encoding ISO_2022_JP = Iso2022Jp.INSTANCE; + + /** + * The iso-8859-10 encoding. + */ + public static final Encoding ISO_8859_10 = Iso10.INSTANCE; + + /** + * The iso-8859-13 encoding. + */ + public static final Encoding ISO_8859_13 = Iso13.INSTANCE; + + /** + * The iso-8859-14 encoding. + */ + public static final Encoding ISO_8859_14 = Iso14.INSTANCE; + + /** + * The iso-8859-15 encoding. + */ + public static final Encoding ISO_8859_15 = Iso15.INSTANCE; + + /** + * The iso-8859-16 encoding. + */ + public static final Encoding ISO_8859_16 = Iso16.INSTANCE; + + /** + * The iso-8859-2 encoding. + */ + public static final Encoding ISO_8859_2 = Iso2.INSTANCE; + + /** + * The iso-8859-3 encoding. + */ + public static final Encoding ISO_8859_3 = Iso3.INSTANCE; + + /** + * The iso-8859-4 encoding. + */ + public static final Encoding ISO_8859_4 = Iso4.INSTANCE; + + /** + * The iso-8859-5 encoding. + */ + public static final Encoding ISO_8859_5 = Iso5.INSTANCE; + + /** + * The iso-8859-6 encoding. + */ + public static final Encoding ISO_8859_6 = Iso6.INSTANCE; + + /** + * The iso-8859-7 encoding. + */ + public static final Encoding ISO_8859_7 = Iso7.INSTANCE; + + /** + * The iso-8859-8 encoding. + */ + public static final Encoding ISO_8859_8 = Iso8.INSTANCE; + + /** + * The iso-8859-8-i encoding. + */ + public static final Encoding ISO_8859_8_I = Iso8I.INSTANCE; + + /** + * The koi8-r encoding. + */ + public static final Encoding KOI8_R = Koi8R.INSTANCE; + + /** + * The koi8-u encoding. + */ + public static final Encoding KOI8_U = Koi8U.INSTANCE; + + /** + * The macintosh encoding. + */ + public static final Encoding MACINTOSH = Macintosh.INSTANCE; + + /** + * The replacement encoding. + */ + public static final Encoding REPLACEMENT = Replacement.INSTANCE; + + /** + * The shift_jis encoding. + */ + public static final Encoding SHIFT_JIS = ShiftJis.INSTANCE; + + /** + * The utf-16be encoding. + */ + public static final Encoding UTF_16BE = Utf16Be.INSTANCE; + + /** + * The utf-16le encoding. + */ + public static final Encoding UTF_16LE = Utf16Le.INSTANCE; + + /** + * The utf-8 encoding. + */ + public static final Encoding UTF_8 = Utf8.INSTANCE; + + /** + * The windows-1250 encoding. + */ + public static final Encoding WINDOWS_1250 = Windows1250.INSTANCE; + + /** + * The windows-1251 encoding. + */ + public static final Encoding WINDOWS_1251 = Windows1251.INSTANCE; + + /** + * The windows-1252 encoding. + */ + public static final Encoding WINDOWS_1252 = Windows1252.INSTANCE; + + /** + * The windows-1253 encoding. + */ + public static final Encoding WINDOWS_1253 = Windows1253.INSTANCE; + + /** + * The windows-1254 encoding. + */ + public static final Encoding WINDOWS_1254 = Windows1254.INSTANCE; + + /** + * The windows-1255 encoding. + */ + public static final Encoding WINDOWS_1255 = Windows1255.INSTANCE; + + /** + * The windows-1256 encoding. + */ + public static final Encoding WINDOWS_1256 = Windows1256.INSTANCE; + + /** + * The windows-1257 encoding. + */ + public static final Encoding WINDOWS_1257 = Windows1257.INSTANCE; + + /** + * The windows-1258 encoding. + */ + public static final Encoding WINDOWS_1258 = Windows1258.INSTANCE; + + /** + * The windows-874 encoding. + */ + public static final Encoding WINDOWS_874 = Windows874.INSTANCE; + + /** + * The x-mac-cyrillic encoding. + */ + public static final Encoding X_MAC_CYRILLIC = MacCyrillic.INSTANCE; + + /** + * The x-user-defined encoding. + */ + public static final Encoding X_USER_DEFINED = UserDefined.INSTANCE; + + +private static SortedMap encodings = null; + + protected Encoding(String canonicalName, String[] aliases) { + super(canonicalName, aliases); + } + + private enum State { + HEAD, LABEL, TAIL + }; + + public static Encoding forName(String label) { + if (label == null) { + throw new IllegalArgumentException("Label must not be null."); + } + if (label.length() == 0) { + throw new IllegalCharsetNameException(label); + } + // First try the fast path + int index = Arrays.binarySearch(LABELS, label); + if (index >= 0) { + return ENCODINGS_FOR_LABELS[index]; + } + // Else, slow path + StringBuilder sb = new StringBuilder(); + State state = State.HEAD; + for (int i = 0; i < label.length(); i++) { + char c = label.charAt(i); + if ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t') + || (c == '\u000C')) { + if (state == State.LABEL) { + state = State.TAIL; + } + continue; + } + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { + switch (state) { + case HEAD: + state = State.LABEL; + // Fall through + case LABEL: + sb.append(c); + continue; + case TAIL: + throw new IllegalCharsetNameException(label); + } + } + if (c >= 'A' && c <= 'Z') { + c += 0x20; + switch (state) { + case HEAD: + state = State.LABEL; + // Fall through + case LABEL: + sb.append(c); + continue; + case TAIL: + throw new IllegalCharsetNameException(label); + } + } + if ((c == '-') || (c == '+') || (c == '.') || (c == ':') + || (c == '_')) { + switch (state) { + case LABEL: + sb.append(c); + continue; + case HEAD: + case TAIL: + throw new IllegalCharsetNameException(label); + } + } + throw new IllegalCharsetNameException(label); + } + index = Arrays.binarySearch(LABELS, sb.toString()); + if (index >= 0) { + return ENCODINGS_FOR_LABELS[index]; + } + throw new UnsupportedCharsetException(label); + } + + public static Encoding forNameNoReplacement(String label) { + Encoding encoding = Encoding.forName(label); + if (encoding == Encoding.REPLACEMENT) { + throw new UnsupportedCharsetException(label); + } + return encoding; + } + + public static boolean isSupported(String label) { + try { + Encoding.forName(label); + } catch (UnsupportedCharsetException e) { + return false; + } + return true; + } + + public static boolean isSupportedNoReplacement(String label) { + try { + Encoding.forNameNoReplacement(label); + } catch (UnsupportedCharsetException e) { + return false; + } + return true; + } + + public static SortedMap availableCharsets() { + if (encodings == null) { + TreeMap map = new TreeMap(); + for (Encoding encoding : ENCODINGS) { + map.put(encoding.name(), encoding); + } + encodings = Collections.unmodifiableSortedMap(map); + } + return encodings; + } + + public static Encoding defaultCharset() { + return WINDOWS_1252; + } + + @Override public boolean canEncode() { + return false; + } + + @Override public boolean contains(Charset cs) { + return false; + } + + @Override public CharsetEncoder newEncoder() { + throw new UnsupportedOperationException("Encoder not implemented."); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java b/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java new file mode 100644 index 0000000000..05fbef8104 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class EucJp extends Encoding { + + private static final String[] LABELS = { + "cseucpkdfmtjapanese", + "euc-jp", + "x-euc-jp" + }; + + private static final String NAME = "euc-jp"; + + static final EucJp INSTANCE = new EucJp(); + + private EucJp() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java b/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java new file mode 100644 index 0000000000..a3923e2240 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class EucKr extends Encoding { + + private static final String[] LABELS = { + "cseuckr", + "csksc56011987", + "euc-kr", + "iso-ir-149", + "korean", + "ks_c_5601-1987", + "ks_c_5601-1989", + "ksc5601", + "ksc_5601", + "windows-949" + }; + + private static final String NAME = "euc-kr"; + + static final EucKr INSTANCE = new EucKr(); + + private EucKr() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java new file mode 100644 index 0000000000..34a1f36b5a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CoderResult; + +public final class FallibleSingleByteDecoder extends InfallibleSingleByteDecoder { + + public FallibleSingleByteDecoder(Encoding cs, char[] upperHalf) { + super(cs, upperHalf); + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + if (!this.report) { + return super.decodeLoop(in, out); + } else { + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + int b = (int) in.get(); + if (b >= 0) { + out.put((char) b); + } else { + char mapped = this.upperHalf[b + 128]; + if (mapped == '\uFFFD') { + in.position(in.position() - 1); + return CoderResult.malformedForLength(1); + } + out.put(mapped); + } + } + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java new file mode 100644 index 0000000000..fcb090ddec --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Gb18030 extends Encoding { + + private static final String[] LABELS = { + "gb18030" + }; + + private static final String NAME = "gb18030"; + + static final Gb18030 INSTANCE = new Gb18030(); + + private Gb18030() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java new file mode 100644 index 0000000000..2dc3694edf --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Gbk extends Encoding { + + private static final String[] LABELS = { + "chinese", + "csgb2312", + "csiso58gb231280", + "gb2312", + "gb_2312", + "gb_2312-80", + "gbk", + "iso-ir-58", + "x-gbk" + }; + + private static final String NAME = "gbk"; + + static final Gbk INSTANCE = new Gbk(); + + private Gbk() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName("gb18030").newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java new file mode 100644 index 0000000000..037e628352 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Ibm866 extends Encoding { + + private static final char[] TABLE = { + '\u0410', + '\u0411', + '\u0412', + '\u0413', + '\u0414', + '\u0415', + '\u0416', + '\u0417', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0424', + '\u0425', + '\u0426', + '\u0427', + '\u0428', + '\u0429', + '\u042a', + '\u042b', + '\u042c', + '\u042d', + '\u042e', + '\u042f', + '\u0430', + '\u0431', + '\u0432', + '\u0433', + '\u0434', + '\u0435', + '\u0436', + '\u0437', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u2591', + '\u2592', + '\u2593', + '\u2502', + '\u2524', + '\u2561', + '\u2562', + '\u2556', + '\u2555', + '\u2563', + '\u2551', + '\u2557', + '\u255d', + '\u255c', + '\u255b', + '\u2510', + '\u2514', + '\u2534', + '\u252c', + '\u251c', + '\u2500', + '\u253c', + '\u255e', + '\u255f', + '\u255a', + '\u2554', + '\u2569', + '\u2566', + '\u2560', + '\u2550', + '\u256c', + '\u2567', + '\u2568', + '\u2564', + '\u2565', + '\u2559', + '\u2558', + '\u2552', + '\u2553', + '\u256b', + '\u256a', + '\u2518', + '\u250c', + '\u2588', + '\u2584', + '\u258c', + '\u2590', + '\u2580', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0444', + '\u0445', + '\u0446', + '\u0447', + '\u0448', + '\u0449', + '\u044a', + '\u044b', + '\u044c', + '\u044d', + '\u044e', + '\u044f', + '\u0401', + '\u0451', + '\u0404', + '\u0454', + '\u0407', + '\u0457', + '\u040e', + '\u045e', + '\u00b0', + '\u2219', + '\u00b7', + '\u221a', + '\u2116', + '\u00a4', + '\u25a0', + '\u00a0' + }; + + private static final String[] LABELS = { + "866", + "cp866", + "csibm866", + "ibm866" + }; + + private static final String NAME = "ibm866"; + + static final Encoding INSTANCE = new Ibm866(); + + private Ibm866() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java new file mode 100644 index 0000000000..7cc63072c1 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CoderResult; + +public class InfallibleSingleByteDecoder extends Decoder { + + protected final char[] upperHalf; + + protected InfallibleSingleByteDecoder(Encoding cs, char[] upperHalf) { + super(cs, 1.0f, 1.0f); + this.upperHalf = upperHalf; + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + // TODO figure out if it's worthwhile to optimize the case where both + // buffers are array-backed. + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + int b = (int) in.get(); + if (b >= 0) { + out.put((char) b); + } else { + out.put(this.upperHalf[b + 128]); + } + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java new file mode 100644 index 0000000000..895cb5eedd --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso10 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0104', + '\u0112', + '\u0122', + '\u012a', + '\u0128', + '\u0136', + '\u00a7', + '\u013b', + '\u0110', + '\u0160', + '\u0166', + '\u017d', + '\u00ad', + '\u016a', + '\u014a', + '\u00b0', + '\u0105', + '\u0113', + '\u0123', + '\u012b', + '\u0129', + '\u0137', + '\u00b7', + '\u013c', + '\u0111', + '\u0161', + '\u0167', + '\u017e', + '\u2015', + '\u016b', + '\u014b', + '\u0100', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u012e', + '\u010c', + '\u00c9', + '\u0118', + '\u00cb', + '\u0116', + '\u00cd', + '\u00ce', + '\u00cf', + '\u00d0', + '\u0145', + '\u014c', + '\u00d3', + '\u00d4', + '\u00d5', + '\u00d6', + '\u0168', + '\u00d8', + '\u0172', + '\u00da', + '\u00db', + '\u00dc', + '\u00dd', + '\u00de', + '\u00df', + '\u0101', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u012f', + '\u010d', + '\u00e9', + '\u0119', + '\u00eb', + '\u0117', + '\u00ed', + '\u00ee', + '\u00ef', + '\u00f0', + '\u0146', + '\u014d', + '\u00f3', + '\u00f4', + '\u00f5', + '\u00f6', + '\u0169', + '\u00f8', + '\u0173', + '\u00fa', + '\u00fb', + '\u00fc', + '\u00fd', + '\u00fe', + '\u0138' + }; + + private static final String[] LABELS = { + "csisolatin6", + "iso-8859-10", + "iso-ir-157", + "iso8859-10", + "iso885910", + "l6", + "latin6" + }; + + private static final String NAME = "iso-8859-10"; + + static final Encoding INSTANCE = new Iso10(); + + private Iso10() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java new file mode 100644 index 0000000000..60e6f53399 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso13 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u201d', + '\u00a2', + '\u00a3', + '\u00a4', + '\u201e', + '\u00a6', + '\u00a7', + '\u00d8', + '\u00a9', + '\u0156', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00c6', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u201c', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00f8', + '\u00b9', + '\u0157', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00e6', + '\u0104', + '\u012e', + '\u0100', + '\u0106', + '\u00c4', + '\u00c5', + '\u0118', + '\u0112', + '\u010c', + '\u00c9', + '\u0179', + '\u0116', + '\u0122', + '\u0136', + '\u012a', + '\u013b', + '\u0160', + '\u0143', + '\u0145', + '\u00d3', + '\u014c', + '\u00d5', + '\u00d6', + '\u00d7', + '\u0172', + '\u0141', + '\u015a', + '\u016a', + '\u00dc', + '\u017b', + '\u017d', + '\u00df', + '\u0105', + '\u012f', + '\u0101', + '\u0107', + '\u00e4', + '\u00e5', + '\u0119', + '\u0113', + '\u010d', + '\u00e9', + '\u017a', + '\u0117', + '\u0123', + '\u0137', + '\u012b', + '\u013c', + '\u0161', + '\u0144', + '\u0146', + '\u00f3', + '\u014d', + '\u00f5', + '\u00f6', + '\u00f7', + '\u0173', + '\u0142', + '\u015b', + '\u016b', + '\u00fc', + '\u017c', + '\u017e', + '\u2019' + }; + + private static final String[] LABELS = { + "iso-8859-13", + "iso8859-13", + "iso885913" + }; + + private static final String NAME = "iso-8859-13"; + + static final Encoding INSTANCE = new Iso13(); + + private Iso13() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java new file mode 100644 index 0000000000..d4a180e6e9 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso14 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u1e02', + '\u1e03', + '\u00a3', + '\u010a', + '\u010b', + '\u1e0a', + '\u00a7', + '\u1e80', + '\u00a9', + '\u1e82', + '\u1e0b', + '\u1ef2', + '\u00ad', + '\u00ae', + '\u0178', + '\u1e1e', + '\u1e1f', + '\u0120', + '\u0121', + '\u1e40', + '\u1e41', + '\u00b6', + '\u1e56', + '\u1e81', + '\u1e57', + '\u1e83', + '\u1e60', + '\u1ef3', + '\u1e84', + '\u1e85', + '\u1e61', + '\u00c0', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\u0174', + '\u00d1', + '\u00d2', + '\u00d3', + '\u00d4', + '\u00d5', + '\u00d6', + '\u1e6a', + '\u00d8', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u00dd', + '\u0176', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\u0175', + '\u00f1', + '\u00f2', + '\u00f3', + '\u00f4', + '\u00f5', + '\u00f6', + '\u1e6b', + '\u00f8', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u00fd', + '\u0177', + '\u00ff' + }; + + private static final String[] LABELS = { + "iso-8859-14", + "iso8859-14", + "iso885914" + }; + + private static final String NAME = "iso-8859-14"; + + static final Encoding INSTANCE = new Iso14(); + + private Iso14() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java new file mode 100644 index 0000000000..a60e4b6ef2 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso15 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u00a1', + '\u00a2', + '\u00a3', + '\u20ac', + '\u00a5', + '\u0160', + '\u00a7', + '\u0161', + '\u00a9', + '\u00aa', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u017d', + '\u00b5', + '\u00b6', + '\u00b7', + '\u017e', + '\u00b9', + '\u00ba', + '\u00bb', + '\u0152', + '\u0153', + '\u0178', + '\u00bf', + '\u00c0', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\u00d0', + '\u00d1', + '\u00d2', + '\u00d3', + '\u00d4', + '\u00d5', + '\u00d6', + '\u00d7', + '\u00d8', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u00dd', + '\u00de', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\u00f0', + '\u00f1', + '\u00f2', + '\u00f3', + '\u00f4', + '\u00f5', + '\u00f6', + '\u00f7', + '\u00f8', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u00fd', + '\u00fe', + '\u00ff' + }; + + private static final String[] LABELS = { + "csisolatin9", + "iso-8859-15", + "iso8859-15", + "iso885915", + "iso_8859-15", + "l9" + }; + + private static final String NAME = "iso-8859-15"; + + static final Encoding INSTANCE = new Iso15(); + + private Iso15() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java new file mode 100644 index 0000000000..5eb1926db4 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso16 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0104', + '\u0105', + '\u0141', + '\u20ac', + '\u201e', + '\u0160', + '\u00a7', + '\u0161', + '\u00a9', + '\u0218', + '\u00ab', + '\u0179', + '\u00ad', + '\u017a', + '\u017b', + '\u00b0', + '\u00b1', + '\u010c', + '\u0142', + '\u017d', + '\u201d', + '\u00b6', + '\u00b7', + '\u017e', + '\u010d', + '\u0219', + '\u00bb', + '\u0152', + '\u0153', + '\u0178', + '\u017c', + '\u00c0', + '\u00c1', + '\u00c2', + '\u0102', + '\u00c4', + '\u0106', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\u0110', + '\u0143', + '\u00d2', + '\u00d3', + '\u00d4', + '\u0150', + '\u00d6', + '\u015a', + '\u0170', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u0118', + '\u021a', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u0103', + '\u00e4', + '\u0107', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\u0111', + '\u0144', + '\u00f2', + '\u00f3', + '\u00f4', + '\u0151', + '\u00f6', + '\u015b', + '\u0171', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u0119', + '\u021b', + '\u00ff' + }; + + private static final String[] LABELS = { + "iso-8859-16" + }; + + private static final String NAME = "iso-8859-16"; + + static final Encoding INSTANCE = new Iso16(); + + private Iso16() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java new file mode 100644 index 0000000000..7a5f6322ae --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso2 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0104', + '\u02d8', + '\u0141', + '\u00a4', + '\u013d', + '\u015a', + '\u00a7', + '\u00a8', + '\u0160', + '\u015e', + '\u0164', + '\u0179', + '\u00ad', + '\u017d', + '\u017b', + '\u00b0', + '\u0105', + '\u02db', + '\u0142', + '\u00b4', + '\u013e', + '\u015b', + '\u02c7', + '\u00b8', + '\u0161', + '\u015f', + '\u0165', + '\u017a', + '\u02dd', + '\u017e', + '\u017c', + '\u0154', + '\u00c1', + '\u00c2', + '\u0102', + '\u00c4', + '\u0139', + '\u0106', + '\u00c7', + '\u010c', + '\u00c9', + '\u0118', + '\u00cb', + '\u011a', + '\u00cd', + '\u00ce', + '\u010e', + '\u0110', + '\u0143', + '\u0147', + '\u00d3', + '\u00d4', + '\u0150', + '\u00d6', + '\u00d7', + '\u0158', + '\u016e', + '\u00da', + '\u0170', + '\u00dc', + '\u00dd', + '\u0162', + '\u00df', + '\u0155', + '\u00e1', + '\u00e2', + '\u0103', + '\u00e4', + '\u013a', + '\u0107', + '\u00e7', + '\u010d', + '\u00e9', + '\u0119', + '\u00eb', + '\u011b', + '\u00ed', + '\u00ee', + '\u010f', + '\u0111', + '\u0144', + '\u0148', + '\u00f3', + '\u00f4', + '\u0151', + '\u00f6', + '\u00f7', + '\u0159', + '\u016f', + '\u00fa', + '\u0171', + '\u00fc', + '\u00fd', + '\u0163', + '\u02d9' + }; + + private static final String[] LABELS = { + "csisolatin2", + "iso-8859-2", + "iso-ir-101", + "iso8859-2", + "iso88592", + "iso_8859-2", + "iso_8859-2:1987", + "l2", + "latin2" + }; + + private static final String NAME = "iso-8859-2"; + + static final Encoding INSTANCE = new Iso2(); + + private Iso2() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java new file mode 100644 index 0000000000..6ebadc947f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Iso2022Jp extends Encoding { + + private static final String[] LABELS = { + "csiso2022jp", + "iso-2022-jp" + }; + + private static final String NAME = "iso-2022-jp"; + + static final Iso2022Jp INSTANCE = new Iso2022Jp(); + + private Iso2022Jp() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java new file mode 100644 index 0000000000..0667a160c0 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso3 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0126', + '\u02d8', + '\u00a3', + '\u00a4', + '\ufffd', + '\u0124', + '\u00a7', + '\u00a8', + '\u0130', + '\u015e', + '\u011e', + '\u0134', + '\u00ad', + '\ufffd', + '\u017b', + '\u00b0', + '\u0127', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u0125', + '\u00b7', + '\u00b8', + '\u0131', + '\u015f', + '\u011f', + '\u0135', + '\u00bd', + '\ufffd', + '\u017c', + '\u00c0', + '\u00c1', + '\u00c2', + '\ufffd', + '\u00c4', + '\u010a', + '\u0108', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\ufffd', + '\u00d1', + '\u00d2', + '\u00d3', + '\u00d4', + '\u0120', + '\u00d6', + '\u00d7', + '\u011c', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u016c', + '\u015c', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\ufffd', + '\u00e4', + '\u010b', + '\u0109', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\ufffd', + '\u00f1', + '\u00f2', + '\u00f3', + '\u00f4', + '\u0121', + '\u00f6', + '\u00f7', + '\u011d', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u016d', + '\u015d', + '\u02d9' + }; + + private static final String[] LABELS = { + "csisolatin3", + "iso-8859-3", + "iso-ir-109", + "iso8859-3", + "iso88593", + "iso_8859-3", + "iso_8859-3:1988", + "l3", + "latin3" + }; + + private static final String NAME = "iso-8859-3"; + + static final Encoding INSTANCE = new Iso3(); + + private Iso3() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java new file mode 100644 index 0000000000..b954869ab0 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso4 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0104', + '\u0138', + '\u0156', + '\u00a4', + '\u0128', + '\u013b', + '\u00a7', + '\u00a8', + '\u0160', + '\u0112', + '\u0122', + '\u0166', + '\u00ad', + '\u017d', + '\u00af', + '\u00b0', + '\u0105', + '\u02db', + '\u0157', + '\u00b4', + '\u0129', + '\u013c', + '\u02c7', + '\u00b8', + '\u0161', + '\u0113', + '\u0123', + '\u0167', + '\u014a', + '\u017e', + '\u014b', + '\u0100', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u012e', + '\u010c', + '\u00c9', + '\u0118', + '\u00cb', + '\u0116', + '\u00cd', + '\u00ce', + '\u012a', + '\u0110', + '\u0145', + '\u014c', + '\u0136', + '\u00d4', + '\u00d5', + '\u00d6', + '\u00d7', + '\u00d8', + '\u0172', + '\u00da', + '\u00db', + '\u00dc', + '\u0168', + '\u016a', + '\u00df', + '\u0101', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u012f', + '\u010d', + '\u00e9', + '\u0119', + '\u00eb', + '\u0117', + '\u00ed', + '\u00ee', + '\u012b', + '\u0111', + '\u0146', + '\u014d', + '\u0137', + '\u00f4', + '\u00f5', + '\u00f6', + '\u00f7', + '\u00f8', + '\u0173', + '\u00fa', + '\u00fb', + '\u00fc', + '\u0169', + '\u016b', + '\u02d9' + }; + + private static final String[] LABELS = { + "csisolatin4", + "iso-8859-4", + "iso-ir-110", + "iso8859-4", + "iso88594", + "iso_8859-4", + "iso_8859-4:1988", + "l4", + "latin4" + }; + + private static final String NAME = "iso-8859-4"; + + static final Encoding INSTANCE = new Iso4(); + + private Iso4() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java new file mode 100644 index 0000000000..13946cdbb1 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso5 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0401', + '\u0402', + '\u0403', + '\u0404', + '\u0405', + '\u0406', + '\u0407', + '\u0408', + '\u0409', + '\u040a', + '\u040b', + '\u040c', + '\u00ad', + '\u040e', + '\u040f', + '\u0410', + '\u0411', + '\u0412', + '\u0413', + '\u0414', + '\u0415', + '\u0416', + '\u0417', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0424', + '\u0425', + '\u0426', + '\u0427', + '\u0428', + '\u0429', + '\u042a', + '\u042b', + '\u042c', + '\u042d', + '\u042e', + '\u042f', + '\u0430', + '\u0431', + '\u0432', + '\u0433', + '\u0434', + '\u0435', + '\u0436', + '\u0437', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0444', + '\u0445', + '\u0446', + '\u0447', + '\u0448', + '\u0449', + '\u044a', + '\u044b', + '\u044c', + '\u044d', + '\u044e', + '\u044f', + '\u2116', + '\u0451', + '\u0452', + '\u0453', + '\u0454', + '\u0455', + '\u0456', + '\u0457', + '\u0458', + '\u0459', + '\u045a', + '\u045b', + '\u045c', + '\u00a7', + '\u045e', + '\u045f' + }; + + private static final String[] LABELS = { + "csisolatincyrillic", + "cyrillic", + "iso-8859-5", + "iso-ir-144", + "iso8859-5", + "iso88595", + "iso_8859-5", + "iso_8859-5:1988" + }; + + private static final String NAME = "iso-8859-5"; + + static final Encoding INSTANCE = new Iso5(); + + private Iso5() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java new file mode 100644 index 0000000000..02e6df8baf --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso6 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\ufffd', + '\ufffd', + '\ufffd', + '\u00a4', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u060c', + '\u00ad', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u061b', + '\ufffd', + '\ufffd', + '\ufffd', + '\u061f', + '\ufffd', + '\u0621', + '\u0622', + '\u0623', + '\u0624', + '\u0625', + '\u0626', + '\u0627', + '\u0628', + '\u0629', + '\u062a', + '\u062b', + '\u062c', + '\u062d', + '\u062e', + '\u062f', + '\u0630', + '\u0631', + '\u0632', + '\u0633', + '\u0634', + '\u0635', + '\u0636', + '\u0637', + '\u0638', + '\u0639', + '\u063a', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u0640', + '\u0641', + '\u0642', + '\u0643', + '\u0644', + '\u0645', + '\u0646', + '\u0647', + '\u0648', + '\u0649', + '\u064a', + '\u064b', + '\u064c', + '\u064d', + '\u064e', + '\u064f', + '\u0650', + '\u0651', + '\u0652', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd' + }; + + private static final String[] LABELS = { + "arabic", + "asmo-708", + "csiso88596e", + "csiso88596i", + "csisolatinarabic", + "ecma-114", + "iso-8859-6", + "iso-8859-6-e", + "iso-8859-6-i", + "iso-ir-127", + "iso8859-6", + "iso88596", + "iso_8859-6", + "iso_8859-6:1987" + }; + + private static final String NAME = "iso-8859-6"; + + static final Encoding INSTANCE = new Iso6(); + + private Iso6() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java new file mode 100644 index 0000000000..630e702dec --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso7 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u2018', + '\u2019', + '\u00a3', + '\u20ac', + '\u20af', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u037a', + '\u00ab', + '\u00ac', + '\u00ad', + '\ufffd', + '\u2015', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u0384', + '\u0385', + '\u0386', + '\u00b7', + '\u0388', + '\u0389', + '\u038a', + '\u00bb', + '\u038c', + '\u00bd', + '\u038e', + '\u038f', + '\u0390', + '\u0391', + '\u0392', + '\u0393', + '\u0394', + '\u0395', + '\u0396', + '\u0397', + '\u0398', + '\u0399', + '\u039a', + '\u039b', + '\u039c', + '\u039d', + '\u039e', + '\u039f', + '\u03a0', + '\u03a1', + '\ufffd', + '\u03a3', + '\u03a4', + '\u03a5', + '\u03a6', + '\u03a7', + '\u03a8', + '\u03a9', + '\u03aa', + '\u03ab', + '\u03ac', + '\u03ad', + '\u03ae', + '\u03af', + '\u03b0', + '\u03b1', + '\u03b2', + '\u03b3', + '\u03b4', + '\u03b5', + '\u03b6', + '\u03b7', + '\u03b8', + '\u03b9', + '\u03ba', + '\u03bb', + '\u03bc', + '\u03bd', + '\u03be', + '\u03bf', + '\u03c0', + '\u03c1', + '\u03c2', + '\u03c3', + '\u03c4', + '\u03c5', + '\u03c6', + '\u03c7', + '\u03c8', + '\u03c9', + '\u03ca', + '\u03cb', + '\u03cc', + '\u03cd', + '\u03ce', + '\ufffd' + }; + + private static final String[] LABELS = { + "csisolatingreek", + "ecma-118", + "elot_928", + "greek", + "greek8", + "iso-8859-7", + "iso-ir-126", + "iso8859-7", + "iso88597", + "iso_8859-7", + "iso_8859-7:1987", + "sun_eu_greek" + }; + + private static final String NAME = "iso-8859-7"; + + static final Encoding INSTANCE = new Iso7(); + + private Iso7() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java new file mode 100644 index 0000000000..10ee334865 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso8 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\ufffd', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00d7', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00f7', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u2017', + '\u05d0', + '\u05d1', + '\u05d2', + '\u05d3', + '\u05d4', + '\u05d5', + '\u05d6', + '\u05d7', + '\u05d8', + '\u05d9', + '\u05da', + '\u05db', + '\u05dc', + '\u05dd', + '\u05de', + '\u05df', + '\u05e0', + '\u05e1', + '\u05e2', + '\u05e3', + '\u05e4', + '\u05e5', + '\u05e6', + '\u05e7', + '\u05e8', + '\u05e9', + '\u05ea', + '\ufffd', + '\ufffd', + '\u200e', + '\u200f', + '\ufffd' + }; + + private static final String[] LABELS = { + "csiso88598e", + "csisolatinhebrew", + "hebrew", + "iso-8859-8", + "iso-8859-8-e", + "iso-ir-138", + "iso8859-8", + "iso88598", + "iso_8859-8", + "iso_8859-8:1988", + "visual" + }; + + private static final String NAME = "iso-8859-8"; + + static final Encoding INSTANCE = new Iso8(); + + private Iso8() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java new file mode 100644 index 0000000000..732e1c9525 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso8I extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\ufffd', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00d7', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00f7', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u2017', + '\u05d0', + '\u05d1', + '\u05d2', + '\u05d3', + '\u05d4', + '\u05d5', + '\u05d6', + '\u05d7', + '\u05d8', + '\u05d9', + '\u05da', + '\u05db', + '\u05dc', + '\u05dd', + '\u05de', + '\u05df', + '\u05e0', + '\u05e1', + '\u05e2', + '\u05e3', + '\u05e4', + '\u05e5', + '\u05e6', + '\u05e7', + '\u05e8', + '\u05e9', + '\u05ea', + '\ufffd', + '\ufffd', + '\u200e', + '\u200f', + '\ufffd' + }; + + private static final String[] LABELS = { + "csiso88598i", + "iso-8859-8-i", + "logical" + }; + + private static final String NAME = "iso-8859-8-i"; + + static final Encoding INSTANCE = new Iso8I(); + + private Iso8I() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java new file mode 100644 index 0000000000..b6157bd8ea --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Koi8R extends Encoding { + + private static final char[] TABLE = { + '\u2500', + '\u2502', + '\u250c', + '\u2510', + '\u2514', + '\u2518', + '\u251c', + '\u2524', + '\u252c', + '\u2534', + '\u253c', + '\u2580', + '\u2584', + '\u2588', + '\u258c', + '\u2590', + '\u2591', + '\u2592', + '\u2593', + '\u2320', + '\u25a0', + '\u2219', + '\u221a', + '\u2248', + '\u2264', + '\u2265', + '\u00a0', + '\u2321', + '\u00b0', + '\u00b2', + '\u00b7', + '\u00f7', + '\u2550', + '\u2551', + '\u2552', + '\u0451', + '\u2553', + '\u2554', + '\u2555', + '\u2556', + '\u2557', + '\u2558', + '\u2559', + '\u255a', + '\u255b', + '\u255c', + '\u255d', + '\u255e', + '\u255f', + '\u2560', + '\u2561', + '\u0401', + '\u2562', + '\u2563', + '\u2564', + '\u2565', + '\u2566', + '\u2567', + '\u2568', + '\u2569', + '\u256a', + '\u256b', + '\u256c', + '\u00a9', + '\u044e', + '\u0430', + '\u0431', + '\u0446', + '\u0434', + '\u0435', + '\u0444', + '\u0433', + '\u0445', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u044f', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0436', + '\u0432', + '\u044c', + '\u044b', + '\u0437', + '\u0448', + '\u044d', + '\u0449', + '\u0447', + '\u044a', + '\u042e', + '\u0410', + '\u0411', + '\u0426', + '\u0414', + '\u0415', + '\u0424', + '\u0413', + '\u0425', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u042f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0416', + '\u0412', + '\u042c', + '\u042b', + '\u0417', + '\u0428', + '\u042d', + '\u0429', + '\u0427', + '\u042a' + }; + + private static final String[] LABELS = { + "cskoi8r", + "koi", + "koi8", + "koi8-r", + "koi8_r" + }; + + private static final String NAME = "koi8-r"; + + static final Encoding INSTANCE = new Koi8R(); + + private Koi8R() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java new file mode 100644 index 0000000000..8150838d30 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Koi8U extends Encoding { + + private static final char[] TABLE = { + '\u2500', + '\u2502', + '\u250c', + '\u2510', + '\u2514', + '\u2518', + '\u251c', + '\u2524', + '\u252c', + '\u2534', + '\u253c', + '\u2580', + '\u2584', + '\u2588', + '\u258c', + '\u2590', + '\u2591', + '\u2592', + '\u2593', + '\u2320', + '\u25a0', + '\u2219', + '\u221a', + '\u2248', + '\u2264', + '\u2265', + '\u00a0', + '\u2321', + '\u00b0', + '\u00b2', + '\u00b7', + '\u00f7', + '\u2550', + '\u2551', + '\u2552', + '\u0451', + '\u0454', + '\u2554', + '\u0456', + '\u0457', + '\u2557', + '\u2558', + '\u2559', + '\u255a', + '\u255b', + '\u0491', + '\u045e', + '\u255e', + '\u255f', + '\u2560', + '\u2561', + '\u0401', + '\u0404', + '\u2563', + '\u0406', + '\u0407', + '\u2566', + '\u2567', + '\u2568', + '\u2569', + '\u256a', + '\u0490', + '\u040e', + '\u00a9', + '\u044e', + '\u0430', + '\u0431', + '\u0446', + '\u0434', + '\u0435', + '\u0444', + '\u0433', + '\u0445', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u044f', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0436', + '\u0432', + '\u044c', + '\u044b', + '\u0437', + '\u0448', + '\u044d', + '\u0449', + '\u0447', + '\u044a', + '\u042e', + '\u0410', + '\u0411', + '\u0426', + '\u0414', + '\u0415', + '\u0424', + '\u0413', + '\u0425', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u042f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0416', + '\u0412', + '\u042c', + '\u042b', + '\u0417', + '\u0428', + '\u042d', + '\u0429', + '\u0427', + '\u042a' + }; + + private static final String[] LABELS = { + "koi8-ru", + "koi8-u" + }; + + private static final String NAME = "koi8-u"; + + static final Encoding INSTANCE = new Koi8U(); + + private Koi8U() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java b/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java new file mode 100644 index 0000000000..f46546ce22 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class MacCyrillic extends Encoding { + + private static final char[] TABLE = { + '\u0410', + '\u0411', + '\u0412', + '\u0413', + '\u0414', + '\u0415', + '\u0416', + '\u0417', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0424', + '\u0425', + '\u0426', + '\u0427', + '\u0428', + '\u0429', + '\u042a', + '\u042b', + '\u042c', + '\u042d', + '\u042e', + '\u042f', + '\u2020', + '\u00b0', + '\u0490', + '\u00a3', + '\u00a7', + '\u2022', + '\u00b6', + '\u0406', + '\u00ae', + '\u00a9', + '\u2122', + '\u0402', + '\u0452', + '\u2260', + '\u0403', + '\u0453', + '\u221e', + '\u00b1', + '\u2264', + '\u2265', + '\u0456', + '\u00b5', + '\u0491', + '\u0408', + '\u0404', + '\u0454', + '\u0407', + '\u0457', + '\u0409', + '\u0459', + '\u040a', + '\u045a', + '\u0458', + '\u0405', + '\u00ac', + '\u221a', + '\u0192', + '\u2248', + '\u2206', + '\u00ab', + '\u00bb', + '\u2026', + '\u00a0', + '\u040b', + '\u045b', + '\u040c', + '\u045c', + '\u0455', + '\u2013', + '\u2014', + '\u201c', + '\u201d', + '\u2018', + '\u2019', + '\u00f7', + '\u201e', + '\u040e', + '\u045e', + '\u040f', + '\u045f', + '\u2116', + '\u0401', + '\u0451', + '\u044f', + '\u0430', + '\u0431', + '\u0432', + '\u0433', + '\u0434', + '\u0435', + '\u0436', + '\u0437', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0444', + '\u0445', + '\u0446', + '\u0447', + '\u0448', + '\u0449', + '\u044a', + '\u044b', + '\u044c', + '\u044d', + '\u044e', + '\u20ac' + }; + + private static final String[] LABELS = { + "x-mac-cyrillic", + "x-mac-ukrainian" + }; + + private static final String NAME = "x-mac-cyrillic"; + + static final Encoding INSTANCE = new MacCyrillic(); + + private MacCyrillic() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java new file mode 100644 index 0000000000..70e356f23c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Macintosh extends Encoding { + + private static final char[] TABLE = { + '\u00c4', + '\u00c5', + '\u00c7', + '\u00c9', + '\u00d1', + '\u00d6', + '\u00dc', + '\u00e1', + '\u00e0', + '\u00e2', + '\u00e4', + '\u00e3', + '\u00e5', + '\u00e7', + '\u00e9', + '\u00e8', + '\u00ea', + '\u00eb', + '\u00ed', + '\u00ec', + '\u00ee', + '\u00ef', + '\u00f1', + '\u00f3', + '\u00f2', + '\u00f4', + '\u00f6', + '\u00f5', + '\u00fa', + '\u00f9', + '\u00fb', + '\u00fc', + '\u2020', + '\u00b0', + '\u00a2', + '\u00a3', + '\u00a7', + '\u2022', + '\u00b6', + '\u00df', + '\u00ae', + '\u00a9', + '\u2122', + '\u00b4', + '\u00a8', + '\u2260', + '\u00c6', + '\u00d8', + '\u221e', + '\u00b1', + '\u2264', + '\u2265', + '\u00a5', + '\u00b5', + '\u2202', + '\u2211', + '\u220f', + '\u03c0', + '\u222b', + '\u00aa', + '\u00ba', + '\u03a9', + '\u00e6', + '\u00f8', + '\u00bf', + '\u00a1', + '\u00ac', + '\u221a', + '\u0192', + '\u2248', + '\u2206', + '\u00ab', + '\u00bb', + '\u2026', + '\u00a0', + '\u00c0', + '\u00c3', + '\u00d5', + '\u0152', + '\u0153', + '\u2013', + '\u2014', + '\u201c', + '\u201d', + '\u2018', + '\u2019', + '\u00f7', + '\u25ca', + '\u00ff', + '\u0178', + '\u2044', + '\u20ac', + '\u2039', + '\u203a', + '\ufb01', + '\ufb02', + '\u2021', + '\u00b7', + '\u201a', + '\u201e', + '\u2030', + '\u00c2', + '\u00ca', + '\u00c1', + '\u00cb', + '\u00c8', + '\u00cd', + '\u00ce', + '\u00cf', + '\u00cc', + '\u00d3', + '\u00d4', + '\uf8ff', + '\u00d2', + '\u00da', + '\u00db', + '\u00d9', + '\u0131', + '\u02c6', + '\u02dc', + '\u00af', + '\u02d8', + '\u02d9', + '\u02da', + '\u00b8', + '\u02dd', + '\u02db', + '\u02c7' + }; + + private static final String[] LABELS = { + "csmacintosh", + "mac", + "macintosh", + "x-mac-roman" + }; + + private static final String NAME = "macintosh"; + + static final Encoding INSTANCE = new Macintosh(); + + private Macintosh() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java new file mode 100644 index 0000000000..abb6e24e71 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Replacement extends Encoding { + + private static final String[] LABELS = { + "csiso2022kr", + "hz-gb-2312", + "iso-2022-cn", + "iso-2022-cn-ext", + "iso-2022-kr" + }; + + private static final String NAME = "replacement"; + + static final Replacement INSTANCE = new Replacement(); + + private Replacement() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new ReplacementDecoder(this); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java new file mode 100644 index 0000000000..f6f2448f65 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CoderResult; + +class ReplacementDecoder extends Decoder { + + private boolean haveEmitted = false; + + ReplacementDecoder(Charset cs) { + super(cs, 1.0f, 1.0f); + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (haveEmitted) { + in.position(in.limit()); + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + in.position(in.limit()); + haveEmitted = true; + if (this.report) { + return CoderResult.malformedForLength(1); + } + out.put('\uFFFD'); + } + } + + /** + * @see java.nio.charset.CharsetDecoder#implFlush(java.nio.CharBuffer) + */ + @Override protected CoderResult implFlush(CharBuffer out) { + // TODO Auto-generated method stub + return super.implFlush(out); + } + + /** + * @see java.nio.charset.CharsetDecoder#implReset() + */ + @Override protected void implReset() { + // TODO Auto-generated method stub + super.implReset(); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java b/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java new file mode 100644 index 0000000000..6638eab39b --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class ShiftJis extends Encoding { + + private static final String[] LABELS = { + "csshiftjis", + "ms932", + "ms_kanji", + "shift-jis", + "shift_jis", + "sjis", + "windows-31j", + "x-sjis" + }; + + private static final String NAME = "shift_jis"; + + static final ShiftJis INSTANCE = new ShiftJis(); + + private ShiftJis() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java new file mode 100644 index 0000000000..61534cb28a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class UserDefined extends Encoding { + + private static final String[] LABELS = { + "x-user-defined" + }; + + private static final String NAME = "x-user-defined"; + + static final UserDefined INSTANCE = new UserDefined(); + + private UserDefined() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new UserDefinedDecoder(this); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java new file mode 100644 index 0000000000..c14ca8627e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; + +class UserDefinedDecoder extends Decoder { + + UserDefinedDecoder(Charset cs) { + super(cs, 1.0f, 1.0f); + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + // TODO figure out if it's worthwhile to optimize the case where both + // buffers are array-backed. + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + int b = (int)in.get(); + if (b >= 0) { + out.put((char)b); + } else { + out.put((char)(b + 128 + 0xF780)); + } + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java new file mode 100644 index 0000000000..16c0d2fd51 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Utf16Be extends Encoding { + + private static final String[] LABELS = { + "utf-16be" + }; + + private static final String NAME = "utf-16be"; + + static final Utf16Be INSTANCE = new Utf16Be(); + + private Utf16Be() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java new file mode 100644 index 0000000000..7381235b5c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Utf16Le extends Encoding { + + private static final String[] LABELS = { + "utf-16", + "utf-16le" + }; + + private static final String NAME = "utf-16le"; + + static final Utf16Le INSTANCE = new Utf16Le(); + + private Utf16Le() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java new file mode 100644 index 0000000000..d6ea7b5145 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Utf8 extends Encoding { + + private static final String[] LABELS = { + "unicode-1-1-utf-8", + "utf-8", + "utf8" + }; + + private static final String NAME = "utf-8"; + + static final Utf8 INSTANCE = new Utf8(); + + private Utf8() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java new file mode 100644 index 0000000000..0b3f508754 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1250 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0083', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u0088', + '\u2030', + '\u0160', + '\u2039', + '\u015a', + '\u0164', + '\u017d', + '\u0179', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u0098', + '\u2122', + '\u0161', + '\u203a', + '\u015b', + '\u0165', + '\u017e', + '\u017a', + '\u00a0', + '\u02c7', + '\u02d8', + '\u0141', + '\u00a4', + '\u0104', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u015e', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u017b', + '\u00b0', + '\u00b1', + '\u02db', + '\u0142', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u0105', + '\u015f', + '\u00bb', + '\u013d', + '\u02dd', + '\u013e', + '\u017c', + '\u0154', + '\u00c1', + '\u00c2', + '\u0102', + '\u00c4', + '\u0139', + '\u0106', + '\u00c7', + '\u010c', + '\u00c9', + '\u0118', + '\u00cb', + '\u011a', + '\u00cd', + '\u00ce', + '\u010e', + '\u0110', + '\u0143', + '\u0147', + '\u00d3', + '\u00d4', + '\u0150', + '\u00d6', + '\u00d7', + '\u0158', + '\u016e', + '\u00da', + '\u0170', + '\u00dc', + '\u00dd', + '\u0162', + '\u00df', + '\u0155', + '\u00e1', + '\u00e2', + '\u0103', + '\u00e4', + '\u013a', + '\u0107', + '\u00e7', + '\u010d', + '\u00e9', + '\u0119', + '\u00eb', + '\u011b', + '\u00ed', + '\u00ee', + '\u010f', + '\u0111', + '\u0144', + '\u0148', + '\u00f3', + '\u00f4', + '\u0151', + '\u00f6', + '\u00f7', + '\u0159', + '\u016f', + '\u00fa', + '\u0171', + '\u00fc', + '\u00fd', + '\u0163', + '\u02d9' + }; + + private static final String[] LABELS = { + "cp1250", + "windows-1250", + "x-cp1250" + }; + + private static final String NAME = "windows-1250"; + + static final Encoding INSTANCE = new Windows1250(); + + private Windows1250() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java new file mode 100644 index 0000000000..def5cf11e1 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1251 extends Encoding { + + private static final char[] TABLE = { + '\u0402', + '\u0403', + '\u201a', + '\u0453', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u20ac', + '\u2030', + '\u0409', + '\u2039', + '\u040a', + '\u040c', + '\u040b', + '\u040f', + '\u0452', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u0098', + '\u2122', + '\u0459', + '\u203a', + '\u045a', + '\u045c', + '\u045b', + '\u045f', + '\u00a0', + '\u040e', + '\u045e', + '\u0408', + '\u00a4', + '\u0490', + '\u00a6', + '\u00a7', + '\u0401', + '\u00a9', + '\u0404', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u0407', + '\u00b0', + '\u00b1', + '\u0406', + '\u0456', + '\u0491', + '\u00b5', + '\u00b6', + '\u00b7', + '\u0451', + '\u2116', + '\u0454', + '\u00bb', + '\u0458', + '\u0405', + '\u0455', + '\u0457', + '\u0410', + '\u0411', + '\u0412', + '\u0413', + '\u0414', + '\u0415', + '\u0416', + '\u0417', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0424', + '\u0425', + '\u0426', + '\u0427', + '\u0428', + '\u0429', + '\u042a', + '\u042b', + '\u042c', + '\u042d', + '\u042e', + '\u042f', + '\u0430', + '\u0431', + '\u0432', + '\u0433', + '\u0434', + '\u0435', + '\u0436', + '\u0437', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0444', + '\u0445', + '\u0446', + '\u0447', + '\u0448', + '\u0449', + '\u044a', + '\u044b', + '\u044c', + '\u044d', + '\u044e', + '\u044f' + }; + + private static final String[] LABELS = { + "cp1251", + "windows-1251", + "x-cp1251" + }; + + private static final String NAME = "windows-1251"; + + static final Encoding INSTANCE = new Windows1251(); + + private Windows1251() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java new file mode 100644 index 0000000000..4b3fa1ffae --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1252 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u02c6', + '\u2030', + '\u0160', + '\u2039', + '\u0152', + '\u008d', + '\u017d', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u02dc', + '\u2122', + '\u0161', + '\u203a', + '\u0153', + '\u009d', + '\u017e', + '\u0178', + '\u00a0', + '\u00a1', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00aa', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00ba', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00bf', + '\u00c0', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\u00d0', + '\u00d1', + '\u00d2', + '\u00d3', + '\u00d4', + '\u00d5', + '\u00d6', + '\u00d7', + '\u00d8', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u00dd', + '\u00de', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\u00f0', + '\u00f1', + '\u00f2', + '\u00f3', + '\u00f4', + '\u00f5', + '\u00f6', + '\u00f7', + '\u00f8', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u00fd', + '\u00fe', + '\u00ff' + }; + + private static final String[] LABELS = { + "ansi_x3.4-1968", + "ascii", + "cp1252", + "cp819", + "csisolatin1", + "ibm819", + "iso-8859-1", + "iso-ir-100", + "iso8859-1", + "iso88591", + "iso_8859-1", + "iso_8859-1:1987", + "l1", + "latin1", + "us-ascii", + "windows-1252", + "x-cp1252" + }; + + private static final String NAME = "windows-1252"; + + static final Encoding INSTANCE = new Windows1252(); + + private Windows1252() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java new file mode 100644 index 0000000000..c96e8630cc --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1253 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u0088', + '\u2030', + '\u008a', + '\u2039', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u0098', + '\u2122', + '\u009a', + '\u203a', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0385', + '\u0386', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\ufffd', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u2015', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u0384', + '\u00b5', + '\u00b6', + '\u00b7', + '\u0388', + '\u0389', + '\u038a', + '\u00bb', + '\u038c', + '\u00bd', + '\u038e', + '\u038f', + '\u0390', + '\u0391', + '\u0392', + '\u0393', + '\u0394', + '\u0395', + '\u0396', + '\u0397', + '\u0398', + '\u0399', + '\u039a', + '\u039b', + '\u039c', + '\u039d', + '\u039e', + '\u039f', + '\u03a0', + '\u03a1', + '\ufffd', + '\u03a3', + '\u03a4', + '\u03a5', + '\u03a6', + '\u03a7', + '\u03a8', + '\u03a9', + '\u03aa', + '\u03ab', + '\u03ac', + '\u03ad', + '\u03ae', + '\u03af', + '\u03b0', + '\u03b1', + '\u03b2', + '\u03b3', + '\u03b4', + '\u03b5', + '\u03b6', + '\u03b7', + '\u03b8', + '\u03b9', + '\u03ba', + '\u03bb', + '\u03bc', + '\u03bd', + '\u03be', + '\u03bf', + '\u03c0', + '\u03c1', + '\u03c2', + '\u03c3', + '\u03c4', + '\u03c5', + '\u03c6', + '\u03c7', + '\u03c8', + '\u03c9', + '\u03ca', + '\u03cb', + '\u03cc', + '\u03cd', + '\u03ce', + '\ufffd' + }; + + private static final String[] LABELS = { + "cp1253", + "windows-1253", + "x-cp1253" + }; + + private static final String NAME = "windows-1253"; + + static final Encoding INSTANCE = new Windows1253(); + + private Windows1253() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java new file mode 100644 index 0000000000..fc3aa98399 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1254 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u02c6', + '\u2030', + '\u0160', + '\u2039', + '\u0152', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u02dc', + '\u2122', + '\u0161', + '\u203a', + '\u0153', + '\u009d', + '\u009e', + '\u0178', + '\u00a0', + '\u00a1', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00aa', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00ba', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00bf', + '\u00c0', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\u011e', + '\u00d1', + '\u00d2', + '\u00d3', + '\u00d4', + '\u00d5', + '\u00d6', + '\u00d7', + '\u00d8', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u0130', + '\u015e', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\u011f', + '\u00f1', + '\u00f2', + '\u00f3', + '\u00f4', + '\u00f5', + '\u00f6', + '\u00f7', + '\u00f8', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u0131', + '\u015f', + '\u00ff' + }; + + private static final String[] LABELS = { + "cp1254", + "csisolatin5", + "iso-8859-9", + "iso-ir-148", + "iso8859-9", + "iso88599", + "iso_8859-9", + "iso_8859-9:1989", + "l5", + "latin5", + "windows-1254", + "x-cp1254" + }; + + private static final String NAME = "windows-1254"; + + static final Encoding INSTANCE = new Windows1254(); + + private Windows1254() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java new file mode 100644 index 0000000000..957203d80a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1255 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u02c6', + '\u2030', + '\u008a', + '\u2039', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u02dc', + '\u2122', + '\u009a', + '\u203a', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u00a1', + '\u00a2', + '\u00a3', + '\u20aa', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00d7', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00f7', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00bf', + '\u05b0', + '\u05b1', + '\u05b2', + '\u05b3', + '\u05b4', + '\u05b5', + '\u05b6', + '\u05b7', + '\u05b8', + '\u05b9', + '\ufffd', + '\u05bb', + '\u05bc', + '\u05bd', + '\u05be', + '\u05bf', + '\u05c0', + '\u05c1', + '\u05c2', + '\u05c3', + '\u05f0', + '\u05f1', + '\u05f2', + '\u05f3', + '\u05f4', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u05d0', + '\u05d1', + '\u05d2', + '\u05d3', + '\u05d4', + '\u05d5', + '\u05d6', + '\u05d7', + '\u05d8', + '\u05d9', + '\u05da', + '\u05db', + '\u05dc', + '\u05dd', + '\u05de', + '\u05df', + '\u05e0', + '\u05e1', + '\u05e2', + '\u05e3', + '\u05e4', + '\u05e5', + '\u05e6', + '\u05e7', + '\u05e8', + '\u05e9', + '\u05ea', + '\ufffd', + '\ufffd', + '\u200e', + '\u200f', + '\ufffd' + }; + + private static final String[] LABELS = { + "cp1255", + "windows-1255", + "x-cp1255" + }; + + private static final String NAME = "windows-1255"; + + static final Encoding INSTANCE = new Windows1255(); + + private Windows1255() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java new file mode 100644 index 0000000000..87d805e1e3 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1256 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u067e', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u02c6', + '\u2030', + '\u0679', + '\u2039', + '\u0152', + '\u0686', + '\u0698', + '\u0688', + '\u06af', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u06a9', + '\u2122', + '\u0691', + '\u203a', + '\u0153', + '\u200c', + '\u200d', + '\u06ba', + '\u00a0', + '\u060c', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u06be', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u061b', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u061f', + '\u06c1', + '\u0621', + '\u0622', + '\u0623', + '\u0624', + '\u0625', + '\u0626', + '\u0627', + '\u0628', + '\u0629', + '\u062a', + '\u062b', + '\u062c', + '\u062d', + '\u062e', + '\u062f', + '\u0630', + '\u0631', + '\u0632', + '\u0633', + '\u0634', + '\u0635', + '\u0636', + '\u00d7', + '\u0637', + '\u0638', + '\u0639', + '\u063a', + '\u0640', + '\u0641', + '\u0642', + '\u0643', + '\u00e0', + '\u0644', + '\u00e2', + '\u0645', + '\u0646', + '\u0647', + '\u0648', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u0649', + '\u064a', + '\u00ee', + '\u00ef', + '\u064b', + '\u064c', + '\u064d', + '\u064e', + '\u00f4', + '\u064f', + '\u0650', + '\u00f7', + '\u0651', + '\u00f9', + '\u0652', + '\u00fb', + '\u00fc', + '\u200e', + '\u200f', + '\u06d2' + }; + + private static final String[] LABELS = { + "cp1256", + "windows-1256", + "x-cp1256" + }; + + private static final String NAME = "windows-1256"; + + static final Encoding INSTANCE = new Windows1256(); + + private Windows1256() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java new file mode 100644 index 0000000000..140e9b4587 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1257 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0083', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u0088', + '\u2030', + '\u008a', + '\u2039', + '\u008c', + '\u00a8', + '\u02c7', + '\u00b8', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u0098', + '\u2122', + '\u009a', + '\u203a', + '\u009c', + '\u00af', + '\u02db', + '\u009f', + '\u00a0', + '\ufffd', + '\u00a2', + '\u00a3', + '\u00a4', + '\ufffd', + '\u00a6', + '\u00a7', + '\u00d8', + '\u00a9', + '\u0156', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00c6', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00f8', + '\u00b9', + '\u0157', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00e6', + '\u0104', + '\u012e', + '\u0100', + '\u0106', + '\u00c4', + '\u00c5', + '\u0118', + '\u0112', + '\u010c', + '\u00c9', + '\u0179', + '\u0116', + '\u0122', + '\u0136', + '\u012a', + '\u013b', + '\u0160', + '\u0143', + '\u0145', + '\u00d3', + '\u014c', + '\u00d5', + '\u00d6', + '\u00d7', + '\u0172', + '\u0141', + '\u015a', + '\u016a', + '\u00dc', + '\u017b', + '\u017d', + '\u00df', + '\u0105', + '\u012f', + '\u0101', + '\u0107', + '\u00e4', + '\u00e5', + '\u0119', + '\u0113', + '\u010d', + '\u00e9', + '\u017a', + '\u0117', + '\u0123', + '\u0137', + '\u012b', + '\u013c', + '\u0161', + '\u0144', + '\u0146', + '\u00f3', + '\u014d', + '\u00f5', + '\u00f6', + '\u00f7', + '\u0173', + '\u0142', + '\u015b', + '\u016b', + '\u00fc', + '\u017c', + '\u017e', + '\u02d9' + }; + + private static final String[] LABELS = { + "cp1257", + "windows-1257", + "x-cp1257" + }; + + private static final String NAME = "windows-1257"; + + static final Encoding INSTANCE = new Windows1257(); + + private Windows1257() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java new file mode 100644 index 0000000000..1301077899 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1258 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u02c6', + '\u2030', + '\u008a', + '\u2039', + '\u0152', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u02dc', + '\u2122', + '\u009a', + '\u203a', + '\u0153', + '\u009d', + '\u009e', + '\u0178', + '\u00a0', + '\u00a1', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00aa', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00ba', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00bf', + '\u00c0', + '\u00c1', + '\u00c2', + '\u0102', + '\u00c4', + '\u00c5', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u0300', + '\u00cd', + '\u00ce', + '\u00cf', + '\u0110', + '\u00d1', + '\u0309', + '\u00d3', + '\u00d4', + '\u01a0', + '\u00d6', + '\u00d7', + '\u00d8', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u01af', + '\u0303', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u0103', + '\u00e4', + '\u00e5', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u0301', + '\u00ed', + '\u00ee', + '\u00ef', + '\u0111', + '\u00f1', + '\u0323', + '\u00f3', + '\u00f4', + '\u01a1', + '\u00f6', + '\u00f7', + '\u00f8', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u01b0', + '\u20ab', + '\u00ff' + }; + + private static final String[] LABELS = { + "cp1258", + "windows-1258", + "x-cp1258" + }; + + private static final String NAME = "windows-1258"; + + static final Encoding INSTANCE = new Windows1258(); + + private Windows1258() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java new file mode 100644 index 0000000000..f93be0175e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows874 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u2026', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0e01', + '\u0e02', + '\u0e03', + '\u0e04', + '\u0e05', + '\u0e06', + '\u0e07', + '\u0e08', + '\u0e09', + '\u0e0a', + '\u0e0b', + '\u0e0c', + '\u0e0d', + '\u0e0e', + '\u0e0f', + '\u0e10', + '\u0e11', + '\u0e12', + '\u0e13', + '\u0e14', + '\u0e15', + '\u0e16', + '\u0e17', + '\u0e18', + '\u0e19', + '\u0e1a', + '\u0e1b', + '\u0e1c', + '\u0e1d', + '\u0e1e', + '\u0e1f', + '\u0e20', + '\u0e21', + '\u0e22', + '\u0e23', + '\u0e24', + '\u0e25', + '\u0e26', + '\u0e27', + '\u0e28', + '\u0e29', + '\u0e2a', + '\u0e2b', + '\u0e2c', + '\u0e2d', + '\u0e2e', + '\u0e2f', + '\u0e30', + '\u0e31', + '\u0e32', + '\u0e33', + '\u0e34', + '\u0e35', + '\u0e36', + '\u0e37', + '\u0e38', + '\u0e39', + '\u0e3a', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u0e3f', + '\u0e40', + '\u0e41', + '\u0e42', + '\u0e43', + '\u0e44', + '\u0e45', + '\u0e46', + '\u0e47', + '\u0e48', + '\u0e49', + '\u0e4a', + '\u0e4b', + '\u0e4c', + '\u0e4d', + '\u0e4e', + '\u0e4f', + '\u0e50', + '\u0e51', + '\u0e52', + '\u0e53', + '\u0e54', + '\u0e55', + '\u0e56', + '\u0e57', + '\u0e58', + '\u0e59', + '\u0e5a', + '\u0e5b', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd' + }; + + private static final String[] LABELS = { + "dos-874", + "iso-8859-11", + "iso8859-11", + "iso885911", + "tis-620", + "windows-874" + }; + + private static final String NAME = "windows-874"; + + static final Encoding INSTANCE = new Windows874(); + + private Windows874() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java new file mode 100644 index 0000000000..0967a58149 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +public @interface Auto { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java new file mode 100644 index 0000000000..bcb8a2b00a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +public @interface CharacterName { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java new file mode 100644 index 0000000000..2ba7f418a3 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * Marker for translating into the C++ const keyword on the declaration in + * question. + * + * @version $Id$ + * @author hsivonen + */ +public @interface Const { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java new file mode 100644 index 0000000000..117da8d3ca --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The type for attribute IDness. (In Java, an interned string + * "CDATA" or "ID".) + * + * @version $Id$ + * @author hsivonen + */ +public @interface IdType { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java new file mode 100644 index 0000000000..cc0728b1b5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2009-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * Translates into the C++ inline keyword. + * + * @version $Id$ + * @author hsivonen + */ +public @interface Inline { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java new file mode 100644 index 0000000000..44444d5250 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2009-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * Marks a string type as being the literal string type (typically const char*) + * in C++. + * + * @version $Id$ + * @author hsivonen + */ +public @interface Literal { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java new file mode 100644 index 0000000000..1f91ba93b8 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The local name of an element or attribute. Must be comparable with + * == (interned String in Java). + * + * @version $Id$ + * @author hsivonen + */ +public @interface Local { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java new file mode 100644 index 0000000000..cf011d33e2 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The array type marked with this annotation won't have its + * .length read. + * + * @version $Id$ + * @author hsivonen + */ +public @interface NoLength { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java new file mode 100644 index 0000000000..03baa75f5a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The namespace URI type. (In Java, an interned String.) + * + * @version $Id$ + * @author hsivonen + */ +public @interface NsUri { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java new file mode 100644 index 0000000000..268e531a3a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The type for namespace prefixes. (In Java, an interned String.) + * + * @version $Id$ + * @author hsivonen + */ +public @interface Prefix { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java new file mode 100644 index 0000000000..e6d4807b6b --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The type for qualified names. (In Java, an interned String.) + * + * @version $Id$ + * @author hsivonen + */ +public @interface QName { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java new file mode 100644 index 0000000000..e293e1af5b --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * Marks a method as virtualy in C++. + * + * @version $Id$ + * @author hsivonen + */ +public @interface Virtual { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html new file mode 100644 index 0000000000..af15d38270 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html @@ -0,0 +1,30 @@ + + +Package Overview + + + +

This package provides annotations for facilitating automated translation +of the source code into other programming languages.

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java new file mode 100644 index 0000000000..f3b3e74ca9 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +import java.io.IOException; + +/** + * An interface for providing a method for reading a stream of bytes one byte at + * a time. + * + * @version $Id$ + * @author hsivonen + */ +public interface ByteReadable { + /** + * Returns the value of the next byte as an integer from 0 to 0xFF or -1 if + * the stream has ended. + * + * @return integer from 0 to 0xFF or -1 on EOF + * @throws IOException + */ + public int readByte() throws IOException; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java new file mode 100644 index 0000000000..4a5769f545 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +import org.xml.sax.SAXException; + +/** + * An interface for receiving notifications of UTF-16 code units read from a character stream. + * + * @version $Id$ + * @author hsivonen + */ +public interface CharacterHandler { + + /** + * Receive notification of a run of UTF-16 code units. + * @param ch the buffer + * @param start start index in the buffer + * @param length the number of characters to process starting from start + * @throws SAXException if things go wrong + */ + public void characters(char[] ch, int start, int length) + throws SAXException; + + /** + * Signals the end of the stream. Can be used for cleanup. Doesn't mean that the stream ended successfully. + * + * @throws SAXException if things go wrong + */ + public void end() throws SAXException; + + /** + * Signals the start of the stream. Can be used for setup. + * + * @throws SAXException if things go wrong + */ + public void start() throws SAXException; + +} \ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java new file mode 100644 index 0000000000..a34af51fa8 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +/** + * Used for indicating desired behavior with legacy doctypes. + * + * @version $Id$ + * @author hsivonen + */ +public enum DoctypeExpectation { + /** + * Be a pure HTML5 parser. + */ + HTML, + + /** + * Require the HTML 4.01 Transitional public id. Turn on HTML4-specific + * additional errors regardless of doctype. + */ + HTML401_TRANSITIONAL, + + /** + * Require the HTML 4.01 Transitional public id and a system id. Turn on + * HTML4-specific additional errors regardless of doctype. + */ + HTML401_STRICT, + + /** + * Treat the doctype required by HTML 5, doctypes with the HTML 4.01 Strict + * public id and doctypes with the HTML 4.01 Transitional public id and a + * system id as non-errors. Turn on HTML4-specific additional errors if the + * public id is the HTML 4.01 Strict or Transitional public id. + */ + AUTO, + + /** + * Never enable HTML4-specific error checks. Never report any doctype + * condition as an error. (Doctype tokens in wrong places will be + * reported as errors, though.) The application may decide what to log + * in response to calls to DocumentModeHanler. This mode + * in meant for doing surveys on existing content. + */ + NO_DOCTYPE_ERRORS +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java new file mode 100644 index 0000000000..e30eddd871 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +/** + * Represents the HTML document compatibility mode. + * + * @version $Id$ + * @author hsivonen + */ +public enum DocumentMode { + /** + * The Standards Mode + */ + STANDARDS_MODE, + + /** + * The Limited Quirks Mode aka. The Almost Standards Mode + */ + ALMOST_STANDARDS_MODE, + + /** + * The Quirks Mode + */ + QUIRKS_MODE +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java new file mode 100644 index 0000000000..55377e0e43 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + + +import org.xml.sax.SAXException; + +/** + * A callback interface for receiving notification about the document mode. + * + * @version $Id$ + * @author hsivonen + */ +public interface DocumentModeHandler { + + /** + * Receive notification of the document mode. + * + * @param mode the document mode + * @param publicIdentifier the public id of the doctype or null if unavailable + * @param systemIdentifier the system id of the doctype or null if unavailable + * @param html4SpecificAdditionalErrorChecks true if HTML 4-specific checks were enabled, false otherwise + * @throws SAXException if things go wrong + */ + public void documentMode(DocumentMode mode, String publicIdentifier, String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) throws SAXException; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java new file mode 100644 index 0000000000..6f185aeaf2 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +import org.xml.sax.SAXException; + +/** + * An interface for communicating about character encoding names with the + * environment of the parser. + * + * @version $Id$ + * @author hsivonen + */ +public interface EncodingDeclarationHandler { + + /** + * Indicates that the parser has found an internal encoding declaration with + * the charset value charset. + * + * @param charset + * the charset name found. + * @return true if the value of charset was an + * encoding name for a supported ASCII-superset encoding. + * @throws SAXException + * if something went wrong + */ + public boolean internalEncodingDeclaration(String charset) throws SAXException; + + /** + * Queries the environment for the encoding in use (for error reporting). + * + * @return the encoding in use + * @throws SAXException + * if something went wrong + */ + public String getCharacterEncoding() throws SAXException; + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java new file mode 100644 index 0000000000..40f15ce7de --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +/** + * Indicates a request for character encoding sniffer choice. + * + * @version $Id$ + * @author hsivonen + */ +public enum Heuristics { + + /** + * Perform no heuristic sniffing. + */ + NONE, + + /** + * Use both jchardet and ICU4J. + */ + ALL, + + /** + * Use jchardet only. + */ + CHARDET, + + /** + * Use ICU4J only. + */ + ICU +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java new file mode 100644 index 0000000000..deab4c60f7 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2009-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +/** + * A placeholder type that translates into the type of the C++ class that + * implements an interning service for local names (@Local in + * Java). + * + * @version $Id$ + * @author hsivonen + */ +public interface Interner { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java new file mode 100644 index 0000000000..18f49e99d4 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +import nu.validator.htmlparser.annotation.Const; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.impl.ElementName; +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.validator.htmlparser.impl.Tokenizer; + +import org.xml.sax.SAXException; + +/** + * Tokenizer reports tokens through this interface. + * + * @version $Id$ + * @author hsivonen + */ +public interface TokenHandler { + + /** + * This method is called at the start of tokenization before any other + * methods on this interface are called. Implementations should hold the + * reference to the Tokenizer in order to set the content + * model flag and in order to be able to query for Locator + * data. + * + * @param self + * the Tokenizer. + * @throws SAXException + * if something went wrong + */ + public void startTokenization(Tokenizer self) throws SAXException; + + /** + * If this handler implementation cares about comments, return + * true. If not, return false. + * + * @return whether this handler wants comments + * @throws SAXException + * if something went wrong + */ + public boolean wantsComments() throws SAXException; + + /** + * Receive a doctype token. + * + * @param name + * the name + * @param publicIdentifier + * the public id + * @param systemIdentifier + * the system id + * @param forceQuirks + * whether the token is correct + * @throws SAXException + * if something went wrong + */ + public void doctype(String name, String publicIdentifier, + String systemIdentifier, boolean forceQuirks) throws SAXException; + + /** + * Receive a start tag token. + * + * @param eltName + * the tag name + * @param attributes + * the attributes + * @param selfClosing + * TODO + * @throws SAXException + * if something went wrong + */ + public void startTag(ElementName eltName, HtmlAttributes attributes, + boolean selfClosing) throws SAXException; + + /** + * Receive an end tag token. + * + * @param eltName + * the tag name + * @throws SAXException + * if something went wrong + */ + public void endTag(ElementName eltName) throws SAXException; + + /** + * Receive a comment token. The data is junk if the + * wantsComments() returned false. + * + * @param buf + * a buffer holding the data + * @param start the offset into the buffer + * @param length + * the number of code units to read + * @throws SAXException + * if something went wrong + */ + public void comment(@NoLength char[] buf, int start, int length) throws SAXException; + + /** + * Receive character tokens. This method has the same semantics as the SAX + * method of the same name. + * + * @param buf + * a buffer holding the data + * @param start + * offset into the buffer + * @param length + * the number of code units to read + * @throws SAXException + * if something went wrong + * @see org.xml.sax.ContentHandler#characters(char[], int, int) + */ + public void characters(@Const @NoLength char[] buf, int start, int length) + throws SAXException; + + /** + * Reports a U+0000 that's being turned into a U+FFFD. + * + * @throws SAXException + * if something went wrong + */ + public void zeroOriginatingReplacementCharacter() throws SAXException; + + /** + * The end-of-file token. + * + * @throws SAXException + * if something went wrong + */ + public void eof() throws SAXException; + + /** + * The perform final cleanup. + * + * @throws SAXException + * if something went wrong + */ + public void endTokenization() throws SAXException; + + /** + * Checks if the CDATA sections are allowed. + * + * @return true if CDATA sections are allowed + * @throws SAXException + * if something went wrong + */ + public boolean cdataSectionAllowed() throws SAXException; + + /** + * Notifies the token handler of the worst case amount of data to be + * reported via characters() and + * zeroOriginatingReplacementCharacter(). + * + * @param inputLength the maximum number of chars that can be reported + * via characters() and + * zeroOriginatingReplacementCharacter() before a new call to + * this method. + */ + public void ensureBufferSpace(int inputLength) throws SAXException; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java new file mode 100644 index 0000000000..eec23c71ce --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +import org.xml.sax.SAXException; + +/** + * An interface for intercepting information about the state transitions that + * the tokenizer is making. + * + * @version $Id$ + * @author hsivonen + */ +public interface TransitionHandler { + + /** + * This method is called for every tokenizer state transition. + * + * @param from + * the state the tokenizer is transitioning from + * @param to + * the state being transitioned to + * @param reconsume + * true if the current input character is going to + * be reconsumed in the new state + * @param pos + * the current index into the input stream + * @throws SAXException + * if something went wrong + */ + void transition(int from, int to, boolean reconsume, int pos) + throws SAXException; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java new file mode 100644 index 0000000000..c959df655c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +/** + * Policy for XML 1.0 violations. + * + * @version $Id$ + * @author hsivonen + */ +public enum XmlViolationPolicy { + /** + * Conform to HTML 5, allow XML 1.0 to be violated. + */ + ALLOW, + + /** + * Halt when something cannot be mapped to XML 1.0. + */ + FATAL, + + /** + * Be non-conforming and alter the infoset to fit + * XML 1.0 when something would otherwise not be + * mappable to XML 1.0. + */ + ALTER_INFOSET +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html new file mode 100644 index 0000000000..43f141cd8e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html @@ -0,0 +1,29 @@ + + +Package Overview + + + +

This package provides common interfaces and enumerations.

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java new file mode 100644 index 0000000000..2b8eff230c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.dom; + +import nu.validator.htmlparser.common.DocumentMode; +import nu.validator.htmlparser.impl.CoalescingTreeBuilder; +import nu.validator.htmlparser.impl.HtmlAttributes; + +import org.w3c.dom.DOMException; +import org.w3c.dom.DOMImplementation; +import org.w3c.dom.Document; +import org.w3c.dom.DocumentFragment; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.Text; +import org.xml.sax.SAXException; + +/** + * The tree builder glue for building a tree through the public DOM APIs. + * + * @version $Id$ + * @author hsivonen + */ +class DOMTreeBuilder extends CoalescingTreeBuilder { + + /** + * The DOM impl. + */ + private DOMImplementation implementation; + + /** + * The current doc. + */ + private Document document; + + /** + * The constructor. + * + * @param implementation + * the DOM impl. + */ + protected DOMTreeBuilder(DOMImplementation implementation) { + super(); + this.implementation = implementation; + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#addAttributesToElement(java.lang.Object, + * nu.validator.htmlparser.impl.HtmlAttributes) + */ + @Override protected void addAttributesToElement(Element element, + HtmlAttributes attributes) throws SAXException { + try { + for (int i = 0; i < attributes.getLength(); i++) { + String localName = attributes.getLocalNameNoBoundsCheck(i); + String uri = attributes.getURINoBoundsCheck(i); + if (!element.hasAttributeNS(uri, localName)) { + element.setAttributeNS(uri, localName, + attributes.getValueNoBoundsCheck(i)); + } + } + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendCharacters(java.lang.Object, + * java.lang.String) + */ + @Override protected void appendCharacters(Element parent, String text) + throws SAXException { + try { + Node lastChild = parent.getLastChild(); + if (lastChild != null && lastChild.getNodeType() == Node.TEXT_NODE) { + Text lastAsText = (Text) lastChild; + lastAsText.setData(lastAsText.getData() + text); + return; + } + parent.appendChild(document.createTextNode(text)); + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#appendChildrenToNewParent(java.lang.Object, + * java.lang.Object) + */ + @Override protected void appendChildrenToNewParent(Element oldParent, + Element newParent) throws SAXException { + try { + while (oldParent.hasChildNodes()) { + newParent.appendChild(oldParent.getFirstChild()); + } + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendComment(java.lang.Object, + * java.lang.String) + */ + @Override protected void appendComment(Element parent, String comment) + throws SAXException { + try { + parent.appendChild(document.createComment(comment)); + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendCommentToDocument(java.lang.String) + */ + @Override protected void appendCommentToDocument(String comment) + throws SAXException { + try { + document.appendChild(document.createComment(comment)); + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String, String, nu.validator.htmlparser.impl.HtmlAttributes, Object) + */ + @Override protected Element createElement(String ns, String name, + HtmlAttributes attributes, Element intendedParent) throws SAXException { + try { + Element rv = document.createElementNS(ns, name); + for (int i = 0; i < attributes.getLength(); i++) { + rv.setAttributeNS(attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + if (attributes.getTypeNoBoundsCheck(i) == "ID") { + rv.setIdAttributeNS(null, attributes.getLocalName(i), true); + } + } + return rv; + } catch (DOMException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#createHtmlElementSetAsRoot(nu.validator.htmlparser.impl.HtmlAttributes) + */ + @Override protected Element createHtmlElementSetAsRoot( + HtmlAttributes attributes) throws SAXException { + try { + Element rv = document.createElementNS( + "http://www.w3.org/1999/xhtml", "html"); + for (int i = 0; i < attributes.getLength(); i++) { + rv.setAttributeNS(attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + document.appendChild(rv); + return rv; + } catch (DOMException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#appendElement(java.lang.Object, + * java.lang.Object) + */ + @Override protected void appendElement(Element child, Element newParent) + throws SAXException { + try { + newParent.appendChild(child); + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#hasChildren(java.lang.Object) + */ + @Override protected boolean hasChildren(Element element) + throws SAXException { + try { + return element.hasChildNodes(); + } catch (DOMException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String, + * java.lang.String, org.xml.sax.Attributes, java.lang.Object) + */ + @Override protected Element createElement(String ns, String name, + HtmlAttributes attributes, Element form, Element intendedParent) throws SAXException { + try { + Element rv = createElement(ns, name, attributes, intendedParent); + rv.setUserData("nu.validator.form-pointer", form, null); + return rv; + } catch (DOMException e) { + fatal(e); + return null; + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override protected void start(boolean fragment) throws SAXException { + document = implementation.createDocument(null, null, null); + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#documentMode(nu.validator.htmlparser.common.DocumentMode, + * java.lang.String, java.lang.String, boolean) + */ + protected void documentMode(DocumentMode mode, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + document.setUserData("nu.validator.document-mode", mode, null); + } + + /** + * Returns the document. + * + * @return the document + */ + Document getDocument() { + Document rv = document; + document = null; + return rv; + } + + /** + * Return the document fragment. + * + * @return the document fragment + */ + DocumentFragment getDocumentFragment() { + DocumentFragment rv = document.createDocumentFragment(); + Node rootElt = document.getFirstChild(); + while (rootElt.hasChildNodes()) { + rv.appendChild(rootElt.getFirstChild()); + } + document = null; + return rv; + } + + @Override + protected Element createAndInsertFosterParentedElement(String ns, String name, + HtmlAttributes attributes, Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParentNode(); + Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent); + + if (parent != null) { // always an element if not null + parent.insertBefore(child, table); + } else { + stackParent.appendChild(child); + } + + return child; + } catch (DOMException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override protected void insertFosterParentedCharacters(String text, + Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParentNode(); + if (parent != null) { // always an element if not null + Node previousSibling = table.getPreviousSibling(); + if (previousSibling != null + && previousSibling.getNodeType() == Node.TEXT_NODE) { + Text lastAsText = (Text) previousSibling; + lastAsText.setData(lastAsText.getData() + text); + return; + } + parent.insertBefore(document.createTextNode(text), table); + return; + } + Node lastChild = stackParent.getLastChild(); + if (lastChild != null && lastChild.getNodeType() == Node.TEXT_NODE) { + Text lastAsText = (Text) lastChild; + lastAsText.setData(lastAsText.getData() + text); + return; + } + stackParent.appendChild(document.createTextNode(text)); + } catch (DOMException e) { + fatal(e); + } + } + + @Override protected void insertFosterParentedChild(Element child, + Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParentNode(); + if (parent != null) { // always an element if not null + parent.insertBefore(child, table); + } else { + stackParent.appendChild(child); + } + } catch (DOMException e) { + fatal(e); + } + } + + @Override protected void detachFromParent(Element element) + throws SAXException { + try { + Node parent = element.getParentNode(); + if (parent != null) { + parent.removeChild(element); + } + } catch (DOMException e) { + fatal(e); + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java new file mode 100644 index 0000000000..5e366be7be --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.dom; + +import org.w3c.dom.DocumentType; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +public class Dom2Sax { + + private static String emptyIfNull(String namespaceURI) { + return namespaceURI == null ? "" : namespaceURI; + } + + private final NamedNodeMapAttributes attributes = new NamedNodeMapAttributes(); + + private final ContentHandler contentHandler; + + private final LexicalHandler lexicalHandler; + + /** + * @param contentHandler + * @param lexicalHandler + */ + public Dom2Sax(ContentHandler contentHandler, LexicalHandler lexicalHandler) { + if (contentHandler == null) { + throw new IllegalArgumentException("ContentHandler must not be null."); + } + this.contentHandler = contentHandler; + this.lexicalHandler = lexicalHandler; + } + + public void parse(Node node) throws SAXException { + Node current = node; + Node next; + char[] buf; + for (;;) { + switch (current.getNodeType()) { + case Node.ELEMENT_NODE: + attributes.setNamedNodeMap(current.getAttributes()); + // To work around severe bogosity in the default DOM + // impl, use the node name if local name is null. + String localName = current.getLocalName(); + contentHandler.startElement( + emptyIfNull(current.getNamespaceURI()), + localName == null ? current.getNodeName() + : localName, null, attributes); + attributes.clear(); + break; + case Node.TEXT_NODE: + buf = current.getNodeValue().toCharArray(); + contentHandler.characters(buf, 0, buf.length); + break; + case Node.CDATA_SECTION_NODE: + if (lexicalHandler != null) { + lexicalHandler.startCDATA(); + } + buf = current.getNodeValue().toCharArray(); + contentHandler.characters(buf, 0, buf.length); + if (lexicalHandler != null) { + lexicalHandler.endCDATA(); + } + break; + case Node.COMMENT_NODE: + if (lexicalHandler != null) { + buf = current.getNodeValue().toCharArray(); + lexicalHandler.comment(buf, 0, buf.length); + } + break; + case Node.DOCUMENT_NODE: + contentHandler.startDocument(); + break; + case Node.DOCUMENT_TYPE_NODE: + if (lexicalHandler != null) { + DocumentType doctype = (DocumentType) current; + lexicalHandler.startDTD(doctype.getName(), + doctype.getPublicId(), doctype.getSystemId()); + lexicalHandler.endDTD(); + } + break; + case Node.PROCESSING_INSTRUCTION_NODE: + contentHandler.processingInstruction(current.getNodeName(), current.getNodeValue()); + break; + case Node.ENTITY_REFERENCE_NODE: + contentHandler.skippedEntity(current.getNodeName()); + break; + } + if ((next = current.getFirstChild()) != null) { + current = next; + continue; + } + for (;;) { + switch (current.getNodeType()) { + case Node.ELEMENT_NODE: + // To work around severe bogosity in the default DOM + // impl, use the node name if local name is null. + String localName = current.getLocalName(); + contentHandler.endElement( + emptyIfNull(current.getNamespaceURI()), + localName == null ? current.getNodeName() + : localName, null); + break; + case Node.DOCUMENT_NODE: + contentHandler.endDocument(); + break; + } + if (current == node) { + return; + } + if ((next = current.getNextSibling()) != null) { + current = next; + break; + } + current = current.getParentNode(); + } + } + } + + private class NamedNodeMapAttributes implements Attributes { + + private NamedNodeMap map; + + private int length; + + public void setNamedNodeMap(NamedNodeMap attributes) { + this.map = attributes; + this.length = attributes.getLength(); + } + + public void clear() { + this.map = null; + } + + public int getIndex(String qName) { + for (int i = 0; i < length; i++) { + Node n = map.item(i); + if (n.getNodeName().equals(qName)) { + return i; + } + } + return -1; + } + + public int getIndex(String uri, String localName) { + for (int i = 0; i < length; i++) { + Node n = map.item(i); + if (n.getLocalName().equals(localName) && emptyIfNull(n.getNamespaceURI()).equals(uri)) { + return i; + } + } + return -1; + } + + public int getLength() { + return length; + } + + public String getLocalName(int index) { + if (index < length && index >= 0) { + return map.item(index).getLocalName(); + } else { + return null; + } + } + + public String getQName(int index) { + if (index < length && index >= 0) { + return map.item(index).getNodeName(); + } else { + return null; + } + } + + public String getType(int index) { + if (index < length && index >= 0) { + return "id".equals(map.item(index).getLocalName()) ? "ID" : "CDATA"; + } else { + return null; + } + } + + public String getType(String qName) { + int index = getIndex(qName); + if (index == -1) { + return null; + } else { + return getType(index); + } + } + + public String getType(String uri, String localName) { + int index = getIndex(uri, localName); + if (index == -1) { + return null; + } else { + return getType(index); + } + } + + public String getURI(int index) { + if (index < length && index >= 0) { + return emptyIfNull(map.item(index).getNamespaceURI()); + } else { + return null; + } + } + + public String getValue(int index) { + if (index < length && index >= 0) { + return map.item(index).getNodeValue(); + } else { + return null; + } + } + + public String getValue(String qName) { + int index = getIndex(qName); + if (index == -1) { + return null; + } else { + return getValue(index); + } + } + + public String getValue(String uri, String localName) { + int index = getIndex(uri, localName); + if (index == -1) { + return null; + } else { + return getValue(index); + } + } + + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java new file mode 100644 index 0000000000..f4a307c9f3 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.dom; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.LinkedList; +import java.util.List; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import nu.validator.htmlparser.common.CharacterHandler; +import nu.validator.htmlparser.common.DoctypeExpectation; +import nu.validator.htmlparser.common.DocumentModeHandler; +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.TransitionHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.io.Driver; + +import org.w3c.dom.DOMImplementation; +import org.w3c.dom.Document; +import org.w3c.dom.DocumentFragment; +import org.xml.sax.EntityResolver; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * This class implements an HTML5 parser that exposes data through the DOM + * interface. + * + *

By default, when using the constructor without arguments, the + * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible + * infosets. This corresponds to ALTER_INFOSET as the general + * XML violation policy. To make the parser support non-conforming HTML fully + * per the HTML 5 spec while on the other hand potentially violating the SAX2 + * API contract, set the general XML violation policy to ALLOW. + * This does not work with a standard DOM implementation. + * It is possible to treat XML 1.0 infoset violations as fatal by setting + * the general XML violation policy to FATAL. + * + *

The doctype is not represented in the tree. + * + *

The document mode is represented as user data DocumentMode + * object with the key nu.validator.document-mode on the document + * node. + * + *

The form pointer is also stored as user data with the key + * nu.validator.form-pointer. + * + * @version $Id$ + * @author hsivonen + */ +public class HtmlDocumentBuilder extends DocumentBuilder { + + /** + * Returns the JAXP DOM implementation. + * + * @return the JAXP DOM implementation + */ + private static DOMImplementation jaxpDOMImplementation() { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + DocumentBuilder builder; + try { + builder = factory.newDocumentBuilder(); + } catch (ParserConfigurationException e) { + throw new RuntimeException(e); + } + return builder.getDOMImplementation(); + } + + /** + * The tokenizer. + */ + private Driver driver; + + /** + * The tree builder. + */ + private final DOMTreeBuilder treeBuilder; + + /** + * The DOM impl. + */ + private final DOMImplementation implementation; + + /** + * The entity resolver. + */ + private EntityResolver entityResolver; + + private ErrorHandler errorHandler = null; + + private DocumentModeHandler documentModeHandler = null; + + private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; + + private boolean checkingNormalization = false; + + private boolean scriptingEnabled = false; + + private final List characterHandlers = new LinkedList(); + + private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW; + + private boolean html4ModeCompatibleWithXhtml1Schemata = false; + + private boolean mappingLangToXmlLang = false; + + private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL; + + private boolean reportingDoctype = true; + + private ErrorHandler treeBuilderErrorHandler = null; + + private Heuristics heuristics = Heuristics.NONE; + + private TransitionHandler transitionHandler = null; + + /** + * Instantiates the document builder with a specific DOM + * implementation and XML violation policy. + * + * @param implementation + * the DOM implementation + * @param xmlPolicy the policy + */ + public HtmlDocumentBuilder(DOMImplementation implementation, + XmlViolationPolicy xmlPolicy) { + this.implementation = implementation; + this.treeBuilder = new DOMTreeBuilder(implementation); + this.driver = null; + setXmlPolicy(xmlPolicy); + } + + /** + * Instantiates the document builder with a specific DOM implementation + * and the infoset-altering XML violation policy. + * + * @param implementation + * the DOM implementation + */ + public HtmlDocumentBuilder(DOMImplementation implementation) { + this(implementation, XmlViolationPolicy.ALTER_INFOSET); + } + + /** + * Instantiates the document builder with the JAXP DOM implementation + * and the infoset-altering XML violation policy. + */ + public HtmlDocumentBuilder() { + this(XmlViolationPolicy.ALTER_INFOSET); + } + + /** + * Instantiates the document builder with the JAXP DOM implementation + * and a specific XML violation policy. + * @param xmlPolicy the policy + */ + public HtmlDocumentBuilder(XmlViolationPolicy xmlPolicy) { + this(jaxpDOMImplementation(), xmlPolicy); + } + + + private Tokenizer newTokenizer(TokenHandler handler, + boolean newAttributesEachTime) { + if (errorHandler == null && transitionHandler == null + && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) { + return new Tokenizer(handler, newAttributesEachTime); + } else { + return new ErrorReportingTokenizer(handler, newAttributesEachTime); + } + } + + /** + * This class wraps different tree builders depending on configuration. This + * method does the work of hiding this from the user of the class. + */ + private void lazyInit() { + if (driver == null) { + this.driver = new Driver(newTokenizer(treeBuilder, false)); + this.driver.setErrorHandler(errorHandler); + this.driver.setTransitionHandler(transitionHandler); + this.treeBuilder.setErrorHandler(treeBuilderErrorHandler); + this.driver.setCheckingNormalization(checkingNormalization); + this.driver.setCommentPolicy(commentPolicy); + this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); + this.driver.setContentSpacePolicy(contentSpacePolicy); + this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + this.driver.setMappingLangToXmlLang(mappingLangToXmlLang); + this.driver.setXmlnsPolicy(xmlnsPolicy); + this.driver.setHeuristics(heuristics); + for (CharacterHandler characterHandler : characterHandlers) { + this.driver.addCharacterHandler(characterHandler); + } + this.treeBuilder.setDoctypeExpectation(doctypeExpectation); + this.treeBuilder.setDocumentModeHandler(documentModeHandler); + this.treeBuilder.setScriptingEnabled(scriptingEnabled); + this.treeBuilder.setReportingDoctype(reportingDoctype); + this.treeBuilder.setNamePolicy(namePolicy); + } + } + + /** + * Tokenizes the input source. + * + * @param is the source + * @throws SAXException if stuff goes wrong + * @throws IOException if IO goes wrong + * @throws MalformedURLException if the system ID is malformed and the entity resolver is null + */ + private void tokenize(InputSource is) throws SAXException, IOException, + MalformedURLException { + if (is == null) { + throw new IllegalArgumentException("Null input."); + } + if (is.getByteStream() == null && is.getCharacterStream() == null) { + String systemId = is.getSystemId(); + if (systemId == null) { + throw new IllegalArgumentException( + "No byte stream, no character stream nor URI."); + } + if (entityResolver != null) { + is = entityResolver.resolveEntity(is.getPublicId(), systemId); + } + if (is.getByteStream() == null || is.getCharacterStream() == null) { + is = new InputSource(); + is.setSystemId(systemId); + is.setByteStream(new URL(systemId).openStream()); + } + } + if (driver == null) lazyInit(); + driver.tokenize(is); + } + + /** + * Returns the DOM implementation + * @return the DOM implementation + * @see javax.xml.parsers.DocumentBuilder#getDOMImplementation() + */ + @Override public DOMImplementation getDOMImplementation() { + return implementation; + } + + /** + * Returns true. + * @return true + * @see javax.xml.parsers.DocumentBuilder#isNamespaceAware() + */ + @Override public boolean isNamespaceAware() { + return true; + } + + /** + * Returns false + * @return false + * @see javax.xml.parsers.DocumentBuilder#isValidating() + */ + @Override public boolean isValidating() { + return false; + } + + /** + * For API compatibility. + * @see javax.xml.parsers.DocumentBuilder#newDocument() + */ + @Override public Document newDocument() { + return implementation.createDocument(null, null, null); + } + + /** + * Parses a document from a SAX InputSource. + * @param is the source + * @return the doc + * @throws SAXException if stuff goes wrong + * @throws IOException if IO goes wrong + * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource) + */ + @Override public Document parse(InputSource is) throws SAXException, + IOException { + treeBuilder.setFragmentContext(null); + tokenize(is); + return treeBuilder.getDocument(); + } + + /** + * Parses a document fragment from a SAX InputSource with + * an HTML element as the fragment context. + * @param is the source + * @param context the context element name (HTML namespace assumed) + * @return the document fragment + * @throws SAXException if stuff goes wrong + * @throws IOException if IO goes wrong + */ + public DocumentFragment parseFragment(InputSource is, String context) + throws IOException, SAXException { + treeBuilder.setFragmentContext(context.intern()); + tokenize(is); + return treeBuilder.getDocumentFragment(); + } + + /** + * Parses a document fragment from a SAX InputSource. + * @param is the source + * @param contextLocal the local name of the context element + * @param contextNamespace the namespace of the context element + * @return the document fragment + * @throws SAXException if stuff goes wrong + * @throws IOException if IO goes wrong + */ + public DocumentFragment parseFragment(InputSource is, String contextLocal, + String contextNamespace) throws IOException, SAXException { + treeBuilder.setFragmentContext(contextLocal.intern(), + contextNamespace.intern(), null, false); + tokenize(is); + return treeBuilder.getDocumentFragment(); + } + + /** + * Sets the entity resolver for URI-only inputs. + * @param resolver the resolver + * @see javax.xml.parsers.DocumentBuilder#setEntityResolver(org.xml.sax.EntityResolver) + */ + @Override public void setEntityResolver(EntityResolver resolver) { + this.entityResolver = resolver; + } + + /** + * Sets the error handler. + * @param errorHandler the handler + * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler) + */ + @Override public void setErrorHandler(ErrorHandler errorHandler) { + treeBuilder.setErrorHandler(errorHandler); + if (driver != null) { + driver.setErrorHandler(errorHandler); + } + } + + public void setTransitionHander(TransitionHandler handler) { + transitionHandler = handler; + driver = null; + } + + /** + * Indicates whether NFC normalization of source is being checked. + * @return true if NFC normalization of source is being checked. + * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + */ + public boolean isCheckingNormalization() { + return checkingNormalization; + } + + /** + * Toggles the checking of the NFC normalization of source. + * @param enable true to check normalization + * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + */ + public void setCheckingNormalization(boolean enable) { + this.checkingNormalization = enable; + if (driver != null) { + driver.setCheckingNormalization(checkingNormalization); + } + } + + /** + * Sets the policy for consecutive hyphens in comments. + * @param commentPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setCommentPolicy(XmlViolationPolicy commentPolicy) { + this.commentPolicy = commentPolicy; + if (driver != null) { + driver.setCommentPolicy(commentPolicy); + } + } + + /** + * Sets the policy for non-XML characters except white space. + * @param contentNonXmlCharPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; + driver = null; + } + + /** + * Sets the policy for non-XML white space. + * @param contentSpacePolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { + this.contentSpacePolicy = contentSpacePolicy; + if (driver != null) { + driver.setContentSpacePolicy(contentSpacePolicy); + } + } + + /** + * Whether the parser considers scripting to be enabled for noscript treatment. + * + * @return true if enabled + * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled() + */ + public boolean isScriptingEnabled() { + return scriptingEnabled; + } + + /** + * Sets whether the parser considers scripting to be enabled for noscript treatment. + * @param scriptingEnabled true to enable + * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + this.scriptingEnabled = scriptingEnabled; + if (treeBuilder != null) { + treeBuilder.setScriptingEnabled(scriptingEnabled); + } + } + + /** + * Returns the doctype expectation. + * + * @return the doctypeExpectation + */ + public DoctypeExpectation getDoctypeExpectation() { + return doctypeExpectation; + } + + /** + * Sets the doctype expectation. + * + * @param doctypeExpectation + * the doctypeExpectation to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation) + */ + public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { + this.doctypeExpectation = doctypeExpectation; + if (treeBuilder != null) { + treeBuilder.setDoctypeExpectation(doctypeExpectation); + } + } + + /** + * Returns the document mode handler. + * + * @return the documentModeHandler + */ + public DocumentModeHandler getDocumentModeHandler() { + return documentModeHandler; + } + + /** + * Sets the document mode handler. + * + * @param documentModeHandler + * the documentModeHandler to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler) + */ + public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { + this.documentModeHandler = documentModeHandler; + } + + /** + * Returns the streamabilityViolationPolicy. + * + * @return the streamabilityViolationPolicy + */ + public XmlViolationPolicy getStreamabilityViolationPolicy() { + return streamabilityViolationPolicy; + } + + /** + * Sets the streamabilityViolationPolicy. + * + * @param streamabilityViolationPolicy + * the streamabilityViolationPolicy to set + */ + public void setStreamabilityViolationPolicy( + XmlViolationPolicy streamabilityViolationPolicy) { + this.streamabilityViolationPolicy = streamabilityViolationPolicy; + driver = null; + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * @param html4ModeCompatibleWithXhtml1Schemata + */ + public void setHtml4ModeCompatibleWithXhtml1Schemata( + boolean html4ModeCompatibleWithXhtml1Schemata) { + this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; + if (driver != null) { + driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + } + } + + /** + * Returns the Locator during parse. + * @return the Locator + */ + public Locator getDocumentLocator() { + return driver.getDocumentLocator(); + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * + * @return the html4ModeCompatibleWithXhtml1Schemata + */ + public boolean isHtml4ModeCompatibleWithXhtml1Schemata() { + return html4ModeCompatibleWithXhtml1Schemata; + } + + /** + * Whether lang is mapped to xml:lang. + * @param mappingLangToXmlLang + * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) + */ + public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { + this.mappingLangToXmlLang = mappingLangToXmlLang; + if (driver != null) { + driver.setMappingLangToXmlLang(mappingLangToXmlLang); + } + } + + /** + * Whether lang is mapped to xml:lang. + * + * @return the mappingLangToXmlLang + */ + public boolean isMappingLangToXmlLang() { + return mappingLangToXmlLang; + } + + /** + * Whether the xmlns attribute on the root element is + * passed to through. (FATAL not allowed.) + * @param xmlnsPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { + if (xmlnsPolicy == XmlViolationPolicy.FATAL) { + throw new IllegalArgumentException("Can't use FATAL here."); + } + this.xmlnsPolicy = xmlnsPolicy; + if (driver != null) { + driver.setXmlnsPolicy(xmlnsPolicy); + } + } + + /** + * Returns the xmlnsPolicy. + * + * @return the xmlnsPolicy + */ + public XmlViolationPolicy getXmlnsPolicy() { + return xmlnsPolicy; + } + + /** + * Returns the commentPolicy. + * + * @return the commentPolicy + */ + public XmlViolationPolicy getCommentPolicy() { + return commentPolicy; + } + + /** + * Returns the contentNonXmlCharPolicy. + * + * @return the contentNonXmlCharPolicy + */ + public XmlViolationPolicy getContentNonXmlCharPolicy() { + return contentNonXmlCharPolicy; + } + + /** + * Returns the contentSpacePolicy. + * + * @return the contentSpacePolicy + */ + public XmlViolationPolicy getContentSpacePolicy() { + return contentSpacePolicy; + } + + /** + * @param reportingDoctype + * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean) + */ + public void setReportingDoctype(boolean reportingDoctype) { + this.reportingDoctype = reportingDoctype; + if (treeBuilder != null) { + treeBuilder.setReportingDoctype(reportingDoctype); + } + } + + /** + * Returns the reportingDoctype. + * + * @return the reportingDoctype + */ + public boolean isReportingDoctype() { + return reportingDoctype; + } + + /** + * The policy for non-NCName element and attribute names. + * @param namePolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setNamePolicy(XmlViolationPolicy namePolicy) { + this.namePolicy = namePolicy; + if (driver != null) { + driver.setNamePolicy(namePolicy); + treeBuilder.setNamePolicy(namePolicy); + } + } + + /** + * Sets the encoding sniffing heuristics. + * + * @param heuristics the heuristics to set + * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + */ + public void setHeuristics(Heuristics heuristics) { + this.heuristics = heuristics; + if (driver != null) { + driver.setHeuristics(heuristics); + } + } + + public Heuristics getHeuristics() { + return this.heuristics; + } + + /** + * This is a catch-all convenience method for setting name, xmlns, content space, + * content non-XML char and comment policies in one go. This does not affect the + * streamability policy or doctype reporting. + * + * @param xmlPolicy + */ + public void setXmlPolicy(XmlViolationPolicy xmlPolicy) { + setNamePolicy(xmlPolicy); + setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy); + setContentSpacePolicy(xmlPolicy); + setContentNonXmlCharPolicy(xmlPolicy); + setCommentPolicy(xmlPolicy); + } + + /** + * The policy for non-NCName element and attribute names. + * + * @return the namePolicy + */ + public XmlViolationPolicy getNamePolicy() { + return namePolicy; + } + + /** + * Does nothing. + * @deprecated + */ + public void setBogusXmlnsPolicy( + XmlViolationPolicy bogusXmlnsPolicy) { + } + + /** + * Returns XmlViolationPolicy.ALTER_INFOSET. + * @deprecated + * @return XmlViolationPolicy.ALTER_INFOSET + */ + public XmlViolationPolicy getBogusXmlnsPolicy() { + return XmlViolationPolicy.ALTER_INFOSET; + } + + public void addCharacterHandler(CharacterHandler characterHandler) { + this.characterHandlers.add(characterHandler); + if (driver != null) { + driver.addCharacterHandler(characterHandler); + } + } + + + /** + * Sets whether comment nodes appear in the tree. + * @param ignoreComments true to ignore comments + * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) + */ + public void setIgnoringComments(boolean ignoreComments) { + treeBuilder.setIgnoringComments(ignoreComments); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html new file mode 100644 index 0000000000..d793bcf867 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html @@ -0,0 +1,29 @@ + + +Package Overview + + + +

This package provides an HTML5 parser that exposes the document using the DOM API.

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java new file mode 100644 index 0000000000..a757503988 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.extra; + +import java.io.IOException; +import java.nio.charset.UnsupportedCharsetException; + +import nu.validator.htmlparser.io.Encoding; + +import org.mozilla.intl.chardet.nsDetector; +import org.mozilla.intl.chardet.nsICharsetDetectionObserver; +import org.mozilla.intl.chardet.nsPSMDetector; + +import com.ibm.icu.text.CharsetDetector; + +public class ChardetSniffer implements nsICharsetDetectionObserver { + + private final byte[] source; + + private final int length; + + private Encoding returnValue = null; + + /** + * @param source + */ + public ChardetSniffer(final byte[] source, final int length) { + this.source = source; + this.length = length; + } + + public Encoding sniff() throws IOException { + nsDetector detector = new nsDetector(nsPSMDetector.ALL); + detector.Init(this); + detector.DoIt(source, length, false); + detector.DataEnd(); + if (returnValue != null && returnValue != Encoding.WINDOWS1252 && returnValue.isAsciiSuperset()) { + return returnValue; + } else { + return null; + } + } + + public static void main(String[] args) { + String[] detectable = CharsetDetector.getAllDetectableCharsets(); + for (int i = 0; i < detectable.length; i++) { + String charset = detectable[i]; + System.out.println(charset); + } + } + + public void Notify(String charsetName) { + try { + Encoding enc = Encoding.forName(charsetName); + Encoding actual = enc.getActualHtmlEncoding(); + if (actual != null) { + enc = actual; + } + returnValue = enc; + } catch (UnsupportedCharsetException e) { + returnValue = null; + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java new file mode 100644 index 0000000000..f3caab5c44 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.extra; + +import java.io.IOException; +import java.io.InputStream; + +import nu.validator.htmlparser.common.ByteReadable; +import nu.validator.htmlparser.io.Encoding; + +import com.ibm.icu.text.CharsetDetector; +import com.ibm.icu.text.CharsetMatch; + +public class IcuDetectorSniffer extends InputStream { + + private final ByteReadable source; + + /** + * @param source + */ + public IcuDetectorSniffer(final ByteReadable source) { + this.source = source; + } + + @Override + public int read() throws IOException { + return source.readByte(); + } + + public Encoding sniff() throws IOException { + try { + CharsetDetector detector = new CharsetDetector(); + detector.setText(this); + CharsetMatch match = detector.detect(); + Encoding enc = Encoding.forName(match.getName()); + Encoding actual = enc.getActualHtmlEncoding(); + if (actual != null) { + enc = actual; + } + if (enc != Encoding.WINDOWS1252 && enc.isAsciiSuperset()) { + return enc; + } else { + return null; + } + } catch (Exception e) { + return null; + } + } + + public static void main(String[] args) { + String[] detectable = CharsetDetector.getAllDetectableCharsets(); + for (int i = 0; i < detectable.length; i++) { + String charset = detectable[i]; + System.out.println(charset); + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java new file mode 100644 index 0000000000..45df62fb70 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2006, 2007 Henri Sivonen + * Copyright (c) 2007 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.extra; + +import nu.validator.htmlparser.common.CharacterHandler; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.Normalizer; +import com.ibm.icu.text.UnicodeSet; + +/** + * @version $Id$ + * @author hsivonen + */ +public final class NormalizationChecker implements CharacterHandler { + + private ErrorHandler errorHandler; + + private Locator locator; + + /** + * A thread-safe set of composing characters as per Charmod Norm. + */ + @SuppressWarnings("deprecation") + private static final UnicodeSet COMPOSING_CHARACTERS = (UnicodeSet) new UnicodeSet( + "[[:nfc_qc=maybe:][:^ccc=0:]]").freeze(); + + // see http://sourceforge.net/mailarchive/message.php?msg_id=37279908 + + /** + * A buffer for holding sequences overlap the SAX buffer boundary. + */ + private char[] buf = new char[128]; + + /** + * A holder for the original buffer (for the memory leak prevention + * mechanism). + */ + private char[] bufHolder = null; + + /** + * The current used length of the buffer, i.e. the index of the first slot + * that does not hold current data. + */ + private int pos; + + /** + * Indicates whether the checker the next call to characters() + * is the first call in a run. + */ + private boolean atStartOfRun; + + /** + * Indicates whether the current run has already caused an error. + */ + private boolean alreadyComplainedAboutThisRun; + + /** + * Emit an error. The locator is used. + * + * @param message the error message + * @throws SAXException if something goes wrong + */ + public void err(String message) throws SAXException { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, locator); + errorHandler.error(spe); + } + } + + /** + * Returns true if the argument is a composing BMP character + * or a surrogate and false otherwise. + * + * @param c a UTF-16 code unit + * @return true if the argument is a composing BMP character + * or a surrogate and false otherwise + */ + private static boolean isComposingCharOrSurrogate(char c) { + if (UCharacter.isHighSurrogate(c) || UCharacter.isLowSurrogate(c)) { + return true; + } + return isComposingChar(c); + } + + /** + * Returns true if the argument is a composing character + * and false otherwise. + * + * @param c a Unicode code point + * @return true if the argument is a composing character + * false otherwise + */ + private static boolean isComposingChar(int c) { + return COMPOSING_CHARACTERS.contains(c); + } + + /** + * Constructor with mode selection. + * + * @param sourceTextMode whether the source text-related messages + * should be enabled. + */ + public NormalizationChecker(Locator locator) { + super(); + start(); + } + + /** + * @see nu.validator.htmlparser.common.CharacterHandler#start() + */ + public void start() { + atStartOfRun = true; + alreadyComplainedAboutThisRun = false; + pos = 0; + } + + /** + * @see nu.validator.htmlparser.common.CharacterHandler#characters(char[], int, int) + */ + public void characters(char[] ch, int start, int length) + throws SAXException { + if (alreadyComplainedAboutThisRun) { + return; + } + if (atStartOfRun) { + char c = ch[start]; + if (pos == 1) { + // there's a single high surrogate in buf + if (isComposingChar(UCharacter.getCodePoint(buf[0], c))) { + err("Text run starts with a composing character."); + } + atStartOfRun = false; + } else { + if (length == 1 && UCharacter.isHighSurrogate(c)) { + buf[0] = c; + pos = 1; + return; + } else { + if (UCharacter.isHighSurrogate(c)) { + if (isComposingChar(UCharacter.getCodePoint(c, + ch[start + 1]))) { + err("Text run starts with a composing character."); + } + } else { + if (isComposingCharOrSurrogate(c)) { + err("Text run starts with a composing character."); + } + } + atStartOfRun = false; + } + } + } + int i = start; + int stop = start + length; + if (pos > 0) { + // there's stuff in buf + while (i < stop && isComposingCharOrSurrogate(ch[i])) { + i++; + } + appendToBuf(ch, start, i); + if (i == stop) { + return; + } else { + if (!Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) { + errAboutTextRun(); + } + pos = 0; + } + } + if (i < stop) { + start = i; + i = stop - 1; + while (i > start && isComposingCharOrSurrogate(ch[i])) { + i--; + } + if (i > start) { + if (!Normalizer.isNormalized(ch, start, i, Normalizer.NFC, 0)) { + errAboutTextRun(); + } + } + appendToBuf(ch, i, stop); + } + } + + /** + * Emits an error stating that the current text run or the source + * text is not in NFC. + * + * @throws SAXException if the ErrorHandler throws + */ + private void errAboutTextRun() throws SAXException { + err("Source text is not in Unicode Normalization Form C."); + alreadyComplainedAboutThisRun = true; + } + + /** + * Appends a slice of an UTF-16 code unit array to the internal + * buffer. + * + * @param ch the array from which to copy + * @param start the index of the first element that is copied + * @param end the index of the first element that is not copied + */ + private void appendToBuf(char[] ch, int start, int end) { + if (start == end) { + return; + } + int neededBufLen = pos + (end - start); + if (neededBufLen > buf.length) { + char[] newBuf = new char[neededBufLen]; + System.arraycopy(buf, 0, newBuf, 0, pos); + if (bufHolder == null) { + bufHolder = buf; // keep the original around + } + buf = newBuf; + } + System.arraycopy(ch, start, buf, pos, end - start); + pos += (end - start); + } + + /** + * @see nu.validator.htmlparser.common.CharacterHandler#end() + */ + public void end() throws SAXException { + if (!alreadyComplainedAboutThisRun + && !Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) { + errAboutTextRun(); + } + if (bufHolder != null) { + // restore the original small buffer to avoid leaking + // memory if this checker is recycled + buf = bufHolder; + bufHolder = null; + } + } + + public void setErrorHandler(ErrorHandler errorHandler) { + this.errorHandler = errorHandler; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java new file mode 100644 index 0000000000..7b889e71e7 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java @@ -0,0 +1,2473 @@ +/* + * Copyright (c) 2008-2011 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import java.util.Arrays; + +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.annotation.NsUri; +import nu.validator.htmlparser.annotation.Prefix; +import nu.validator.htmlparser.annotation.QName; +import nu.validator.htmlparser.annotation.Virtual; +import nu.validator.htmlparser.common.Interner; + +public final class AttributeName +// Uncomment to regenerate +// implements Comparable +{ + // [NOCPP[ + + public static final int NCNAME_HTML = 1; + + public static final int NCNAME_FOREIGN = (1 << 1) | (1 << 2); + + public static final int NCNAME_LANG = (1 << 3); + + public static final int IS_XMLNS = (1 << 4); + + public static final int CASE_FOLDED = (1 << 5); + + public static final int BOOLEAN = (1 << 6); + + // ]NOCPP] + + /** + * An array representing no namespace regardless of namespace mode (HTML, + * SVG, MathML, lang-mapping HTML) used. + */ + static final @NoLength @NsUri String[] ALL_NO_NS = { "", "", "", + // [NOCPP[ + "" + // ]NOCPP] + }; + + /** + * An array that has no namespace for the HTML mode but the XMLNS namespace + * for the SVG and MathML modes. + */ + private static final @NoLength @NsUri String[] XMLNS_NS = { "", + "http://www.w3.org/2000/xmlns/", "http://www.w3.org/2000/xmlns/", + // [NOCPP[ + "" + // ]NOCPP] + }; + + /** + * An array that has no namespace for the HTML mode but the XML namespace + * for the SVG and MathML modes. + */ + private static final @NoLength @NsUri String[] XML_NS = { "", + "http://www.w3.org/XML/1998/namespace", + "http://www.w3.org/XML/1998/namespace", + // [NOCPP[ + "" + // ]NOCPP] + }; + + /** + * An array that has no namespace for the HTML mode but the XLink namespace + * for the SVG and MathML modes. + */ + private static final @NoLength @NsUri String[] XLINK_NS = { "", + "http://www.w3.org/1999/xlink", "http://www.w3.org/1999/xlink", + // [NOCPP[ + "" + // ]NOCPP] + }; + + // [NOCPP[ + /** + * An array that has no namespace for the HTML, SVG and MathML modes but has + * the XML namespace for the lang-mapping HTML mode. + */ + private static final @NoLength @NsUri String[] LANG_NS = { "", "", "", + "http://www.w3.org/XML/1998/namespace" }; + + // ]NOCPP] + + /** + * An array for no prefixes in any mode. + */ + static final @NoLength @Prefix String[] ALL_NO_PREFIX = { null, null, null, + // [NOCPP[ + null + // ]NOCPP] + }; + + /** + * An array for no prefixe in the HTML mode and the xmlns + * prefix in the SVG and MathML modes. + */ + private static final @NoLength @Prefix String[] XMLNS_PREFIX = { null, + "xmlns", "xmlns", + // [NOCPP[ + null + // ]NOCPP] + }; + + /** + * An array for no prefixe in the HTML mode and the xlink + * prefix in the SVG and MathML modes. + */ + private static final @NoLength @Prefix String[] XLINK_PREFIX = { null, + "xlink", "xlink", + // [NOCPP[ + null + // ]NOCPP] + }; + + /** + * An array for no prefixe in the HTML mode and the xml prefix + * in the SVG and MathML modes. + */ + private static final @NoLength @Prefix String[] XML_PREFIX = { null, "xml", + "xml", + // [NOCPP[ + null + // ]NOCPP] + }; + + // [NOCPP[ + + private static final @NoLength @Prefix String[] LANG_PREFIX = { null, null, + null, "xml" }; + + private static @QName String[] COMPUTE_QNAME(String[] local, String[] prefix) { + @QName String[] arr = new String[4]; + for (int i = 0; i < arr.length; i++) { + if (prefix[i] == null) { + arr[i] = local[i]; + } else { + arr[i] = (prefix[i] + ':' + local[i]).intern(); + } + } + return arr; + } + + // ]NOCPP] + + /** + * An initialization helper for having a one name in the SVG mode and + * another name in the other modes. + * + * @param name + * the name for the non-SVG modes + * @param camel + * the name for the SVG mode + * @return the initialized name array + */ + private static @NoLength @Local String[] SVG_DIFFERENT(@Local String name, + @Local String camel) { + @NoLength @Local String[] arr = new String[4]; + arr[0] = name; + arr[1] = name; + arr[2] = camel; + // [NOCPP[ + arr[3] = name; + // ]NOCPP] + return arr; + } + + /** + * An initialization helper for having a one name in the MathML mode and + * another name in the other modes. + * + * @param name + * the name for the non-MathML modes + * @param camel + * the name for the MathML mode + * @return the initialized name array + */ + private static @NoLength @Local String[] MATH_DIFFERENT(@Local String name, + @Local String camel) { + @NoLength @Local String[] arr = new String[4]; + arr[0] = name; + arr[1] = camel; + arr[2] = name; + // [NOCPP[ + arr[3] = name; + // ]NOCPP] + return arr; + } + + /** + * An initialization helper for having a different local name in the HTML + * mode and the SVG and MathML modes. + * + * @param name + * the name for the HTML mode + * @param suffix + * the name for the SVG and MathML modes + * @return the initialized name array + */ + private static @NoLength @Local String[] COLONIFIED_LOCAL( + @Local String name, @Local String suffix) { + @NoLength @Local String[] arr = new String[4]; + arr[0] = name; + arr[1] = suffix; + arr[2] = suffix; + // [NOCPP[ + arr[3] = name; + // ]NOCPP] + return arr; + } + + /** + * An initialization helper for having the same local name in all modes. + * + * @param name + * the name + * @return the initialized name array + */ + static @NoLength @Local String[] SAME_LOCAL(@Local String name) { + @NoLength @Local String[] arr = new String[4]; + arr[0] = name; + arr[1] = name; + arr[2] = name; + // [NOCPP[ + arr[3] = name; + // ]NOCPP] + return arr; + } + + /** + * Returns an attribute name by buffer. + * + *

+ * C++ ownership: The return value is either released by the caller if the + * attribute is a duplicate or the ownership is transferred to + * HtmlAttributes and released upon clearing or destroying that object. + * + * @param buf + * the buffer + * @param offset + * ignored + * @param length + * length of data + * @param checkNcName + * whether to check ncnameness + * @return an AttributeName corresponding to the argument data + */ + static AttributeName nameByBuffer(@NoLength char[] buf, int offset, + int length + // [NOCPP[ + , boolean checkNcName + // ]NOCPP] + , Interner interner) { + // XXX deal with offset + int hash = AttributeName.bufToHash(buf, length); + int index = Arrays.binarySearch(AttributeName.ATTRIBUTE_HASHES, hash); + if (index < 0) { + return AttributeName.createAttributeName( + Portability.newLocalNameFromBuffer(buf, offset, length, + interner) + // [NOCPP[ + , checkNcName + // ]NOCPP] + ); + } else { + AttributeName attributeName = AttributeName.ATTRIBUTE_NAMES[index]; + @Local String name = attributeName.getLocal(AttributeName.HTML); + if (!Portability.localEqualsBuffer(name, buf, offset, length)) { + return AttributeName.createAttributeName( + Portability.newLocalNameFromBuffer(buf, offset, length, + interner) + // [NOCPP[ + , checkNcName + // ]NOCPP] + ); + } + return attributeName; + } + } + + /** + * This method has to return a unique integer for each well-known + * lower-cased attribute name. + * + * @param buf + * @param len + * @return + */ + private static int bufToHash(@NoLength char[] buf, int len) { + int hash2 = 0; + int hash = len; + hash <<= 5; + hash += buf[0] - 0x60; + int j = len; + for (int i = 0; i < 4 && j > 0; i++) { + j--; + hash <<= 5; + hash += buf[j] - 0x60; + hash2 <<= 6; + hash2 += buf[i] - 0x5F; + } + return hash ^ hash2; + } + + /** + * The mode value for HTML. + */ + public static final int HTML = 0; + + /** + * The mode value for MathML. + */ + public static final int MATHML = 1; + + /** + * The mode value for SVG. + */ + public static final int SVG = 2; + + // [NOCPP[ + + /** + * The mode value for lang-mapping HTML. + */ + public static final int HTML_LANG = 3; + + // ]NOCPP] + + /** + * The namespaces indexable by mode. + */ + private final @NsUri @NoLength String[] uri; + + /** + * The local names indexable by mode. + */ + private final @Local @NoLength String[] local; + + /** + * The prefixes indexably by mode. + */ + private final @Prefix @NoLength String[] prefix; + + // [NOCPP[ + + private final int flags; + + /** + * The qnames indexable by mode. + */ + private final @QName @NoLength String[] qName; + + // ]NOCPP] + + /** + * The startup-time constructor. + * + * @param uri + * the namespace + * @param local + * the local name + * @param prefix + * the prefix + * @param ncname + * the ncnameness + * @param xmlns + * whether this is an xmlns attribute + */ + protected AttributeName(@NsUri @NoLength String[] uri, + @Local @NoLength String[] local, @Prefix @NoLength String[] prefix + // [NOCPP[ + , int flags + // ]NOCPP] + ) { + this.uri = uri; + this.local = local; + this.prefix = prefix; + // [NOCPP[ + this.qName = COMPUTE_QNAME(local, prefix); + this.flags = flags; + // ]NOCPP] + } + + /** + * Creates an AttributeName for a local name. + * + * @param name + * the name + * @param checkNcName + * whether to check ncnameness + * @return an AttributeName + */ + private static AttributeName createAttributeName(@Local String name + // [NOCPP[ + , boolean checkNcName + // ]NOCPP] + ) { + // [NOCPP[ + int flags = NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG; + if (name.startsWith("xmlns:")) { + flags = IS_XMLNS; + } else if (checkNcName && !NCName.isNCName(name)) { + flags = 0; + } + // ]NOCPP] + return new AttributeName(AttributeName.ALL_NO_NS, + AttributeName.SAME_LOCAL(name), ALL_NO_PREFIX, flags); + } + + /** + * Deletes runtime-allocated instances in C++. + */ + @Virtual void release() { + // No-op in Java. + // Implement as |delete this;| in subclass. + } + + /** + * The C++ destructor. + */ + @SuppressWarnings("unused") @Virtual private void destructor() { + Portability.deleteArray(local); + } + + /** + * Clones the attribute using an interner. Returns this in Java + * and for non-dynamic instances in C++. + * + * @param interner + * an interner + * @return a clone + */ + @Virtual public AttributeName cloneAttributeName(Interner interner) { + return this; + } + + // [NOCPP[ + /** + * Creator for use when the XML violation policy requires an attribute name + * to be changed. + * + * @param name + * the name of the attribute to create + */ + static AttributeName create(@Local String name) { + return new AttributeName(AttributeName.ALL_NO_NS, + AttributeName.SAME_LOCAL(name), ALL_NO_PREFIX, + NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + } + + /** + * Queries whether this name is an XML 1.0 4th ed. NCName. + * + * @param mode + * the SVG/MathML/HTML mode + * @return true if this is an NCName in the given mode + */ + public boolean isNcName(int mode) { + return (flags & (1 << mode)) != 0; + } + + /** + * Queries whether this is an xmlns attribute. + * + * @return true if this is an xmlns attribute + */ + public boolean isXmlns() { + return (flags & IS_XMLNS) != 0; + } + + /** + * Queries whether this attribute has a case-folded value in the HTML4 mode + * of the parser. + * + * @return true if the value is case-folded + */ + boolean isCaseFolded() { + return (flags & CASE_FOLDED) != 0; + } + + boolean isBoolean() { + return (flags & BOOLEAN) != 0; + } + + public @QName String getQName(int mode) { + return qName[mode]; + } + + // ]NOCPP] + + public @NsUri String getUri(int mode) { + return uri[mode]; + } + + public @Local String getLocal(int mode) { + return local[mode]; + } + + public @Prefix String getPrefix(int mode) { + return prefix[mode]; + } + + boolean equalsAnother(AttributeName another) { + return this.getLocal(AttributeName.HTML) == another.getLocal(AttributeName.HTML); + } + + // START CODE ONLY USED FOR GENERATING CODE uncomment to regenerate + +// /** +// * @see java.lang.Object#toString() +// */ +// @Override public String toString() { +// return "(" + formatNs() + ", " + formatLocal() + ", " + formatPrefix() +// + ", " + formatFlags() + ")"; +// } +// +// private String formatFlags() { +// StringBuilder builder = new StringBuilder(); +// if ((flags & NCNAME_HTML) != 0) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("NCNAME_HTML"); +// } +// if ((flags & NCNAME_FOREIGN) != 0) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("NCNAME_FOREIGN"); +// } +// if ((flags & NCNAME_LANG) != 0) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("NCNAME_LANG"); +// } +// if (isXmlns()) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("IS_XMLNS"); +// } +// if (isCaseFolded()) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("CASE_FOLDED"); +// } +// if (isBoolean()) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("BOOLEAN"); +// } +// if (builder.length() == 0) { +// return "0"; +// } +// return builder.toString(); +// } +// +// public int compareTo(AttributeName other) { +// int thisHash = this.hash(); +// int otherHash = other.hash(); +// if (thisHash < otherHash) { +// return -1; +// } else if (thisHash == otherHash) { +// return 0; +// } else { +// return 1; +// } +// } +// +// private String formatPrefix() { +// if (prefix[0] == null && prefix[1] == null && prefix[2] == null +// && prefix[3] == null) { +// return "ALL_NO_PREFIX"; +// } else if (prefix[0] == null && prefix[1] == prefix[2] +// && prefix[3] == null) { +// if ("xmlns".equals(prefix[1])) { +// return "XMLNS_PREFIX"; +// } else if ("xml".equals(prefix[1])) { +// return "XML_PREFIX"; +// } else if ("xlink".equals(prefix[1])) { +// return "XLINK_PREFIX"; +// } else { +// throw new IllegalStateException(); +// } +// } else if (prefix[0] == null && prefix[1] == null && prefix[2] == null +// && prefix[3] == "xml") { +// return "LANG_PREFIX"; +// } else { +// throw new IllegalStateException(); +// } +// } +// +// private String formatLocal() { +// if (local[0] == local[1] && local[0] == local[3] +// && local[0] != local[2]) { +// return "SVG_DIFFERENT(\"" + local[0] + "\", \"" + local[2] + "\")"; +// } +// if (local[0] == local[2] && local[0] == local[3] +// && local[0] != local[1]) { +// return "MATH_DIFFERENT(\"" + local[0] + "\", \"" + local[1] + "\")"; +// } +// if (local[0] == local[3] && local[1] == local[2] +// && local[0] != local[1]) { +// return "COLONIFIED_LOCAL(\"" + local[0] + "\", \"" + local[1] +// + "\")"; +// } +// for (int i = 1; i < local.length; i++) { +// if (local[0] != local[i]) { +// throw new IllegalStateException(); +// } +// } +// return "SAME_LOCAL(\"" + local[0] + "\")"; +// } +// +// private String formatNs() { +// if (uri[0] == "" && uri[1] == "" && uri[2] == "" && uri[3] == "") { +// return "ALL_NO_NS"; +// } else if (uri[0] == "" && uri[1] == uri[2] && uri[3] == "") { +// if ("http://www.w3.org/2000/xmlns/".equals(uri[1])) { +// return "XMLNS_NS"; +// } else if ("http://www.w3.org/XML/1998/namespace".equals(uri[1])) { +// return "XML_NS"; +// } else if ("http://www.w3.org/1999/xlink".equals(uri[1])) { +// return "XLINK_NS"; +// } else { +// throw new IllegalStateException(); +// } +// } else if (uri[0] == "" && uri[1] == "" && uri[2] == "" +// && uri[3] == "http://www.w3.org/XML/1998/namespace") { +// return "LANG_NS"; +// } else { +// throw new IllegalStateException(); +// } +// } +// +// private String constName() { +// String name = getLocal(HTML); +// char[] buf = new char[name.length()]; +// for (int i = 0; i < name.length(); i++) { +// char c = name.charAt(i); +// if (c == '-' || c == ':') { +// buf[i] = '_'; +// } else if (c >= 'a' && c <= 'z') { +// buf[i] = (char) (c - 0x20); +// } else { +// buf[i] = c; +// } +// } +// return new String(buf); +// } +// +// private int hash() { +// String name = getLocal(HTML); +// return bufToHash(name.toCharArray(), name.length()); +// } +// +// /** +// * Regenerate self +// * +// * @param args +// */ +// public static void main(String[] args) { +// Arrays.sort(ATTRIBUTE_NAMES); +// for (int i = 1; i < ATTRIBUTE_NAMES.length; i++) { +// if (ATTRIBUTE_NAMES[i].hash() == ATTRIBUTE_NAMES[i - 1].hash()) { +// System.err.println("Hash collision: " +// + ATTRIBUTE_NAMES[i].getLocal(HTML) + ", " +// + ATTRIBUTE_NAMES[i - 1].getLocal(HTML)); +// return; +// } +// } +// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) { +// AttributeName att = ATTRIBUTE_NAMES[i]; +// System.out.println("public static final AttributeName " +// + att.constName() + " = new AttributeName" + att.toString() +// + ";"); +// } +// System.out.println("private final static @NoLength AttributeName[] ATTRIBUTE_NAMES = {"); +// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) { +// AttributeName att = ATTRIBUTE_NAMES[i]; +// System.out.println(att.constName() + ","); +// } +// System.out.println("};"); +// System.out.println("private final static int[] ATTRIBUTE_HASHES = {"); +// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) { +// AttributeName att = ATTRIBUTE_NAMES[i]; +// System.out.println(Integer.toString(att.hash()) + ","); +// } +// System.out.println("};"); +// } + + // START GENERATED CODE + public static final AttributeName D = new AttributeName(ALL_NO_NS, SAME_LOCAL("d"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName K = new AttributeName(ALL_NO_NS, SAME_LOCAL("k"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName R = new AttributeName(ALL_NO_NS, SAME_LOCAL("r"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName X = new AttributeName(ALL_NO_NS, SAME_LOCAL("x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName Z = new AttributeName(ALL_NO_NS, SAME_LOCAL("z"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BY = new AttributeName(ALL_NO_NS, SAME_LOCAL("by"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CX = new AttributeName(ALL_NO_NS, SAME_LOCAL("cx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CY = new AttributeName(ALL_NO_NS, SAME_LOCAL("cy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DX = new AttributeName(ALL_NO_NS, SAME_LOCAL("dx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DY = new AttributeName(ALL_NO_NS, SAME_LOCAL("dy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName G2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("g2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName G1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("g1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FX = new AttributeName(ALL_NO_NS, SAME_LOCAL("fx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName K4 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k4"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName K2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName K3 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k3"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName K1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ID = new AttributeName(ALL_NO_NS, SAME_LOCAL("id"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName IN = new AttributeName(ALL_NO_NS, SAME_LOCAL("in"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName U2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("u2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName U1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("u1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RT = new AttributeName(ALL_NO_NS, SAME_LOCAL("rt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RX = new AttributeName(ALL_NO_NS, SAME_LOCAL("rx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RY = new AttributeName(ALL_NO_NS, SAME_LOCAL("ry"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TO = new AttributeName(ALL_NO_NS, SAME_LOCAL("to"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName Y2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("y2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName Y1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("y1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName X1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("x1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName X2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("x2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ALT = new AttributeName(ALL_NO_NS, SAME_LOCAL("alt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DIR = new AttributeName(ALL_NO_NS, SAME_LOCAL("dir"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName DUR = new AttributeName(ALL_NO_NS, SAME_LOCAL("dur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName END = new AttributeName(ALL_NO_NS, SAME_LOCAL("end"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("for"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName IN2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("in2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("max"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("min"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("low"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REL = new AttributeName(ALL_NO_NS, SAME_LOCAL("rel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REV = new AttributeName(ALL_NO_NS, SAME_LOCAL("rev"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("src"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName AXIS = new AttributeName(ALL_NO_NS, SAME_LOCAL("axis"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ABBR = new AttributeName(ALL_NO_NS, SAME_LOCAL("abbr"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BBOX = new AttributeName(ALL_NO_NS, SAME_LOCAL("bbox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CITE = new AttributeName(ALL_NO_NS, SAME_LOCAL("cite"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("code"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BIAS = new AttributeName(ALL_NO_NS, SAME_LOCAL("bias"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("cols"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLIP = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CHAR = new AttributeName(ALL_NO_NS, SAME_LOCAL("char"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BASE = new AttributeName(ALL_NO_NS, SAME_LOCAL("base"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName EDGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("edge"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DATA = new AttributeName(ALL_NO_NS, SAME_LOCAL("data"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FILL = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FROM = new AttributeName(ALL_NO_NS, SAME_LOCAL("from"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("form"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("face"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HIGH = new AttributeName(ALL_NO_NS, SAME_LOCAL("high"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("href"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OPEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("open"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ICON = new AttributeName(ALL_NO_NS, SAME_LOCAL("icon"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("name"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MASK = new AttributeName(ALL_NO_NS, SAME_LOCAL("mask"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("link"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LANG = new AttributeName(LANG_NS, SAME_LOCAL("lang"), LANG_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LOOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("loop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LIST = new AttributeName(ALL_NO_NS, SAME_LOCAL("list"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("type"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName WHEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("when"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName WRAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("wrap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEXT = new AttributeName(ALL_NO_NS, SAME_LOCAL("text"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATH = new AttributeName(ALL_NO_NS, SAME_LOCAL("path"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PING = new AttributeName(ALL_NO_NS, SAME_LOCAL("ping"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REFX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("refx", "refX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REFY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("refy", "refY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("size"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SEED = new AttributeName(ALL_NO_NS, SAME_LOCAL("seed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ROWS = new AttributeName(ALL_NO_NS, SAME_LOCAL("rows"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("span"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STEP = new AttributeName(ALL_NO_NS, SAME_LOCAL("step"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName ROLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("role"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("xref"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ASYNC = new AttributeName(ALL_NO_NS, SAME_LOCAL("async"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ALINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("alink"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("align"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName CLOSE = new AttributeName(ALL_NO_NS, SAME_LOCAL("close"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLASS = new AttributeName(ALL_NO_NS, SAME_LOCAL("class"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLEAR = new AttributeName(ALL_NO_NS, SAME_LOCAL("clear"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName BEGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("begin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DEPTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("depth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DEFER = new AttributeName(ALL_NO_NS, SAME_LOCAL("defer"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName FENCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fence"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FRAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("frame"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName ISMAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ismap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ONEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("onend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName INDEX = new AttributeName(ALL_NO_NS, SAME_LOCAL("index"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("order"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OTHER = new AttributeName(ALL_NO_NS, SAME_LOCAL("other"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncut"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NARGS = new AttributeName(ALL_NO_NS, SAME_LOCAL("nargs"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MEDIA = new AttributeName(ALL_NO_NS, SAME_LOCAL("media"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LABEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("label"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LOCAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("local"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName WIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("width"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TITLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("title"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VLINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("vlink"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VALUE = new AttributeName(ALL_NO_NS, SAME_LOCAL("value"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SLOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("slope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SHAPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("shape"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName SCOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName SCALE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scale"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPEED = new AttributeName(ALL_NO_NS, SAME_LOCAL("speed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("style"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RULES = new AttributeName(ALL_NO_NS, SAME_LOCAL("rules"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName STEMH = new AttributeName(ALL_NO_NS, SAME_LOCAL("stemh"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SIZES = new AttributeName(ALL_NO_NS, SAME_LOCAL("sizes"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STEMV = new AttributeName(ALL_NO_NS, SAME_LOCAL("stemv"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName START = new AttributeName(ALL_NO_NS, SAME_LOCAL("start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XMLNS = new AttributeName(XMLNS_NS, SAME_LOCAL("xmlns"), ALL_NO_PREFIX, IS_XMLNS); + public static final AttributeName ACCEPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accept"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ASCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("ascent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACTIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("active"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ALTIMG = new AttributeName(ALL_NO_NS, SAME_LOCAL("altimg"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("action"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("border"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CURSOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("cursor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COORDS = new AttributeName(ALL_NO_NS, SAME_LOCAL("coords"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FILTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("filter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FORMAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("format"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HIDDEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("hidden"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("hspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmove"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAG = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondrag"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ORIGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("origin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONZOOM = new AttributeName(ALL_NO_NS, SAME_LOCAL("onzoom"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONHELP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onhelp"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSTOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onstop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDROP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondrop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBLUR = new AttributeName(ALL_NO_NS, SAME_LOCAL("onblur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OBJECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("object"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OFFSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("offset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ORIENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("orient"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCOPY = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncopy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NOWRAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("nowrap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName NOHREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("nohref"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName MACROS = new AttributeName(ALL_NO_NS, SAME_LOCAL("macros"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName METHOD = new AttributeName(ALL_NO_NS, SAME_LOCAL("method"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName LOWSRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("lowsrc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("lspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LQUOTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("lquote"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName USEMAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("usemap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName WIDTHS = new AttributeName(ALL_NO_NS, SAME_LOCAL("widths"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TARGET = new AttributeName(ALL_NO_NS, SAME_LOCAL("target"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VALUES = new AttributeName(ALL_NO_NS, SAME_LOCAL("values"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("valign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName VSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("vspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName POSTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("poster"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName POINTS = new AttributeName(ALL_NO_NS, SAME_LOCAL("points"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PROMPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("prompt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SRCDOC = new AttributeName(ALL_NO_NS, SAME_LOCAL("srcdoc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCOPED = new AttributeName(ALL_NO_NS, SAME_LOCAL("scoped"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STRING = new AttributeName(ALL_NO_NS, SAME_LOCAL("string"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCHEME = new AttributeName(ALL_NO_NS, SAME_LOCAL("scheme"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RADIUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("radius"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RESULT = new AttributeName(ALL_NO_NS, SAME_LOCAL("result"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SRCSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("srcset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ROTATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rotate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RQUOTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rquote"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ALTTEXT = new AttributeName(ALL_NO_NS, SAME_LOCAL("alttext"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARCHIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("archive"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName AZIMUTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("azimuth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLOSURE = new AttributeName(ALL_NO_NS, SAME_LOCAL("closure"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CHECKED = new AttributeName(ALL_NO_NS, SAME_LOCAL("checked"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName CLASSID = new AttributeName(ALL_NO_NS, SAME_LOCAL("classid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CHAROFF = new AttributeName(ALL_NO_NS, SAME_LOCAL("charoff"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BGCOLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("bgcolor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("colspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CHARSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("charset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COMPACT = new AttributeName(ALL_NO_NS, SAME_LOCAL("compact"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName CONTENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("content"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ENCTYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("enctype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName DATASRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("datasrc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DATAFLD = new AttributeName(ALL_NO_NS, SAME_LOCAL("datafld"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DECLARE = new AttributeName(ALL_NO_NS, SAME_LOCAL("declare"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName DISPLAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("display"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DIVISOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("divisor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DEFAULT = new AttributeName(ALL_NO_NS, SAME_LOCAL("default"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName DESCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("descent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KERNING = new AttributeName(ALL_NO_NS, SAME_LOCAL("kerning"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HANGING = new AttributeName(ALL_NO_NS, SAME_LOCAL("hanging"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HEADERS = new AttributeName(ALL_NO_NS, SAME_LOCAL("headers"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONPASTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onpaste"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCLICK = new AttributeName(ALL_NO_NS, SAME_LOCAL("onclick"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OPTIMUM = new AttributeName(ALL_NO_NS, SAME_LOCAL("optimum"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbegin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONKEYUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeyup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONERROR = new AttributeName(ALL_NO_NS, SAME_LOCAL("onerror"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONINPUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oninput"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONABORT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onabort"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONRESET = new AttributeName(ALL_NO_NS, SAME_LOCAL("onreset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NOSHADE = new AttributeName(ALL_NO_NS, SAME_LOCAL("noshade"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName MINSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("minsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MAXSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("maxsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LARGEOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("largeop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNICODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TARGETX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("targetx", "targetX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TARGETY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("targety", "targetY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VIEWBOX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("viewbox", "viewBox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERSION = new AttributeName(ALL_NO_NS, SAME_LOCAL("version"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATTERN = new AttributeName(ALL_NO_NS, SAME_LOCAL("pattern"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PROFILE = new AttributeName(ALL_NO_NS, SAME_LOCAL("profile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RESTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("restart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ROWSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SANDBOX = new AttributeName(ALL_NO_NS, SAME_LOCAL("sandbox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SUMMARY = new AttributeName(ALL_NO_NS, SAME_LOCAL("summary"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STANDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("standby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPLACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("replace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName AUTOPLAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("autoplay"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ADDITIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("additive"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CALCMODE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("calcmode", "calcMode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CODETYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("codetype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CODEBASE = new AttributeName(ALL_NO_NS, SAME_LOCAL("codebase"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CONTROLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("controls"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BEVELLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("bevelled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName EXPONENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("exponent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName EDGEMODE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("edgemode", "edgeMode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ENCODING = new AttributeName(ALL_NO_NS, SAME_LOCAL("encoding"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GLYPHREF = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("glyphref", "glyphRef"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DATETIME = new AttributeName(ALL_NO_NS, SAME_LOCAL("datetime"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DISABLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("disabled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName FONTSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KEYTIMES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keytimes", "keyTimes"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PANOSE_1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("panose-1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HREFLANG = new AttributeName(ALL_NO_NS, SAME_LOCAL("hreflang"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONRESIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onresize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBOUNCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbounce"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONUNLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onunload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFINISH = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfinish"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSCROLL = new AttributeName(ALL_NO_NS, SAME_LOCAL("onscroll"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OPERATOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("operator"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OVERFLOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("overflow"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSUBMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onsubmit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONREPEAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrepeat"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSELECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onselect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NOTATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("notation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NORESIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("noresize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName MANIFEST = new AttributeName(ALL_NO_NS, SAME_LOCAL("manifest"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MATHSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MULTIPLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("multiple"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName LONGDESC = new AttributeName(ALL_NO_NS, SAME_LOCAL("longdesc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LANGUAGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("language"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEMPLATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("template"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TABINDEX = new AttributeName(ALL_NO_NS, SAME_LOCAL("tabindex"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PROPERTY = new AttributeName(ALL_NO_NS, SAME_LOCAL("property"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName READONLY = new AttributeName(ALL_NO_NS, SAME_LOCAL("readonly"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName SELECTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("selected"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ROWLINES = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowlines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SEAMLESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("seamless"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ROWALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STRETCHY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stretchy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REQUIRED = new AttributeName(ALL_NO_NS, SAME_LOCAL("required"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName XML_BASE = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:base", "base"), XML_PREFIX, NCNAME_FOREIGN); + public static final AttributeName XML_LANG = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:lang", "lang"), XML_PREFIX, NCNAME_FOREIGN); + public static final AttributeName X_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("x-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_OWNS = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-owns"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName AUTOFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("autofocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ARIA_SORT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-sort"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACCESSKEY = new AttributeName(ALL_NO_NS, SAME_LOCAL("accesskey"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_BUSY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-busy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_GRAB = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-grab"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName AMPLITUDE = new AttributeName(ALL_NO_NS, SAME_LOCAL("amplitude"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_LIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-live"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLIP_RULE = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip-rule"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLIP_PATH = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip-path"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName EQUALROWS = new AttributeName(ALL_NO_NS, SAME_LOCAL("equalrows"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ELEVATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("elevation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DIRECTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("direction"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DRAGGABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("draggable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FILL_RULE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill-rule"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONTSTYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontstyle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_SIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-size"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KEYSYSTEM = new AttributeName(ALL_NO_NS, SAME_LOCAL("keysystem"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KEYPOINTS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keypoints", "keyPoints"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HIDEFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("hidefocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMESSAGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmessage"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName INTERCEPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("intercept"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOVEEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmoveend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONINVALID = new AttributeName(ALL_NO_NS, SAME_LOCAL("oninvalid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName INTEGRITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("integrity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONKEYDOWN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeydown"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFOCUSIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocusin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName INPUTMODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("inputmode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONROWEXIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowexit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MATHCOLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathcolor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MASKUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("maskunits", "maskUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MAXLENGTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("maxlength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LINEBREAK = new AttributeName(ALL_NO_NS, SAME_LOCAL("linebreak"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TRANSFORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("transform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName V_HANGING = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-hanging"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VALUETYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("valuetype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName POINTSATZ = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsatz", "pointsAtZ"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName POINTSATX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsatx", "pointsAtX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName POINTSATY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsaty", "pointsAtY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SYMMETRIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("symmetric"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCROLLING = new AttributeName(ALL_NO_NS, SAME_LOCAL("scrolling"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName REPEATDUR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("repeatdur", "repeatDur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SELECTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("selection"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SEPARATOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("separator"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XML_SPACE = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:space", "space"), XML_PREFIX, NCNAME_FOREIGN); + public static final AttributeName AUTOSUBMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("autosubmit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ALPHABETIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("alphabetic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACTIONTYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("actiontype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACCUMULATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("accumulate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_LEVEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-level"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLUMNSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CAP_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("cap-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("background"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GLYPH_NAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-name"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GROUPALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("groupalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONTFAMILY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontfamily"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONTWEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontweight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_STYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-style"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KEYSPLINES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keysplines", "keySplines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HTTP_EQUIV = new AttributeName(ALL_NO_NS, SAME_LOCAL("http-equiv"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OCCURRENCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("occurrence"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName IRRELEVANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("irrelevant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDBLCLICK = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondblclick"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGDROP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragdrop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONKEYPRESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeypress"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONROWENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGOVER = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragover"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFOCUSOUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocusout"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEOUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseout"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NUMOCTAVES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("numoctaves", "numOctaves"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKER_MID = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-mid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKER_END = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-end"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEXTLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("textlength", "textLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VISIBILITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("visibility"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VIEWTARGET = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("viewtarget", "viewTarget"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERT_ADV_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-adv-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATHLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pathlength", "pathLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEAT_MAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-max"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RADIOGROUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("radiogroup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STOP_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("stop-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SEPARATORS = new AttributeName(ALL_NO_NS, SAME_LOCAL("separators"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEAT_MIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-min"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ROWSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ZOOMANDPAN = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("zoomandpan", "zoomAndPan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XLINK_TYPE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:type", "type"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName XLINK_ROLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:role", "role"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName XLINK_HREF = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:href", "href"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName XLINK_SHOW = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:show", "show"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName ACCENTUNDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("accentunder"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_SECRET = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-secret"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_ATOMIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-atomic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_HIDDEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-hidden"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_FLOWTO = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-flowto"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARABIC_FORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("arabic-form"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CELLPADDING = new AttributeName(ALL_NO_NS, SAME_LOCAL("cellpadding"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CELLSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("cellspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLUMNWIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnwidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CROSSORIGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("crossorigin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLUMNALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLUMNLINES = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnlines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CONTEXTMENU = new AttributeName(ALL_NO_NS, SAME_LOCAL("contextmenu"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BASEPROFILE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("baseprofile", "baseProfile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_FAMILY = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-family"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FRAMEBORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("frameborder"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FILTERUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("filterunits", "filterUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FLOOD_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("flood-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_WEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-weight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HORIZ_ADV_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-adv-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGLEAVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragleave"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEMOVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousemove"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ORIENTATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("orientation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEDOWN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousedown"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEOVER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseover"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName IDEOGRAPHIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("ideographic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFORECUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforecut"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFORMINPUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onforminput"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOVESTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmovestart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKERUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerunits", "markerUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MATHVARIANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathvariant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARGINWIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("marginwidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKERWIDTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerwidth", "markerWidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEXT_ANCHOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-anchor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TABLEVALUES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("tablevalues", "tableValues"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCRIPTLEVEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptlevel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEATCOUNT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("repeatcount", "repeatCount"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STITCHTILES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("stitchtiles", "stitchTiles"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STARTOFFSET = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("startoffset", "startOffset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCROLLDELAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("scrolldelay"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XMLNS_XLINK = new AttributeName(XMLNS_NS, COLONIFIED_LOCAL("xmlns:xlink", "xlink"), XMLNS_PREFIX, IS_XMLNS); + public static final AttributeName XLINK_TITLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:title", "title"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName ARIA_INVALID = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-invalid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_PRESSED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-pressed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_CHECKED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-checked"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName AUTOCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("autocomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName ARIA_SETSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-setsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_CHANNEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-channel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName EQUALCOLUMNS = new AttributeName(ALL_NO_NS, SAME_LOCAL("equalcolumns"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DISPLAYSTYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("displaystyle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DATAFORMATAS = new AttributeName(ALL_NO_NS, SAME_LOCAL("dataformatas"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName FILL_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_VARIANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-variant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_STRETCH = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-stretch"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FRAMESPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("framespacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KERNELMATRIX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("kernelmatrix", "kernelMatrix"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDEACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONROWSDELETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowsdelete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSELEAVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseleave"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFORMCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onformchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCELLCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncellchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEWHEEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousewheel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONAFTERPRINT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onafterprint"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFORECOPY = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforecopy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARGINHEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("marginheight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKERHEIGHT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerheight", "markerHeight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKER_START = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MATHEMATICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathematical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LENGTHADJUST = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("lengthadjust", "lengthAdjust"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNSELECTABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unselectable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNICODE_BIDI = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode-bidi"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNITS_PER_EM = new AttributeName(ALL_NO_NS, SAME_LOCAL("units-per-em"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName WORD_SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("word-spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName WRITING_MODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("writing-mode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName V_ALPHABETIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-alphabetic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATTERNUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patternunits", "patternUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPREADMETHOD = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("spreadmethod", "spreadMethod"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SURFACESCALE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("surfacescale", "surfaceScale"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_WIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-width"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEAT_START = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STDDEVIATION = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("stddeviation", "stdDeviation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STOP_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stop-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_CONTROLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-controls"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_HASPOPUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-haspopup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACCENT_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accent-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_VALUENOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuenow"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_RELEVANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-relevant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_POSINSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-posinset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_VALUEMAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuemax"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_READONLY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-readonly"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_SELECTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-selected"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_REQUIRED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-required"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_EXPANDED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-expanded"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_DISABLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-disabled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ATTRIBUTETYPE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("attributetype", "attributeType"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ATTRIBUTENAME = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("attributename", "attributeName"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_DATATYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-datatype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_VALUEMIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuemin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BASEFREQUENCY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("basefrequency", "baseFrequency"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLUMNSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLOR_PROFILE = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-profile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLIPPATHUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("clippathunits", "clipPathUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DEFINITIONURL = new AttributeName(ALL_NO_NS, MATH_DIFFERENT("definitionurl", "definitionURL"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GRADIENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("gradientunits", "gradientUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FLOOD_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("flood-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONAFTERUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onafterupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONERRORUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onerrorupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREPASTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforepaste"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONLOSECAPTURE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onlosecapture"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCONTEXTMENU = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncontextmenu"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSELECTSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onselectstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREPRINT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeprint"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MOVABLELIMITS = new AttributeName(ALL_NO_NS, SAME_LOCAL("movablelimits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LINETHICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("linethickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNICODE_RANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode-range"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName THINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("thinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERT_ORIGIN_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-origin-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERT_ORIGIN_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-origin-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName V_IDEOGRAPHIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-ideographic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PRESERVEALPHA = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("preservealpha", "preserveAlpha"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCRIPTMINSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptminsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPECIFICATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("specification"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XLINK_ACTUATE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:actuate", "actuate"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName XLINK_ARCROLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:arcrole", "arcrole"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName ACCEPT_CHARSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("accept-charset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ALIGNMENTSCOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("alignmentscope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_MULTILINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-multiline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BASELINE_SHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("baseline-shift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HORIZ_ORIGIN_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-origin-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HORIZ_ORIGIN_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-origin-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFILTERCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfilterchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONROWSINSERTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowsinserted"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREUNLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeunload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MATHBACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathbackground"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LETTER_SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("letter-spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LIGHTING_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("lighting-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName THICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("thickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEXT_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName V_MATHEMATICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-mathematical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName POINTER_EVENTS = new AttributeName(ALL_NO_NS, SAME_LOCAL("pointer-events"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PRIMITIVEUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("primitiveunits", "primitiveUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REFERRERPOLICY = new AttributeName(ALL_NO_NS, SAME_LOCAL("referrerpolicy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SYSTEMLANGUAGE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("systemlanguage", "systemLanguage"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_LINECAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-linecap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SUBSCRIPTSHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("subscriptshift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_DROPEFFECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-dropeffect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_LABELLEDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-labelledby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_TEMPLATEID = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-templateid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLOR_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CONTENTEDITABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("contenteditable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DIFFUSECONSTANT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("diffuseconstant", "diffuseConstant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDATAAVAILABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondataavailable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCONTROLSELECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncontrolselect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName IMAGE_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("image-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MEDIUMMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mediummathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEXT_DECORATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-decoration"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SHAPE_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("shape-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_LINEJOIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-linejoin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEAT_TEMPLATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-template"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_DESCRIBEDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-describedby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_SIZE_ADJUST = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-size-adjust"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KERNELUNITLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("kernelunitlength", "kernelUnitLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONPROPERTYCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onpropertychange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDATASETCHANGED = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondatasetchanged"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MASKCONTENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("maskcontentunits", "maskContentUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATTERNTRANSFORM = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patterntransform", "patternTransform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REQUIREDFEATURES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("requiredfeatures", "requiredFeatures"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RENDERING_INTENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("rendering-intent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPECULAREXPONENT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("specularexponent", "specularExponent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPECULARCONSTANT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("specularconstant", "specularConstant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SUPERSCRIPTSHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("superscriptshift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_DASHARRAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-dasharray"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XCHANNELSELECTOR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("xchannelselector", "xChannelSelector"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName YCHANNELSELECTOR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("ychannelselector", "yChannelSelector"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_AUTOCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-autocomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ENABLE_BACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("enable-background"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DOMINANT_BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("dominant-baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GRADIENTTRANSFORM = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("gradienttransform", "gradientTransform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFORDEACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbefordeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDATASETCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondatasetcomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OVERLINE_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("overline-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREEDITFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeeditfocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LIMITINGCONEANGLE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("limitingconeangle", "limitingConeAngle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERYTHINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("verythinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_DASHOFFSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-dashoffset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_MITERLIMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-miterlimit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ALIGNMENT_BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("alignment-baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONREADYSTATECHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onreadystatechange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OVERLINE_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("overline-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNDERLINE_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("underline-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERYTHICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("verythickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REQUIREDEXTENSIONS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("requiredextensions", "requiredExtensions"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLOR_INTERPOLATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-interpolation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNDERLINE_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("underline-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PRESERVEASPECTRATIO = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("preserveaspectratio", "preserveAspectRatio"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATTERNCONTENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patterncontentunits", "patternContentUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_MULTISELECTABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-multiselectable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCRIPTSIZEMULTIPLIER = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptsizemultiplier"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_ACTIVEDESCENDANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-activedescendant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERYVERYTHINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("veryverythinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERYVERYTHICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("veryverythickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STRIKETHROUGH_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("strikethrough-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STRIKETHROUGH_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("strikethrough-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GLYPH_ORIENTATION_VERTICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-orientation-vertical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLOR_INTERPOLATION_FILTERS = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-interpolation-filters"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GLYPH_ORIENTATION_HORIZONTAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-orientation-horizontal"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + private final static @NoLength AttributeName[] ATTRIBUTE_NAMES = { + D, + K, + R, + X, + Y, + Z, + BY, + CX, + CY, + DX, + DY, + G2, + G1, + FX, + FY, + K4, + K2, + K3, + K1, + ID, + IN, + U2, + U1, + RT, + RX, + RY, + TO, + Y2, + Y1, + X1, + X2, + ALT, + DIR, + DUR, + END, + FOR, + IN2, + MAX, + MIN, + LOW, + REL, + REV, + SRC, + AXIS, + ABBR, + BBOX, + CITE, + CODE, + BIAS, + COLS, + CLIP, + CHAR, + BASE, + EDGE, + DATA, + FILL, + FROM, + FORM, + FACE, + HIGH, + HREF, + OPEN, + ICON, + NAME, + MODE, + MASK, + LINK, + LANG, + LOOP, + LIST, + TYPE, + WHEN, + WRAP, + TEXT, + PATH, + PING, + REFX, + REFY, + SIZE, + SEED, + ROWS, + SPAN, + STEP, + ROLE, + XREF, + ASYNC, + ALINK, + ALIGN, + CLOSE, + COLOR, + CLASS, + CLEAR, + BEGIN, + DEPTH, + DEFER, + FENCE, + FRAME, + ISMAP, + ONEND, + INDEX, + ORDER, + OTHER, + ONCUT, + NARGS, + MEDIA, + LABEL, + LOCAL, + WIDTH, + TITLE, + VLINK, + VALUE, + SLOPE, + SHAPE, + SCOPE, + SCALE, + SPEED, + STYLE, + RULES, + STEMH, + SIZES, + STEMV, + START, + XMLNS, + ACCEPT, + ACCENT, + ASCENT, + ACTIVE, + ALTIMG, + ACTION, + BORDER, + CURSOR, + COORDS, + FILTER, + FORMAT, + HIDDEN, + HSPACE, + HEIGHT, + ONMOVE, + ONLOAD, + ONDRAG, + ORIGIN, + ONZOOM, + ONHELP, + ONSTOP, + ONDROP, + ONBLUR, + OBJECT, + OFFSET, + ORIENT, + ONCOPY, + NOWRAP, + NOHREF, + MACROS, + METHOD, + LOWSRC, + LSPACE, + LQUOTE, + USEMAP, + WIDTHS, + TARGET, + VALUES, + VALIGN, + VSPACE, + POSTER, + POINTS, + PROMPT, + SRCDOC, + SCOPED, + STRING, + SCHEME, + STROKE, + RADIUS, + RESULT, + REPEAT, + SRCSET, + RSPACE, + ROTATE, + RQUOTE, + ALTTEXT, + ARCHIVE, + AZIMUTH, + CLOSURE, + CHECKED, + CLASSID, + CHAROFF, + BGCOLOR, + COLSPAN, + CHARSET, + COMPACT, + CONTENT, + ENCTYPE, + DATASRC, + DATAFLD, + DECLARE, + DISPLAY, + DIVISOR, + DEFAULT, + DESCENT, + KERNING, + HANGING, + HEADERS, + ONPASTE, + ONCLICK, + OPTIMUM, + ONBEGIN, + ONKEYUP, + ONFOCUS, + ONERROR, + ONINPUT, + ONABORT, + ONSTART, + ONRESET, + OPACITY, + NOSHADE, + MINSIZE, + MAXSIZE, + LARGEOP, + UNICODE, + TARGETX, + TARGETY, + VIEWBOX, + VERSION, + PATTERN, + PROFILE, + SPACING, + RESTART, + ROWSPAN, + SANDBOX, + SUMMARY, + STANDBY, + REPLACE, + AUTOPLAY, + ADDITIVE, + CALCMODE, + CODETYPE, + CODEBASE, + CONTROLS, + BEVELLED, + BASELINE, + EXPONENT, + EDGEMODE, + ENCODING, + GLYPHREF, + DATETIME, + DISABLED, + FONTSIZE, + KEYTIMES, + PANOSE_1, + HREFLANG, + ONRESIZE, + ONCHANGE, + ONBOUNCE, + ONUNLOAD, + ONFINISH, + ONSCROLL, + OPERATOR, + OVERFLOW, + ONSUBMIT, + ONREPEAT, + ONSELECT, + NOTATION, + NORESIZE, + MANIFEST, + MATHSIZE, + MULTIPLE, + LONGDESC, + LANGUAGE, + TEMPLATE, + TABINDEX, + PROPERTY, + READONLY, + SELECTED, + ROWLINES, + SEAMLESS, + ROWALIGN, + STRETCHY, + REQUIRED, + XML_BASE, + XML_LANG, + X_HEIGHT, + ARIA_OWNS, + AUTOFOCUS, + ARIA_SORT, + ACCESSKEY, + ARIA_BUSY, + ARIA_GRAB, + AMPLITUDE, + ARIA_LIVE, + CLIP_RULE, + CLIP_PATH, + EQUALROWS, + ELEVATION, + DIRECTION, + DRAGGABLE, + FILL_RULE, + FONTSTYLE, + FONT_SIZE, + KEYSYSTEM, + KEYPOINTS, + HIDEFOCUS, + ONMESSAGE, + INTERCEPT, + ONDRAGEND, + ONMOVEEND, + ONINVALID, + INTEGRITY, + ONKEYDOWN, + ONFOCUSIN, + ONMOUSEUP, + INPUTMODE, + ONROWEXIT, + MATHCOLOR, + MASKUNITS, + MAXLENGTH, + LINEBREAK, + TRANSFORM, + V_HANGING, + VALUETYPE, + POINTSATZ, + POINTSATX, + POINTSATY, + SYMMETRIC, + SCROLLING, + REPEATDUR, + SELECTION, + SEPARATOR, + XML_SPACE, + AUTOSUBMIT, + ALPHABETIC, + ACTIONTYPE, + ACCUMULATE, + ARIA_LEVEL, + COLUMNSPAN, + CAP_HEIGHT, + BACKGROUND, + GLYPH_NAME, + GROUPALIGN, + FONTFAMILY, + FONTWEIGHT, + FONT_STYLE, + KEYSPLINES, + HTTP_EQUIV, + ONACTIVATE, + OCCURRENCE, + IRRELEVANT, + ONDBLCLICK, + ONDRAGDROP, + ONKEYPRESS, + ONROWENTER, + ONDRAGOVER, + ONFOCUSOUT, + ONMOUSEOUT, + NUMOCTAVES, + MARKER_MID, + MARKER_END, + TEXTLENGTH, + VISIBILITY, + VIEWTARGET, + VERT_ADV_Y, + PATHLENGTH, + REPEAT_MAX, + RADIOGROUP, + STOP_COLOR, + SEPARATORS, + REPEAT_MIN, + ROWSPACING, + ZOOMANDPAN, + XLINK_TYPE, + XLINK_ROLE, + XLINK_HREF, + XLINK_SHOW, + ACCENTUNDER, + ARIA_SECRET, + ARIA_ATOMIC, + ARIA_HIDDEN, + ARIA_FLOWTO, + ARABIC_FORM, + CELLPADDING, + CELLSPACING, + COLUMNWIDTH, + CROSSORIGIN, + COLUMNALIGN, + COLUMNLINES, + CONTEXTMENU, + BASEPROFILE, + FONT_FAMILY, + FRAMEBORDER, + FILTERUNITS, + FLOOD_COLOR, + FONT_WEIGHT, + HORIZ_ADV_X, + ONDRAGLEAVE, + ONMOUSEMOVE, + ORIENTATION, + ONMOUSEDOWN, + ONMOUSEOVER, + ONDRAGENTER, + IDEOGRAPHIC, + ONBEFORECUT, + ONFORMINPUT, + ONDRAGSTART, + ONMOVESTART, + MARKERUNITS, + MATHVARIANT, + MARGINWIDTH, + MARKERWIDTH, + TEXT_ANCHOR, + TABLEVALUES, + SCRIPTLEVEL, + REPEATCOUNT, + STITCHTILES, + STARTOFFSET, + SCROLLDELAY, + XMLNS_XLINK, + XLINK_TITLE, + ARIA_INVALID, + ARIA_PRESSED, + ARIA_CHECKED, + AUTOCOMPLETE, + ARIA_SETSIZE, + ARIA_CHANNEL, + EQUALCOLUMNS, + DISPLAYSTYLE, + DATAFORMATAS, + FILL_OPACITY, + FONT_VARIANT, + FONT_STRETCH, + FRAMESPACING, + KERNELMATRIX, + ONDEACTIVATE, + ONROWSDELETE, + ONMOUSELEAVE, + ONFORMCHANGE, + ONCELLCHANGE, + ONMOUSEWHEEL, + ONMOUSEENTER, + ONAFTERPRINT, + ONBEFORECOPY, + MARGINHEIGHT, + MARKERHEIGHT, + MARKER_START, + MATHEMATICAL, + LENGTHADJUST, + UNSELECTABLE, + UNICODE_BIDI, + UNITS_PER_EM, + WORD_SPACING, + WRITING_MODE, + V_ALPHABETIC, + PATTERNUNITS, + SPREADMETHOD, + SURFACESCALE, + STROKE_WIDTH, + REPEAT_START, + STDDEVIATION, + STOP_OPACITY, + ARIA_CONTROLS, + ARIA_HASPOPUP, + ACCENT_HEIGHT, + ARIA_VALUENOW, + ARIA_RELEVANT, + ARIA_POSINSET, + ARIA_VALUEMAX, + ARIA_READONLY, + ARIA_SELECTED, + ARIA_REQUIRED, + ARIA_EXPANDED, + ARIA_DISABLED, + ATTRIBUTETYPE, + ATTRIBUTENAME, + ARIA_DATATYPE, + ARIA_VALUEMIN, + BASEFREQUENCY, + COLUMNSPACING, + COLOR_PROFILE, + CLIPPATHUNITS, + DEFINITIONURL, + GRADIENTUNITS, + FLOOD_OPACITY, + ONAFTERUPDATE, + ONERRORUPDATE, + ONBEFOREPASTE, + ONLOSECAPTURE, + ONCONTEXTMENU, + ONSELECTSTART, + ONBEFOREPRINT, + MOVABLELIMITS, + LINETHICKNESS, + UNICODE_RANGE, + THINMATHSPACE, + VERT_ORIGIN_X, + VERT_ORIGIN_Y, + V_IDEOGRAPHIC, + PRESERVEALPHA, + SCRIPTMINSIZE, + SPECIFICATION, + XLINK_ACTUATE, + XLINK_ARCROLE, + ACCEPT_CHARSET, + ALIGNMENTSCOPE, + ARIA_MULTILINE, + BASELINE_SHIFT, + HORIZ_ORIGIN_X, + HORIZ_ORIGIN_Y, + ONBEFOREUPDATE, + ONFILTERCHANGE, + ONROWSINSERTED, + ONBEFOREUNLOAD, + MATHBACKGROUND, + LETTER_SPACING, + LIGHTING_COLOR, + THICKMATHSPACE, + TEXT_RENDERING, + V_MATHEMATICAL, + POINTER_EVENTS, + PRIMITIVEUNITS, + REFERRERPOLICY, + SYSTEMLANGUAGE, + STROKE_LINECAP, + SUBSCRIPTSHIFT, + STROKE_OPACITY, + ARIA_DROPEFFECT, + ARIA_LABELLEDBY, + ARIA_TEMPLATEID, + COLOR_RENDERING, + CONTENTEDITABLE, + DIFFUSECONSTANT, + ONDATAAVAILABLE, + ONCONTROLSELECT, + IMAGE_RENDERING, + MEDIUMMATHSPACE, + TEXT_DECORATION, + SHAPE_RENDERING, + STROKE_LINEJOIN, + REPEAT_TEMPLATE, + ARIA_DESCRIBEDBY, + FONT_SIZE_ADJUST, + KERNELUNITLENGTH, + ONBEFOREACTIVATE, + ONPROPERTYCHANGE, + ONDATASETCHANGED, + MASKCONTENTUNITS, + PATTERNTRANSFORM, + REQUIREDFEATURES, + RENDERING_INTENT, + SPECULAREXPONENT, + SPECULARCONSTANT, + SUPERSCRIPTSHIFT, + STROKE_DASHARRAY, + XCHANNELSELECTOR, + YCHANNELSELECTOR, + ARIA_AUTOCOMPLETE, + ENABLE_BACKGROUND, + DOMINANT_BASELINE, + GRADIENTTRANSFORM, + ONBEFORDEACTIVATE, + ONDATASETCOMPLETE, + OVERLINE_POSITION, + ONBEFOREEDITFOCUS, + LIMITINGCONEANGLE, + VERYTHINMATHSPACE, + STROKE_DASHOFFSET, + STROKE_MITERLIMIT, + ALIGNMENT_BASELINE, + ONREADYSTATECHANGE, + OVERLINE_THICKNESS, + UNDERLINE_POSITION, + VERYTHICKMATHSPACE, + REQUIREDEXTENSIONS, + COLOR_INTERPOLATION, + UNDERLINE_THICKNESS, + PRESERVEASPECTRATIO, + PATTERNCONTENTUNITS, + ARIA_MULTISELECTABLE, + SCRIPTSIZEMULTIPLIER, + ARIA_ACTIVEDESCENDANT, + VERYVERYTHINMATHSPACE, + VERYVERYTHICKMATHSPACE, + STRIKETHROUGH_POSITION, + STRIKETHROUGH_THICKNESS, + GLYPH_ORIENTATION_VERTICAL, + COLOR_INTERPOLATION_FILTERS, + GLYPH_ORIENTATION_HORIZONTAL, + }; + private final static int[] ATTRIBUTE_HASHES = { + 1153, + 1383, + 1601, + 1793, + 1827, + 1857, + 68600, + 69146, + 69177, + 70237, + 70270, + 71572, + 71669, + 72415, + 72444, + 74846, + 74904, + 74943, + 75001, + 75276, + 75590, + 84742, + 84839, + 85575, + 85963, + 85992, + 87204, + 88074, + 88171, + 89130, + 89163, + 3207892, + 3283895, + 3284791, + 3338752, + 3358197, + 3369562, + 3539124, + 3562402, + 3574260, + 3670335, + 3696933, + 3721879, + 135280021, + 135346322, + 136317019, + 136475749, + 136548517, + 136652214, + 136884919, + 136902418, + 136942992, + 137292068, + 139120259, + 139785574, + 142250603, + 142314056, + 142331176, + 142519584, + 144752417, + 145106895, + 146147200, + 146765926, + 148805544, + 149655723, + 149809441, + 150018784, + 150445028, + 150813181, + 150923321, + 152528754, + 152536216, + 152647366, + 152962785, + 155219321, + 155654904, + 157317483, + 157350248, + 157437941, + 157447478, + 157604838, + 157685404, + 157894402, + 158315188, + 166078431, + 169409980, + 169700259, + 169856932, + 170007032, + 170409695, + 170466488, + 170513710, + 170608367, + 173028944, + 173896963, + 176090625, + 176129212, + 179390001, + 179489057, + 179627464, + 179840468, + 179849042, + 180004216, + 181779081, + 183027151, + 183645319, + 183698797, + 185922012, + 185997252, + 188312483, + 188675799, + 190977533, + 190992569, + 191006194, + 191033518, + 191038774, + 191096249, + 191166163, + 191194426, + 191443343, + 191522106, + 191568039, + 200104642, + 202506661, + 202537381, + 202602917, + 203070590, + 203120766, + 203389054, + 203690071, + 203971238, + 203986524, + 209040857, + 209125756, + 212055489, + 212322418, + 212746849, + 213002877, + 213055164, + 213088023, + 213259873, + 213273386, + 213435118, + 213437318, + 213438231, + 213493071, + 213532268, + 213542834, + 213584431, + 213659891, + 215285828, + 215880731, + 216112976, + 216684637, + 217369699, + 217565298, + 217576549, + 218186795, + 219743185, + 220082234, + 221623802, + 221986406, + 222283890, + 223089542, + 223138630, + 223311265, + 224431494, + 224547358, + 224587256, + 224589550, + 224655650, + 224785518, + 224810917, + 224813302, + 225126263, + 225429618, + 225432950, + 225440869, + 236107233, + 236709921, + 236838947, + 237117095, + 237143271, + 237172455, + 237209953, + 237354143, + 237372743, + 237668065, + 237703073, + 237714273, + 239743521, + 240512803, + 240522627, + 240560417, + 240656513, + 241015715, + 241062755, + 241065383, + 243523041, + 245865199, + 246261793, + 246556195, + 246774817, + 246923491, + 246928419, + 246981667, + 247014847, + 247058369, + 247112833, + 247118177, + 247119137, + 247128739, + 247316903, + 249533729, + 250235623, + 250269543, + 251402351, + 252339047, + 253260911, + 253293679, + 254844367, + 255547879, + 256077281, + 256345377, + 258124199, + 258354465, + 258605063, + 258744193, + 258845603, + 258856961, + 258926689, + 269869248, + 270174334, + 270709417, + 270778994, + 270781796, + 271102503, + 271478858, + 271490090, + 272870654, + 273335275, + 273369140, + 273924313, + 274108530, + 274116736, + 276818662, + 277476156, + 279156579, + 279349675, + 280108533, + 280128712, + 280132869, + 280162403, + 280280292, + 280413430, + 280506130, + 280677397, + 280678580, + 280686710, + 280689066, + 282736758, + 283110901, + 283275116, + 283823226, + 283890012, + 284479340, + 284606461, + 286700477, + 286798916, + 290055764, + 291557706, + 291665349, + 291804100, + 292138018, + 292166446, + 292418738, + 292451039, + 300298041, + 300374839, + 300597935, + 303073389, + 303083839, + 303266673, + 303354997, + 303430688, + 303576261, + 303724281, + 303819694, + 304242723, + 304382625, + 306247792, + 307227811, + 307468786, + 307724489, + 310252031, + 310358241, + 310373094, + 310833159, + 311015256, + 313357609, + 313683893, + 313701861, + 313706996, + 313707317, + 313710350, + 313795700, + 314027746, + 314038181, + 314091299, + 314205627, + 314233813, + 316741830, + 316797986, + 317486755, + 317794164, + 320076137, + 322657125, + 322887778, + 323506876, + 323572412, + 323605180, + 325060058, + 325320188, + 325398738, + 325541490, + 325671619, + 333868843, + 336806130, + 337212108, + 337282686, + 337285434, + 337585223, + 338036037, + 338298087, + 338566051, + 340943551, + 341190970, + 342995704, + 343352124, + 343912673, + 344585053, + 346977248, + 347218098, + 347262163, + 347278576, + 347438191, + 347655959, + 347684788, + 347726430, + 347727772, + 347776035, + 347776629, + 349500753, + 350880161, + 350887073, + 353384123, + 355496998, + 355906922, + 355979793, + 356545959, + 358637867, + 358905016, + 359164318, + 359247286, + 359350571, + 359579447, + 365560330, + 367399355, + 367420285, + 367510727, + 368013212, + 370234760, + 370353345, + 370710317, + 371074566, + 371122285, + 371194213, + 371448425, + 371448430, + 371545055, + 371593469, + 371596922, + 371758751, + 371964792, + 372151328, + 376550136, + 376710172, + 376795771, + 376826271, + 376906556, + 380514830, + 380774774, + 380775037, + 381030322, + 381136500, + 381281631, + 381282269, + 381285504, + 381330595, + 381331422, + 381335911, + 381336484, + 383907298, + 383917408, + 384595009, + 384595013, + 387799894, + 387823201, + 392581647, + 392584937, + 392742684, + 392906485, + 393003349, + 400644707, + 400973830, + 404428547, + 404432113, + 404432865, + 404469244, + 404478897, + 404694860, + 406887479, + 408294949, + 408789955, + 410022510, + 410467324, + 410586448, + 410945965, + 411845275, + 414327152, + 414327932, + 414329781, + 414346257, + 414346439, + 414639928, + 414835998, + 414894517, + 414986533, + 417465377, + 417465381, + 417492216, + 418259232, + 419310946, + 420103495, + 420242342, + 420380455, + 420658662, + 420717432, + 423183880, + 424539259, + 425929170, + 425972964, + 426050649, + 426126450, + 426142833, + 426607922, + 437289840, + 437347469, + 437412335, + 437423943, + 437455540, + 437462252, + 437597991, + 437617485, + 437986305, + 437986507, + 437986828, + 437987072, + 438015591, + 438034813, + 438038966, + 438179623, + 438347971, + 438483573, + 438547062, + 438895551, + 441592676, + 442032555, + 443548979, + 447881379, + 447881655, + 447881895, + 447887844, + 448416189, + 448445746, + 448449012, + 450942191, + 452816744, + 453668677, + 454434495, + 456610076, + 456642844, + 456738709, + 457544600, + 459451897, + 459680944, + 468058810, + 468083581, + 470964084, + 471470955, + 471567278, + 472267822, + 481177859, + 481210627, + 481435874, + 481455115, + 481485378, + 481490218, + 485105638, + 486005878, + 486383494, + 487988916, + 488103783, + 490661867, + 491574090, + 491578272, + 492891370, + 493041952, + 493441205, + 493582844, + 493716979, + 504577572, + 504740359, + 505091638, + 505592418, + 505656212, + 509516275, + 514998531, + 515571132, + 515594682, + 518712698, + 521362273, + 526592419, + 526807354, + 527348842, + 538294791, + 544689535, + 545535009, + 548544752, + 548563346, + 548595116, + 551679010, + 558034099, + 560329411, + 560356209, + 560671018, + 560671152, + 560692590, + 560845442, + 569212097, + 569474241, + 572252718, + 575326764, + 576174758, + 576190819, + 582099184, + 582099438, + 582372519, + 582558889, + 586552164, + 591325418, + 594231990, + 594243961, + 605711268, + 615672071, + 616086845, + 621792370, + 624879850, + 627432831, + 640040548, + 654392808, + 658675477, + 659420283, + 672891587, + 694768102, + 705890982, + 725543146, + 759097578, + 761686526, + 795383908, + 878105336, + 908643300, + 945213471, + }; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java new file mode 100644 index 0000000000..01d76d7009 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.NoLength; + +import org.xml.sax.SAXException; + +/** + * A common superclass for tree builders that coalesce their text nodes. + * + * @version $Id$ + * @author hsivonen + */ +public abstract class CoalescingTreeBuilder extends TreeBuilder { + + protected final void accumulateCharacters(@NoLength char[] buf, int start, + int length) throws SAXException { + System.arraycopy(buf, start, charBuffer, charBufferLen, length); + charBufferLen += length; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendCharacters(java.lang.Object, char[], int, int) + */ + @Override protected final void appendCharacters(T parent, char[] buf, int start, + int length) throws SAXException { + appendCharacters(parent, new String(buf, start, length)); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object) + */ + @Override protected void appendIsindexPrompt(T parent) throws SAXException { + appendCharacters(parent, "This is a searchable index. Enter search keywords: "); + } + + protected abstract void appendCharacters(T parent, String text) throws SAXException; + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendComment(java.lang.Object, char[], int, int) + */ + @Override final protected void appendComment(T parent, char[] buf, int start, + int length) throws SAXException { + appendComment(parent, new String(buf, start, length)); + } + + protected abstract void appendComment(T parent, String comment) throws SAXException; + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendCommentToDocument(char[], int, int) + */ + @Override protected final void appendCommentToDocument(char[] buf, int start, + int length) throws SAXException { + // TODO Auto-generated method stub + appendCommentToDocument(new String(buf, start, length)); + } + + protected abstract void appendCommentToDocument(String comment) throws SAXException; + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#insertFosterParentedCharacters(char[], int, int, java.lang.Object, java.lang.Object) + */ + @Override protected final void insertFosterParentedCharacters(char[] buf, int start, + int length, T table, T stackParent) throws SAXException { + insertFosterParentedCharacters(new String(buf, start, length), table, stackParent); + } + + protected abstract void insertFosterParentedCharacters(String text, T table, T stackParent) throws SAXException; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java new file mode 100644 index 0000000000..ee551a7377 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java @@ -0,0 +1,1609 @@ +/* + * Copyright (c) 2008-2014 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import java.util.Arrays; + +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.annotation.Virtual; +import nu.validator.htmlparser.common.Interner; + +public final class ElementName +// uncomment when regenerating self +// implements Comparable +{ + + /** + * The mask for extracting the dispatch group. + */ + public static final int GROUP_MASK = 127; + + /** + * Indicates that the element is not a pre-interned element. Forbidden + * on preinterned elements. + */ + public static final int CUSTOM = (1 << 30); + + /** + * Indicates that the element is in the "special" category. This bit + * should not be pre-set on MathML or SVG specials--only on HTML specials. + */ + public static final int SPECIAL = (1 << 29); + + /** + * The element is foster-parenting. This bit should be pre-set on elements + * that are foster-parenting as HTML. + */ + public static final int FOSTER_PARENTING = (1 << 28); + + /** + * The element is scoping. This bit should be pre-set on elements + * that are scoping as HTML. + */ + public static final int SCOPING = (1 << 27); + + /** + * The element is scoping as SVG. + */ + public static final int SCOPING_AS_SVG = (1 << 26); + + /** + * The element is scoping as MathML. + */ + public static final int SCOPING_AS_MATHML = (1 << 25); + + /** + * The element is an HTML integration point. + */ + public static final int HTML_INTEGRATION_POINT = (1 << 24); + + /** + * The element has an optional end tag. + */ + public static final int OPTIONAL_END_TAG = (1 << 23); + + public static final ElementName NULL_ELEMENT_NAME = new ElementName(null); + + public final @Local String name; + + public final @Local String camelCaseName; + + /** + * The lowest 7 bits are the dispatch group. The high bits are flags. + */ + public final int flags; + + @Inline public int getFlags() { + return flags; + } + + public int getGroup() { + return flags & GROUP_MASK; + } + + public boolean isCustom() { + return (flags & CUSTOM) != 0; + } + + static ElementName elementNameByBuffer(@NoLength char[] buf, int offset, int length, Interner interner) { + int hash = ElementName.bufToHash(buf, length); + int index = Arrays.binarySearch(ElementName.ELEMENT_HASHES, hash); + if (index < 0) { + return new ElementName(Portability.newLocalNameFromBuffer(buf, offset, length, interner)); + } else { + ElementName elementName = ElementName.ELEMENT_NAMES[index]; + @Local String name = elementName.name; + if (!Portability.localEqualsBuffer(name, buf, offset, length)) { + return new ElementName(Portability.newLocalNameFromBuffer(buf, + offset, length, interner)); + } + return elementName; + } + } + + /** + * This method has to return a unique integer for each well-known + * lower-cased element name. + * + * @param buf + * @param len + * @return + */ + private static int bufToHash(@NoLength char[] buf, int len) { + int hash = len; + hash <<= 5; + hash += buf[0] - 0x60; + int j = len; + for (int i = 0; i < 4 && j > 0; i++) { + j--; + hash <<= 5; + hash += buf[j] - 0x60; + } + return hash; + } + + private ElementName(@Local String name, @Local String camelCaseName, + int flags) { + this.name = name; + this.camelCaseName = camelCaseName; + this.flags = flags; + } + + protected ElementName(@Local String name) { + this.name = name; + this.camelCaseName = name; + this.flags = TreeBuilder.OTHER | CUSTOM; + } + + @Virtual void release() { + // No-op in Java. + // Implement as delete this in subclass. + // Be sure to release the local name + } + + @SuppressWarnings("unused") @Virtual private void destructor() { + } + + @Virtual public ElementName cloneElementName(Interner interner) { + return this; + } + + // START CODE ONLY USED FOR GENERATING CODE uncomment and run to regenerate + +// /** +// * @see java.lang.Object#toString() +// */ +// @Override public String toString() { +// return "(\"" + name + "\", \"" + camelCaseName + "\", " + decomposedFlags() + ")"; +// } +// +// private String decomposedFlags() { +// StringBuilder buf = new StringBuilder("TreeBuilder."); +// buf.append(treeBuilderGroupToName()); +// if ((flags & SPECIAL) != 0) { +// buf.append(" | SPECIAL"); +// } +// if ((flags & FOSTER_PARENTING) != 0) { +// buf.append(" | FOSTER_PARENTING"); +// } +// if ((flags & SCOPING) != 0) { +// buf.append(" | SCOPING"); +// } +// if ((flags & SCOPING_AS_MATHML) != 0) { +// buf.append(" | SCOPING_AS_MATHML"); +// } +// if ((flags & SCOPING_AS_SVG) != 0) { +// buf.append(" | SCOPING_AS_SVG"); +// } +// if ((flags & OPTIONAL_END_TAG) != 0) { +// buf.append(" | OPTIONAL_END_TAG"); +// } +// return buf.toString(); +// } +// +// private String constName() { +// char[] buf = new char[name.length()]; +// for (int i = 0; i < name.length(); i++) { +// char c = name.charAt(i); +// if (c == '-') { +// buf[i] = '_'; +// } else if (c >= '0' && c <= '9') { +// buf[i] = c; +// } else { +// buf[i] = (char) (c - 0x20); +// } +// } +// return new String(buf); +// } +// +// private int hash() { +// return bufToHash(name.toCharArray(), name.length()); +// } +// +// public int compareTo(ElementName other) { +// int thisHash = this.hash(); +// int otherHash = other.hash(); +// if (thisHash < otherHash) { +// return -1; +// } else if (thisHash == otherHash) { +// return 0; +// } else { +// return 1; +// } +// } +// +// private String treeBuilderGroupToName() { +// switch (getGroup()) { +// case TreeBuilder.OTHER: +// return "OTHER"; +// case TreeBuilder.A: +// return "A"; +// case TreeBuilder.BASE: +// return "BASE"; +// case TreeBuilder.BODY: +// return "BODY"; +// case TreeBuilder.BR: +// return "BR"; +// case TreeBuilder.BUTTON: +// return "BUTTON"; +// case TreeBuilder.CAPTION: +// return "CAPTION"; +// case TreeBuilder.COL: +// return "COL"; +// case TreeBuilder.COLGROUP: +// return "COLGROUP"; +// case TreeBuilder.FONT: +// return "FONT"; +// case TreeBuilder.FORM: +// return "FORM"; +// case TreeBuilder.FRAME: +// return "FRAME"; +// case TreeBuilder.FRAMESET: +// return "FRAMESET"; +// case TreeBuilder.IMAGE: +// return "IMAGE"; +// case TreeBuilder.INPUT: +// return "INPUT"; +// case TreeBuilder.ISINDEX: +// return "ISINDEX"; +// case TreeBuilder.LI: +// return "LI"; +// case TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND: +// return "LINK_OR_BASEFONT_OR_BGSOUND"; +// case TreeBuilder.MATH: +// return "MATH"; +// case TreeBuilder.META: +// return "META"; +// case TreeBuilder.SVG: +// return "SVG"; +// case TreeBuilder.HEAD: +// return "HEAD"; +// case TreeBuilder.HR: +// return "HR"; +// case TreeBuilder.HTML: +// return "HTML"; +// case TreeBuilder.KEYGEN: +// return "KEYGEN"; +// case TreeBuilder.NOBR: +// return "NOBR"; +// case TreeBuilder.NOFRAMES: +// return "NOFRAMES"; +// case TreeBuilder.NOSCRIPT: +// return "NOSCRIPT"; +// case TreeBuilder.OPTGROUP: +// return "OPTGROUP"; +// case TreeBuilder.OPTION: +// return "OPTION"; +// case TreeBuilder.P: +// return "P"; +// case TreeBuilder.PLAINTEXT: +// return "PLAINTEXT"; +// case TreeBuilder.SCRIPT: +// return "SCRIPT"; +// case TreeBuilder.SELECT: +// return "SELECT"; +// case TreeBuilder.STYLE: +// return "STYLE"; +// case TreeBuilder.TABLE: +// return "TABLE"; +// case TreeBuilder.TEXTAREA: +// return "TEXTAREA"; +// case TreeBuilder.TITLE: +// return "TITLE"; +// case TreeBuilder.TEMPLATE: +// return "TEMPLATE"; +// case TreeBuilder.TR: +// return "TR"; +// case TreeBuilder.XMP: +// return "XMP"; +// case TreeBuilder.TBODY_OR_THEAD_OR_TFOOT: +// return "TBODY_OR_THEAD_OR_TFOOT"; +// case TreeBuilder.TD_OR_TH: +// return "TD_OR_TH"; +// case TreeBuilder.DD_OR_DT: +// return "DD_OR_DT"; +// case TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6: +// return "H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6"; +// case TreeBuilder.OBJECT: +// return "OBJECT"; +// case TreeBuilder.OUTPUT: +// return "OUTPUT"; +// case TreeBuilder.MARQUEE_OR_APPLET: +// return "MARQUEE_OR_APPLET"; +// case TreeBuilder.PRE_OR_LISTING: +// return "PRE_OR_LISTING"; +// case TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U: +// return "B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U"; +// case TreeBuilder.UL_OR_OL_OR_DL: +// return "UL_OR_OL_OR_DL"; +// case TreeBuilder.IFRAME: +// return "IFRAME"; +// case TreeBuilder.NOEMBED: +// return "NOEMBED"; +// case TreeBuilder.EMBED: +// return "EMBED"; +// case TreeBuilder.IMG: +// return "IMG"; +// case TreeBuilder.AREA_OR_WBR: +// return "AREA_OR_WBR"; +// case TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU: +// return "DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU"; +// case TreeBuilder.FIELDSET: +// return "FIELDSET"; +// case TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY: +// return "ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY"; +// case TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR: +// return "RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR"; +// case TreeBuilder.RB_OR_RTC: +// return "RB_OR_RTC"; +// case TreeBuilder.RT_OR_RP: +// return "RT_OR_RP"; +// case TreeBuilder.PARAM_OR_SOURCE_OR_TRACK: +// return "PARAM_OR_SOURCE_OR_TRACK"; +// case TreeBuilder.MGLYPH_OR_MALIGNMARK: +// return "MGLYPH_OR_MALIGNMARK"; +// case TreeBuilder.MI_MO_MN_MS_MTEXT: +// return "MI_MO_MN_MS_MTEXT"; +// case TreeBuilder.ANNOTATION_XML: +// return "ANNOTATION_XML"; +// case TreeBuilder.FOREIGNOBJECT_OR_DESC: +// return "FOREIGNOBJECT_OR_DESC"; +// case TreeBuilder.MENUITEM: +// return "MENUITEM"; +// } +// return null; +// } +// +// /** +// * Regenerate self +// * +// * @param args +// */ +// public static void main(String[] args) { +// Arrays.sort(ELEMENT_NAMES); +// for (int i = 1; i < ELEMENT_NAMES.length; i++) { +// if (ELEMENT_NAMES[i].hash() == ELEMENT_NAMES[i - 1].hash()) { +// System.err.println("Hash collision: " + ELEMENT_NAMES[i].name +// + ", " + ELEMENT_NAMES[i - 1].name); +// return; +// } +// } +// for (int i = 0; i < ELEMENT_NAMES.length; i++) { +// ElementName el = ELEMENT_NAMES[i]; +// System.out.println("public static final ElementName " +// + el.constName() + " = new ElementName" + el.toString() +// + ";"); +// } +// System.out.println("private final static @NoLength ElementName[] ELEMENT_NAMES = {"); +// for (int i = 0; i < ELEMENT_NAMES.length; i++) { +// ElementName el = ELEMENT_NAMES[i]; +// System.out.println(el.constName() + ","); +// } +// System.out.println("};"); +// System.out.println("private final static int[] ELEMENT_HASHES = {"); +// for (int i = 0; i < ELEMENT_NAMES.length; i++) { +// ElementName el = ELEMENT_NAMES[i]; +// System.out.println(Integer.toString(el.hash()) + ","); +// } +// System.out.println("};"); +// } + + // START GENERATED CODE + public static final ElementName A = new ElementName("a", "a", TreeBuilder.A); + public static final ElementName B = new ElementName("b", "b", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName G = new ElementName("g", "g", TreeBuilder.OTHER); + public static final ElementName I = new ElementName("i", "i", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName P = new ElementName("p", "p", TreeBuilder.P | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName Q = new ElementName("q", "q", TreeBuilder.OTHER); + public static final ElementName S = new ElementName("s", "s", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName U = new ElementName("u", "u", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName BR = new ElementName("br", "br", TreeBuilder.BR | SPECIAL); + public static final ElementName CI = new ElementName("ci", "ci", TreeBuilder.OTHER); + public static final ElementName CN = new ElementName("cn", "cn", TreeBuilder.OTHER); + public static final ElementName DD = new ElementName("dd", "dd", TreeBuilder.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName DL = new ElementName("dl", "dl", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL); + public static final ElementName DT = new ElementName("dt", "dt", TreeBuilder.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName EM = new ElementName("em", "em", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName EQ = new ElementName("eq", "eq", TreeBuilder.OTHER); + public static final ElementName FN = new ElementName("fn", "fn", TreeBuilder.OTHER); + public static final ElementName H1 = new ElementName("h1", "h1", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName H2 = new ElementName("h2", "h2", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName H3 = new ElementName("h3", "h3", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName H4 = new ElementName("h4", "h4", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName H5 = new ElementName("h5", "h5", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName H6 = new ElementName("h6", "h6", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName GT = new ElementName("gt", "gt", TreeBuilder.OTHER); + public static final ElementName HR = new ElementName("hr", "hr", TreeBuilder.HR | SPECIAL); + public static final ElementName IN = new ElementName("in", "in", TreeBuilder.OTHER); + public static final ElementName LI = new ElementName("li", "li", TreeBuilder.LI | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName LN = new ElementName("ln", "ln", TreeBuilder.OTHER); + public static final ElementName LT = new ElementName("lt", "lt", TreeBuilder.OTHER); + public static final ElementName MI = new ElementName("mi", "mi", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); + public static final ElementName MN = new ElementName("mn", "mn", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); + public static final ElementName MO = new ElementName("mo", "mo", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); + public static final ElementName MS = new ElementName("ms", "ms", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); + public static final ElementName OL = new ElementName("ol", "ol", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL); + public static final ElementName OR = new ElementName("or", "or", TreeBuilder.OTHER); + public static final ElementName PI = new ElementName("pi", "pi", TreeBuilder.OTHER); + public static final ElementName RB = new ElementName("rb", "rb", TreeBuilder.RB_OR_RTC | OPTIONAL_END_TAG); + public static final ElementName RP = new ElementName("rp", "rp", TreeBuilder.RT_OR_RP | OPTIONAL_END_TAG); + public static final ElementName RT = new ElementName("rt", "rt", TreeBuilder.RT_OR_RP | OPTIONAL_END_TAG); + public static final ElementName TD = new ElementName("td", "td", TreeBuilder.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG); + public static final ElementName TH = new ElementName("th", "th", TreeBuilder.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG); + public static final ElementName TR = new ElementName("tr", "tr", TreeBuilder.TR | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); + public static final ElementName TT = new ElementName("tt", "tt", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName UL = new ElementName("ul", "ul", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL); + public static final ElementName AND = new ElementName("and", "and", TreeBuilder.OTHER); + public static final ElementName ARG = new ElementName("arg", "arg", TreeBuilder.OTHER); + public static final ElementName ABS = new ElementName("abs", "abs", TreeBuilder.OTHER); + public static final ElementName BIG = new ElementName("big", "big", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName BDO = new ElementName("bdo", "bdo", TreeBuilder.OTHER); + public static final ElementName CSC = new ElementName("csc", "csc", TreeBuilder.OTHER); + public static final ElementName COL = new ElementName("col", "col", TreeBuilder.COL | SPECIAL); + public static final ElementName COS = new ElementName("cos", "cos", TreeBuilder.OTHER); + public static final ElementName COT = new ElementName("cot", "cot", TreeBuilder.OTHER); + public static final ElementName DEL = new ElementName("del", "del", TreeBuilder.OTHER); + public static final ElementName DFN = new ElementName("dfn", "dfn", TreeBuilder.OTHER); + public static final ElementName DIR = new ElementName("dir", "dir", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName DIV = new ElementName("div", "div", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); + public static final ElementName EXP = new ElementName("exp", "exp", TreeBuilder.OTHER); + public static final ElementName GCD = new ElementName("gcd", "gcd", TreeBuilder.OTHER); + public static final ElementName GEQ = new ElementName("geq", "geq", TreeBuilder.OTHER); + public static final ElementName IMG = new ElementName("img", "img", TreeBuilder.IMG | SPECIAL); + public static final ElementName INS = new ElementName("ins", "ins", TreeBuilder.OTHER); + public static final ElementName INT = new ElementName("int", "int", TreeBuilder.OTHER); + public static final ElementName KBD = new ElementName("kbd", "kbd", TreeBuilder.OTHER); + public static final ElementName LOG = new ElementName("log", "log", TreeBuilder.OTHER); + public static final ElementName LCM = new ElementName("lcm", "lcm", TreeBuilder.OTHER); + public static final ElementName LEQ = new ElementName("leq", "leq", TreeBuilder.OTHER); + public static final ElementName MTD = new ElementName("mtd", "mtd", TreeBuilder.OTHER); + public static final ElementName MIN = new ElementName("min", "min", TreeBuilder.OTHER); + public static final ElementName MAP = new ElementName("map", "map", TreeBuilder.OTHER); + public static final ElementName MTR = new ElementName("mtr", "mtr", TreeBuilder.OTHER); + public static final ElementName MAX = new ElementName("max", "max", TreeBuilder.OTHER); + public static final ElementName NEQ = new ElementName("neq", "neq", TreeBuilder.OTHER); + public static final ElementName NOT = new ElementName("not", "not", TreeBuilder.OTHER); + public static final ElementName NAV = new ElementName("nav", "nav", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName PRE = new ElementName("pre", "pre", TreeBuilder.PRE_OR_LISTING | SPECIAL); + public static final ElementName RTC = new ElementName("rtc", "rtc", TreeBuilder.RB_OR_RTC | OPTIONAL_END_TAG); + public static final ElementName REM = new ElementName("rem", "rem", TreeBuilder.OTHER); + public static final ElementName SUB = new ElementName("sub", "sub", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); + public static final ElementName SEC = new ElementName("sec", "sec", TreeBuilder.OTHER); + public static final ElementName SVG = new ElementName("svg", "svg", TreeBuilder.SVG); + public static final ElementName SUM = new ElementName("sum", "sum", TreeBuilder.OTHER); + public static final ElementName SIN = new ElementName("sin", "sin", TreeBuilder.OTHER); + public static final ElementName SEP = new ElementName("sep", "sep", TreeBuilder.OTHER); + public static final ElementName SUP = new ElementName("sup", "sup", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); + public static final ElementName SET = new ElementName("set", "set", TreeBuilder.OTHER); + public static final ElementName TAN = new ElementName("tan", "tan", TreeBuilder.OTHER); + public static final ElementName USE = new ElementName("use", "use", TreeBuilder.OTHER); + public static final ElementName VAR = new ElementName("var", "var", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); + public static final ElementName WBR = new ElementName("wbr", "wbr", TreeBuilder.AREA_OR_WBR | SPECIAL); + public static final ElementName XMP = new ElementName("xmp", "xmp", TreeBuilder.XMP | SPECIAL); + public static final ElementName XOR = new ElementName("xor", "xor", TreeBuilder.OTHER); + public static final ElementName AREA = new ElementName("area", "area", TreeBuilder.AREA_OR_WBR | SPECIAL); + public static final ElementName ABBR = new ElementName("abbr", "abbr", TreeBuilder.OTHER); + public static final ElementName BASE = new ElementName("base", "base", TreeBuilder.BASE | SPECIAL); + public static final ElementName BVAR = new ElementName("bvar", "bvar", TreeBuilder.OTHER); + public static final ElementName BODY = new ElementName("body", "body", TreeBuilder.BODY | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName CARD = new ElementName("card", "card", TreeBuilder.OTHER); + public static final ElementName CODE = new ElementName("code", "code", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName CITE = new ElementName("cite", "cite", TreeBuilder.OTHER); + public static final ElementName CSCH = new ElementName("csch", "csch", TreeBuilder.OTHER); + public static final ElementName COSH = new ElementName("cosh", "cosh", TreeBuilder.OTHER); + public static final ElementName COTH = new ElementName("coth", "coth", TreeBuilder.OTHER); + public static final ElementName CURL = new ElementName("curl", "curl", TreeBuilder.OTHER); + public static final ElementName DESC = new ElementName("desc", "desc", TreeBuilder.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG); + public static final ElementName DIFF = new ElementName("diff", "diff", TreeBuilder.OTHER); + public static final ElementName DEFS = new ElementName("defs", "defs", TreeBuilder.OTHER); + public static final ElementName FORM = new ElementName("form", "form", TreeBuilder.FORM | SPECIAL); + public static final ElementName FONT = new ElementName("font", "font", TreeBuilder.FONT); + public static final ElementName GRAD = new ElementName("grad", "grad", TreeBuilder.OTHER); + public static final ElementName HEAD = new ElementName("head", "head", TreeBuilder.HEAD | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName HTML = new ElementName("html", "html", TreeBuilder.HTML | SPECIAL | SCOPING | OPTIONAL_END_TAG); + public static final ElementName LINE = new ElementName("line", "line", TreeBuilder.OTHER); + public static final ElementName LINK = new ElementName("link", "link", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL); + public static final ElementName LIST = new ElementName("list", "list", TreeBuilder.OTHER); + public static final ElementName META = new ElementName("meta", "meta", TreeBuilder.META | SPECIAL); + public static final ElementName MSUB = new ElementName("msub", "msub", TreeBuilder.OTHER); + public static final ElementName MODE = new ElementName("mode", "mode", TreeBuilder.OTHER); + public static final ElementName MATH = new ElementName("math", "math", TreeBuilder.MATH); + public static final ElementName MARK = new ElementName("mark", "mark", TreeBuilder.OTHER); + public static final ElementName MASK = new ElementName("mask", "mask", TreeBuilder.OTHER); + public static final ElementName MEAN = new ElementName("mean", "mean", TreeBuilder.OTHER); + public static final ElementName MAIN = new ElementName("main", "main", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName MSUP = new ElementName("msup", "msup", TreeBuilder.OTHER); + public static final ElementName MENU = new ElementName("menu", "menu", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); + public static final ElementName MROW = new ElementName("mrow", "mrow", TreeBuilder.OTHER); + public static final ElementName NONE = new ElementName("none", "none", TreeBuilder.OTHER); + public static final ElementName NOBR = new ElementName("nobr", "nobr", TreeBuilder.NOBR); + public static final ElementName NEST = new ElementName("nest", "nest", TreeBuilder.OTHER); + public static final ElementName PATH = new ElementName("path", "path", TreeBuilder.OTHER); + public static final ElementName PLUS = new ElementName("plus", "plus", TreeBuilder.OTHER); + public static final ElementName RULE = new ElementName("rule", "rule", TreeBuilder.OTHER); + public static final ElementName REAL = new ElementName("real", "real", TreeBuilder.OTHER); + public static final ElementName RELN = new ElementName("reln", "reln", TreeBuilder.OTHER); + public static final ElementName RECT = new ElementName("rect", "rect", TreeBuilder.OTHER); + public static final ElementName ROOT = new ElementName("root", "root", TreeBuilder.OTHER); + public static final ElementName RUBY = new ElementName("ruby", "ruby", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); + public static final ElementName SECH = new ElementName("sech", "sech", TreeBuilder.OTHER); + public static final ElementName SINH = new ElementName("sinh", "sinh", TreeBuilder.OTHER); + public static final ElementName SPAN = new ElementName("span", "span", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); + public static final ElementName SAMP = new ElementName("samp", "samp", TreeBuilder.OTHER); + public static final ElementName STOP = new ElementName("stop", "stop", TreeBuilder.OTHER); + public static final ElementName SDEV = new ElementName("sdev", "sdev", TreeBuilder.OTHER); + public static final ElementName TIME = new ElementName("time", "time", TreeBuilder.OTHER); + public static final ElementName TRUE = new ElementName("true", "true", TreeBuilder.OTHER); + public static final ElementName TREF = new ElementName("tref", "tref", TreeBuilder.OTHER); + public static final ElementName TANH = new ElementName("tanh", "tanh", TreeBuilder.OTHER); + public static final ElementName TEXT = new ElementName("text", "text", TreeBuilder.OTHER); + public static final ElementName VIEW = new ElementName("view", "view", TreeBuilder.OTHER); + public static final ElementName ASIDE = new ElementName("aside", "aside", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName AUDIO = new ElementName("audio", "audio", TreeBuilder.OTHER); + public static final ElementName APPLY = new ElementName("apply", "apply", TreeBuilder.OTHER); + public static final ElementName EMBED = new ElementName("embed", "embed", TreeBuilder.EMBED | SPECIAL); + public static final ElementName FRAME = new ElementName("frame", "frame", TreeBuilder.FRAME | SPECIAL); + public static final ElementName FALSE = new ElementName("false", "false", TreeBuilder.OTHER); + public static final ElementName FLOOR = new ElementName("floor", "floor", TreeBuilder.OTHER); + public static final ElementName GLYPH = new ElementName("glyph", "glyph", TreeBuilder.OTHER); + public static final ElementName HKERN = new ElementName("hkern", "hkern", TreeBuilder.OTHER); + public static final ElementName IMAGE = new ElementName("image", "image", TreeBuilder.IMAGE); + public static final ElementName IDENT = new ElementName("ident", "ident", TreeBuilder.OTHER); + public static final ElementName INPUT = new ElementName("input", "input", TreeBuilder.INPUT | SPECIAL); + public static final ElementName LABEL = new ElementName("label", "label", TreeBuilder.OTHER); + public static final ElementName LIMIT = new ElementName("limit", "limit", TreeBuilder.OTHER); + public static final ElementName MFRAC = new ElementName("mfrac", "mfrac", TreeBuilder.OTHER); + public static final ElementName MPATH = new ElementName("mpath", "mpath", TreeBuilder.OTHER); + public static final ElementName METER = new ElementName("meter", "meter", TreeBuilder.OTHER); + public static final ElementName MOVER = new ElementName("mover", "mover", TreeBuilder.OTHER); + public static final ElementName MINUS = new ElementName("minus", "minus", TreeBuilder.OTHER); + public static final ElementName MROOT = new ElementName("mroot", "mroot", TreeBuilder.OTHER); + public static final ElementName MSQRT = new ElementName("msqrt", "msqrt", TreeBuilder.OTHER); + public static final ElementName MTEXT = new ElementName("mtext", "mtext", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); + public static final ElementName NOTIN = new ElementName("notin", "notin", TreeBuilder.OTHER); + public static final ElementName PIECE = new ElementName("piece", "piece", TreeBuilder.OTHER); + public static final ElementName PARAM = new ElementName("param", "param", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK | SPECIAL); + public static final ElementName POWER = new ElementName("power", "power", TreeBuilder.OTHER); + public static final ElementName REALS = new ElementName("reals", "reals", TreeBuilder.OTHER); + public static final ElementName STYLE = new ElementName("style", "style", TreeBuilder.STYLE | SPECIAL); + public static final ElementName SMALL = new ElementName("small", "small", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName THEAD = new ElementName("thead", "thead", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); + public static final ElementName TABLE = new ElementName("table", "table", TreeBuilder.TABLE | SPECIAL | FOSTER_PARENTING | SCOPING); + public static final ElementName TITLE = new ElementName("title", "title", TreeBuilder.TITLE | SPECIAL | SCOPING_AS_SVG); + public static final ElementName TRACK = new ElementName("track", "track", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK | SPECIAL); + public static final ElementName TSPAN = new ElementName("tspan", "tspan", TreeBuilder.OTHER); + public static final ElementName TIMES = new ElementName("times", "times", TreeBuilder.OTHER); + public static final ElementName TFOOT = new ElementName("tfoot", "tfoot", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); + public static final ElementName TBODY = new ElementName("tbody", "tbody", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); + public static final ElementName UNION = new ElementName("union", "union", TreeBuilder.OTHER); + public static final ElementName VKERN = new ElementName("vkern", "vkern", TreeBuilder.OTHER); + public static final ElementName VIDEO = new ElementName("video", "video", TreeBuilder.OTHER); + public static final ElementName ARCSEC = new ElementName("arcsec", "arcsec", TreeBuilder.OTHER); + public static final ElementName ARCCSC = new ElementName("arccsc", "arccsc", TreeBuilder.OTHER); + public static final ElementName ARCTAN = new ElementName("arctan", "arctan", TreeBuilder.OTHER); + public static final ElementName ARCSIN = new ElementName("arcsin", "arcsin", TreeBuilder.OTHER); + public static final ElementName ARCCOS = new ElementName("arccos", "arccos", TreeBuilder.OTHER); + public static final ElementName APPLET = new ElementName("applet", "applet", TreeBuilder.MARQUEE_OR_APPLET | SPECIAL | SCOPING); + public static final ElementName ARCCOT = new ElementName("arccot", "arccot", TreeBuilder.OTHER); + public static final ElementName APPROX = new ElementName("approx", "approx", TreeBuilder.OTHER); + public static final ElementName BUTTON = new ElementName("button", "button", TreeBuilder.BUTTON | SPECIAL); + public static final ElementName CIRCLE = new ElementName("circle", "circle", TreeBuilder.OTHER); + public static final ElementName CENTER = new ElementName("center", "center", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); + public static final ElementName CURSOR = new ElementName("cursor", "cursor", TreeBuilder.OTHER); + public static final ElementName CANVAS = new ElementName("canvas", "canvas", TreeBuilder.OTHER); + public static final ElementName DIVIDE = new ElementName("divide", "divide", TreeBuilder.OTHER); + public static final ElementName DEGREE = new ElementName("degree", "degree", TreeBuilder.OTHER); + public static final ElementName DOMAIN = new ElementName("domain", "domain", TreeBuilder.OTHER); + public static final ElementName EXISTS = new ElementName("exists", "exists", TreeBuilder.OTHER); + public static final ElementName FETILE = new ElementName("fetile", "feTile", TreeBuilder.OTHER); + public static final ElementName FIGURE = new ElementName("figure", "figure", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName FORALL = new ElementName("forall", "forall", TreeBuilder.OTHER); + public static final ElementName FILTER = new ElementName("filter", "filter", TreeBuilder.OTHER); + public static final ElementName FOOTER = new ElementName("footer", "footer", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName HGROUP = new ElementName("hgroup", "hgroup", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName HEADER = new ElementName("header", "header", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName IFRAME = new ElementName("iframe", "iframe", TreeBuilder.IFRAME | SPECIAL); + public static final ElementName KEYGEN = new ElementName("keygen", "keygen", TreeBuilder.KEYGEN); + public static final ElementName LAMBDA = new ElementName("lambda", "lambda", TreeBuilder.OTHER); + public static final ElementName LEGEND = new ElementName("legend", "legend", TreeBuilder.OTHER); + public static final ElementName MSPACE = new ElementName("mspace", "mspace", TreeBuilder.OTHER); + public static final ElementName MTABLE = new ElementName("mtable", "mtable", TreeBuilder.OTHER); + public static final ElementName MSTYLE = new ElementName("mstyle", "mstyle", TreeBuilder.OTHER); + public static final ElementName MGLYPH = new ElementName("mglyph", "mglyph", TreeBuilder.MGLYPH_OR_MALIGNMARK); + public static final ElementName MEDIAN = new ElementName("median", "median", TreeBuilder.OTHER); + public static final ElementName MUNDER = new ElementName("munder", "munder", TreeBuilder.OTHER); + public static final ElementName MARKER = new ElementName("marker", "marker", TreeBuilder.OTHER); + public static final ElementName MERROR = new ElementName("merror", "merror", TreeBuilder.OTHER); + public static final ElementName MOMENT = new ElementName("moment", "moment", TreeBuilder.OTHER); + public static final ElementName MATRIX = new ElementName("matrix", "matrix", TreeBuilder.OTHER); + public static final ElementName OPTION = new ElementName("option", "option", TreeBuilder.OPTION | OPTIONAL_END_TAG); + public static final ElementName OBJECT = new ElementName("object", "object", TreeBuilder.OBJECT | SPECIAL | SCOPING); + public static final ElementName OUTPUT = new ElementName("output", "output", TreeBuilder.OUTPUT); + public static final ElementName PRIMES = new ElementName("primes", "primes", TreeBuilder.OTHER); + public static final ElementName SOURCE = new ElementName("source", "source", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK); + public static final ElementName STRIKE = new ElementName("strike", "strike", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName STRONG = new ElementName("strong", "strong", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName SWITCH = new ElementName("switch", "switch", TreeBuilder.OTHER); + public static final ElementName SYMBOL = new ElementName("symbol", "symbol", TreeBuilder.OTHER); + public static final ElementName SELECT = new ElementName("select", "select", TreeBuilder.SELECT | SPECIAL); + public static final ElementName SUBSET = new ElementName("subset", "subset", TreeBuilder.OTHER); + public static final ElementName SCRIPT = new ElementName("script", "script", TreeBuilder.SCRIPT | SPECIAL); + public static final ElementName TBREAK = new ElementName("tbreak", "tbreak", TreeBuilder.OTHER); + public static final ElementName VECTOR = new ElementName("vector", "vector", TreeBuilder.OTHER); + public static final ElementName ARTICLE = new ElementName("article", "article", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName ANIMATE = new ElementName("animate", "animate", TreeBuilder.OTHER); + public static final ElementName ARCSECH = new ElementName("arcsech", "arcsech", TreeBuilder.OTHER); + public static final ElementName ARCCSCH = new ElementName("arccsch", "arccsch", TreeBuilder.OTHER); + public static final ElementName ARCTANH = new ElementName("arctanh", "arctanh", TreeBuilder.OTHER); + public static final ElementName ARCSINH = new ElementName("arcsinh", "arcsinh", TreeBuilder.OTHER); + public static final ElementName ARCCOSH = new ElementName("arccosh", "arccosh", TreeBuilder.OTHER); + public static final ElementName ARCCOTH = new ElementName("arccoth", "arccoth", TreeBuilder.OTHER); + public static final ElementName ACRONYM = new ElementName("acronym", "acronym", TreeBuilder.OTHER); + public static final ElementName ADDRESS = new ElementName("address", "address", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName BGSOUND = new ElementName("bgsound", "bgsound", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL); + public static final ElementName COMPOSE = new ElementName("compose", "compose", TreeBuilder.OTHER); + public static final ElementName CEILING = new ElementName("ceiling", "ceiling", TreeBuilder.OTHER); + public static final ElementName CSYMBOL = new ElementName("csymbol", "csymbol", TreeBuilder.OTHER); + public static final ElementName CAPTION = new ElementName("caption", "caption", TreeBuilder.CAPTION | SPECIAL | SCOPING); + public static final ElementName DISCARD = new ElementName("discard", "discard", TreeBuilder.OTHER); + public static final ElementName DECLARE = new ElementName("declare", "declare", TreeBuilder.OTHER); + public static final ElementName DETAILS = new ElementName("details", "details", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName ELLIPSE = new ElementName("ellipse", "ellipse", TreeBuilder.OTHER); + public static final ElementName FEFUNCA = new ElementName("fefunca", "feFuncA", TreeBuilder.OTHER); + public static final ElementName FEFUNCB = new ElementName("fefuncb", "feFuncB", TreeBuilder.OTHER); + public static final ElementName FEBLEND = new ElementName("feblend", "feBlend", TreeBuilder.OTHER); + public static final ElementName FEFLOOD = new ElementName("feflood", "feFlood", TreeBuilder.OTHER); + public static final ElementName FEIMAGE = new ElementName("feimage", "feImage", TreeBuilder.OTHER); + public static final ElementName FEMERGE = new ElementName("femerge", "feMerge", TreeBuilder.OTHER); + public static final ElementName FEFUNCG = new ElementName("fefuncg", "feFuncG", TreeBuilder.OTHER); + public static final ElementName FEFUNCR = new ElementName("fefuncr", "feFuncR", TreeBuilder.OTHER); + public static final ElementName HANDLER = new ElementName("handler", "handler", TreeBuilder.OTHER); + public static final ElementName INVERSE = new ElementName("inverse", "inverse", TreeBuilder.OTHER); + public static final ElementName IMPLIES = new ElementName("implies", "implies", TreeBuilder.OTHER); + public static final ElementName ISINDEX = new ElementName("isindex", "isindex", TreeBuilder.ISINDEX | SPECIAL); + public static final ElementName LOGBASE = new ElementName("logbase", "logbase", TreeBuilder.OTHER); + public static final ElementName LISTING = new ElementName("listing", "listing", TreeBuilder.PRE_OR_LISTING | SPECIAL); + public static final ElementName MFENCED = new ElementName("mfenced", "mfenced", TreeBuilder.OTHER); + public static final ElementName MPADDED = new ElementName("mpadded", "mpadded", TreeBuilder.OTHER); + public static final ElementName MARQUEE = new ElementName("marquee", "marquee", TreeBuilder.MARQUEE_OR_APPLET | SPECIAL | SCOPING); + public static final ElementName MACTION = new ElementName("maction", "maction", TreeBuilder.OTHER); + public static final ElementName MSUBSUP = new ElementName("msubsup", "msubsup", TreeBuilder.OTHER); + public static final ElementName NOEMBED = new ElementName("noembed", "noembed", TreeBuilder.NOEMBED | SPECIAL); + public static final ElementName POLYGON = new ElementName("polygon", "polygon", TreeBuilder.OTHER); + public static final ElementName PATTERN = new ElementName("pattern", "pattern", TreeBuilder.OTHER); + public static final ElementName PICTURE = new ElementName("picture", "picture", TreeBuilder.OTHER); + public static final ElementName PRODUCT = new ElementName("product", "product", TreeBuilder.OTHER); + public static final ElementName SETDIFF = new ElementName("setdiff", "setdiff", TreeBuilder.OTHER); + public static final ElementName SECTION = new ElementName("section", "section", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName SUMMARY = new ElementName("summary", "summary", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName TENDSTO = new ElementName("tendsto", "tendsto", TreeBuilder.OTHER); + public static final ElementName UPLIMIT = new ElementName("uplimit", "uplimit", TreeBuilder.OTHER); + public static final ElementName ALTGLYPH = new ElementName("altglyph", "altGlyph", TreeBuilder.OTHER); + public static final ElementName BASEFONT = new ElementName("basefont", "basefont", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL); + public static final ElementName CLIPPATH = new ElementName("clippath", "clipPath", TreeBuilder.OTHER); + public static final ElementName CODOMAIN = new ElementName("codomain", "codomain", TreeBuilder.OTHER); + public static final ElementName COLGROUP = new ElementName("colgroup", "colgroup", TreeBuilder.COLGROUP | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName EMPTYSET = new ElementName("emptyset", "emptyset", TreeBuilder.OTHER); + public static final ElementName FACTOROF = new ElementName("factorof", "factorof", TreeBuilder.OTHER); + public static final ElementName FIELDSET = new ElementName("fieldset", "fieldset", TreeBuilder.FIELDSET | SPECIAL); + public static final ElementName FRAMESET = new ElementName("frameset", "frameset", TreeBuilder.FRAMESET | SPECIAL); + public static final ElementName FEOFFSET = new ElementName("feoffset", "feOffset", TreeBuilder.OTHER); + public static final ElementName GLYPHREF = new ElementName("glyphref", "glyphRef", TreeBuilder.OTHER); + public static final ElementName INTERVAL = new ElementName("interval", "interval", TreeBuilder.OTHER); + public static final ElementName INTEGERS = new ElementName("integers", "integers", TreeBuilder.OTHER); + public static final ElementName INFINITY = new ElementName("infinity", "infinity", TreeBuilder.OTHER); + public static final ElementName LISTENER = new ElementName("listener", "listener", TreeBuilder.OTHER); + public static final ElementName LOWLIMIT = new ElementName("lowlimit", "lowlimit", TreeBuilder.OTHER); + public static final ElementName METADATA = new ElementName("metadata", "metadata", TreeBuilder.OTHER); + public static final ElementName MENCLOSE = new ElementName("menclose", "menclose", TreeBuilder.OTHER); + public static final ElementName MENUITEM = new ElementName("menuitem", "menuitem", TreeBuilder.MENUITEM); + public static final ElementName MPHANTOM = new ElementName("mphantom", "mphantom", TreeBuilder.OTHER); + public static final ElementName NOFRAMES = new ElementName("noframes", "noframes", TreeBuilder.NOFRAMES | SPECIAL); + public static final ElementName NOSCRIPT = new ElementName("noscript", "noscript", TreeBuilder.NOSCRIPT | SPECIAL); + public static final ElementName OPTGROUP = new ElementName("optgroup", "optgroup", TreeBuilder.OPTGROUP | OPTIONAL_END_TAG); + public static final ElementName POLYLINE = new ElementName("polyline", "polyline", TreeBuilder.OTHER); + public static final ElementName PREFETCH = new ElementName("prefetch", "prefetch", TreeBuilder.OTHER); + public static final ElementName PROGRESS = new ElementName("progress", "progress", TreeBuilder.OTHER); + public static final ElementName PRSUBSET = new ElementName("prsubset", "prsubset", TreeBuilder.OTHER); + public static final ElementName QUOTIENT = new ElementName("quotient", "quotient", TreeBuilder.OTHER); + public static final ElementName SELECTOR = new ElementName("selector", "selector", TreeBuilder.OTHER); + public static final ElementName TEXTAREA = new ElementName("textarea", "textarea", TreeBuilder.TEXTAREA | SPECIAL); + public static final ElementName TEMPLATE = new ElementName("template", "template", TreeBuilder.TEMPLATE | SPECIAL | SCOPING); + public static final ElementName TEXTPATH = new ElementName("textpath", "textPath", TreeBuilder.OTHER); + public static final ElementName VARIANCE = new ElementName("variance", "variance", TreeBuilder.OTHER); + public static final ElementName ANIMATION = new ElementName("animation", "animation", TreeBuilder.OTHER); + public static final ElementName CONJUGATE = new ElementName("conjugate", "conjugate", TreeBuilder.OTHER); + public static final ElementName CONDITION = new ElementName("condition", "condition", TreeBuilder.OTHER); + public static final ElementName COMPLEXES = new ElementName("complexes", "complexes", TreeBuilder.OTHER); + public static final ElementName FONT_FACE = new ElementName("font-face", "font-face", TreeBuilder.OTHER); + public static final ElementName FACTORIAL = new ElementName("factorial", "factorial", TreeBuilder.OTHER); + public static final ElementName INTERSECT = new ElementName("intersect", "intersect", TreeBuilder.OTHER); + public static final ElementName IMAGINARY = new ElementName("imaginary", "imaginary", TreeBuilder.OTHER); + public static final ElementName LAPLACIAN = new ElementName("laplacian", "laplacian", TreeBuilder.OTHER); + public static final ElementName MATRIXROW = new ElementName("matrixrow", "matrixrow", TreeBuilder.OTHER); + public static final ElementName NOTSUBSET = new ElementName("notsubset", "notsubset", TreeBuilder.OTHER); + public static final ElementName OTHERWISE = new ElementName("otherwise", "otherwise", TreeBuilder.OTHER); + public static final ElementName PIECEWISE = new ElementName("piecewise", "piecewise", TreeBuilder.OTHER); + public static final ElementName PLAINTEXT = new ElementName("plaintext", "plaintext", TreeBuilder.PLAINTEXT | SPECIAL); + public static final ElementName RATIONALS = new ElementName("rationals", "rationals", TreeBuilder.OTHER); + public static final ElementName SEMANTICS = new ElementName("semantics", "semantics", TreeBuilder.OTHER); + public static final ElementName TRANSPOSE = new ElementName("transpose", "transpose", TreeBuilder.OTHER); + public static final ElementName ANNOTATION = new ElementName("annotation", "annotation", TreeBuilder.OTHER); + public static final ElementName BLOCKQUOTE = new ElementName("blockquote", "blockquote", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); + public static final ElementName DIVERGENCE = new ElementName("divergence", "divergence", TreeBuilder.OTHER); + public static final ElementName EULERGAMMA = new ElementName("eulergamma", "eulergamma", TreeBuilder.OTHER); + public static final ElementName EQUIVALENT = new ElementName("equivalent", "equivalent", TreeBuilder.OTHER); + public static final ElementName FIGCAPTION = new ElementName("figcaption", "figcaption", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName IMAGINARYI = new ElementName("imaginaryi", "imaginaryi", TreeBuilder.OTHER); + public static final ElementName MALIGNMARK = new ElementName("malignmark", "malignmark", TreeBuilder.MGLYPH_OR_MALIGNMARK); + public static final ElementName MUNDEROVER = new ElementName("munderover", "munderover", TreeBuilder.OTHER); + public static final ElementName MLABELEDTR = new ElementName("mlabeledtr", "mlabeledtr", TreeBuilder.OTHER); + public static final ElementName NOTANUMBER = new ElementName("notanumber", "notanumber", TreeBuilder.OTHER); + public static final ElementName SOLIDCOLOR = new ElementName("solidcolor", "solidcolor", TreeBuilder.OTHER); + public static final ElementName ALTGLYPHDEF = new ElementName("altglyphdef", "altGlyphDef", TreeBuilder.OTHER); + public static final ElementName DETERMINANT = new ElementName("determinant", "determinant", TreeBuilder.OTHER); + public static final ElementName FEMERGENODE = new ElementName("femergenode", "feMergeNode", TreeBuilder.OTHER); + public static final ElementName FECOMPOSITE = new ElementName("fecomposite", "feComposite", TreeBuilder.OTHER); + public static final ElementName FESPOTLIGHT = new ElementName("fespotlight", "feSpotLight", TreeBuilder.OTHER); + public static final ElementName MALIGNGROUP = new ElementName("maligngroup", "maligngroup", TreeBuilder.OTHER); + public static final ElementName MPRESCRIPTS = new ElementName("mprescripts", "mprescripts", TreeBuilder.OTHER); + public static final ElementName MOMENTABOUT = new ElementName("momentabout", "momentabout", TreeBuilder.OTHER); + public static final ElementName NOTPRSUBSET = new ElementName("notprsubset", "notprsubset", TreeBuilder.OTHER); + public static final ElementName PARTIALDIFF = new ElementName("partialdiff", "partialdiff", TreeBuilder.OTHER); + public static final ElementName ALTGLYPHITEM = new ElementName("altglyphitem", "altGlyphItem", TreeBuilder.OTHER); + public static final ElementName ANIMATECOLOR = new ElementName("animatecolor", "animateColor", TreeBuilder.OTHER); + public static final ElementName DATATEMPLATE = new ElementName("datatemplate", "datatemplate", TreeBuilder.OTHER); + public static final ElementName EXPONENTIALE = new ElementName("exponentiale", "exponentiale", TreeBuilder.OTHER); + public static final ElementName FETURBULENCE = new ElementName("feturbulence", "feTurbulence", TreeBuilder.OTHER); + public static final ElementName FEPOINTLIGHT = new ElementName("fepointlight", "fePointLight", TreeBuilder.OTHER); + public static final ElementName FEDROPSHADOW = new ElementName("fedropshadow", "feDropShadow", TreeBuilder.OTHER); + public static final ElementName FEMORPHOLOGY = new ElementName("femorphology", "feMorphology", TreeBuilder.OTHER); + public static final ElementName OUTERPRODUCT = new ElementName("outerproduct", "outerproduct", TreeBuilder.OTHER); + public static final ElementName ANIMATEMOTION = new ElementName("animatemotion", "animateMotion", TreeBuilder.OTHER); + public static final ElementName COLOR_PROFILE = new ElementName("color-profile", "color-profile", TreeBuilder.OTHER); + public static final ElementName FONT_FACE_SRC = new ElementName("font-face-src", "font-face-src", TreeBuilder.OTHER); + public static final ElementName FONT_FACE_URI = new ElementName("font-face-uri", "font-face-uri", TreeBuilder.OTHER); + public static final ElementName FOREIGNOBJECT = new ElementName("foreignobject", "foreignObject", TreeBuilder.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG); + public static final ElementName FECOLORMATRIX = new ElementName("fecolormatrix", "feColorMatrix", TreeBuilder.OTHER); + public static final ElementName MISSING_GLYPH = new ElementName("missing-glyph", "missing-glyph", TreeBuilder.OTHER); + public static final ElementName MMULTISCRIPTS = new ElementName("mmultiscripts", "mmultiscripts", TreeBuilder.OTHER); + public static final ElementName SCALARPRODUCT = new ElementName("scalarproduct", "scalarproduct", TreeBuilder.OTHER); + public static final ElementName VECTORPRODUCT = new ElementName("vectorproduct", "vectorproduct", TreeBuilder.OTHER); + public static final ElementName ANNOTATION_XML = new ElementName("annotation-xml", "annotation-xml", TreeBuilder.ANNOTATION_XML | SCOPING_AS_MATHML); + public static final ElementName DEFINITION_SRC = new ElementName("definition-src", "definition-src", TreeBuilder.OTHER); + public static final ElementName FONT_FACE_NAME = new ElementName("font-face-name", "font-face-name", TreeBuilder.OTHER); + public static final ElementName FEGAUSSIANBLUR = new ElementName("fegaussianblur", "feGaussianBlur", TreeBuilder.OTHER); + public static final ElementName FEDISTANTLIGHT = new ElementName("fedistantlight", "feDistantLight", TreeBuilder.OTHER); + public static final ElementName LINEARGRADIENT = new ElementName("lineargradient", "linearGradient", TreeBuilder.OTHER); + public static final ElementName NATURALNUMBERS = new ElementName("naturalnumbers", "naturalnumbers", TreeBuilder.OTHER); + public static final ElementName RADIALGRADIENT = new ElementName("radialgradient", "radialGradient", TreeBuilder.OTHER); + public static final ElementName ANIMATETRANSFORM = new ElementName("animatetransform", "animateTransform", TreeBuilder.OTHER); + public static final ElementName CARTESIANPRODUCT = new ElementName("cartesianproduct", "cartesianproduct", TreeBuilder.OTHER); + public static final ElementName FONT_FACE_FORMAT = new ElementName("font-face-format", "font-face-format", TreeBuilder.OTHER); + public static final ElementName FECONVOLVEMATRIX = new ElementName("feconvolvematrix", "feConvolveMatrix", TreeBuilder.OTHER); + public static final ElementName FEDIFFUSELIGHTING = new ElementName("fediffuselighting", "feDiffuseLighting", TreeBuilder.OTHER); + public static final ElementName FEDISPLACEMENTMAP = new ElementName("fedisplacementmap", "feDisplacementMap", TreeBuilder.OTHER); + public static final ElementName FESPECULARLIGHTING = new ElementName("fespecularlighting", "feSpecularLighting", TreeBuilder.OTHER); + public static final ElementName DOMAINOFAPPLICATION = new ElementName("domainofapplication", "domainofapplication", TreeBuilder.OTHER); + public static final ElementName FECOMPONENTTRANSFER = new ElementName("fecomponenttransfer", "feComponentTransfer", TreeBuilder.OTHER); + private final static @NoLength ElementName[] ELEMENT_NAMES = { + A, + B, + G, + I, + P, + Q, + S, + U, + BR, + CI, + CN, + DD, + DL, + DT, + EM, + EQ, + FN, + H1, + H2, + H3, + H4, + H5, + H6, + GT, + HR, + IN, + LI, + LN, + LT, + MI, + MN, + MO, + MS, + OL, + OR, + PI, + RB, + RP, + RT, + TD, + TH, + TR, + TT, + UL, + AND, + ARG, + ABS, + BIG, + BDO, + CSC, + COL, + COS, + COT, + DEL, + DFN, + DIR, + DIV, + EXP, + GCD, + GEQ, + IMG, + INS, + INT, + KBD, + LOG, + LCM, + LEQ, + MTD, + MIN, + MAP, + MTR, + MAX, + NEQ, + NOT, + NAV, + PRE, + RTC, + REM, + SUB, + SEC, + SVG, + SUM, + SIN, + SEP, + SUP, + SET, + TAN, + USE, + VAR, + WBR, + XMP, + XOR, + AREA, + ABBR, + BASE, + BVAR, + BODY, + CARD, + CODE, + CITE, + CSCH, + COSH, + COTH, + CURL, + DESC, + DIFF, + DEFS, + FORM, + FONT, + GRAD, + HEAD, + HTML, + LINE, + LINK, + LIST, + META, + MSUB, + MODE, + MATH, + MARK, + MASK, + MEAN, + MAIN, + MSUP, + MENU, + MROW, + NONE, + NOBR, + NEST, + PATH, + PLUS, + RULE, + REAL, + RELN, + RECT, + ROOT, + RUBY, + SECH, + SINH, + SPAN, + SAMP, + STOP, + SDEV, + TIME, + TRUE, + TREF, + TANH, + TEXT, + VIEW, + ASIDE, + AUDIO, + APPLY, + EMBED, + FRAME, + FALSE, + FLOOR, + GLYPH, + HKERN, + IMAGE, + IDENT, + INPUT, + LABEL, + LIMIT, + MFRAC, + MPATH, + METER, + MOVER, + MINUS, + MROOT, + MSQRT, + MTEXT, + NOTIN, + PIECE, + PARAM, + POWER, + REALS, + STYLE, + SMALL, + THEAD, + TABLE, + TITLE, + TRACK, + TSPAN, + TIMES, + TFOOT, + TBODY, + UNION, + VKERN, + VIDEO, + ARCSEC, + ARCCSC, + ARCTAN, + ARCSIN, + ARCCOS, + APPLET, + ARCCOT, + APPROX, + BUTTON, + CIRCLE, + CENTER, + CURSOR, + CANVAS, + DIVIDE, + DEGREE, + DOMAIN, + EXISTS, + FETILE, + FIGURE, + FORALL, + FILTER, + FOOTER, + HGROUP, + HEADER, + IFRAME, + KEYGEN, + LAMBDA, + LEGEND, + MSPACE, + MTABLE, + MSTYLE, + MGLYPH, + MEDIAN, + MUNDER, + MARKER, + MERROR, + MOMENT, + MATRIX, + OPTION, + OBJECT, + OUTPUT, + PRIMES, + SOURCE, + STRIKE, + STRONG, + SWITCH, + SYMBOL, + SELECT, + SUBSET, + SCRIPT, + TBREAK, + VECTOR, + ARTICLE, + ANIMATE, + ARCSECH, + ARCCSCH, + ARCTANH, + ARCSINH, + ARCCOSH, + ARCCOTH, + ACRONYM, + ADDRESS, + BGSOUND, + COMPOSE, + CEILING, + CSYMBOL, + CAPTION, + DISCARD, + DECLARE, + DETAILS, + ELLIPSE, + FEFUNCA, + FEFUNCB, + FEBLEND, + FEFLOOD, + FEIMAGE, + FEMERGE, + FEFUNCG, + FEFUNCR, + HANDLER, + INVERSE, + IMPLIES, + ISINDEX, + LOGBASE, + LISTING, + MFENCED, + MPADDED, + MARQUEE, + MACTION, + MSUBSUP, + NOEMBED, + POLYGON, + PATTERN, + PICTURE, + PRODUCT, + SETDIFF, + SECTION, + SUMMARY, + TENDSTO, + UPLIMIT, + ALTGLYPH, + BASEFONT, + CLIPPATH, + CODOMAIN, + COLGROUP, + EMPTYSET, + FACTOROF, + FIELDSET, + FRAMESET, + FEOFFSET, + GLYPHREF, + INTERVAL, + INTEGERS, + INFINITY, + LISTENER, + LOWLIMIT, + METADATA, + MENCLOSE, + MENUITEM, + MPHANTOM, + NOFRAMES, + NOSCRIPT, + OPTGROUP, + POLYLINE, + PREFETCH, + PROGRESS, + PRSUBSET, + QUOTIENT, + SELECTOR, + TEXTAREA, + TEMPLATE, + TEXTPATH, + VARIANCE, + ANIMATION, + CONJUGATE, + CONDITION, + COMPLEXES, + FONT_FACE, + FACTORIAL, + INTERSECT, + IMAGINARY, + LAPLACIAN, + MATRIXROW, + NOTSUBSET, + OTHERWISE, + PIECEWISE, + PLAINTEXT, + RATIONALS, + SEMANTICS, + TRANSPOSE, + ANNOTATION, + BLOCKQUOTE, + DIVERGENCE, + EULERGAMMA, + EQUIVALENT, + FIGCAPTION, + IMAGINARYI, + MALIGNMARK, + MUNDEROVER, + MLABELEDTR, + NOTANUMBER, + SOLIDCOLOR, + ALTGLYPHDEF, + DETERMINANT, + FEMERGENODE, + FECOMPOSITE, + FESPOTLIGHT, + MALIGNGROUP, + MPRESCRIPTS, + MOMENTABOUT, + NOTPRSUBSET, + PARTIALDIFF, + ALTGLYPHITEM, + ANIMATECOLOR, + DATATEMPLATE, + EXPONENTIALE, + FETURBULENCE, + FEPOINTLIGHT, + FEDROPSHADOW, + FEMORPHOLOGY, + OUTERPRODUCT, + ANIMATEMOTION, + COLOR_PROFILE, + FONT_FACE_SRC, + FONT_FACE_URI, + FOREIGNOBJECT, + FECOLORMATRIX, + MISSING_GLYPH, + MMULTISCRIPTS, + SCALARPRODUCT, + VECTORPRODUCT, + ANNOTATION_XML, + DEFINITION_SRC, + FONT_FACE_NAME, + FEGAUSSIANBLUR, + FEDISTANTLIGHT, + LINEARGRADIENT, + NATURALNUMBERS, + RADIALGRADIENT, + ANIMATETRANSFORM, + CARTESIANPRODUCT, + FONT_FACE_FORMAT, + FECONVOLVEMATRIX, + FEDIFFUSELIGHTING, + FEDISPLACEMENTMAP, + FESPECULARLIGHTING, + DOMAINOFAPPLICATION, + FECOMPONENTTRANSFER, + }; + private final static int[] ELEMENT_HASHES = { + 1057, + 1090, + 1255, + 1321, + 1552, + 1585, + 1651, + 1717, + 68162, + 68899, + 69059, + 69764, + 70020, + 70276, + 71077, + 71205, + 72134, + 72232, + 72264, + 72296, + 72328, + 72360, + 72392, + 73351, + 74312, + 75209, + 78124, + 78284, + 78476, + 79149, + 79309, + 79341, + 79469, + 81295, + 81487, + 82224, + 84050, + 84498, + 84626, + 86164, + 86292, + 86612, + 86676, + 87445, + 3183041, + 3186241, + 3198017, + 3218722, + 3226754, + 3247715, + 3256803, + 3263971, + 3264995, + 3289252, + 3291332, + 3295524, + 3299620, + 3326725, + 3379303, + 3392679, + 3448233, + 3460553, + 3461577, + 3510347, + 3546604, + 3552364, + 3556524, + 3576461, + 3586349, + 3588141, + 3590797, + 3596333, + 3622062, + 3625454, + 3627054, + 3675728, + 3739282, + 3749042, + 3771059, + 3771571, + 3776211, + 3782323, + 3782963, + 3784883, + 3785395, + 3788979, + 3815476, + 3839605, + 3885110, + 3917911, + 3948984, + 3951096, + 135304769, + 135858241, + 136498210, + 136906434, + 137138658, + 137512995, + 137531875, + 137548067, + 137629283, + 137645539, + 137646563, + 137775779, + 138529956, + 138615076, + 139040932, + 140954086, + 141179366, + 141690439, + 142738600, + 143013512, + 146979116, + 147175724, + 147475756, + 147902637, + 147936877, + 148017645, + 148131885, + 148228141, + 148229165, + 148309165, + 148317229, + 148395629, + 148551853, + 148618829, + 149076462, + 149490158, + 149572782, + 151277616, + 151639440, + 153268914, + 153486514, + 153563314, + 153750706, + 153763314, + 153914034, + 154406067, + 154417459, + 154600979, + 154678323, + 154680979, + 154866835, + 155366708, + 155375188, + 155391572, + 155465780, + 155869364, + 158045494, + 168988979, + 169321621, + 169652752, + 173151309, + 174240818, + 174247297, + 174669292, + 175391532, + 176638123, + 177380397, + 177879204, + 177886734, + 180753473, + 181020073, + 181503558, + 181686320, + 181999237, + 181999311, + 182048201, + 182074866, + 182078003, + 182083764, + 182920847, + 184716457, + 184976961, + 185145071, + 187281445, + 187872052, + 188100653, + 188875944, + 188919873, + 188920457, + 189107250, + 189203987, + 189371817, + 189414886, + 189567458, + 190266670, + 191318187, + 191337609, + 202479203, + 202493027, + 202835587, + 202843747, + 203013219, + 203036048, + 203045987, + 203177552, + 203898516, + 204648562, + 205067918, + 205078130, + 205096654, + 205689142, + 205690439, + 205988909, + 207213161, + 207794484, + 207800999, + 208023602, + 208213644, + 208213647, + 210261490, + 210310273, + 210940978, + 213325049, + 213946445, + 214055079, + 215125040, + 215134273, + 215135028, + 215237420, + 215418148, + 215553166, + 215553394, + 215563858, + 215627949, + 215754324, + 217529652, + 217713834, + 217732628, + 218731945, + 221417045, + 221424946, + 221493746, + 221515401, + 221658189, + 221908140, + 221910626, + 221921586, + 222659762, + 225001091, + 236105833, + 236113965, + 236194995, + 236195427, + 236206132, + 236206387, + 236211683, + 236212707, + 236381647, + 236571826, + 237124271, + 238210544, + 238270764, + 238435405, + 238501172, + 239224867, + 239257644, + 239710497, + 240307721, + 241208789, + 241241557, + 241318060, + 241319404, + 241343533, + 241344069, + 241405397, + 241765845, + 243864964, + 244502085, + 244946220, + 245109902, + 247647266, + 247707956, + 248648814, + 248648836, + 248682161, + 248986932, + 249058914, + 249697357, + 252132601, + 252135604, + 251841204, + 252317348, + 255007012, + 255278388, + 255641645, + 256365156, + 257566121, + 269763372, + 271202790, + 271863856, + 272049197, + 272127474, + 274339449, + 274939471, + 275388004, + 275388005, + 275388006, + 275977800, + 278267602, + 278513831, + 278712622, + 281613765, + 281683369, + 282120228, + 282250732, + 282498697, + 282508942, + 283743649, + 283787570, + 284710386, + 285391148, + 285478533, + 285854898, + 285873762, + 286931113, + 288964227, + 289445441, + 289591340, + 289689648, + 291671489, + 303512884, + 305319975, + 305610036, + 305764101, + 308448294, + 308675890, + 312085683, + 312264750, + 315032867, + 316391000, + 317331042, + 317902135, + 318950711, + 319447220, + 321499182, + 322538804, + 323145200, + 337067316, + 337826293, + 339905989, + 340833697, + 341457068, + 342310196, + 345302593, + 349554733, + 349771471, + 349786245, + 350819405, + 356072847, + 370349192, + 373962798, + 375558638, + 375574835, + 376053993, + 383276530, + 383373833, + 383407586, + 384439906, + 386079012, + 404133513, + 404307343, + 407031852, + 408072233, + 409112005, + 409608425, + 409713793, + 409771500, + 419040932, + 437730612, + 439529766, + 442616365, + 442813037, + 443157674, + 443295316, + 450118444, + 450482697, + 456789668, + 459935396, + 471217869, + 474073645, + 476230702, + 476665218, + 476717289, + 483014825, + 485083298, + 489306281, + 538364390, + 540675748, + 543819186, + 543958612, + 576960820, + 577242548, + 610515252, + 642202932, + 644420819, + }; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java new file mode 100644 index 0000000000..f1749e0b36 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java @@ -0,0 +1,772 @@ +/* + * Copyright (c) 2009-2013 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.TransitionHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; + +import java.util.HashMap; + +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public class ErrorReportingTokenizer extends Tokenizer { + + /** + * Magic value for UTF-16 operations. + */ + private static final int SURROGATE_OFFSET = (0x10000 - (0xD800 << 10) - 0xDC00); + + /** + * The policy for non-space non-XML characters. + */ + private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.ALTER_INFOSET; + + /** + * Keeps track of PUA warnings. + */ + private boolean alreadyWarnedAboutPrivateUseCharacters; + + /** + * The current line number in the current resource being parsed. (First line + * is 1.) Passed on as locator data. + */ + private int line; + + private int linePrev; + + /** + * The current column number in the current resource being tokenized. (First + * column is 1, counted by UTF-16 code units.) Passed on as locator data. + */ + private int col; + + private int colPrev; + + private boolean nextCharOnNewLine; + + private char prev; + + private HashMap errorProfileMap = null; + + private TransitionHandler transitionHandler = null; + + private int transitionBaseOffset = 0; + + /** + * @param tokenHandler + * @param newAttributesEachTime + */ + public ErrorReportingTokenizer(TokenHandler tokenHandler, + boolean newAttributesEachTime) { + super(tokenHandler, newAttributesEachTime); + } + + /** + * @param tokenHandler + */ + public ErrorReportingTokenizer(TokenHandler tokenHandler) { + super(tokenHandler); + } + + /** + * @see org.xml.sax.Locator#getLineNumber() + */ + public int getLineNumber() { + if (line > 0) { + return line; + } else { + return -1; + } + } + + /** + * @see org.xml.sax.Locator#getColumnNumber() + */ + public int getColumnNumber() { + if (col > 0) { + return col; + } else { + return -1; + } + } + + /** + * Sets the contentNonXmlCharPolicy. + * + * @param contentNonXmlCharPolicy + * the contentNonXmlCharPolicy to set + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; + } + + /** + * Sets the errorProfile. + * + * @param errorProfile + */ + public void setErrorProfile(HashMap errorProfileMap) { + this.errorProfileMap = errorProfileMap; + } + + /** + * Reports on an event based on profile selected. + * + * @param profile + * the profile this message belongs to + * @param message + * the message itself + * @throws SAXException + */ + public void note(String profile, String message) throws SAXException { + if (errorProfileMap == null) + return; + String level = errorProfileMap.get(profile); + if ("warn".equals(level)) { + warn(message); + } else if ("err".equals(level)) { + err(message); + // } else if ("info".equals(level)) { + // info(message); + } + } + + protected void startErrorReporting() throws SAXException { + line = linePrev = 0; + col = colPrev = 1; + nextCharOnNewLine = true; + prev = '\u0000'; + alreadyWarnedAboutPrivateUseCharacters = false; + transitionBaseOffset = 0; + } + + @Inline protected void silentCarriageReturn() { + nextCharOnNewLine = true; + lastCR = true; + } + + @Inline protected void silentLineFeed() { + nextCharOnNewLine = true; + } + + /** + * Returns the line. + * + * @return the line + */ + public int getLine() { + return line; + } + + /** + * Returns the col. + * + * @return the col + */ + public int getCol() { + return col; + } + + /** + * Returns the nextCharOnNewLine. + * + * @return the nextCharOnNewLine + */ + public boolean isNextCharOnNewLine() { + return nextCharOnNewLine; + } + + /** + * Flushes coalesced character tokens. + * + * @param buf + * TODO + * @param pos + * TODO + * + * @throws SAXException + */ + @Override protected void flushChars(char[] buf, int pos) + throws SAXException { + if (pos > cstart) { + int currLine = line; + int currCol = col; + line = linePrev; + col = colPrev; + tokenHandler.characters(buf, cstart, pos - cstart); + line = currLine; + col = currCol; + } + cstart = 0x7fffffff; + } + + @Override protected char checkChar(@NoLength char[] buf, int pos) + throws SAXException { + linePrev = line; + colPrev = col; + if (nextCharOnNewLine) { + line++; + col = 1; + nextCharOnNewLine = false; + } else { + col++; + } + + char c = buf[pos]; + switch (c) { + case '\u0000': + err("Saw U+0000 in stream."); + case '\t': + case '\r': + case '\n': + break; + case '\u000C': + if (contentNonXmlCharPolicy == XmlViolationPolicy.FATAL) { + fatal("This document is not mappable to XML 1.0 without data loss due to " + + toUPlusString(c) + + " which is not a legal XML 1.0 character."); + } else { + if (contentNonXmlCharPolicy == XmlViolationPolicy.ALTER_INFOSET) { + c = buf[pos] = ' '; + } + warn("This document is not mappable to XML 1.0 without data loss due to " + + toUPlusString(c) + + " which is not a legal XML 1.0 character."); + } + break; + default: + if ((c & 0xFC00) == 0xDC00) { + // Got a low surrogate. See if prev was high + // surrogate + if ((prev & 0xFC00) == 0xD800) { + int intVal = (prev << 10) + c + SURROGATE_OFFSET; + if ((intVal & 0xFFFE) == 0xFFFE) { + err("Astral non-character."); + } + if (isAstralPrivateUse(intVal)) { + warnAboutPrivateUseChar(); + } + } + } else if ((c < ' ' || ((c & 0xFFFE) == 0xFFFE))) { + switch (contentNonXmlCharPolicy) { + case FATAL: + fatal("Forbidden code point " + toUPlusString(c) + + "."); + break; + case ALTER_INFOSET: + c = buf[pos] = '\uFFFD'; + // fall through + case ALLOW: + err("Forbidden code point " + toUPlusString(c) + + "."); + } + } else if ((c >= '\u007F') && (c <= '\u009F') + || (c >= '\uFDD0') && (c <= '\uFDEF')) { + err("Forbidden code point " + toUPlusString(c) + "."); + } else if (isPrivateUse(c)) { + warnAboutPrivateUseChar(); + } + } + prev = c; + return c; + } + + /** + * @throws SAXException + * @see nu.validator.htmlparser.impl.Tokenizer#transition(int, int, boolean, + * int) + */ + @Override protected int transition(int from, int to, boolean reconsume, + int pos) throws SAXException { + if (transitionHandler != null) { + transitionHandler.transition(from, to, reconsume, + transitionBaseOffset + pos); + } + return to; + } + + private String toUPlusString(int c) { + String hexString = Integer.toHexString(c); + switch (hexString.length()) { + case 1: + return "U+000" + hexString; + case 2: + return "U+00" + hexString; + case 3: + return "U+0" + hexString; + default: + return "U+" + hexString; + } + } + + /** + * Emits a warning about private use characters if the warning has not been + * emitted yet. + * + * @throws SAXException + */ + private void warnAboutPrivateUseChar() throws SAXException { + if (!alreadyWarnedAboutPrivateUseCharacters) { + warn("Document uses the Unicode Private Use Area(s), which should not be used in publicly exchanged documents. (Charmod C073)"); + alreadyWarnedAboutPrivateUseCharacters = true; + } + } + + /** + * Tells if the argument is a BMP PUA character. + * + * @param c + * the UTF-16 code unit to check + * @return true if PUA character + */ + private boolean isPrivateUse(char c) { + return c >= '\uE000' && c <= '\uF8FF'; + } + + /** + * Tells if the argument is an astral PUA character. + * + * @param c + * the code point to check + * @return true if astral private use + */ + private boolean isAstralPrivateUse(int c) { + return (c >= 0xF0000 && c <= 0xFFFFD) + || (c >= 0x100000 && c <= 0x10FFFD); + } + + @Override protected void errGarbageAfterLtSlash() throws SAXException { + err("Garbage after \u201C\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C<\u201D) or mistyped end tag."); + } + + @Override protected void errWarnLtSlashInRcdata() throws SAXException { + if (html4) { + err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA") + + " element \u201C" + + endTagExpectation + + "\u201D contained the string \u201C 0 || (folded >= 'a' && folded <= 'z')) + && ElementName.IFRAME != endTagExpectation) { + err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA") + + " element \u201C" + + endTagExpectation.name + + "\u201D contained the string \u201C\u201D in system identifier."); + } + + @Override protected void errGtInPublicId() throws SAXException { + err("\u201C>\u201D in public identifier."); + } + + @Override protected void errNamelessDoctype() throws SAXException { + err("Nameless doctype."); + } + + @Override protected void errConsecutiveHyphens() throws SAXException { + err("Consecutive hyphens did not terminate a comment. \u201C--\u201D is not permitted inside a comment, but e.g. \u201C- -\u201D is."); + } + + @Override protected void errPrematureEndOfComment() throws SAXException { + err("Premature end of comment. Use \u201C-->\u201D to end a comment properly."); + } + + @Override protected void errBogusComment() throws SAXException { + err("Bogus comment."); + } + + @Override protected void errUnquotedAttributeValOrNull(char c) + throws SAXException { + switch (c) { + case '<': + err("\u201C<\u201D in an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before."); + return; + case '`': + err("\u201C`\u201D in an unquoted attribute value. Probable cause: Using the wrong character as a quote."); + return; + case '\uFFFD': + return; + default: + err("\u201C" + + c + + "\u201D in an unquoted attribute value. Probable causes: Attributes running together or a URL query string in an unquoted attribute value."); + return; + } + } + + @Override protected void errSlashNotFollowedByGt() throws SAXException { + err("A slash was not immediately followed by \u201C>\u201D."); + } + + @Override protected void errHtml4XmlVoidSyntax() throws SAXException { + if (html4) { + err("The \u201C/>\u201D syntax on void elements is not allowed. (This is an HTML4-only error.)"); + } + } + + @Override protected void errNoSpaceBetweenAttributes() throws SAXException { + err("No space between attributes."); + } + + @Override protected void errHtml4NonNameInUnquotedAttribute(char c) + throws SAXException { + if (html4 + && !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') || c == '.' || c == '-' + || c == '_' || c == ':')) { + err("Non-name character in an unquoted attribute value. (This is an HTML4-only error.)"); + } + } + + @Override protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull( + char c) throws SAXException { + switch (c) { + case '=': + err("\u201C=\u201D at the start of an unquoted attribute value. Probable cause: Stray duplicate equals sign."); + return; + case '<': + err("\u201C<\u201D at the start of an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before."); + return; + case '`': + err("\u201C`\u201D at the start of an unquoted attribute value. Probable cause: Using the wrong character as a quote."); + return; + } + } + + @Override protected void errAttributeValueMissing() throws SAXException { + err("Attribute value missing."); + } + + @Override protected void errBadCharBeforeAttributeNameOrNull(char c) + throws SAXException { + if (c == '<') { + err("Saw \u201C<\u201D when expecting an attribute name. Probable cause: Missing \u201C>\u201D immediately before."); + } else if (c == '=') { + errEqualsSignBeforeAttributeName(); + } else if (c != '\uFFFD') { + errQuoteBeforeAttributeName(c); + } + } + + @Override protected void errEqualsSignBeforeAttributeName() + throws SAXException { + err("Saw \u201C=\u201D when expecting an attribute name. Probable cause: Attribute name missing."); + } + + @Override protected void errBadCharAfterLt(char c) throws SAXException { + err("Bad character \u201C" + + c + + "\u201D after \u201C<\u201D. Probable cause: Unescaped \u201C<\u201D. Try escaping it as \u201C<\u201D."); + } + + @Override protected void errLtGt() throws SAXException { + err("Saw \u201C<>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C<\u201D) or mistyped start tag."); + } + + @Override protected void errProcessingInstruction() throws SAXException { + err("Saw \u201C\u201D missing immediately before."); + } else if (c != '\uFFFD') { + err("Quote \u201C" + + c + + "\u201D in attribute name. Probable cause: Matching quote missing somewhere earlier."); + } + } + + @Override protected void errExpectedPublicId() throws SAXException { + err("Expected a public identifier but the doctype ended."); + } + + @Override protected void errBogusDoctype() throws SAXException { + err("Bogus doctype."); + } + + @Override protected void maybeWarnPrivateUseAstral() throws SAXException { + if (errorHandler != null && isAstralPrivateUse(value)) { + warnAboutPrivateUseChar(); + } + } + + @Override protected void maybeWarnPrivateUse(char ch) throws SAXException { + if (errorHandler != null && isPrivateUse(ch)) { + warnAboutPrivateUseChar(); + } + } + + @Override protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs) + throws SAXException { + if (attrs.getLength() != 0) { + /* + * When an end tag token is emitted with attributes, that is a parse + * error. + */ + err("End tag had attributes."); + } + } + + @Override protected void maybeErrSlashInEndTag(boolean selfClosing) + throws SAXException { + if (selfClosing && endTag) { + err("Stray \u201C/\u201D at the end of an end tag."); + } + } + + @Override protected char errNcrNonCharacter(char ch) throws SAXException { + switch (contentNonXmlCharPolicy) { + case FATAL: + fatal("Character reference expands to a non-character (" + + toUPlusString((char) value) + ")."); + break; + case ALTER_INFOSET: + ch = '\uFFFD'; + // fall through + case ALLOW: + err("Character reference expands to a non-character (" + + toUPlusString((char) value) + ")."); + } + return ch; + } + + /** + * @see nu.validator.htmlparser.impl.Tokenizer#errAstralNonCharacter(int) + */ + @Override protected void errAstralNonCharacter(int ch) throws SAXException { + err("Character reference expands to an astral non-character (" + + toUPlusString(value) + ")."); + } + + @Override protected void errNcrSurrogate() throws SAXException { + err("Character reference expands to a surrogate."); + } + + @Override protected char errNcrControlChar(char ch) throws SAXException { + switch (contentNonXmlCharPolicy) { + case FATAL: + fatal("Character reference expands to a control character (" + + toUPlusString((char) value) + ")."); + break; + case ALTER_INFOSET: + ch = '\uFFFD'; + // fall through + case ALLOW: + err("Character reference expands to a control character (" + + toUPlusString((char) value) + ")."); + } + return ch; + } + + @Override protected void errNcrCr() throws SAXException { + err("A numeric character reference expanded to carriage return."); + } + + @Override protected void errNcrInC1Range() throws SAXException { + err("A numeric character reference expanded to the C1 controls range."); + } + + @Override protected void errEofInPublicId() throws SAXException { + err("End of file inside public identifier."); + } + + @Override protected void errEofInComment() throws SAXException { + err("End of file inside comment."); + } + + @Override protected void errEofInDoctype() throws SAXException { + err("End of file inside doctype."); + } + + @Override protected void errEofInAttributeValue() throws SAXException { + err("End of file reached when inside an attribute value. Ignoring tag."); + } + + @Override protected void errEofInAttributeName() throws SAXException { + err("End of file occurred in an attribute name. Ignoring tag."); + } + + @Override protected void errEofWithoutGt() throws SAXException { + err("Saw end of file without the previous tag ending with \u201C>\u201D. Ignoring tag."); + } + + @Override protected void errEofInTagName() throws SAXException { + err("End of file seen when looking for tag name. Ignoring tag."); + } + + @Override protected void errEofInEndTag() throws SAXException { + err("End of file inside end tag. Ignoring tag."); + } + + @Override protected void errEofAfterLt() throws SAXException { + err("End of file after \u201C<\u201D."); + } + + @Override protected void errNcrOutOfRange() throws SAXException { + err("Character reference outside the permissible Unicode range."); + } + + @Override protected void errNcrUnassigned() throws SAXException { + err("Character reference expands to a permanently unassigned code point."); + } + + @Override protected void errDuplicateAttribute() throws SAXException { + err("Duplicate attribute \u201C" + + attributeName.getLocal(AttributeName.HTML) + "\u201D."); + } + + @Override protected void errEofInSystemId() throws SAXException { + err("End of file inside system identifier."); + } + + @Override protected void errExpectedSystemId() throws SAXException { + err("Expected a system identifier but the doctype ended."); + } + + @Override protected void errMissingSpaceBeforeDoctypeName() + throws SAXException { + err("Missing space before doctype name."); + } + + @Override protected void errHyphenHyphenBang() throws SAXException { + err("\u201C--!\u201D found in comment."); + } + + @Override protected void errNcrControlChar() throws SAXException { + err("Character reference expands to a control character (" + + toUPlusString((char) value) + ")."); + } + + @Override protected void errNcrZero() throws SAXException { + err("Character reference expands to zero."); + } + + @Override protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote() + throws SAXException { + err("No space between the doctype \u201CSYSTEM\u201D keyword and the quote."); + } + + @Override protected void errNoSpaceBetweenPublicAndSystemIds() + throws SAXException { + err("No space between the doctype public and system identifiers."); + } + + @Override protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote() + throws SAXException { + err("No space between the doctype \u201CPUBLIC\u201D keyword and the quote."); + } + + @Override protected void noteAttributeWithoutValue() throws SAXException { + note("xhtml2", "Attribute without value"); + } + + @Override protected void noteUnquotedAttributeValue() throws SAXException { + note("xhtml1", "Unquoted attribute value."); + } + + /** + * Sets the transitionHandler. + * + * @param transitionHandler + * the transitionHandler to set + */ + public void setTransitionHandler(TransitionHandler transitionHandler) { + this.transitionHandler = transitionHandler; + } + + /** + * Sets an offset to be added to the position reported to + * TransitionHandler. + * + * @param offset + * the offset + */ + public void setTransitionBaseOffset(int offset) { + this.transitionBaseOffset = offset; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt new file mode 100644 index 0000000000..c389a8cac6 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + /** + * compressed returnValue: + * int returnState = returnValue >> 33 + * boolean breakOuterState = ((returnValue >> 32) & 0x1) != 0) + * int pos = returnValue & 0xFFFFFFFF // same as (int)returnValue + */ + @SuppressWarnings("unused") private long workAroundHotSpotHugeMethodLimit( + int state, char c, int pos, @NoLength char[] buf, + boolean reconsume, int returnState, int endPos) throws SAXException { + stateloop: for (;;) { + switch (state) { + // BEGIN HOTSPOT WORKAROUND + default: + long returnStateAndPos = workAroundHotSpotHugeMethodLimit( + state, c, pos, buf, reconsume, returnState, endPos); + pos = (int)returnStateAndPos; // 5.1.3 in the Java spec + returnState = (int)(returnStateAndPos >> 33); + state = stateSave; + if ( (pos == endPos) || ( (((int)(returnStateAndPos >> 32)) & 0x1) != 0) ) { + break stateloop; + } + continue stateloop; + // END HOTSPOT WORKAROUND + default: + assert !reconsume : "Must not reconsume when returning from HotSpot workaround."; + stateSave = state; + return (((long)returnState) << 33) | pos; + } + } + assert !reconsume : "Must not reconsume when returning from HotSpot workaround."; + stateSave = state; + return (((long)returnState) << 33) | (1L << 32) | pos ; + } diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java new file mode 100644 index 0000000000..0ec25f96f0 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java @@ -0,0 +1,618 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2011 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.Auto; +import nu.validator.htmlparser.annotation.IdType; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NsUri; +import nu.validator.htmlparser.annotation.Prefix; +import nu.validator.htmlparser.annotation.QName; +import nu.validator.htmlparser.common.Interner; +import nu.validator.htmlparser.common.XmlViolationPolicy; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; + +/** + * Be careful with this class. QName is the name in from HTML tokenization. + * Otherwise, please refer to the interface doc. + * + * @version $Id: AttributesImpl.java 206 2008-03-20 14:09:29Z hsivonen $ + * @author hsivonen + */ +public final class HtmlAttributes implements Attributes { + + // [NOCPP[ + + private static final AttributeName[] EMPTY_ATTRIBUTENAMES = new AttributeName[0]; + + private static final String[] EMPTY_STRINGS = new String[0]; + + // ]NOCPP] + + public static final HtmlAttributes EMPTY_ATTRIBUTES = new HtmlAttributes( + AttributeName.HTML); + + private int mode; + + private int length; + + private @Auto AttributeName[] names; + + private @Auto String[] values; // XXX perhaps make this @NoLength? + + // CPPONLY: private @Auto int[] lines; // XXX perhaps make this @NoLength? + + // [NOCPP[ + + private String idValue; + + private int xmlnsLength; + + private AttributeName[] xmlnsNames; + + private String[] xmlnsValues; + + // ]NOCPP] + + public HtmlAttributes(int mode) { + this.mode = mode; + this.length = 0; + /* + * The length of 5 covers covers 98.3% of elements + * according to Hixie, but lets round to the next power of two for + * jemalloc. + */ + this.names = new AttributeName[8]; + this.values = new String[8]; + // CPPONLY: this.lines = new int[8]; + + // [NOCPP[ + + this.idValue = null; + + this.xmlnsLength = 0; + + this.xmlnsNames = HtmlAttributes.EMPTY_ATTRIBUTENAMES; + + this.xmlnsValues = HtmlAttributes.EMPTY_STRINGS; + + // ]NOCPP] + } + /* + public HtmlAttributes(HtmlAttributes other) { + this.mode = other.mode; + this.length = other.length; + this.names = new AttributeName[other.length]; + this.values = new String[other.length]; + // [NOCPP[ + this.idValue = other.idValue; + this.xmlnsLength = other.xmlnsLength; + this.xmlnsNames = new AttributeName[other.xmlnsLength]; + this.xmlnsValues = new String[other.xmlnsLength]; + // ]NOCPP] + } + */ + + void destructor() { + clear(0); + } + + /** + * Only use with a static argument + * + * @param name + * @return + */ + public int getIndex(AttributeName name) { + for (int i = 0; i < length; i++) { + if (names[i] == name) { + return i; + } + } + return -1; + } + + /** + * Only use with static argument. + * + * @see org.xml.sax.Attributes#getValue(java.lang.String) + */ + public String getValue(AttributeName name) { + int index = getIndex(name); + if (index == -1) { + return null; + } else { + return getValueNoBoundsCheck(index); + } + } + + public int getLength() { + return length; + } + + /** + * Variant of getLocalName(int index) without bounds check. + * @param index a valid attribute index + * @return the local name at index + */ + public @Local String getLocalNameNoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + return names[index].getLocal(mode); + } + + /** + * Variant of getURI(int index) without bounds check. + * @param index a valid attribute index + * @return the namespace URI at index + */ + public @NsUri String getURINoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + return names[index].getUri(mode); + } + + /** + * Variant of getPrefix(int index) without bounds check. + * @param index a valid attribute index + * @return the namespace prefix at index + */ + public @Prefix String getPrefixNoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + return names[index].getPrefix(mode); + } + + /** + * Variant of getValue(int index) without bounds check. + * @param index a valid attribute index + * @return the attribute value at index + */ + public String getValueNoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + return values[index]; + } + + /** + * Variant of getAttributeName(int index) without bounds check. + * @param index a valid attribute index + * @return the attribute name at index + */ + public AttributeName getAttributeNameNoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + return names[index]; + } + + // CPPONLY: /** + // CPPONLY: * Obtains a line number without bounds check. + // CPPONLY: * @param index a valid attribute index + // CPPONLY: * @return the line number at index or -1 if unknown + // CPPONLY: */ + // CPPONLY: public int getLineNoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + // CPPONLY: return lines[index]; + // CPPONLY: } + + // [NOCPP[ + + /** + * Variant of getQName(int index) without bounds check. + * @param index a valid attribute index + * @return the QName at index + */ + public @QName String getQNameNoBoundsCheck(int index) { + return names[index].getQName(mode); + } + + /** + * Variant of getType(int index) without bounds check. + * @param index a valid attribute index + * @return the attribute type at index + */ + public @IdType String getTypeNoBoundsCheck(int index) { + return (names[index] == AttributeName.ID) ? "ID" : "CDATA"; + } + + public int getIndex(String qName) { + for (int i = 0; i < length; i++) { + if (names[i].getQName(mode).equals(qName)) { + return i; + } + } + return -1; + } + + public int getIndex(String uri, String localName) { + for (int i = 0; i < length; i++) { + if (names[i].getLocal(mode).equals(localName) + && names[i].getUri(mode).equals(uri)) { + return i; + } + } + return -1; + } + + public @IdType String getType(String qName) { + int index = getIndex(qName); + if (index == -1) { + return null; + } else { + return getType(index); + } + } + + public @IdType String getType(String uri, String localName) { + int index = getIndex(uri, localName); + if (index == -1) { + return null; + } else { + return getType(index); + } + } + + public String getValue(String qName) { + int index = getIndex(qName); + if (index == -1) { + return null; + } else { + return getValue(index); + } + } + + public String getValue(String uri, String localName) { + int index = getIndex(uri, localName); + if (index == -1) { + return null; + } else { + return getValue(index); + } + } + + public @Local String getLocalName(int index) { + if (index < length && index >= 0) { + return names[index].getLocal(mode); + } else { + return null; + } + } + + public @QName String getQName(int index) { + if (index < length && index >= 0) { + return names[index].getQName(mode); + } else { + return null; + } + } + + public @IdType String getType(int index) { + if (index < length && index >= 0) { + return (names[index] == AttributeName.ID) ? "ID" : "CDATA"; + } else { + return null; + } + } + + public AttributeName getAttributeName(int index) { + if (index < length && index >= 0) { + return names[index]; + } else { + return null; + } + } + + public @NsUri String getURI(int index) { + if (index < length && index >= 0) { + return names[index].getUri(mode); + } else { + return null; + } + } + + public @Prefix String getPrefix(int index) { + if (index < length && index >= 0) { + return names[index].getPrefix(mode); + } else { + return null; + } + } + + public String getValue(int index) { + if (index < length && index >= 0) { + return values[index]; + } else { + return null; + } + } + + public String getId() { + return idValue; + } + + public int getXmlnsLength() { + return xmlnsLength; + } + + public @Local String getXmlnsLocalName(int index) { + if (index < xmlnsLength && index >= 0) { + return xmlnsNames[index].getLocal(mode); + } else { + return null; + } + } + + public @NsUri String getXmlnsURI(int index) { + if (index < xmlnsLength && index >= 0) { + return xmlnsNames[index].getUri(mode); + } else { + return null; + } + } + + public String getXmlnsValue(int index) { + if (index < xmlnsLength && index >= 0) { + return xmlnsValues[index]; + } else { + return null; + } + } + + public int getXmlnsIndex(AttributeName name) { + for (int i = 0; i < xmlnsLength; i++) { + if (xmlnsNames[i] == name) { + return i; + } + } + return -1; + } + + public String getXmlnsValue(AttributeName name) { + int index = getXmlnsIndex(name); + if (index == -1) { + return null; + } else { + return getXmlnsValue(index); + } + } + + public AttributeName getXmlnsAttributeName(int index) { + if (index < xmlnsLength && index >= 0) { + return xmlnsNames[index]; + } else { + return null; + } + } + + // ]NOCPP] + + void addAttribute(AttributeName name, String value + // [NOCPP[ + , XmlViolationPolicy xmlnsPolicy + // ]NOCPP] + // CPPONLY: , int line + ) throws SAXException { + // [NOCPP[ + if (name == AttributeName.ID) { + idValue = value; + } + + if (name.isXmlns()) { + if (xmlnsNames.length == xmlnsLength) { + int newLen = xmlnsLength == 0 ? 2 : xmlnsLength << 1; + AttributeName[] newNames = new AttributeName[newLen]; + System.arraycopy(xmlnsNames, 0, newNames, 0, xmlnsNames.length); + xmlnsNames = newNames; + String[] newValues = new String[newLen]; + System.arraycopy(xmlnsValues, 0, newValues, 0, xmlnsValues.length); + xmlnsValues = newValues; + } + xmlnsNames[xmlnsLength] = name; + xmlnsValues[xmlnsLength] = value; + xmlnsLength++; + switch (xmlnsPolicy) { + case FATAL: + // this is ugly + throw new SAXException("Saw an xmlns attribute."); + case ALTER_INFOSET: + return; + case ALLOW: + // fall through + } + } + + // ]NOCPP] + + if (names.length == length) { + int newLen = length << 1; // The first growth covers virtually + // 100% of elements according to + // Hixie + AttributeName[] newNames = new AttributeName[newLen]; + System.arraycopy(names, 0, newNames, 0, names.length); + names = newNames; + String[] newValues = new String[newLen]; + System.arraycopy(values, 0, newValues, 0, values.length); + values = newValues; + // CPPONLY: int[] newLines = new int[newLen]; + // CPPONLY: System.arraycopy(lines, 0, newLines, 0, lines.length); + // CPPONLY: lines = newLines; + } + names[length] = name; + values[length] = value; + // CPPONLY: lines[length] = line; + length++; + } + + void clear(int m) { + for (int i = 0; i < length; i++) { + names[i].release(); + names[i] = null; + Portability.releaseString(values[i]); + values[i] = null; + } + length = 0; + mode = m; + // [NOCPP[ + idValue = null; + for (int i = 0; i < xmlnsLength; i++) { + xmlnsNames[i] = null; + xmlnsValues[i] = null; + } + xmlnsLength = 0; + // ]NOCPP] + } + + /** + * This is used in C++ to release special isindex + * attribute values whose ownership is not transferred. + */ + void releaseValue(int i) { + Portability.releaseString(values[i]); + } + + /** + * This is only used for AttributeName ownership transfer + * in the isindex case to avoid freeing custom names twice in C++. + */ + void clearWithoutReleasingContents() { + for (int i = 0; i < length; i++) { + names[i] = null; + values[i] = null; + } + length = 0; + } + + boolean contains(AttributeName name) { + for (int i = 0; i < length; i++) { + if (name.equalsAnother(names[i])) { + return true; + } + } + // [NOCPP[ + for (int i = 0; i < xmlnsLength; i++) { + if (name.equalsAnother(xmlnsNames[i])) { + return true; + } + } + // ]NOCPP] + return false; + } + + public void adjustForMath() { + mode = AttributeName.MATHML; + } + + public void adjustForSvg() { + mode = AttributeName.SVG; + } + + public HtmlAttributes cloneAttributes(Interner interner) + throws SAXException { + assert (length == 0 + // [NOCPP[ + && xmlnsLength == 0 + // ]NOCPP] + ) + || mode == 0 || mode == 3; + HtmlAttributes clone = new HtmlAttributes(0); + for (int i = 0; i < length; i++) { + clone.addAttribute(names[i].cloneAttributeName(interner), + Portability.newStringFromString(values[i]) + // [NOCPP[ + , XmlViolationPolicy.ALLOW + // ]NOCPP] + // CPPONLY: , lines[i] + ); + } + // [NOCPP[ + for (int i = 0; i < xmlnsLength; i++) { + clone.addAttribute(xmlnsNames[i], xmlnsValues[i], + XmlViolationPolicy.ALLOW); + } + // ]NOCPP] + return clone; // XXX!!! + } + + public boolean equalsAnother(HtmlAttributes other) { + assert mode == 0 || mode == 3 : "Trying to compare attributes in foreign content."; + int otherLength = other.getLength(); + if (length != otherLength) { + return false; + } + for (int i = 0; i < length; i++) { + // Work around the limitations of C++ + boolean found = false; + // The comparing just the local names is OK, since these attribute + // holders are both supposed to belong to HTML formatting elements + @Local String ownLocal = names[i].getLocal(AttributeName.HTML); + for (int j = 0; j < otherLength; j++) { + if (ownLocal == other.names[j].getLocal(AttributeName.HTML)) { + found = true; + if (!Portability.stringEqualsString(values[i], other.values[j])) { + return false; + } + } + } + if (!found) { + return false; + } + } + return true; + } + + // [NOCPP[ + + void processNonNcNames(TreeBuilder treeBuilder, XmlViolationPolicy namePolicy) throws SAXException { + for (int i = 0; i < length; i++) { + AttributeName attName = names[i]; + if (!attName.isNcName(mode)) { + String name = attName.getLocal(mode); + switch (namePolicy) { + case ALTER_INFOSET: + names[i] = AttributeName.create(NCName.escapeName(name)); + // fall through + case ALLOW: + if (attName != AttributeName.XML_LANG) { + treeBuilder.warn("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0."); + } + break; + case FATAL: + treeBuilder.fatal("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0."); + break; + } + } + } + } + + public void merge(HtmlAttributes attributes) throws SAXException { + int len = attributes.getLength(); + for (int i = 0; i < len; i++) { + AttributeName name = attributes.getAttributeNameNoBoundsCheck(i); + if (!contains(name)) { + addAttribute(name, attributes.getValueNoBoundsCheck(i), XmlViolationPolicy.ALLOW); + } + } + } + + + // ]NOCPP] + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java new file mode 100644 index 0000000000..7a559d9034 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2011 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import org.xml.sax.Locator; + +public class LocatorImpl implements Locator { + + private final String systemId; + + private final String publicId; + + private final int column; + + private final int line; + + public LocatorImpl(Locator locator) { + this.systemId = locator.getSystemId(); + this.publicId = locator.getPublicId(); + this.column = locator.getColumnNumber(); + this.line = locator.getLineNumber(); + } + + public final int getColumnNumber() { + return column; + } + + public final int getLineNumber() { + return line; + } + + public final String getPublicId() { + return publicId; + } + + public final String getSystemId() { + return systemId; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java new file mode 100644 index 0000000000..be9aabfe33 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java @@ -0,0 +1,854 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import java.io.IOException; + +import nu.validator.htmlparser.annotation.Auto; +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.common.ByteReadable; + +import org.xml.sax.SAXException; + +public abstract class MetaScanner { + + /** + * Constant for "charset". + */ + private static final char[] CHARSET = { 'h', 'a', 'r', 's', 'e', 't' }; + + /** + * Constant for "content". + */ + private static final char[] CONTENT = { 'o', 'n', 't', 'e', 'n', 't' }; + + /** + * Constant for "http-equiv". + */ + private static final char[] HTTP_EQUIV = { 't', 't', 'p', '-', 'e', 'q', + 'u', 'i', 'v' }; + + /** + * Constant for "content-type". + */ + private static final char[] CONTENT_TYPE = { 'c', 'o', 'n', 't', 'e', 'n', + 't', '-', 't', 'y', 'p', 'e' }; + + private static final int NO = 0; + + private static final int M = 1; + + private static final int E = 2; + + private static final int T = 3; + + private static final int A = 4; + + private static final int DATA = 0; + + private static final int TAG_OPEN = 1; + + private static final int SCAN_UNTIL_GT = 2; + + private static final int TAG_NAME = 3; + + private static final int BEFORE_ATTRIBUTE_NAME = 4; + + private static final int ATTRIBUTE_NAME = 5; + + private static final int AFTER_ATTRIBUTE_NAME = 6; + + private static final int BEFORE_ATTRIBUTE_VALUE = 7; + + private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8; + + private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 9; + + private static final int ATTRIBUTE_VALUE_UNQUOTED = 10; + + private static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 11; + + private static final int MARKUP_DECLARATION_OPEN = 13; + + private static final int MARKUP_DECLARATION_HYPHEN = 14; + + private static final int COMMENT_START = 15; + + private static final int COMMENT_START_DASH = 16; + + private static final int COMMENT = 17; + + private static final int COMMENT_END_DASH = 18; + + private static final int COMMENT_END = 19; + + private static final int SELF_CLOSING_START_TAG = 20; + + private static final int HTTP_EQUIV_NOT_SEEN = 0; + + private static final int HTTP_EQUIV_CONTENT_TYPE = 1; + + private static final int HTTP_EQUIV_OTHER = 2; + + /** + * The data source. + */ + protected ByteReadable readable; + + /** + * The state of the state machine that recognizes the tag name "meta". + */ + private int metaState = NO; + + /** + * The current position in recognizing the attribute name "content". + */ + private int contentIndex = Integer.MAX_VALUE; + + /** + * The current position in recognizing the attribute name "charset". + */ + private int charsetIndex = Integer.MAX_VALUE; + + /** + * The current position in recognizing the attribute name "http-equive". + */ + private int httpEquivIndex = Integer.MAX_VALUE; + + /** + * The current position in recognizing the attribute value "content-type". + */ + private int contentTypeIndex = Integer.MAX_VALUE; + + /** + * The tokenizer state. + */ + protected int stateSave = DATA; + + /** + * The currently filled length of strBuf. + */ + private int strBufLen; + + /** + * Accumulation buffer for attribute values. + */ + private @Auto char[] strBuf; + + private String content; + + private String charset; + + private int httpEquivState; + + // CPPONLY: private TreeBuilder treeBuilder; + + public MetaScanner( + // CPPONLY: TreeBuilder tb + ) { + this.readable = null; + this.metaState = NO; + this.contentIndex = Integer.MAX_VALUE; + this.charsetIndex = Integer.MAX_VALUE; + this.httpEquivIndex = Integer.MAX_VALUE; + this.contentTypeIndex = Integer.MAX_VALUE; + this.stateSave = DATA; + this.strBufLen = 0; + this.strBuf = new char[36]; + this.content = null; + this.charset = null; + this.httpEquivState = HTTP_EQUIV_NOT_SEEN; + // CPPONLY: this.treeBuilder = tb; + } + + @SuppressWarnings("unused") private void destructor() { + Portability.releaseString(content); + Portability.releaseString(charset); + } + + // [NOCPP[ + + /** + * Reads a byte from the data source. + * + * -1 means end. + * @return + * @throws IOException + */ + protected int read() throws IOException { + return readable.readByte(); + } + + // ]NOCPP] + + // WARNING When editing this, makes sure the bytecode length shown by javap + // stays under 8000 bytes! + /** + * The runs the meta scanning algorithm. + */ + protected final void stateLoop(int state) + throws SAXException, IOException { + int c = -1; + boolean reconsume = false; + stateloop: for (;;) { + switch (state) { + case DATA: + dataloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + switch (c) { + case -1: + break stateloop; + case '<': + state = MetaScanner.TAG_OPEN; + break dataloop; // FALL THROUGH continue + // stateloop; + default: + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case TAG_OPEN: + tagopenloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case 'm': + case 'M': + metaState = M; + state = MetaScanner.TAG_NAME; + break tagopenloop; + // continue stateloop; + case '!': + state = MetaScanner.MARKUP_DECLARATION_OPEN; + continue stateloop; + case '?': + case '/': + state = MetaScanner.SCAN_UNTIL_GT; + continue stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + default: + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { + metaState = NO; + state = MetaScanner.TAG_NAME; + break tagopenloop; + // continue stateloop; + } + state = MetaScanner.DATA; + reconsume = true; + continue stateloop; + } + } + // FALL THROUGH DON'T REORDER + case TAG_NAME: + tagnameloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + state = MetaScanner.BEFORE_ATTRIBUTE_NAME; + break tagnameloop; + // continue stateloop; + case '/': + state = MetaScanner.SELF_CLOSING_START_TAG; + continue stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + case 'e': + case 'E': + if (metaState == M) { + metaState = E; + } else { + metaState = NO; + } + continue; + case 't': + case 'T': + if (metaState == E) { + metaState = T; + } else { + metaState = NO; + } + continue; + case 'a': + case 'A': + if (metaState == T) { + metaState = A; + } else { + metaState = NO; + } + continue; + default: + metaState = NO; + continue; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_ATTRIBUTE_NAME: + beforeattributenameloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + /* + * Consume the next input character: + */ + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + continue; + case '/': + state = MetaScanner.SELF_CLOSING_START_TAG; + continue stateloop; + case '>': + if (handleTag()) { + break stateloop; + } + state = DATA; + continue stateloop; + case 'c': + case 'C': + contentIndex = 0; + charsetIndex = 0; + httpEquivIndex = Integer.MAX_VALUE; + contentTypeIndex = Integer.MAX_VALUE; + state = MetaScanner.ATTRIBUTE_NAME; + break beforeattributenameloop; + case 'h': + case 'H': + contentIndex = Integer.MAX_VALUE; + charsetIndex = Integer.MAX_VALUE; + httpEquivIndex = 0; + contentTypeIndex = Integer.MAX_VALUE; + state = MetaScanner.ATTRIBUTE_NAME; + break beforeattributenameloop; + default: + contentIndex = Integer.MAX_VALUE; + charsetIndex = Integer.MAX_VALUE; + httpEquivIndex = Integer.MAX_VALUE; + contentTypeIndex = Integer.MAX_VALUE; + state = MetaScanner.ATTRIBUTE_NAME; + break beforeattributenameloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case ATTRIBUTE_NAME: + attributenameloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + state = MetaScanner.AFTER_ATTRIBUTE_NAME; + continue stateloop; + case '/': + state = MetaScanner.SELF_CLOSING_START_TAG; + continue stateloop; + case '=': + strBufLen = 0; + contentTypeIndex = 0; + state = MetaScanner.BEFORE_ATTRIBUTE_VALUE; + break attributenameloop; + // continue stateloop; + case '>': + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + default: + if (metaState == A) { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + if (contentIndex < CONTENT.length && c == CONTENT[contentIndex]) { + ++contentIndex; + } else { + contentIndex = Integer.MAX_VALUE; + } + if (charsetIndex < CHARSET.length && c == CHARSET[charsetIndex]) { + ++charsetIndex; + } else { + charsetIndex = Integer.MAX_VALUE; + } + if (httpEquivIndex < HTTP_EQUIV.length && c == HTTP_EQUIV[httpEquivIndex]) { + ++httpEquivIndex; + } else { + httpEquivIndex = Integer.MAX_VALUE; + } + } + continue; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_ATTRIBUTE_VALUE: + beforeattributevalueloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + continue; + case '"': + state = MetaScanner.ATTRIBUTE_VALUE_DOUBLE_QUOTED; + break beforeattributevalueloop; + // continue stateloop; + case '\'': + state = MetaScanner.ATTRIBUTE_VALUE_SINGLE_QUOTED; + continue stateloop; + case '>': + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + default: + handleCharInAttributeValue(c); + state = MetaScanner.ATTRIBUTE_VALUE_UNQUOTED; + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case ATTRIBUTE_VALUE_DOUBLE_QUOTED: + attributevaluedoublequotedloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + switch (c) { + case -1: + break stateloop; + case '"': + handleAttributeValue(); + state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED; + break attributevaluedoublequotedloop; + // continue stateloop; + default: + handleCharInAttributeValue(c); + continue; + } + } + // FALLTHRU DON'T REORDER + case AFTER_ATTRIBUTE_VALUE_QUOTED: + afterattributevaluequotedloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + state = MetaScanner.BEFORE_ATTRIBUTE_NAME; + continue stateloop; + case '/': + state = MetaScanner.SELF_CLOSING_START_TAG; + break afterattributevaluequotedloop; + // continue stateloop; + case '>': + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + default: + state = MetaScanner.BEFORE_ATTRIBUTE_NAME; + reconsume = true; + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case SELF_CLOSING_START_TAG: + c = read(); + switch (c) { + case -1: + break stateloop; + case '>': + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + default: + state = MetaScanner.BEFORE_ATTRIBUTE_NAME; + reconsume = true; + continue stateloop; + } + // XXX reorder point + case ATTRIBUTE_VALUE_UNQUOTED: + for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + + case '\u000C': + handleAttributeValue(); + state = MetaScanner.BEFORE_ATTRIBUTE_NAME; + continue stateloop; + case '>': + handleAttributeValue(); + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + default: + handleCharInAttributeValue(c); + continue; + } + } + // XXX reorder point + case AFTER_ATTRIBUTE_NAME: + for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + continue; + case '/': + handleAttributeValue(); + state = MetaScanner.SELF_CLOSING_START_TAG; + continue stateloop; + case '=': + strBufLen = 0; + contentTypeIndex = 0; + state = MetaScanner.BEFORE_ATTRIBUTE_VALUE; + continue stateloop; + case '>': + handleAttributeValue(); + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + case 'c': + case 'C': + contentIndex = 0; + charsetIndex = 0; + state = MetaScanner.ATTRIBUTE_NAME; + continue stateloop; + default: + contentIndex = Integer.MAX_VALUE; + charsetIndex = Integer.MAX_VALUE; + state = MetaScanner.ATTRIBUTE_NAME; + continue stateloop; + } + } + // XXX reorder point + case MARKUP_DECLARATION_OPEN: + markupdeclarationopenloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.MARKUP_DECLARATION_HYPHEN; + break markupdeclarationopenloop; + // continue stateloop; + default: + state = MetaScanner.SCAN_UNTIL_GT; + reconsume = true; + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case MARKUP_DECLARATION_HYPHEN: + markupdeclarationhyphenloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.COMMENT_START; + break markupdeclarationhyphenloop; + // continue stateloop; + default: + state = MetaScanner.SCAN_UNTIL_GT; + reconsume = true; + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_START: + commentstartloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.COMMENT_START_DASH; + continue stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + default: + state = MetaScanner.COMMENT; + break commentstartloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT: + commentloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.COMMENT_END_DASH; + break commentloop; + // continue stateloop; + default: + continue; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_END_DASH: + commentenddashloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.COMMENT_END; + break commentenddashloop; + // continue stateloop; + default: + state = MetaScanner.COMMENT; + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_END: + for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + case '-': + continue; + default: + state = MetaScanner.COMMENT; + continue stateloop; + } + } + // XXX reorder point + case COMMENT_START_DASH: + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.COMMENT_END; + continue stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + default: + state = MetaScanner.COMMENT; + continue stateloop; + } + // XXX reorder point + case ATTRIBUTE_VALUE_SINGLE_QUOTED: + for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + switch (c) { + case -1: + break stateloop; + case '\'': + handleAttributeValue(); + state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED; + continue stateloop; + default: + handleCharInAttributeValue(c); + continue; + } + } + // XXX reorder point + case SCAN_UNTIL_GT: + for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + switch (c) { + case -1: + break stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + default: + continue; + } + } + } + } + stateSave = state; + } + + private void handleCharInAttributeValue(int c) { + if (metaState == A) { + if (contentIndex == CONTENT.length || charsetIndex == CHARSET.length) { + addToBuffer(c); + } else if (httpEquivIndex == HTTP_EQUIV.length) { + if (contentTypeIndex < CONTENT_TYPE.length && toAsciiLowerCase(c) == CONTENT_TYPE[contentTypeIndex]) { + ++contentTypeIndex; + } else { + contentTypeIndex = Integer.MAX_VALUE; + } + } + } + } + + @Inline private int toAsciiLowerCase(int c) { + if (c >= 'A' && c <= 'Z') { + return c + 0x20; + } + return c; + } + + /** + * Adds a character to the accumulation buffer. + * @param c the character to add + */ + private void addToBuffer(int c) { + if (strBufLen == strBuf.length) { + char[] newBuf = new char[strBuf.length + (strBuf.length << 1)]; + System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length); + strBuf = newBuf; + } + strBuf[strBufLen++] = (char)c; + } + + /** + * Attempts to extract a charset name from the accumulation buffer. + * @return true if successful + * @throws SAXException + */ + private void handleAttributeValue() throws SAXException { + if (metaState != A) { + return; + } + if (contentIndex == CONTENT.length && content == null) { + content = Portability.newStringFromBuffer(strBuf, 0, strBufLen + // CPPONLY: , treeBuilder + ); + return; + } + if (charsetIndex == CHARSET.length && charset == null) { + charset = Portability.newStringFromBuffer(strBuf, 0, strBufLen + // CPPONLY: , treeBuilder + ); + return; + } + if (httpEquivIndex == HTTP_EQUIV.length + && httpEquivState == HTTP_EQUIV_NOT_SEEN) { + httpEquivState = (contentTypeIndex == CONTENT_TYPE.length) ? HTTP_EQUIV_CONTENT_TYPE + : HTTP_EQUIV_OTHER; + return; + } + } + + private boolean handleTag() throws SAXException { + boolean stop = handleTagInner(); + Portability.releaseString(content); + content = null; + Portability.releaseString(charset); + charset = null; + httpEquivState = HTTP_EQUIV_NOT_SEEN; + return stop; + } + + private boolean handleTagInner() throws SAXException { + if (charset != null && tryCharset(charset)) { + return true; + } + if (content != null && httpEquivState == HTTP_EQUIV_CONTENT_TYPE) { + String extract = TreeBuilder.extractCharsetFromContent(content + // CPPONLY: , treeBuilder + ); + if (extract == null) { + return false; + } + boolean success = tryCharset(extract); + Portability.releaseString(extract); + return success; + } + return false; + } + + /** + * Tries to switch to an encoding. + * + * @param encoding + * @return true if successful + * @throws SAXException + */ + protected abstract boolean tryCharset(String encoding) throws SAXException; + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java new file mode 100644 index 0000000000..940cf2e9ca --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2008-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +public final class NCName { + // [NOCPP[ + + private static final int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; + + private static final char[] HEX_TABLE = "0123456789ABCDEF".toCharArray(); + + public static boolean isNCNameStart(char c) { + return ((c >= '\u0041' && c <= '\u005A') + || (c >= '\u0061' && c <= '\u007A') + || (c >= '\u00C0' && c <= '\u00D6') + || (c >= '\u00D8' && c <= '\u00F6') + || (c >= '\u00F8' && c <= '\u00FF') + || (c >= '\u0100' && c <= '\u0131') + || (c >= '\u0134' && c <= '\u013E') + || (c >= '\u0141' && c <= '\u0148') + || (c >= '\u014A' && c <= '\u017E') + || (c >= '\u0180' && c <= '\u01C3') + || (c >= '\u01CD' && c <= '\u01F0') + || (c >= '\u01F4' && c <= '\u01F5') + || (c >= '\u01FA' && c <= '\u0217') + || (c >= '\u0250' && c <= '\u02A8') + || (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386') + || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C') + || (c >= '\u038E' && c <= '\u03A1') + || (c >= '\u03A3' && c <= '\u03CE') + || (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA') + || (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0') + || (c >= '\u03E2' && c <= '\u03F3') + || (c >= '\u0401' && c <= '\u040C') + || (c >= '\u040E' && c <= '\u044F') + || (c >= '\u0451' && c <= '\u045C') + || (c >= '\u045E' && c <= '\u0481') + || (c >= '\u0490' && c <= '\u04C4') + || (c >= '\u04C7' && c <= '\u04C8') + || (c >= '\u04CB' && c <= '\u04CC') + || (c >= '\u04D0' && c <= '\u04EB') + || (c >= '\u04EE' && c <= '\u04F5') + || (c >= '\u04F8' && c <= '\u04F9') + || (c >= '\u0531' && c <= '\u0556') || (c == '\u0559') + || (c >= '\u0561' && c <= '\u0586') + || (c >= '\u05D0' && c <= '\u05EA') + || (c >= '\u05F0' && c <= '\u05F2') + || (c >= '\u0621' && c <= '\u063A') + || (c >= '\u0641' && c <= '\u064A') + || (c >= '\u0671' && c <= '\u06B7') + || (c >= '\u06BA' && c <= '\u06BE') + || (c >= '\u06C0' && c <= '\u06CE') + || (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5') + || (c >= '\u06E5' && c <= '\u06E6') + || (c >= '\u0905' && c <= '\u0939') || (c == '\u093D') + || (c >= '\u0958' && c <= '\u0961') + || (c >= '\u0985' && c <= '\u098C') + || (c >= '\u098F' && c <= '\u0990') + || (c >= '\u0993' && c <= '\u09A8') + || (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2') + || (c >= '\u09B6' && c <= '\u09B9') + || (c >= '\u09DC' && c <= '\u09DD') + || (c >= '\u09DF' && c <= '\u09E1') + || (c >= '\u09F0' && c <= '\u09F1') + || (c >= '\u0A05' && c <= '\u0A0A') + || (c >= '\u0A0F' && c <= '\u0A10') + || (c >= '\u0A13' && c <= '\u0A28') + || (c >= '\u0A2A' && c <= '\u0A30') + || (c >= '\u0A32' && c <= '\u0A33') + || (c >= '\u0A35' && c <= '\u0A36') + || (c >= '\u0A38' && c <= '\u0A39') + || (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E') + || (c >= '\u0A72' && c <= '\u0A74') + || (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D') + || (c >= '\u0A8F' && c <= '\u0A91') + || (c >= '\u0A93' && c <= '\u0AA8') + || (c >= '\u0AAA' && c <= '\u0AB0') + || (c >= '\u0AB2' && c <= '\u0AB3') + || (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD') + || (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C') + || (c >= '\u0B0F' && c <= '\u0B10') + || (c >= '\u0B13' && c <= '\u0B28') + || (c >= '\u0B2A' && c <= '\u0B30') + || (c >= '\u0B32' && c <= '\u0B33') + || (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D') + || (c >= '\u0B5C' && c <= '\u0B5D') + || (c >= '\u0B5F' && c <= '\u0B61') + || (c >= '\u0B85' && c <= '\u0B8A') + || (c >= '\u0B8E' && c <= '\u0B90') + || (c >= '\u0B92' && c <= '\u0B95') + || (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C') + || (c >= '\u0B9E' && c <= '\u0B9F') + || (c >= '\u0BA3' && c <= '\u0BA4') + || (c >= '\u0BA8' && c <= '\u0BAA') + || (c >= '\u0BAE' && c <= '\u0BB5') + || (c >= '\u0BB7' && c <= '\u0BB9') + || (c >= '\u0C05' && c <= '\u0C0C') + || (c >= '\u0C0E' && c <= '\u0C10') + || (c >= '\u0C12' && c <= '\u0C28') + || (c >= '\u0C2A' && c <= '\u0C33') + || (c >= '\u0C35' && c <= '\u0C39') + || (c >= '\u0C60' && c <= '\u0C61') + || (c >= '\u0C85' && c <= '\u0C8C') + || (c >= '\u0C8E' && c <= '\u0C90') + || (c >= '\u0C92' && c <= '\u0CA8') + || (c >= '\u0CAA' && c <= '\u0CB3') + || (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE') + || (c >= '\u0CE0' && c <= '\u0CE1') + || (c >= '\u0D05' && c <= '\u0D0C') + || (c >= '\u0D0E' && c <= '\u0D10') + || (c >= '\u0D12' && c <= '\u0D28') + || (c >= '\u0D2A' && c <= '\u0D39') + || (c >= '\u0D60' && c <= '\u0D61') + || (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30') + || (c >= '\u0E32' && c <= '\u0E33') + || (c >= '\u0E40' && c <= '\u0E45') + || (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84') + || (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A') + || (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97') + || (c >= '\u0E99' && c <= '\u0E9F') + || (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5') + || (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB') + || (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0') + || (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD') + || (c >= '\u0EC0' && c <= '\u0EC4') + || (c >= '\u0F40' && c <= '\u0F47') + || (c >= '\u0F49' && c <= '\u0F69') + || (c >= '\u10A0' && c <= '\u10C5') + || (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100') + || (c >= '\u1102' && c <= '\u1103') + || (c >= '\u1105' && c <= '\u1107') || (c == '\u1109') + || (c >= '\u110B' && c <= '\u110C') + || (c >= '\u110E' && c <= '\u1112') || (c == '\u113C') + || (c == '\u113E') || (c == '\u1140') || (c == '\u114C') + || (c == '\u114E') || (c == '\u1150') + || (c >= '\u1154' && c <= '\u1155') || (c == '\u1159') + || (c >= '\u115F' && c <= '\u1161') || (c == '\u1163') + || (c == '\u1165') || (c == '\u1167') || (c == '\u1169') + || (c >= '\u116D' && c <= '\u116E') + || (c >= '\u1172' && c <= '\u1173') || (c == '\u1175') + || (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB') + || (c >= '\u11AE' && c <= '\u11AF') + || (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA') + || (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB') + || (c == '\u11F0') || (c == '\u11F9') + || (c >= '\u1E00' && c <= '\u1E9B') + || (c >= '\u1EA0' && c <= '\u1EF9') + || (c >= '\u1F00' && c <= '\u1F15') + || (c >= '\u1F18' && c <= '\u1F1D') + || (c >= '\u1F20' && c <= '\u1F45') + || (c >= '\u1F48' && c <= '\u1F4D') + || (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59') + || (c == '\u1F5B') || (c == '\u1F5D') + || (c >= '\u1F5F' && c <= '\u1F7D') + || (c >= '\u1F80' && c <= '\u1FB4') + || (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE') + || (c >= '\u1FC2' && c <= '\u1FC4') + || (c >= '\u1FC6' && c <= '\u1FCC') + || (c >= '\u1FD0' && c <= '\u1FD3') + || (c >= '\u1FD6' && c <= '\u1FDB') + || (c >= '\u1FE0' && c <= '\u1FEC') + || (c >= '\u1FF2' && c <= '\u1FF4') + || (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126') + || (c >= '\u212A' && c <= '\u212B') || (c == '\u212E') + || (c >= '\u2180' && c <= '\u2182') + || (c >= '\u3041' && c <= '\u3094') + || (c >= '\u30A1' && c <= '\u30FA') + || (c >= '\u3105' && c <= '\u312C') + || (c >= '\uAC00' && c <= '\uD7A3') + || (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007') + || (c >= '\u3021' && c <= '\u3029') || (c == '_')); + } + + public static boolean isNCNameTrail(char c) { + return ((c >= '\u0030' && c <= '\u0039') + || (c >= '\u0660' && c <= '\u0669') + || (c >= '\u06F0' && c <= '\u06F9') + || (c >= '\u0966' && c <= '\u096F') + || (c >= '\u09E6' && c <= '\u09EF') + || (c >= '\u0A66' && c <= '\u0A6F') + || (c >= '\u0AE6' && c <= '\u0AEF') + || (c >= '\u0B66' && c <= '\u0B6F') + || (c >= '\u0BE7' && c <= '\u0BEF') + || (c >= '\u0C66' && c <= '\u0C6F') + || (c >= '\u0CE6' && c <= '\u0CEF') + || (c >= '\u0D66' && c <= '\u0D6F') + || (c >= '\u0E50' && c <= '\u0E59') + || (c >= '\u0ED0' && c <= '\u0ED9') + || (c >= '\u0F20' && c <= '\u0F29') + || (c >= '\u0041' && c <= '\u005A') + || (c >= '\u0061' && c <= '\u007A') + || (c >= '\u00C0' && c <= '\u00D6') + || (c >= '\u00D8' && c <= '\u00F6') + || (c >= '\u00F8' && c <= '\u00FF') + || (c >= '\u0100' && c <= '\u0131') + || (c >= '\u0134' && c <= '\u013E') + || (c >= '\u0141' && c <= '\u0148') + || (c >= '\u014A' && c <= '\u017E') + || (c >= '\u0180' && c <= '\u01C3') + || (c >= '\u01CD' && c <= '\u01F0') + || (c >= '\u01F4' && c <= '\u01F5') + || (c >= '\u01FA' && c <= '\u0217') + || (c >= '\u0250' && c <= '\u02A8') + || (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386') + || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C') + || (c >= '\u038E' && c <= '\u03A1') + || (c >= '\u03A3' && c <= '\u03CE') + || (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA') + || (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0') + || (c >= '\u03E2' && c <= '\u03F3') + || (c >= '\u0401' && c <= '\u040C') + || (c >= '\u040E' && c <= '\u044F') + || (c >= '\u0451' && c <= '\u045C') + || (c >= '\u045E' && c <= '\u0481') + || (c >= '\u0490' && c <= '\u04C4') + || (c >= '\u04C7' && c <= '\u04C8') + || (c >= '\u04CB' && c <= '\u04CC') + || (c >= '\u04D0' && c <= '\u04EB') + || (c >= '\u04EE' && c <= '\u04F5') + || (c >= '\u04F8' && c <= '\u04F9') + || (c >= '\u0531' && c <= '\u0556') || (c == '\u0559') + || (c >= '\u0561' && c <= '\u0586') + || (c >= '\u05D0' && c <= '\u05EA') + || (c >= '\u05F0' && c <= '\u05F2') + || (c >= '\u0621' && c <= '\u063A') + || (c >= '\u0641' && c <= '\u064A') + || (c >= '\u0671' && c <= '\u06B7') + || (c >= '\u06BA' && c <= '\u06BE') + || (c >= '\u06C0' && c <= '\u06CE') + || (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5') + || (c >= '\u06E5' && c <= '\u06E6') + || (c >= '\u0905' && c <= '\u0939') || (c == '\u093D') + || (c >= '\u0958' && c <= '\u0961') + || (c >= '\u0985' && c <= '\u098C') + || (c >= '\u098F' && c <= '\u0990') + || (c >= '\u0993' && c <= '\u09A8') + || (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2') + || (c >= '\u09B6' && c <= '\u09B9') + || (c >= '\u09DC' && c <= '\u09DD') + || (c >= '\u09DF' && c <= '\u09E1') + || (c >= '\u09F0' && c <= '\u09F1') + || (c >= '\u0A05' && c <= '\u0A0A') + || (c >= '\u0A0F' && c <= '\u0A10') + || (c >= '\u0A13' && c <= '\u0A28') + || (c >= '\u0A2A' && c <= '\u0A30') + || (c >= '\u0A32' && c <= '\u0A33') + || (c >= '\u0A35' && c <= '\u0A36') + || (c >= '\u0A38' && c <= '\u0A39') + || (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E') + || (c >= '\u0A72' && c <= '\u0A74') + || (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D') + || (c >= '\u0A8F' && c <= '\u0A91') + || (c >= '\u0A93' && c <= '\u0AA8') + || (c >= '\u0AAA' && c <= '\u0AB0') + || (c >= '\u0AB2' && c <= '\u0AB3') + || (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD') + || (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C') + || (c >= '\u0B0F' && c <= '\u0B10') + || (c >= '\u0B13' && c <= '\u0B28') + || (c >= '\u0B2A' && c <= '\u0B30') + || (c >= '\u0B32' && c <= '\u0B33') + || (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D') + || (c >= '\u0B5C' && c <= '\u0B5D') + || (c >= '\u0B5F' && c <= '\u0B61') + || (c >= '\u0B85' && c <= '\u0B8A') + || (c >= '\u0B8E' && c <= '\u0B90') + || (c >= '\u0B92' && c <= '\u0B95') + || (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C') + || (c >= '\u0B9E' && c <= '\u0B9F') + || (c >= '\u0BA3' && c <= '\u0BA4') + || (c >= '\u0BA8' && c <= '\u0BAA') + || (c >= '\u0BAE' && c <= '\u0BB5') + || (c >= '\u0BB7' && c <= '\u0BB9') + || (c >= '\u0C05' && c <= '\u0C0C') + || (c >= '\u0C0E' && c <= '\u0C10') + || (c >= '\u0C12' && c <= '\u0C28') + || (c >= '\u0C2A' && c <= '\u0C33') + || (c >= '\u0C35' && c <= '\u0C39') + || (c >= '\u0C60' && c <= '\u0C61') + || (c >= '\u0C85' && c <= '\u0C8C') + || (c >= '\u0C8E' && c <= '\u0C90') + || (c >= '\u0C92' && c <= '\u0CA8') + || (c >= '\u0CAA' && c <= '\u0CB3') + || (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE') + || (c >= '\u0CE0' && c <= '\u0CE1') + || (c >= '\u0D05' && c <= '\u0D0C') + || (c >= '\u0D0E' && c <= '\u0D10') + || (c >= '\u0D12' && c <= '\u0D28') + || (c >= '\u0D2A' && c <= '\u0D39') + || (c >= '\u0D60' && c <= '\u0D61') + || (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30') + || (c >= '\u0E32' && c <= '\u0E33') + || (c >= '\u0E40' && c <= '\u0E45') + || (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84') + || (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A') + || (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97') + || (c >= '\u0E99' && c <= '\u0E9F') + || (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5') + || (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB') + || (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0') + || (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD') + || (c >= '\u0EC0' && c <= '\u0EC4') + || (c >= '\u0F40' && c <= '\u0F47') + || (c >= '\u0F49' && c <= '\u0F69') + || (c >= '\u10A0' && c <= '\u10C5') + || (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100') + || (c >= '\u1102' && c <= '\u1103') + || (c >= '\u1105' && c <= '\u1107') || (c == '\u1109') + || (c >= '\u110B' && c <= '\u110C') + || (c >= '\u110E' && c <= '\u1112') || (c == '\u113C') + || (c == '\u113E') || (c == '\u1140') || (c == '\u114C') + || (c == '\u114E') || (c == '\u1150') + || (c >= '\u1154' && c <= '\u1155') || (c == '\u1159') + || (c >= '\u115F' && c <= '\u1161') || (c == '\u1163') + || (c == '\u1165') || (c == '\u1167') || (c == '\u1169') + || (c >= '\u116D' && c <= '\u116E') + || (c >= '\u1172' && c <= '\u1173') || (c == '\u1175') + || (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB') + || (c >= '\u11AE' && c <= '\u11AF') + || (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA') + || (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB') + || (c == '\u11F0') || (c == '\u11F9') + || (c >= '\u1E00' && c <= '\u1E9B') + || (c >= '\u1EA0' && c <= '\u1EF9') + || (c >= '\u1F00' && c <= '\u1F15') + || (c >= '\u1F18' && c <= '\u1F1D') + || (c >= '\u1F20' && c <= '\u1F45') + || (c >= '\u1F48' && c <= '\u1F4D') + || (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59') + || (c == '\u1F5B') || (c == '\u1F5D') + || (c >= '\u1F5F' && c <= '\u1F7D') + || (c >= '\u1F80' && c <= '\u1FB4') + || (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE') + || (c >= '\u1FC2' && c <= '\u1FC4') + || (c >= '\u1FC6' && c <= '\u1FCC') + || (c >= '\u1FD0' && c <= '\u1FD3') + || (c >= '\u1FD6' && c <= '\u1FDB') + || (c >= '\u1FE0' && c <= '\u1FEC') + || (c >= '\u1FF2' && c <= '\u1FF4') + || (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126') + || (c >= '\u212A' && c <= '\u212B') || (c == '\u212E') + || (c >= '\u2180' && c <= '\u2182') + || (c >= '\u3041' && c <= '\u3094') + || (c >= '\u30A1' && c <= '\u30FA') + || (c >= '\u3105' && c <= '\u312C') + || (c >= '\uAC00' && c <= '\uD7A3') + || (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007') + || (c >= '\u3021' && c <= '\u3029') || (c == '_') || (c == '.') + || (c == '-') || (c >= '\u0300' && c <= '\u0345') + || (c >= '\u0360' && c <= '\u0361') + || (c >= '\u0483' && c <= '\u0486') + || (c >= '\u0591' && c <= '\u05A1') + || (c >= '\u05A3' && c <= '\u05B9') + || (c >= '\u05BB' && c <= '\u05BD') || (c == '\u05BF') + || (c >= '\u05C1' && c <= '\u05C2') || (c == '\u05C4') + || (c >= '\u064B' && c <= '\u0652') || (c == '\u0670') + || (c >= '\u06D6' && c <= '\u06DC') + || (c >= '\u06DD' && c <= '\u06DF') + || (c >= '\u06E0' && c <= '\u06E4') + || (c >= '\u06E7' && c <= '\u06E8') + || (c >= '\u06EA' && c <= '\u06ED') + || (c >= '\u0901' && c <= '\u0903') || (c == '\u093C') + || (c >= '\u093E' && c <= '\u094C') || (c == '\u094D') + || (c >= '\u0951' && c <= '\u0954') + || (c >= '\u0962' && c <= '\u0963') + || (c >= '\u0981' && c <= '\u0983') || (c == '\u09BC') + || (c == '\u09BE') || (c == '\u09BF') + || (c >= '\u09C0' && c <= '\u09C4') + || (c >= '\u09C7' && c <= '\u09C8') + || (c >= '\u09CB' && c <= '\u09CD') || (c == '\u09D7') + || (c >= '\u09E2' && c <= '\u09E3') || (c == '\u0A02') + || (c == '\u0A3C') || (c == '\u0A3E') || (c == '\u0A3F') + || (c >= '\u0A40' && c <= '\u0A42') + || (c >= '\u0A47' && c <= '\u0A48') + || (c >= '\u0A4B' && c <= '\u0A4D') + || (c >= '\u0A70' && c <= '\u0A71') + || (c >= '\u0A81' && c <= '\u0A83') || (c == '\u0ABC') + || (c >= '\u0ABE' && c <= '\u0AC5') + || (c >= '\u0AC7' && c <= '\u0AC9') + || (c >= '\u0ACB' && c <= '\u0ACD') + || (c >= '\u0B01' && c <= '\u0B03') || (c == '\u0B3C') + || (c >= '\u0B3E' && c <= '\u0B43') + || (c >= '\u0B47' && c <= '\u0B48') + || (c >= '\u0B4B' && c <= '\u0B4D') + || (c >= '\u0B56' && c <= '\u0B57') + || (c >= '\u0B82' && c <= '\u0B83') + || (c >= '\u0BBE' && c <= '\u0BC2') + || (c >= '\u0BC6' && c <= '\u0BC8') + || (c >= '\u0BCA' && c <= '\u0BCD') || (c == '\u0BD7') + || (c >= '\u0C01' && c <= '\u0C03') + || (c >= '\u0C3E' && c <= '\u0C44') + || (c >= '\u0C46' && c <= '\u0C48') + || (c >= '\u0C4A' && c <= '\u0C4D') + || (c >= '\u0C55' && c <= '\u0C56') + || (c >= '\u0C82' && c <= '\u0C83') + || (c >= '\u0CBE' && c <= '\u0CC4') + || (c >= '\u0CC6' && c <= '\u0CC8') + || (c >= '\u0CCA' && c <= '\u0CCD') + || (c >= '\u0CD5' && c <= '\u0CD6') + || (c >= '\u0D02' && c <= '\u0D03') + || (c >= '\u0D3E' && c <= '\u0D43') + || (c >= '\u0D46' && c <= '\u0D48') + || (c >= '\u0D4A' && c <= '\u0D4D') || (c == '\u0D57') + || (c == '\u0E31') || (c >= '\u0E34' && c <= '\u0E3A') + || (c >= '\u0E47' && c <= '\u0E4E') || (c == '\u0EB1') + || (c >= '\u0EB4' && c <= '\u0EB9') + || (c >= '\u0EBB' && c <= '\u0EBC') + || (c >= '\u0EC8' && c <= '\u0ECD') + || (c >= '\u0F18' && c <= '\u0F19') || (c == '\u0F35') + || (c == '\u0F37') || (c == '\u0F39') || (c == '\u0F3E') + || (c == '\u0F3F') || (c >= '\u0F71' && c <= '\u0F84') + || (c >= '\u0F86' && c <= '\u0F8B') + || (c >= '\u0F90' && c <= '\u0F95') || (c == '\u0F97') + || (c >= '\u0F99' && c <= '\u0FAD') + || (c >= '\u0FB1' && c <= '\u0FB7') || (c == '\u0FB9') + || (c >= '\u20D0' && c <= '\u20DC') || (c == '\u20E1') + || (c >= '\u302A' && c <= '\u302F') || (c == '\u3099') + || (c == '\u309A') || (c == '\u00B7') || (c == '\u02D0') + || (c == '\u02D1') || (c == '\u0387') || (c == '\u0640') + || (c == '\u0E46') || (c == '\u0EC6') || (c == '\u3005') + || (c >= '\u3031' && c <= '\u3035') + || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE')); + } + + public static boolean isNCName(String str) { + if (str == null) { + return false; + } else { + int len = str.length(); + switch (len) { + case 0: + return false; + case 1: + return NCName.isNCNameStart(str.charAt(0)); + default: + if (!NCName.isNCNameStart(str.charAt(0))) { + return false; + } + for (int i = 1; i < len; i++) { + if (!NCName.isNCNameTrail(str.charAt(i))) { + return false; + } + } + } + return true; + } + } + + private static void appendUHexTo(StringBuilder sb, int c) { + sb.append('U'); + for (int i = 0; i < 6; i++) { + sb.append(HEX_TABLE[(c & 0xF00000) >> 20]); + c <<= 4; + } + } + + public static String escapeName(String str) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if ((c & 0xFC00) == 0xD800) { + char next = str.charAt(++i); + appendUHexTo(sb, (c << 10) + next + SURROGATE_OFFSET); + } else if (i == 0 && !isNCNameStart(c)) { + appendUHexTo(sb, c); + } else if (i != 0 && !isNCNameTrail(c)) { + appendUHexTo(sb, c); + } else { + sb.append(c); + } + } + return sb.toString().intern(); + } + // ]NOCPP] +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java new file mode 100644 index 0000000000..266a5a28ec --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java @@ -0,0 +1,944 @@ +/* + * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera + * Software ASA. + * + * You are granted a license to use, reproduce and create derivative works of + * this document. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.CharacterName; +import nu.validator.htmlparser.annotation.NoLength; + +/** + * @version $Id$ + * @author hsivonen + */ +public final class NamedCharacters { + + static final @NoLength @CharacterName String[] NAMES = { "lig", "lig;", + "P", "P;", "cute", "cute;", "reve;", "irc", "irc;", "y;", "r;", + "rave", "rave;", "pha;", "acr;", "d;", "gon;", "pf;", + "plyFunction;", "ing", "ing;", "cr;", "sign;", "ilde", "ilde;", + "ml", "ml;", "ckslash;", "rv;", "rwed;", "y;", "cause;", + "rnoullis;", "ta;", "r;", "pf;", "eve;", "cr;", "mpeq;", "cy;", + "PY", "PY;", "cute;", "p;", "pitalDifferentialD;", "yleys;", + "aron;", "edil", "edil;", "irc;", "onint;", "ot;", "dilla;", + "nterDot;", "r;", "i;", "rcleDot;", "rcleMinus;", "rclePlus;", + "rcleTimes;", "ockwiseContourIntegral;", "oseCurlyDoubleQuote;", + "oseCurlyQuote;", "lon;", "lone;", "ngruent;", "nint;", + "ntourIntegral;", "pf;", "product;", + "unterClockwiseContourIntegral;", "oss;", "cr;", "p;", "pCap;", + ";", "otrahd;", "cy;", "cy;", "cy;", "gger;", "rr;", "shv;", + "aron;", "y;", "l;", "lta;", "r;", "acriticalAcute;", + "acriticalDot;", "acriticalDoubleAcute;", "acriticalGrave;", + "acriticalTilde;", "amond;", "fferentialD;", "pf;", "t;", "tDot;", + "tEqual;", "ubleContourIntegral;", "ubleDot;", "ubleDownArrow;", + "ubleLeftArrow;", "ubleLeftRightArrow;", "ubleLeftTee;", + "ubleLongLeftArrow;", "ubleLongLeftRightArrow;", + "ubleLongRightArrow;", "ubleRightArrow;", "ubleRightTee;", + "ubleUpArrow;", "ubleUpDownArrow;", "ubleVerticalBar;", "wnArrow;", + "wnArrowBar;", "wnArrowUpArrow;", "wnBreve;", "wnLeftRightVector;", + "wnLeftTeeVector;", "wnLeftVector;", "wnLeftVectorBar;", + "wnRightTeeVector;", "wnRightVector;", "wnRightVectorBar;", + "wnTee;", "wnTeeArrow;", "wnarrow;", "cr;", "trok;", "G;", "H", + "H;", "cute", "cute;", "aron;", "irc", "irc;", "y;", "ot;", "r;", + "rave", "rave;", "ement;", "acr;", "ptySmallSquare;", + "ptyVerySmallSquare;", "gon;", "pf;", "silon;", "ual;", + "ualTilde;", "uilibrium;", "cr;", "im;", "a;", "ml", "ml;", + "ists;", "ponentialE;", "y;", "r;", "lledSmallSquare;", + "lledVerySmallSquare;", "pf;", "rAll;", "uriertrf;", "cr;", "cy;", + "", ";", "mma;", "mmad;", "reve;", "edil;", "irc;", "y;", "ot;", + "r;", ";", "pf;", "eaterEqual;", "eaterEqualLess;", + "eaterFullEqual;", "eaterGreater;", "eaterLess;", + "eaterSlantEqual;", "eaterTilde;", "cr;", ";", "RDcy;", "cek;", + "t;", "irc;", "r;", "lbertSpace;", "pf;", "rizontalLine;", "cr;", + "trok;", "mpDownHump;", "mpEqual;", "cy;", "lig;", "cy;", "cute", + "cute;", "irc", "irc;", "y;", "ot;", "r;", "rave", "rave;", ";", + "acr;", "aginaryI;", "plies;", "t;", "tegral;", "tersection;", + "visibleComma;", "visibleTimes;", "gon;", "pf;", "ta;", "cr;", + "ilde;", "kcy;", "ml", "ml;", "irc;", "y;", "r;", "pf;", "cr;", + "ercy;", "kcy;", "cy;", "cy;", "ppa;", "edil;", "y;", "r;", "pf;", + "cr;", "cy;", "", ";", "cute;", "mbda;", "ng;", "placetrf;", "rr;", + "aron;", "edil;", "y;", "ftAngleBracket;", "ftArrow;", + "ftArrowBar;", "ftArrowRightArrow;", "ftCeiling;", + "ftDoubleBracket;", "ftDownTeeVector;", "ftDownVector;", + "ftDownVectorBar;", "ftFloor;", "ftRightArrow;", "ftRightVector;", + "ftTee;", "ftTeeArrow;", "ftTeeVector;", "ftTriangle;", + "ftTriangleBar;", "ftTriangleEqual;", "ftUpDownVector;", + "ftUpTeeVector;", "ftUpVector;", "ftUpVectorBar;", "ftVector;", + "ftVectorBar;", "ftarrow;", "ftrightarrow;", "ssEqualGreater;", + "ssFullEqual;", "ssGreater;", "ssLess;", "ssSlantEqual;", + "ssTilde;", "r;", ";", "eftarrow;", "idot;", "ngLeftArrow;", + "ngLeftRightArrow;", "ngRightArrow;", "ngleftarrow;", + "ngleftrightarrow;", "ngrightarrow;", "pf;", "werLeftArrow;", + "werRightArrow;", "cr;", "h;", "trok;", ";", "p;", "y;", + "diumSpace;", "llintrf;", "r;", "nusPlus;", "pf;", "cr;", ";", + "cy;", "cute;", "aron;", "edil;", "y;", "gativeMediumSpace;", + "gativeThickSpace;", "gativeThinSpace;", "gativeVeryThinSpace;", + "stedGreaterGreater;", "stedLessLess;", "wLine;", "r;", "Break;", + "nBreakingSpace;", "pf;", "t;", "tCongruent;", "tCupCap;", + "tDoubleVerticalBar;", "tElement;", "tEqual;", "tEqualTilde;", + "tExists;", "tGreater;", "tGreaterEqual;", "tGreaterFullEqual;", + "tGreaterGreater;", "tGreaterLess;", "tGreaterSlantEqual;", + "tGreaterTilde;", "tHumpDownHump;", "tHumpEqual;", + "tLeftTriangle;", "tLeftTriangleBar;", "tLeftTriangleEqual;", + "tLess;", "tLessEqual;", "tLessGreater;", "tLessLess;", + "tLessSlantEqual;", "tLessTilde;", "tNestedGreaterGreater;", + "tNestedLessLess;", "tPrecedes;", "tPrecedesEqual;", + "tPrecedesSlantEqual;", "tReverseElement;", "tRightTriangle;", + "tRightTriangleBar;", "tRightTriangleEqual;", "tSquareSubset;", + "tSquareSubsetEqual;", "tSquareSuperset;", "tSquareSupersetEqual;", + "tSubset;", "tSubsetEqual;", "tSucceeds;", "tSucceedsEqual;", + "tSucceedsSlantEqual;", "tSucceedsTilde;", "tSuperset;", + "tSupersetEqual;", "tTilde;", "tTildeEqual;", "tTildeFullEqual;", + "tTildeTilde;", "tVerticalBar;", "cr;", "ilde", "ilde;", ";", + "lig;", "cute", "cute;", "irc", "irc;", "y;", "blac;", "r;", + "rave", "rave;", "acr;", "ega;", "icron;", "pf;", + "enCurlyDoubleQuote;", "enCurlyQuote;", ";", "cr;", "lash", + "lash;", "ilde", "ilde;", "imes;", "ml", "ml;", "erBar;", + "erBrace;", "erBracket;", "erParenthesis;", "rtialD;", "y;", "r;", + "i;", ";", "usMinus;", "incareplane;", "pf;", ";", "ecedes;", + "ecedesEqual;", "ecedesSlantEqual;", "ecedesTilde;", "ime;", + "oduct;", "oportion;", "oportional;", "cr;", "i;", "OT", "OT;", + "r;", "pf;", "cr;", "arr;", "G", "G;", "cute;", "ng;", "rr;", + "rrtl;", "aron;", "edil;", "y;", ";", "verseElement;", + "verseEquilibrium;", "verseUpEquilibrium;", "r;", "o;", + "ghtAngleBracket;", "ghtArrow;", "ghtArrowBar;", + "ghtArrowLeftArrow;", "ghtCeiling;", "ghtDoubleBracket;", + "ghtDownTeeVector;", "ghtDownVector;", "ghtDownVectorBar;", + "ghtFloor;", "ghtTee;", "ghtTeeArrow;", "ghtTeeVector;", + "ghtTriangle;", "ghtTriangleBar;", "ghtTriangleEqual;", + "ghtUpDownVector;", "ghtUpTeeVector;", "ghtUpVector;", + "ghtUpVectorBar;", "ghtVector;", "ghtVectorBar;", "ghtarrow;", + "pf;", "undImplies;", "ightarrow;", "cr;", "h;", "leDelayed;", + "CHcy;", "cy;", "FTcy;", "cute;", ";", "aron;", "edil;", "irc;", + "y;", "r;", "ortDownArrow;", "ortLeftArrow;", "ortRightArrow;", + "ortUpArrow;", "gma;", "allCircle;", "pf;", "rt;", "uare;", + "uareIntersection;", "uareSubset;", "uareSubsetEqual;", + "uareSuperset;", "uareSupersetEqual;", "uareUnion;", "cr;", "ar;", + "b;", "bset;", "bsetEqual;", "cceeds;", "cceedsEqual;", + "cceedsSlantEqual;", "cceedsTilde;", "chThat;", "m;", "p;", + "perset;", "persetEqual;", "pset;", "ORN", "ORN;", "ADE;", "Hcy;", + "cy;", "b;", "u;", "aron;", "edil;", "y;", "r;", "erefore;", + "eta;", "ickSpace;", "inSpace;", "lde;", "ldeEqual;", + "ldeFullEqual;", "ldeTilde;", "pf;", "ipleDot;", "cr;", "trok;", + "cute", "cute;", "rr;", "rrocir;", "rcy;", "reve;", "irc", "irc;", + "y;", "blac;", "r;", "rave", "rave;", "acr;", "derBar;", + "derBrace;", "derBracket;", "derParenthesis;", "ion;", "ionPlus;", + "gon;", "pf;", "Arrow;", "ArrowBar;", "ArrowDownArrow;", + "DownArrow;", "Equilibrium;", "Tee;", "TeeArrow;", "arrow;", + "downarrow;", "perLeftArrow;", "perRightArrow;", "si;", "silon;", + "ing;", "cr;", "ilde;", "ml", "ml;", "ash;", "ar;", "y;", "ash;", + "ashl;", "e;", "rbar;", "rt;", "rticalBar;", "rticalLine;", + "rticalSeparator;", "rticalTilde;", "ryThinSpace;", "r;", "pf;", + "cr;", "dash;", "irc;", "dge;", "r;", "pf;", "cr;", "r;", ";", + "pf;", "cr;", "cy;", "cy;", "cy;", "cute", "cute;", "irc;", "y;", + "r;", "pf;", "cr;", "ml;", "cy;", "cute;", "aron;", "y;", "ot;", + "roWidthSpace;", "ta;", "r;", "pf;", "cr;", "cute", "cute;", + "reve;", ";", "E;", "d;", "irc", "irc;", "ute", "ute;", "y;", + "lig", "lig;", ";", "r;", "rave", "rave;", "efsym;", "eph;", + "pha;", "acr;", "alg;", "p", "p;", "d;", "dand;", "dd;", "dslope;", + "dv;", "g;", "ge;", "gle;", "gmsd;", "gmsdaa;", "gmsdab;", + "gmsdac;", "gmsdad;", "gmsdae;", "gmsdaf;", "gmsdag;", "gmsdah;", + "grt;", "grtvb;", "grtvbd;", "gsph;", "gst;", "gzarr;", "gon;", + "pf;", ";", "E;", "acir;", "e;", "id;", "os;", "prox;", "proxeq;", + "ing", "ing;", "cr;", "t;", "ymp;", "ympeq;", "ilde", "ilde;", + "ml", "ml;", "conint;", "int;", "ot;", "ckcong;", "ckepsilon;", + "ckprime;", "cksim;", "cksimeq;", "rvee;", "rwed;", "rwedge;", + "rk;", "rktbrk;", "ong;", "y;", "quo;", "caus;", "cause;", + "mptyv;", "psi;", "rnou;", "ta;", "th;", "tween;", "r;", "gcap;", + "gcirc;", "gcup;", "godot;", "goplus;", "gotimes;", "gsqcup;", + "gstar;", "gtriangledown;", "gtriangleup;", "guplus;", "gvee;", + "gwedge;", "arow;", "acklozenge;", "acksquare;", "acktriangle;", + "acktriangledown;", "acktriangleleft;", "acktriangleright;", + "ank;", "k12;", "k14;", "k34;", "ock;", "e;", "equiv;", "ot;", + "pf;", "t;", "ttom;", "wtie;", "xDL;", "xDR;", "xDl;", "xDr;", + "xH;", "xHD;", "xHU;", "xHd;", "xHu;", "xUL;", "xUR;", "xUl;", + "xUr;", "xV;", "xVH;", "xVL;", "xVR;", "xVh;", "xVl;", "xVr;", + "xbox;", "xdL;", "xdR;", "xdl;", "xdr;", "xh;", "xhD;", "xhU;", + "xhd;", "xhu;", "xminus;", "xplus;", "xtimes;", "xuL;", "xuR;", + "xul;", "xur;", "xv;", "xvH;", "xvL;", "xvR;", "xvh;", "xvl;", + "xvr;", "rime;", "eve;", "vbar", "vbar;", "cr;", "emi;", "im;", + "ime;", "ol;", "olb;", "olhsub;", "ll;", "llet;", "mp;", "mpE;", + "mpe;", "mpeq;", "cute;", "p;", "pand;", "pbrcup;", "pcap;", + "pcup;", "pdot;", "ps;", "ret;", "ron;", "aps;", "aron;", "edil", + "edil;", "irc;", "ups;", "upssm;", "ot;", "dil", "dil;", "mptyv;", + "nt", "nt;", "nterdot;", "r;", "cy;", "eck;", "eckmark;", "i;", + "r;", "rE;", "rc;", "rceq;", "rclearrowleft;", "rclearrowright;", + "rcledR;", "rcledS;", "rcledast;", "rcledcirc;", "rcleddash;", + "re;", "rfnint;", "rmid;", "rscir;", "ubs;", "ubsuit;", "lon;", + "lone;", "loneq;", "mma;", "mmat;", "mp;", "mpfn;", "mplement;", + "mplexes;", "ng;", "ngdot;", "nint;", "pf;", "prod;", "py", "py;", + "pysr;", "arr;", "oss;", "cr;", "ub;", "ube;", "up;", "upe;", + "dot;", "darrl;", "darrr;", "epr;", "esc;", "larr;", "larrp;", + "p;", "pbrcap;", "pcap;", "pcup;", "pdot;", "por;", "ps;", "rarr;", + "rarrm;", "rlyeqprec;", "rlyeqsucc;", "rlyvee;", "rlywedge;", + "rren", "rren;", "rvearrowleft;", "rvearrowright;", "vee;", "wed;", + "conint;", "int;", "lcty;", "rr;", "ar;", "gger;", "leth;", "rr;", + "sh;", "shv;", "karow;", "lac;", "aron;", "y;", ";", "agger;", + "arr;", "otseq;", "g", "g;", "lta;", "mptyv;", "isht;", "r;", + "arl;", "arr;", "am;", "amond;", "amondsuit;", "ams;", "e;", + "gamma;", "sin;", "v;", "vide", "vide;", "videontimes;", "vonx;", + "cy;", "corn;", "crop;", "llar;", "pf;", "t;", "teq;", "teqdot;", + "tminus;", "tplus;", "tsquare;", "ublebarwedge;", "wnarrow;", + "wndownarrows;", "wnharpoonleft;", "wnharpoonright;", "bkarow;", + "corn;", "crop;", "cr;", "cy;", "ol;", "trok;", "dot;", "ri;", + "rif;", "arr;", "har;", "angle;", "cy;", "igrarr;", "Dot;", "ot;", + "cute", "cute;", "ster;", "aron;", "ir;", "irc", "irc;", "olon;", + "y;", "ot;", ";", "Dot;", "r;", ";", "rave", "rave;", "s;", + "sdot;", ";", "inters;", "l;", "s;", "sdot;", "acr;", "pty;", + "ptyset;", "ptyv;", "sp13;", "sp14;", "sp;", "g;", "sp;", "gon;", + "pf;", "ar;", "arsl;", "lus;", "si;", "silon;", "siv;", "circ;", + "colon;", "sim;", "slantgtr;", "slantless;", "uals;", "uest;", + "uiv;", "uivDD;", "vparsl;", "Dot;", "arr;", "cr;", "dot;", "im;", + "a;", "h", "h;", "ml", "ml;", "ro;", "cl;", "ist;", "pectation;", + "ponentiale;", "llingdotseq;", "y;", "male;", "ilig;", "lig;", + "llig;", "r;", "lig;", "lig;", "at;", "lig;", "tns;", "of;", "pf;", + "rall;", "rk;", "rkv;", "artint;", "ac12", "ac12;", "ac13;", + "ac14", "ac14;", "ac15;", "ac16;", "ac18;", "ac23;", "ac25;", + "ac34", "ac34;", "ac35;", "ac38;", "ac45;", "ac56;", "ac58;", + "ac78;", "asl;", "own;", "cr;", ";", "l;", "cute;", "mma;", + "mmad;", "p;", "reve;", "irc;", "y;", "ot;", ";", "l;", "q;", + "qq;", "qslant;", "s;", "scc;", "sdot;", "sdoto;", "sdotol;", + "sl;", "sles;", "r;", ";", "g;", "mel;", "cy;", ";", "E;", "a;", + "j;", "E;", "ap;", "approx;", "e;", "eq;", "eqq;", "sim;", "pf;", + "ave;", "cr;", "im;", "ime;", "iml;", "", ";", "cc;", "cir;", + "dot;", "lPar;", "quest;", "rapprox;", "rarr;", "rdot;", + "reqless;", "reqqless;", "rless;", "rsim;", "ertneqq;", "nE;", + "rr;", "irsp;", "lf;", "milt;", "rdcy;", "rr;", "rrcir;", "rrw;", + "ar;", "irc;", "arts;", "artsuit;", "llip;", "rcon;", "r;", + "searow;", "swarow;", "arr;", "mtht;", "okleftarrow;", + "okrightarrow;", "pf;", "rbar;", "cr;", "lash;", "trok;", "bull;", + "phen;", "cute", "cute;", ";", "irc", "irc;", "y;", "cy;", "xcl", + "xcl;", "f;", "r;", "rave", "rave;", ";", "iint;", "int;", "nfin;", + "ota;", "lig;", "acr;", "age;", "agline;", "agpart;", "ath;", + "of;", "ped;", ";", "care;", "fin;", "fintie;", "odot;", "t;", + "tcal;", "tegers;", "tercal;", "tlarhk;", "tprod;", "cy;", "gon;", + "pf;", "ta;", "rod;", "uest", "uest;", "cr;", "in;", "inE;", + "indot;", "ins;", "insv;", "inv;", ";", "ilde;", "kcy;", "ml", + "ml;", "irc;", "y;", "r;", "ath;", "pf;", "cr;", "ercy;", "kcy;", + "ppa;", "ppav;", "edil;", "y;", "r;", "reen;", "cy;", "cy;", "pf;", + "cr;", "arr;", "rr;", "tail;", "arr;", ";", "g;", "ar;", "cute;", + "emptyv;", "gran;", "mbda;", "ng;", "ngd;", "ngle;", "p;", "quo", + "quo;", "rr;", "rrb;", "rrbfs;", "rrfs;", "rrhk;", "rrlp;", + "rrpl;", "rrsim;", "rrtl;", "t;", "tail;", "te;", "tes;", "arr;", + "brk;", "race;", "rack;", "rke;", "rksld;", "rkslu;", "aron;", + "edil;", "eil;", "ub;", "y;", "ca;", "quo;", "quor;", "rdhar;", + "rushar;", "sh;", ";", "ftarrow;", "ftarrowtail;", + "ftharpoondown;", "ftharpoonup;", "ftleftarrows;", "ftrightarrow;", + "ftrightarrows;", "ftrightharpoons;", "ftrightsquigarrow;", + "ftthreetimes;", "g;", "q;", "qq;", "qslant;", "s;", "scc;", + "sdot;", "sdoto;", "sdotor;", "sg;", "sges;", "ssapprox;", + "ssdot;", "sseqgtr;", "sseqqgtr;", "ssgtr;", "sssim;", "isht;", + "loor;", "r;", ";", "E;", "ard;", "aru;", "arul;", "blk;", "cy;", + ";", "arr;", "corner;", "hard;", "tri;", "idot;", "oust;", + "oustache;", "E;", "ap;", "approx;", "e;", "eq;", "eqq;", "sim;", + "ang;", "arr;", "brk;", "ngleftarrow;", "ngleftrightarrow;", + "ngmapsto;", "ngrightarrow;", "oparrowleft;", "oparrowright;", + "par;", "pf;", "plus;", "times;", "wast;", "wbar;", "z;", "zenge;", + "zf;", "ar;", "arlt;", "arr;", "corner;", "har;", "hard;", "m;", + "tri;", "aquo;", "cr;", "h;", "im;", "ime;", "img;", "qb;", "quo;", + "quor;", "trok;", "", ";", "cc;", "cir;", "dot;", "hree;", "imes;", + "larr;", "quest;", "rPar;", "ri;", "rie;", "rif;", "rdshar;", + "ruhar;", "ertneqq;", "nE;", "Dot;", "cr", "cr;", "le;", "lt;", + "ltese;", "p;", "psto;", "pstodown;", "pstoleft;", "pstoup;", + "rker;", "omma;", "y;", "ash;", "asuredangle;", "r;", "o;", "cro", + "cro;", "d;", "dast;", "dcir;", "ddot", "ddot;", "nus;", "nusb;", + "nusd;", "nusdu;", "cp;", "dr;", "plus;", "dels;", "pf;", ";", + "cr;", "tpos;", ";", "ltimap;", "map;", "g;", "t;", "tv;", + "eftarrow;", "eftrightarrow;", "l;", "t;", "tv;", "ightarrow;", + "Dash;", "dash;", "bla;", "cute;", "ng;", "p;", "pE;", "pid;", + "pos;", "pprox;", "tur;", "tural;", "turals;", "sp", "sp;", "ump;", + "umpe;", "ap;", "aron;", "edil;", "ong;", "ongdot;", "up;", "y;", + "ash;", ";", "Arr;", "arhk;", "arr;", "arrow;", "dot;", "quiv;", + "sear;", "sim;", "xist;", "xists;", "r;", "E;", "e;", "eq;", + "eqq;", "eqslant;", "es;", "sim;", "t;", "tr;", "Arr;", "arr;", + "par;", ";", "s;", "sd;", "v;", "cy;", "Arr;", "E;", "arr;", "dr;", + "e;", "eftarrow;", "eftrightarrow;", "eq;", "eqq;", "eqslant;", + "es;", "ess;", "sim;", "t;", "tri;", "trie;", "id;", "pf;", "t", + "t;", "tin;", "tinE;", "tindot;", "tinva;", "tinvb;", "tinvc;", + "tni;", "tniva;", "tnivb;", "tnivc;", "ar;", "arallel;", "arsl;", + "art;", "olint;", "r;", "rcue;", "re;", "rec;", "receq;", "Arr;", + "arr;", "arrc;", "arrw;", "ightarrow;", "tri;", "trie;", "c;", + "ccue;", "ce;", "cr;", "hortmid;", "hortparallel;", "im;", "ime;", + "imeq;", "mid;", "par;", "qsube;", "qsupe;", "ub;", "ubE;", "ube;", + "ubset;", "ubseteq;", "ubseteqq;", "ucc;", "ucceq;", "up;", "upE;", + "upe;", "upset;", "upseteq;", "upseteqq;", "gl;", "ilde", "ilde;", + "lg;", "riangleleft;", "rianglelefteq;", "riangleright;", + "rianglerighteq;", ";", "m;", "mero;", "msp;", "Dash;", "Harr;", + "ap;", "dash;", "ge;", "gt;", "infin;", "lArr;", "le;", "lt;", + "ltrie;", "rArr;", "rtrie;", "sim;", "Arr;", "arhk;", "arr;", + "arrow;", "near;", ";", "cute", "cute;", "st;", "ir;", "irc", + "irc;", "y;", "ash;", "blac;", "iv;", "ot;", "sold;", "lig;", + "cir;", "r;", "on;", "rave", "rave;", "t;", "bar;", "m;", "nt;", + "arr;", "cir;", "cross;", "ine;", "t;", "acr;", "ega;", "icron;", + "id;", "inus;", "pf;", "ar;", "erp;", "lus;", ";", "arr;", "d;", + "der;", "derof;", "df", "df;", "dm", "dm;", "igof;", "or;", + "slope;", "v;", "cr;", "lash", "lash;", "ol;", "ilde", "ilde;", + "imes;", "imesas;", "ml", "ml;", "bar;", "r;", "ra", "ra;", + "rallel;", "rsim;", "rsl;", "rt;", "y;", "rcnt;", "riod;", "rmil;", + "rp;", "rtenk;", "r;", "i;", "iv;", "mmat;", "one;", ";", + "tchfork;", "v;", "anck;", "anckh;", "ankv;", "us;", "usacir;", + "usb;", "uscir;", "usdo;", "usdu;", "use;", "usmn", "usmn;", + "ussim;", "ustwo;", ";", "intint;", "pf;", "und", "und;", ";", + "E;", "ap;", "cue;", "e;", "ec;", "ecapprox;", "eccurlyeq;", + "eceq;", "ecnapprox;", "ecneqq;", "ecnsim;", "ecsim;", "ime;", + "imes;", "nE;", "nap;", "nsim;", "od;", "ofalar;", "ofline;", + "ofsurf;", "op;", "opto;", "sim;", "urel;", "cr;", "i;", "ncsp;", + "r;", "nt;", "pf;", "rime;", "cr;", "aternions;", "atint;", "est;", + "esteq;", "ot", "ot;", "arr;", "rr;", "tail;", "arr;", "ar;", + "ce;", "cute;", "dic;", "emptyv;", "ng;", "ngd;", "nge;", "ngle;", + "quo", "quo;", "rr;", "rrap;", "rrb;", "rrbfs;", "rrc;", "rrfs;", + "rrhk;", "rrlp;", "rrpl;", "rrsim;", "rrtl;", "rrw;", "tail;", + "tio;", "tionals;", "arr;", "brk;", "race;", "rack;", "rke;", + "rksld;", "rkslu;", "aron;", "edil;", "eil;", "ub;", "y;", "ca;", + "ldhar;", "quo;", "quor;", "sh;", "al;", "aline;", "alpart;", + "als;", "ct;", "g", "g;", "isht;", "loor;", "r;", "ard;", "aru;", + "arul;", "o;", "ov;", "ghtarrow;", "ghtarrowtail;", + "ghtharpoondown;", "ghtharpoonup;", "ghtleftarrows;", + "ghtleftharpoons;", "ghtrightarrows;", "ghtsquigarrow;", + "ghtthreetimes;", "ng;", "singdotseq;", "arr;", "har;", "m;", + "oust;", "oustache;", "mid;", "ang;", "arr;", "brk;", "par;", + "pf;", "plus;", "times;", "ar;", "argt;", "polint;", "arr;", + "aquo;", "cr;", "h;", "qb;", "quo;", "quor;", "hree;", "imes;", + "ri;", "rie;", "rif;", "riltri;", "luhar;", ";", "cute;", "quo;", + ";", "E;", "ap;", "aron;", "cue;", "e;", "edil;", "irc;", "nE;", + "nap;", "nsim;", "polint;", "sim;", "y;", "ot;", "otb;", "ote;", + "Arr;", "arhk;", "arr;", "arrow;", "ct", "ct;", "mi;", "swar;", + "tminus;", "tmn;", "xt;", "r;", "rown;", "arp;", "chcy;", "cy;", + "ortmid;", "ortparallel;", "y", "y;", "gma;", "gmaf;", "gmav;", + "m;", "mdot;", "me;", "meq;", "mg;", "mgE;", "ml;", "mlE;", "mne;", + "mplus;", "mrarr;", "arr;", "allsetminus;", "ashp;", "eparsl;", + "id;", "ile;", "t;", "te;", "tes;", "ftcy;", "l;", "lb;", "lbar;", + "pf;", "ades;", "adesuit;", "ar;", "cap;", "caps;", "cup;", + "cups;", "sub;", "sube;", "subset;", "subseteq;", "sup;", "supe;", + "supset;", "supseteq;", "u;", "uare;", "uarf;", "uf;", "arr;", + "cr;", "etmn;", "mile;", "tarf;", "ar;", "arf;", "raightepsilon;", + "raightphi;", "rns;", "b;", "bE;", "bdot;", "be;", "bedot;", + "bmult;", "bnE;", "bne;", "bplus;", "brarr;", "bset;", "bseteq;", + "bseteqq;", "bsetneq;", "bsetneqq;", "bsim;", "bsub;", "bsup;", + "cc;", "ccapprox;", "cccurlyeq;", "cceq;", "ccnapprox;", "ccneqq;", + "ccnsim;", "ccsim;", "m;", "ng;", "p1", "p1;", "p2", "p2;", "p3", + "p3;", "p;", "pE;", "pdot;", "pdsub;", "pe;", "pedot;", "phsol;", + "phsub;", "plarr;", "pmult;", "pnE;", "pne;", "pplus;", "pset;", + "pseteq;", "pseteqq;", "psetneq;", "psetneqq;", "psim;", "psub;", + "psup;", "Arr;", "arhk;", "arr;", "arrow;", "nwar;", "lig", "lig;", + "rget;", "u;", "rk;", "aron;", "edil;", "y;", "ot;", "lrec;", "r;", + "ere4;", "erefore;", "eta;", "etasym;", "etav;", "ickapprox;", + "icksim;", "insp;", "kap;", "ksim;", "orn", "orn;", "lde;", "mes", + "mes;", "mesb;", "mesbar;", "mesd;", "nt;", "ea;", "p;", "pbot;", + "pcir;", "pf;", "pfork;", "sa;", "rime;", "ade;", "iangle;", + "iangledown;", "iangleleft;", "ianglelefteq;", "iangleq;", + "iangleright;", "ianglerighteq;", "idot;", "ie;", "iminus;", + "iplus;", "isb;", "itime;", "pezium;", "cr;", "cy;", "hcy;", + "trok;", "ixt;", "oheadleftarrow;", "oheadrightarrow;", "rr;", + "ar;", "cute", "cute;", "rr;", "rcy;", "reve;", "irc", "irc;", + "y;", "arr;", "blac;", "har;", "isht;", "r;", "rave", "rave;", + "arl;", "arr;", "blk;", "corn;", "corner;", "crop;", "tri;", + "acr;", "l", "l;", "gon;", "pf;", "arrow;", "downarrow;", + "harpoonleft;", "harpoonright;", "lus;", "si;", "sih;", "silon;", + "uparrows;", "corn;", "corner;", "crop;", "ing;", "tri;", "cr;", + "dot;", "ilde;", "ri;", "rif;", "arr;", "ml", "ml;", "angle;", + "rr;", "ar;", "arv;", "ash;", "ngrt;", "repsilon;", "rkappa;", + "rnothing;", "rphi;", "rpi;", "rpropto;", "rr;", "rrho;", + "rsigma;", "rsubsetneq;", "rsubsetneqq;", "rsupsetneq;", + "rsupsetneqq;", "rtheta;", "rtriangleleft;", "rtriangleright;", + "y;", "ash;", "e;", "ebar;", "eeq;", "llip;", "rbar;", "rt;", "r;", + "tri;", "sub;", "sup;", "pf;", "rop;", "tri;", "cr;", "ubnE;", + "ubne;", "upnE;", "upne;", "igzag;", "irc;", "dbar;", "dge;", + "dgeq;", "ierp;", "r;", "pf;", ";", ";", "eath;", "cr;", "ap;", + "irc;", "up;", "tri;", "r;", "Arr;", "arr;", ";", "Arr;", "arr;", + "ap;", "is;", "dot;", "pf;", "plus;", "time;", "Arr;", "arr;", + "cr;", "qcup;", "plus;", "tri;", "ee;", "edge;", "cute", "cute;", + "cy;", "irc;", "y;", "n", "n;", "r;", "cy;", "pf;", "cr;", "cy;", + "ml", "ml;", "cute;", "aron;", "y;", "ot;", "etrf;", "ta;", "r;", + "cy;", "grarr;", "pf;", "cr;", "j;", "nj;", }; + + static final @NoLength char[][] VALUES = { { '\u00c6' }, { '\u00c6' }, + { '\u0026' }, { '\u0026' }, { '\u00c1' }, { '\u00c1' }, + { '\u0102' }, { '\u00c2' }, { '\u00c2' }, { '\u0410' }, + { '\ud835', '\udd04' }, { '\u00c0' }, { '\u00c0' }, { '\u0391' }, + { '\u0100' }, { '\u2a53' }, { '\u0104' }, { '\ud835', '\udd38' }, + { '\u2061' }, { '\u00c5' }, { '\u00c5' }, { '\ud835', '\udc9c' }, + { '\u2254' }, { '\u00c3' }, { '\u00c3' }, { '\u00c4' }, + { '\u00c4' }, { '\u2216' }, { '\u2ae7' }, { '\u2306' }, + { '\u0411' }, { '\u2235' }, { '\u212c' }, { '\u0392' }, + { '\ud835', '\udd05' }, { '\ud835', '\udd39' }, { '\u02d8' }, + { '\u212c' }, { '\u224e' }, { '\u0427' }, { '\u00a9' }, + { '\u00a9' }, { '\u0106' }, { '\u22d2' }, { '\u2145' }, + { '\u212d' }, { '\u010c' }, { '\u00c7' }, { '\u00c7' }, + { '\u0108' }, { '\u2230' }, { '\u010a' }, { '\u00b8' }, + { '\u00b7' }, { '\u212d' }, { '\u03a7' }, { '\u2299' }, + { '\u2296' }, { '\u2295' }, { '\u2297' }, { '\u2232' }, + { '\u201d' }, { '\u2019' }, { '\u2237' }, { '\u2a74' }, + { '\u2261' }, { '\u222f' }, { '\u222e' }, { '\u2102' }, + { '\u2210' }, { '\u2233' }, { '\u2a2f' }, { '\ud835', '\udc9e' }, + { '\u22d3' }, { '\u224d' }, { '\u2145' }, { '\u2911' }, + { '\u0402' }, { '\u0405' }, { '\u040f' }, { '\u2021' }, + { '\u21a1' }, { '\u2ae4' }, { '\u010e' }, { '\u0414' }, + { '\u2207' }, { '\u0394' }, { '\ud835', '\udd07' }, { '\u00b4' }, + { '\u02d9' }, { '\u02dd' }, { '\u0060' }, { '\u02dc' }, + { '\u22c4' }, { '\u2146' }, { '\ud835', '\udd3b' }, { '\u00a8' }, + { '\u20dc' }, { '\u2250' }, { '\u222f' }, { '\u00a8' }, + { '\u21d3' }, { '\u21d0' }, { '\u21d4' }, { '\u2ae4' }, + { '\u27f8' }, { '\u27fa' }, { '\u27f9' }, { '\u21d2' }, + { '\u22a8' }, { '\u21d1' }, { '\u21d5' }, { '\u2225' }, + { '\u2193' }, { '\u2913' }, { '\u21f5' }, { '\u0311' }, + { '\u2950' }, { '\u295e' }, { '\u21bd' }, { '\u2956' }, + { '\u295f' }, { '\u21c1' }, { '\u2957' }, { '\u22a4' }, + { '\u21a7' }, { '\u21d3' }, { '\ud835', '\udc9f' }, { '\u0110' }, + { '\u014a' }, { '\u00d0' }, { '\u00d0' }, { '\u00c9' }, + { '\u00c9' }, { '\u011a' }, { '\u00ca' }, { '\u00ca' }, + { '\u042d' }, { '\u0116' }, { '\ud835', '\udd08' }, { '\u00c8' }, + { '\u00c8' }, { '\u2208' }, { '\u0112' }, { '\u25fb' }, + { '\u25ab' }, { '\u0118' }, { '\ud835', '\udd3c' }, { '\u0395' }, + { '\u2a75' }, { '\u2242' }, { '\u21cc' }, { '\u2130' }, + { '\u2a73' }, { '\u0397' }, { '\u00cb' }, { '\u00cb' }, + { '\u2203' }, { '\u2147' }, { '\u0424' }, { '\ud835', '\udd09' }, + { '\u25fc' }, { '\u25aa' }, { '\ud835', '\udd3d' }, { '\u2200' }, + { '\u2131' }, { '\u2131' }, { '\u0403' }, { '\u003e' }, + { '\u003e' }, { '\u0393' }, { '\u03dc' }, { '\u011e' }, + { '\u0122' }, { '\u011c' }, { '\u0413' }, { '\u0120' }, + { '\ud835', '\udd0a' }, { '\u22d9' }, { '\ud835', '\udd3e' }, + { '\u2265' }, { '\u22db' }, { '\u2267' }, { '\u2aa2' }, + { '\u2277' }, { '\u2a7e' }, { '\u2273' }, { '\ud835', '\udca2' }, + { '\u226b' }, { '\u042a' }, { '\u02c7' }, { '\u005e' }, + { '\u0124' }, { '\u210c' }, { '\u210b' }, { '\u210d' }, + { '\u2500' }, { '\u210b' }, { '\u0126' }, { '\u224e' }, + { '\u224f' }, { '\u0415' }, { '\u0132' }, { '\u0401' }, + { '\u00cd' }, { '\u00cd' }, { '\u00ce' }, { '\u00ce' }, + { '\u0418' }, { '\u0130' }, { '\u2111' }, { '\u00cc' }, + { '\u00cc' }, { '\u2111' }, { '\u012a' }, { '\u2148' }, + { '\u21d2' }, { '\u222c' }, { '\u222b' }, { '\u22c2' }, + { '\u2063' }, { '\u2062' }, { '\u012e' }, { '\ud835', '\udd40' }, + { '\u0399' }, { '\u2110' }, { '\u0128' }, { '\u0406' }, + { '\u00cf' }, { '\u00cf' }, { '\u0134' }, { '\u0419' }, + { '\ud835', '\udd0d' }, { '\ud835', '\udd41' }, + { '\ud835', '\udca5' }, { '\u0408' }, { '\u0404' }, { '\u0425' }, + { '\u040c' }, { '\u039a' }, { '\u0136' }, { '\u041a' }, + { '\ud835', '\udd0e' }, { '\ud835', '\udd42' }, + { '\ud835', '\udca6' }, { '\u0409' }, { '\u003c' }, { '\u003c' }, + { '\u0139' }, { '\u039b' }, { '\u27ea' }, { '\u2112' }, + { '\u219e' }, { '\u013d' }, { '\u013b' }, { '\u041b' }, + { '\u27e8' }, { '\u2190' }, { '\u21e4' }, { '\u21c6' }, + { '\u2308' }, { '\u27e6' }, { '\u2961' }, { '\u21c3' }, + { '\u2959' }, { '\u230a' }, { '\u2194' }, { '\u294e' }, + { '\u22a3' }, { '\u21a4' }, { '\u295a' }, { '\u22b2' }, + { '\u29cf' }, { '\u22b4' }, { '\u2951' }, { '\u2960' }, + { '\u21bf' }, { '\u2958' }, { '\u21bc' }, { '\u2952' }, + { '\u21d0' }, { '\u21d4' }, { '\u22da' }, { '\u2266' }, + { '\u2276' }, { '\u2aa1' }, { '\u2a7d' }, { '\u2272' }, + { '\ud835', '\udd0f' }, { '\u22d8' }, { '\u21da' }, { '\u013f' }, + { '\u27f5' }, { '\u27f7' }, { '\u27f6' }, { '\u27f8' }, + { '\u27fa' }, { '\u27f9' }, { '\ud835', '\udd43' }, { '\u2199' }, + { '\u2198' }, { '\u2112' }, { '\u21b0' }, { '\u0141' }, + { '\u226a' }, { '\u2905' }, { '\u041c' }, { '\u205f' }, + { '\u2133' }, { '\ud835', '\udd10' }, { '\u2213' }, + { '\ud835', '\udd44' }, { '\u2133' }, { '\u039c' }, { '\u040a' }, + { '\u0143' }, { '\u0147' }, { '\u0145' }, { '\u041d' }, + { '\u200b' }, { '\u200b' }, { '\u200b' }, { '\u200b' }, + { '\u226b' }, { '\u226a' }, { '\n' }, { '\ud835', '\udd11' }, + { '\u2060' }, { '\u00a0' }, { '\u2115' }, { '\u2aec' }, + { '\u2262' }, { '\u226d' }, { '\u2226' }, { '\u2209' }, + { '\u2260' }, { '\u2242', '\u0338' }, { '\u2204' }, { '\u226f' }, + { '\u2271' }, { '\u2267', '\u0338' }, { '\u226b', '\u0338' }, + { '\u2279' }, { '\u2a7e', '\u0338' }, { '\u2275' }, + { '\u224e', '\u0338' }, { '\u224f', '\u0338' }, { '\u22ea' }, + { '\u29cf', '\u0338' }, { '\u22ec' }, { '\u226e' }, { '\u2270' }, + { '\u2278' }, { '\u226a', '\u0338' }, { '\u2a7d', '\u0338' }, + { '\u2274' }, { '\u2aa2', '\u0338' }, { '\u2aa1', '\u0338' }, + { '\u2280' }, { '\u2aaf', '\u0338' }, { '\u22e0' }, { '\u220c' }, + { '\u22eb' }, { '\u29d0', '\u0338' }, { '\u22ed' }, + { '\u228f', '\u0338' }, { '\u22e2' }, { '\u2290', '\u0338' }, + { '\u22e3' }, { '\u2282', '\u20d2' }, { '\u2288' }, { '\u2281' }, + { '\u2ab0', '\u0338' }, { '\u22e1' }, { '\u227f', '\u0338' }, + { '\u2283', '\u20d2' }, { '\u2289' }, { '\u2241' }, { '\u2244' }, + { '\u2247' }, { '\u2249' }, { '\u2224' }, { '\ud835', '\udca9' }, + { '\u00d1' }, { '\u00d1' }, { '\u039d' }, { '\u0152' }, + { '\u00d3' }, { '\u00d3' }, { '\u00d4' }, { '\u00d4' }, + { '\u041e' }, { '\u0150' }, { '\ud835', '\udd12' }, { '\u00d2' }, + { '\u00d2' }, { '\u014c' }, { '\u03a9' }, { '\u039f' }, + { '\ud835', '\udd46' }, { '\u201c' }, { '\u2018' }, { '\u2a54' }, + { '\ud835', '\udcaa' }, { '\u00d8' }, { '\u00d8' }, { '\u00d5' }, + { '\u00d5' }, { '\u2a37' }, { '\u00d6' }, { '\u00d6' }, + { '\u203e' }, { '\u23de' }, { '\u23b4' }, { '\u23dc' }, + { '\u2202' }, { '\u041f' }, { '\ud835', '\udd13' }, { '\u03a6' }, + { '\u03a0' }, { '\u00b1' }, { '\u210c' }, { '\u2119' }, + { '\u2abb' }, { '\u227a' }, { '\u2aaf' }, { '\u227c' }, + { '\u227e' }, { '\u2033' }, { '\u220f' }, { '\u2237' }, + { '\u221d' }, { '\ud835', '\udcab' }, { '\u03a8' }, { '\u0022' }, + { '\u0022' }, { '\ud835', '\udd14' }, { '\u211a' }, + { '\ud835', '\udcac' }, { '\u2910' }, { '\u00ae' }, { '\u00ae' }, + { '\u0154' }, { '\u27eb' }, { '\u21a0' }, { '\u2916' }, + { '\u0158' }, { '\u0156' }, { '\u0420' }, { '\u211c' }, + { '\u220b' }, { '\u21cb' }, { '\u296f' }, { '\u211c' }, + { '\u03a1' }, { '\u27e9' }, { '\u2192' }, { '\u21e5' }, + { '\u21c4' }, { '\u2309' }, { '\u27e7' }, { '\u295d' }, + { '\u21c2' }, { '\u2955' }, { '\u230b' }, { '\u22a2' }, + { '\u21a6' }, { '\u295b' }, { '\u22b3' }, { '\u29d0' }, + { '\u22b5' }, { '\u294f' }, { '\u295c' }, { '\u21be' }, + { '\u2954' }, { '\u21c0' }, { '\u2953' }, { '\u21d2' }, + { '\u211d' }, { '\u2970' }, { '\u21db' }, { '\u211b' }, + { '\u21b1' }, { '\u29f4' }, { '\u0429' }, { '\u0428' }, + { '\u042c' }, { '\u015a' }, { '\u2abc' }, { '\u0160' }, + { '\u015e' }, { '\u015c' }, { '\u0421' }, { '\ud835', '\udd16' }, + { '\u2193' }, { '\u2190' }, { '\u2192' }, { '\u2191' }, + { '\u03a3' }, { '\u2218' }, { '\ud835', '\udd4a' }, { '\u221a' }, + { '\u25a1' }, { '\u2293' }, { '\u228f' }, { '\u2291' }, + { '\u2290' }, { '\u2292' }, { '\u2294' }, { '\ud835', '\udcae' }, + { '\u22c6' }, { '\u22d0' }, { '\u22d0' }, { '\u2286' }, + { '\u227b' }, { '\u2ab0' }, { '\u227d' }, { '\u227f' }, + { '\u220b' }, { '\u2211' }, { '\u22d1' }, { '\u2283' }, + { '\u2287' }, { '\u22d1' }, { '\u00de' }, { '\u00de' }, + { '\u2122' }, { '\u040b' }, { '\u0426' }, { '\u0009' }, + { '\u03a4' }, { '\u0164' }, { '\u0162' }, { '\u0422' }, + { '\ud835', '\udd17' }, { '\u2234' }, { '\u0398' }, + { '\u205f', '\u200a' }, { '\u2009' }, { '\u223c' }, { '\u2243' }, + { '\u2245' }, { '\u2248' }, { '\ud835', '\udd4b' }, { '\u20db' }, + { '\ud835', '\udcaf' }, { '\u0166' }, { '\u00da' }, { '\u00da' }, + { '\u219f' }, { '\u2949' }, { '\u040e' }, { '\u016c' }, + { '\u00db' }, { '\u00db' }, { '\u0423' }, { '\u0170' }, + { '\ud835', '\udd18' }, { '\u00d9' }, { '\u00d9' }, { '\u016a' }, + { '\u005f' }, { '\u23df' }, { '\u23b5' }, { '\u23dd' }, + { '\u22c3' }, { '\u228e' }, { '\u0172' }, { '\ud835', '\udd4c' }, + { '\u2191' }, { '\u2912' }, { '\u21c5' }, { '\u2195' }, + { '\u296e' }, { '\u22a5' }, { '\u21a5' }, { '\u21d1' }, + { '\u21d5' }, { '\u2196' }, { '\u2197' }, { '\u03d2' }, + { '\u03a5' }, { '\u016e' }, { '\ud835', '\udcb0' }, { '\u0168' }, + { '\u00dc' }, { '\u00dc' }, { '\u22ab' }, { '\u2aeb' }, + { '\u0412' }, { '\u22a9' }, { '\u2ae6' }, { '\u22c1' }, + { '\u2016' }, { '\u2016' }, { '\u2223' }, { '\u007c' }, + { '\u2758' }, { '\u2240' }, { '\u200a' }, { '\ud835', '\udd19' }, + { '\ud835', '\udd4d' }, { '\ud835', '\udcb1' }, { '\u22aa' }, + { '\u0174' }, { '\u22c0' }, { '\ud835', '\udd1a' }, + { '\ud835', '\udd4e' }, { '\ud835', '\udcb2' }, + { '\ud835', '\udd1b' }, { '\u039e' }, { '\ud835', '\udd4f' }, + { '\ud835', '\udcb3' }, { '\u042f' }, { '\u0407' }, { '\u042e' }, + { '\u00dd' }, { '\u00dd' }, { '\u0176' }, { '\u042b' }, + { '\ud835', '\udd1c' }, { '\ud835', '\udd50' }, + { '\ud835', '\udcb4' }, { '\u0178' }, { '\u0416' }, { '\u0179' }, + { '\u017d' }, { '\u0417' }, { '\u017b' }, { '\u200b' }, + { '\u0396' }, { '\u2128' }, { '\u2124' }, { '\ud835', '\udcb5' }, + { '\u00e1' }, { '\u00e1' }, { '\u0103' }, { '\u223e' }, + { '\u223e', '\u0333' }, { '\u223f' }, { '\u00e2' }, { '\u00e2' }, + { '\u00b4' }, { '\u00b4' }, { '\u0430' }, { '\u00e6' }, + { '\u00e6' }, { '\u2061' }, { '\ud835', '\udd1e' }, { '\u00e0' }, + { '\u00e0' }, { '\u2135' }, { '\u2135' }, { '\u03b1' }, + { '\u0101' }, { '\u2a3f' }, { '\u0026' }, { '\u0026' }, + { '\u2227' }, { '\u2a55' }, { '\u2a5c' }, { '\u2a58' }, + { '\u2a5a' }, { '\u2220' }, { '\u29a4' }, { '\u2220' }, + { '\u2221' }, { '\u29a8' }, { '\u29a9' }, { '\u29aa' }, + { '\u29ab' }, { '\u29ac' }, { '\u29ad' }, { '\u29ae' }, + { '\u29af' }, { '\u221f' }, { '\u22be' }, { '\u299d' }, + { '\u2222' }, { '\u00c5' }, { '\u237c' }, { '\u0105' }, + { '\ud835', '\udd52' }, { '\u2248' }, { '\u2a70' }, { '\u2a6f' }, + { '\u224a' }, { '\u224b' }, { '\'' }, { '\u2248' }, { '\u224a' }, + { '\u00e5' }, { '\u00e5' }, { '\ud835', '\udcb6' }, { '\u002a' }, + { '\u2248' }, { '\u224d' }, { '\u00e3' }, { '\u00e3' }, + { '\u00e4' }, { '\u00e4' }, { '\u2233' }, { '\u2a11' }, + { '\u2aed' }, { '\u224c' }, { '\u03f6' }, { '\u2035' }, + { '\u223d' }, { '\u22cd' }, { '\u22bd' }, { '\u2305' }, + { '\u2305' }, { '\u23b5' }, { '\u23b6' }, { '\u224c' }, + { '\u0431' }, { '\u201e' }, { '\u2235' }, { '\u2235' }, + { '\u29b0' }, { '\u03f6' }, { '\u212c' }, { '\u03b2' }, + { '\u2136' }, { '\u226c' }, { '\ud835', '\udd1f' }, { '\u22c2' }, + { '\u25ef' }, { '\u22c3' }, { '\u2a00' }, { '\u2a01' }, + { '\u2a02' }, { '\u2a06' }, { '\u2605' }, { '\u25bd' }, + { '\u25b3' }, { '\u2a04' }, { '\u22c1' }, { '\u22c0' }, + { '\u290d' }, { '\u29eb' }, { '\u25aa' }, { '\u25b4' }, + { '\u25be' }, { '\u25c2' }, { '\u25b8' }, { '\u2423' }, + { '\u2592' }, { '\u2591' }, { '\u2593' }, { '\u2588' }, + { '\u003d', '\u20e5' }, { '\u2261', '\u20e5' }, { '\u2310' }, + { '\ud835', '\udd53' }, { '\u22a5' }, { '\u22a5' }, { '\u22c8' }, + { '\u2557' }, { '\u2554' }, { '\u2556' }, { '\u2553' }, + { '\u2550' }, { '\u2566' }, { '\u2569' }, { '\u2564' }, + { '\u2567' }, { '\u255d' }, { '\u255a' }, { '\u255c' }, + { '\u2559' }, { '\u2551' }, { '\u256c' }, { '\u2563' }, + { '\u2560' }, { '\u256b' }, { '\u2562' }, { '\u255f' }, + { '\u29c9' }, { '\u2555' }, { '\u2552' }, { '\u2510' }, + { '\u250c' }, { '\u2500' }, { '\u2565' }, { '\u2568' }, + { '\u252c' }, { '\u2534' }, { '\u229f' }, { '\u229e' }, + { '\u22a0' }, { '\u255b' }, { '\u2558' }, { '\u2518' }, + { '\u2514' }, { '\u2502' }, { '\u256a' }, { '\u2561' }, + { '\u255e' }, { '\u253c' }, { '\u2524' }, { '\u251c' }, + { '\u2035' }, { '\u02d8' }, { '\u00a6' }, { '\u00a6' }, + { '\ud835', '\udcb7' }, { '\u204f' }, { '\u223d' }, { '\u22cd' }, + { '\\' }, { '\u29c5' }, { '\u27c8' }, { '\u2022' }, { '\u2022' }, + { '\u224e' }, { '\u2aae' }, { '\u224f' }, { '\u224f' }, + { '\u0107' }, { '\u2229' }, { '\u2a44' }, { '\u2a49' }, + { '\u2a4b' }, { '\u2a47' }, { '\u2a40' }, { '\u2229', '\ufe00' }, + { '\u2041' }, { '\u02c7' }, { '\u2a4d' }, { '\u010d' }, + { '\u00e7' }, { '\u00e7' }, { '\u0109' }, { '\u2a4c' }, + { '\u2a50' }, { '\u010b' }, { '\u00b8' }, { '\u00b8' }, + { '\u29b2' }, { '\u00a2' }, { '\u00a2' }, { '\u00b7' }, + { '\ud835', '\udd20' }, { '\u0447' }, { '\u2713' }, { '\u2713' }, + { '\u03c7' }, { '\u25cb' }, { '\u29c3' }, { '\u02c6' }, + { '\u2257' }, { '\u21ba' }, { '\u21bb' }, { '\u00ae' }, + { '\u24c8' }, { '\u229b' }, { '\u229a' }, { '\u229d' }, + { '\u2257' }, { '\u2a10' }, { '\u2aef' }, { '\u29c2' }, + { '\u2663' }, { '\u2663' }, { '\u003a' }, { '\u2254' }, + { '\u2254' }, { '\u002c' }, { '\u0040' }, { '\u2201' }, + { '\u2218' }, { '\u2201' }, { '\u2102' }, { '\u2245' }, + { '\u2a6d' }, { '\u222e' }, { '\ud835', '\udd54' }, { '\u2210' }, + { '\u00a9' }, { '\u00a9' }, { '\u2117' }, { '\u21b5' }, + { '\u2717' }, { '\ud835', '\udcb8' }, { '\u2acf' }, { '\u2ad1' }, + { '\u2ad0' }, { '\u2ad2' }, { '\u22ef' }, { '\u2938' }, + { '\u2935' }, { '\u22de' }, { '\u22df' }, { '\u21b6' }, + { '\u293d' }, { '\u222a' }, { '\u2a48' }, { '\u2a46' }, + { '\u2a4a' }, { '\u228d' }, { '\u2a45' }, { '\u222a', '\ufe00' }, + { '\u21b7' }, { '\u293c' }, { '\u22de' }, { '\u22df' }, + { '\u22ce' }, { '\u22cf' }, { '\u00a4' }, { '\u00a4' }, + { '\u21b6' }, { '\u21b7' }, { '\u22ce' }, { '\u22cf' }, + { '\u2232' }, { '\u2231' }, { '\u232d' }, { '\u21d3' }, + { '\u2965' }, { '\u2020' }, { '\u2138' }, { '\u2193' }, + { '\u2010' }, { '\u22a3' }, { '\u290f' }, { '\u02dd' }, + { '\u010f' }, { '\u0434' }, { '\u2146' }, { '\u2021' }, + { '\u21ca' }, { '\u2a77' }, { '\u00b0' }, { '\u00b0' }, + { '\u03b4' }, { '\u29b1' }, { '\u297f' }, { '\ud835', '\udd21' }, + { '\u21c3' }, { '\u21c2' }, { '\u22c4' }, { '\u22c4' }, + { '\u2666' }, { '\u2666' }, { '\u00a8' }, { '\u03dd' }, + { '\u22f2' }, { '\u00f7' }, { '\u00f7' }, { '\u00f7' }, + { '\u22c7' }, { '\u22c7' }, { '\u0452' }, { '\u231e' }, + { '\u230d' }, { '\u0024' }, { '\ud835', '\udd55' }, { '\u02d9' }, + { '\u2250' }, { '\u2251' }, { '\u2238' }, { '\u2214' }, + { '\u22a1' }, { '\u2306' }, { '\u2193' }, { '\u21ca' }, + { '\u21c3' }, { '\u21c2' }, { '\u2910' }, { '\u231f' }, + { '\u230c' }, { '\ud835', '\udcb9' }, { '\u0455' }, { '\u29f6' }, + { '\u0111' }, { '\u22f1' }, { '\u25bf' }, { '\u25be' }, + { '\u21f5' }, { '\u296f' }, { '\u29a6' }, { '\u045f' }, + { '\u27ff' }, { '\u2a77' }, { '\u2251' }, { '\u00e9' }, + { '\u00e9' }, { '\u2a6e' }, { '\u011b' }, { '\u2256' }, + { '\u00ea' }, { '\u00ea' }, { '\u2255' }, { '\u044d' }, + { '\u0117' }, { '\u2147' }, { '\u2252' }, { '\ud835', '\udd22' }, + { '\u2a9a' }, { '\u00e8' }, { '\u00e8' }, { '\u2a96' }, + { '\u2a98' }, { '\u2a99' }, { '\u23e7' }, { '\u2113' }, + { '\u2a95' }, { '\u2a97' }, { '\u0113' }, { '\u2205' }, + { '\u2205' }, { '\u2205' }, { '\u2004' }, { '\u2005' }, + { '\u2003' }, { '\u014b' }, { '\u2002' }, { '\u0119' }, + { '\ud835', '\udd56' }, { '\u22d5' }, { '\u29e3' }, { '\u2a71' }, + { '\u03b5' }, { '\u03b5' }, { '\u03f5' }, { '\u2256' }, + { '\u2255' }, { '\u2242' }, { '\u2a96' }, { '\u2a95' }, + { '\u003d' }, { '\u225f' }, { '\u2261' }, { '\u2a78' }, + { '\u29e5' }, { '\u2253' }, { '\u2971' }, { '\u212f' }, + { '\u2250' }, { '\u2242' }, { '\u03b7' }, { '\u00f0' }, + { '\u00f0' }, { '\u00eb' }, { '\u00eb' }, { '\u20ac' }, + { '\u0021' }, { '\u2203' }, { '\u2130' }, { '\u2147' }, + { '\u2252' }, { '\u0444' }, { '\u2640' }, { '\ufb03' }, + { '\ufb00' }, { '\ufb04' }, { '\ud835', '\udd23' }, { '\ufb01' }, + { '\u0066', '\u006a' }, { '\u266d' }, { '\ufb02' }, { '\u25b1' }, + { '\u0192' }, { '\ud835', '\udd57' }, { '\u2200' }, { '\u22d4' }, + { '\u2ad9' }, { '\u2a0d' }, { '\u00bd' }, { '\u00bd' }, + { '\u2153' }, { '\u00bc' }, { '\u00bc' }, { '\u2155' }, + { '\u2159' }, { '\u215b' }, { '\u2154' }, { '\u2156' }, + { '\u00be' }, { '\u00be' }, { '\u2157' }, { '\u215c' }, + { '\u2158' }, { '\u215a' }, { '\u215d' }, { '\u215e' }, + { '\u2044' }, { '\u2322' }, { '\ud835', '\udcbb' }, { '\u2267' }, + { '\u2a8c' }, { '\u01f5' }, { '\u03b3' }, { '\u03dd' }, + { '\u2a86' }, { '\u011f' }, { '\u011d' }, { '\u0433' }, + { '\u0121' }, { '\u2265' }, { '\u22db' }, { '\u2265' }, + { '\u2267' }, { '\u2a7e' }, { '\u2a7e' }, { '\u2aa9' }, + { '\u2a80' }, { '\u2a82' }, { '\u2a84' }, { '\u22db', '\ufe00' }, + { '\u2a94' }, { '\ud835', '\udd24' }, { '\u226b' }, { '\u22d9' }, + { '\u2137' }, { '\u0453' }, { '\u2277' }, { '\u2a92' }, + { '\u2aa5' }, { '\u2aa4' }, { '\u2269' }, { '\u2a8a' }, + { '\u2a8a' }, { '\u2a88' }, { '\u2a88' }, { '\u2269' }, + { '\u22e7' }, { '\ud835', '\udd58' }, { '\u0060' }, { '\u210a' }, + { '\u2273' }, { '\u2a8e' }, { '\u2a90' }, { '\u003e' }, + { '\u003e' }, { '\u2aa7' }, { '\u2a7a' }, { '\u22d7' }, + { '\u2995' }, { '\u2a7c' }, { '\u2a86' }, { '\u2978' }, + { '\u22d7' }, { '\u22db' }, { '\u2a8c' }, { '\u2277' }, + { '\u2273' }, { '\u2269', '\ufe00' }, { '\u2269', '\ufe00' }, + { '\u21d4' }, { '\u200a' }, { '\u00bd' }, { '\u210b' }, + { '\u044a' }, { '\u2194' }, { '\u2948' }, { '\u21ad' }, + { '\u210f' }, { '\u0125' }, { '\u2665' }, { '\u2665' }, + { '\u2026' }, { '\u22b9' }, { '\ud835', '\udd25' }, { '\u2925' }, + { '\u2926' }, { '\u21ff' }, { '\u223b' }, { '\u21a9' }, + { '\u21aa' }, { '\ud835', '\udd59' }, { '\u2015' }, + { '\ud835', '\udcbd' }, { '\u210f' }, { '\u0127' }, { '\u2043' }, + { '\u2010' }, { '\u00ed' }, { '\u00ed' }, { '\u2063' }, + { '\u00ee' }, { '\u00ee' }, { '\u0438' }, { '\u0435' }, + { '\u00a1' }, { '\u00a1' }, { '\u21d4' }, { '\ud835', '\udd26' }, + { '\u00ec' }, { '\u00ec' }, { '\u2148' }, { '\u2a0c' }, + { '\u222d' }, { '\u29dc' }, { '\u2129' }, { '\u0133' }, + { '\u012b' }, { '\u2111' }, { '\u2110' }, { '\u2111' }, + { '\u0131' }, { '\u22b7' }, { '\u01b5' }, { '\u2208' }, + { '\u2105' }, { '\u221e' }, { '\u29dd' }, { '\u0131' }, + { '\u222b' }, { '\u22ba' }, { '\u2124' }, { '\u22ba' }, + { '\u2a17' }, { '\u2a3c' }, { '\u0451' }, { '\u012f' }, + { '\ud835', '\udd5a' }, { '\u03b9' }, { '\u2a3c' }, { '\u00bf' }, + { '\u00bf' }, { '\ud835', '\udcbe' }, { '\u2208' }, { '\u22f9' }, + { '\u22f5' }, { '\u22f4' }, { '\u22f3' }, { '\u2208' }, + { '\u2062' }, { '\u0129' }, { '\u0456' }, { '\u00ef' }, + { '\u00ef' }, { '\u0135' }, { '\u0439' }, { '\ud835', '\udd27' }, + { '\u0237' }, { '\ud835', '\udd5b' }, { '\ud835', '\udcbf' }, + { '\u0458' }, { '\u0454' }, { '\u03ba' }, { '\u03f0' }, + { '\u0137' }, { '\u043a' }, { '\ud835', '\udd28' }, { '\u0138' }, + { '\u0445' }, { '\u045c' }, { '\ud835', '\udd5c' }, + { '\ud835', '\udcc0' }, { '\u21da' }, { '\u21d0' }, { '\u291b' }, + { '\u290e' }, { '\u2266' }, { '\u2a8b' }, { '\u2962' }, + { '\u013a' }, { '\u29b4' }, { '\u2112' }, { '\u03bb' }, + { '\u27e8' }, { '\u2991' }, { '\u27e8' }, { '\u2a85' }, + { '\u00ab' }, { '\u00ab' }, { '\u2190' }, { '\u21e4' }, + { '\u291f' }, { '\u291d' }, { '\u21a9' }, { '\u21ab' }, + { '\u2939' }, { '\u2973' }, { '\u21a2' }, { '\u2aab' }, + { '\u2919' }, { '\u2aad' }, { '\u2aad', '\ufe00' }, { '\u290c' }, + { '\u2772' }, { '\u007b' }, { '\u005b' }, { '\u298b' }, + { '\u298f' }, { '\u298d' }, { '\u013e' }, { '\u013c' }, + { '\u2308' }, { '\u007b' }, { '\u043b' }, { '\u2936' }, + { '\u201c' }, { '\u201e' }, { '\u2967' }, { '\u294b' }, + { '\u21b2' }, { '\u2264' }, { '\u2190' }, { '\u21a2' }, + { '\u21bd' }, { '\u21bc' }, { '\u21c7' }, { '\u2194' }, + { '\u21c6' }, { '\u21cb' }, { '\u21ad' }, { '\u22cb' }, + { '\u22da' }, { '\u2264' }, { '\u2266' }, { '\u2a7d' }, + { '\u2a7d' }, { '\u2aa8' }, { '\u2a7f' }, { '\u2a81' }, + { '\u2a83' }, { '\u22da', '\ufe00' }, { '\u2a93' }, { '\u2a85' }, + { '\u22d6' }, { '\u22da' }, { '\u2a8b' }, { '\u2276' }, + { '\u2272' }, { '\u297c' }, { '\u230a' }, { '\ud835', '\udd29' }, + { '\u2276' }, { '\u2a91' }, { '\u21bd' }, { '\u21bc' }, + { '\u296a' }, { '\u2584' }, { '\u0459' }, { '\u226a' }, + { '\u21c7' }, { '\u231e' }, { '\u296b' }, { '\u25fa' }, + { '\u0140' }, { '\u23b0' }, { '\u23b0' }, { '\u2268' }, + { '\u2a89' }, { '\u2a89' }, { '\u2a87' }, { '\u2a87' }, + { '\u2268' }, { '\u22e6' }, { '\u27ec' }, { '\u21fd' }, + { '\u27e6' }, { '\u27f5' }, { '\u27f7' }, { '\u27fc' }, + { '\u27f6' }, { '\u21ab' }, { '\u21ac' }, { '\u2985' }, + { '\ud835', '\udd5d' }, { '\u2a2d' }, { '\u2a34' }, { '\u2217' }, + { '\u005f' }, { '\u25ca' }, { '\u25ca' }, { '\u29eb' }, + { '\u0028' }, { '\u2993' }, { '\u21c6' }, { '\u231f' }, + { '\u21cb' }, { '\u296d' }, { '\u200e' }, { '\u22bf' }, + { '\u2039' }, { '\ud835', '\udcc1' }, { '\u21b0' }, { '\u2272' }, + { '\u2a8d' }, { '\u2a8f' }, { '\u005b' }, { '\u2018' }, + { '\u201a' }, { '\u0142' }, { '\u003c' }, { '\u003c' }, + { '\u2aa6' }, { '\u2a79' }, { '\u22d6' }, { '\u22cb' }, + { '\u22c9' }, { '\u2976' }, { '\u2a7b' }, { '\u2996' }, + { '\u25c3' }, { '\u22b4' }, { '\u25c2' }, { '\u294a' }, + { '\u2966' }, { '\u2268', '\ufe00' }, { '\u2268', '\ufe00' }, + { '\u223a' }, { '\u00af' }, { '\u00af' }, { '\u2642' }, + { '\u2720' }, { '\u2720' }, { '\u21a6' }, { '\u21a6' }, + { '\u21a7' }, { '\u21a4' }, { '\u21a5' }, { '\u25ae' }, + { '\u2a29' }, { '\u043c' }, { '\u2014' }, { '\u2221' }, + { '\ud835', '\udd2a' }, { '\u2127' }, { '\u00b5' }, { '\u00b5' }, + { '\u2223' }, { '\u002a' }, { '\u2af0' }, { '\u00b7' }, + { '\u00b7' }, { '\u2212' }, { '\u229f' }, { '\u2238' }, + { '\u2a2a' }, { '\u2adb' }, { '\u2026' }, { '\u2213' }, + { '\u22a7' }, { '\ud835', '\udd5e' }, { '\u2213' }, + { '\ud835', '\udcc2' }, { '\u223e' }, { '\u03bc' }, { '\u22b8' }, + { '\u22b8' }, { '\u22d9', '\u0338' }, { '\u226b', '\u20d2' }, + { '\u226b', '\u0338' }, { '\u21cd' }, { '\u21ce' }, + { '\u22d8', '\u0338' }, { '\u226a', '\u20d2' }, + { '\u226a', '\u0338' }, { '\u21cf' }, { '\u22af' }, { '\u22ae' }, + { '\u2207' }, { '\u0144' }, { '\u2220', '\u20d2' }, { '\u2249' }, + { '\u2a70', '\u0338' }, { '\u224b', '\u0338' }, { '\u0149' }, + { '\u2249' }, { '\u266e' }, { '\u266e' }, { '\u2115' }, + { '\u00a0' }, { '\u00a0' }, { '\u224e', '\u0338' }, + { '\u224f', '\u0338' }, { '\u2a43' }, { '\u0148' }, { '\u0146' }, + { '\u2247' }, { '\u2a6d', '\u0338' }, { '\u2a42' }, { '\u043d' }, + { '\u2013' }, { '\u2260' }, { '\u21d7' }, { '\u2924' }, + { '\u2197' }, { '\u2197' }, { '\u2250', '\u0338' }, { '\u2262' }, + { '\u2928' }, { '\u2242', '\u0338' }, { '\u2204' }, { '\u2204' }, + { '\ud835', '\udd2b' }, { '\u2267', '\u0338' }, { '\u2271' }, + { '\u2271' }, { '\u2267', '\u0338' }, { '\u2a7e', '\u0338' }, + { '\u2a7e', '\u0338' }, { '\u2275' }, { '\u226f' }, { '\u226f' }, + { '\u21ce' }, { '\u21ae' }, { '\u2af2' }, { '\u220b' }, + { '\u22fc' }, { '\u22fa' }, { '\u220b' }, { '\u045a' }, + { '\u21cd' }, { '\u2266', '\u0338' }, { '\u219a' }, { '\u2025' }, + { '\u2270' }, { '\u219a' }, { '\u21ae' }, { '\u2270' }, + { '\u2266', '\u0338' }, { '\u2a7d', '\u0338' }, + { '\u2a7d', '\u0338' }, { '\u226e' }, { '\u2274' }, { '\u226e' }, + { '\u22ea' }, { '\u22ec' }, { '\u2224' }, { '\ud835', '\udd5f' }, + { '\u00ac' }, { '\u00ac' }, { '\u2209' }, { '\u22f9', '\u0338' }, + { '\u22f5', '\u0338' }, { '\u2209' }, { '\u22f7' }, { '\u22f6' }, + { '\u220c' }, { '\u220c' }, { '\u22fe' }, { '\u22fd' }, + { '\u2226' }, { '\u2226' }, { '\u2afd', '\u20e5' }, + { '\u2202', '\u0338' }, { '\u2a14' }, { '\u2280' }, { '\u22e0' }, + { '\u2aaf', '\u0338' }, { '\u2280' }, { '\u2aaf', '\u0338' }, + { '\u21cf' }, { '\u219b' }, { '\u2933', '\u0338' }, + { '\u219d', '\u0338' }, { '\u219b' }, { '\u22eb' }, { '\u22ed' }, + { '\u2281' }, { '\u22e1' }, { '\u2ab0', '\u0338' }, + { '\ud835', '\udcc3' }, { '\u2224' }, { '\u2226' }, { '\u2241' }, + { '\u2244' }, { '\u2244' }, { '\u2224' }, { '\u2226' }, + { '\u22e2' }, { '\u22e3' }, { '\u2284' }, { '\u2ac5', '\u0338' }, + { '\u2288' }, { '\u2282', '\u20d2' }, { '\u2288' }, + { '\u2ac5', '\u0338' }, { '\u2281' }, { '\u2ab0', '\u0338' }, + { '\u2285' }, { '\u2ac6', '\u0338' }, { '\u2289' }, + { '\u2283', '\u20d2' }, { '\u2289' }, { '\u2ac6', '\u0338' }, + { '\u2279' }, { '\u00f1' }, { '\u00f1' }, { '\u2278' }, + { '\u22ea' }, { '\u22ec' }, { '\u22eb' }, { '\u22ed' }, + { '\u03bd' }, { '\u0023' }, { '\u2116' }, { '\u2007' }, + { '\u22ad' }, { '\u2904' }, { '\u224d', '\u20d2' }, { '\u22ac' }, + { '\u2265', '\u20d2' }, { '\u003e', '\u20d2' }, { '\u29de' }, + { '\u2902' }, { '\u2264', '\u20d2' }, { '\u003c', '\u20d2' }, + { '\u22b4', '\u20d2' }, { '\u2903' }, { '\u22b5', '\u20d2' }, + { '\u223c', '\u20d2' }, { '\u21d6' }, { '\u2923' }, { '\u2196' }, + { '\u2196' }, { '\u2927' }, { '\u24c8' }, { '\u00f3' }, + { '\u00f3' }, { '\u229b' }, { '\u229a' }, { '\u00f4' }, + { '\u00f4' }, { '\u043e' }, { '\u229d' }, { '\u0151' }, + { '\u2a38' }, { '\u2299' }, { '\u29bc' }, { '\u0153' }, + { '\u29bf' }, { '\ud835', '\udd2c' }, { '\u02db' }, { '\u00f2' }, + { '\u00f2' }, { '\u29c1' }, { '\u29b5' }, { '\u03a9' }, + { '\u222e' }, { '\u21ba' }, { '\u29be' }, { '\u29bb' }, + { '\u203e' }, { '\u29c0' }, { '\u014d' }, { '\u03c9' }, + { '\u03bf' }, { '\u29b6' }, { '\u2296' }, { '\ud835', '\udd60' }, + { '\u29b7' }, { '\u29b9' }, { '\u2295' }, { '\u2228' }, + { '\u21bb' }, { '\u2a5d' }, { '\u2134' }, { '\u2134' }, + { '\u00aa' }, { '\u00aa' }, { '\u00ba' }, { '\u00ba' }, + { '\u22b6' }, { '\u2a56' }, { '\u2a57' }, { '\u2a5b' }, + { '\u2134' }, { '\u00f8' }, { '\u00f8' }, { '\u2298' }, + { '\u00f5' }, { '\u00f5' }, { '\u2297' }, { '\u2a36' }, + { '\u00f6' }, { '\u00f6' }, { '\u233d' }, { '\u2225' }, + { '\u00b6' }, { '\u00b6' }, { '\u2225' }, { '\u2af3' }, + { '\u2afd' }, { '\u2202' }, { '\u043f' }, { '\u0025' }, + { '\u002e' }, { '\u2030' }, { '\u22a5' }, { '\u2031' }, + { '\ud835', '\udd2d' }, { '\u03c6' }, { '\u03d5' }, { '\u2133' }, + { '\u260e' }, { '\u03c0' }, { '\u22d4' }, { '\u03d6' }, + { '\u210f' }, { '\u210e' }, { '\u210f' }, { '\u002b' }, + { '\u2a23' }, { '\u229e' }, { '\u2a22' }, { '\u2214' }, + { '\u2a25' }, { '\u2a72' }, { '\u00b1' }, { '\u00b1' }, + { '\u2a26' }, { '\u2a27' }, { '\u00b1' }, { '\u2a15' }, + { '\ud835', '\udd61' }, { '\u00a3' }, { '\u00a3' }, { '\u227a' }, + { '\u2ab3' }, { '\u2ab7' }, { '\u227c' }, { '\u2aaf' }, + { '\u227a' }, { '\u2ab7' }, { '\u227c' }, { '\u2aaf' }, + { '\u2ab9' }, { '\u2ab5' }, { '\u22e8' }, { '\u227e' }, + { '\u2032' }, { '\u2119' }, { '\u2ab5' }, { '\u2ab9' }, + { '\u22e8' }, { '\u220f' }, { '\u232e' }, { '\u2312' }, + { '\u2313' }, { '\u221d' }, { '\u221d' }, { '\u227e' }, + { '\u22b0' }, { '\ud835', '\udcc5' }, { '\u03c8' }, { '\u2008' }, + { '\ud835', '\udd2e' }, { '\u2a0c' }, { '\ud835', '\udd62' }, + { '\u2057' }, { '\ud835', '\udcc6' }, { '\u210d' }, { '\u2a16' }, + { '\u003f' }, { '\u225f' }, { '\u0022' }, { '\u0022' }, + { '\u21db' }, { '\u21d2' }, { '\u291c' }, { '\u290f' }, + { '\u2964' }, { '\u223d', '\u0331' }, { '\u0155' }, { '\u221a' }, + { '\u29b3' }, { '\u27e9' }, { '\u2992' }, { '\u29a5' }, + { '\u27e9' }, { '\u00bb' }, { '\u00bb' }, { '\u2192' }, + { '\u2975' }, { '\u21e5' }, { '\u2920' }, { '\u2933' }, + { '\u291e' }, { '\u21aa' }, { '\u21ac' }, { '\u2945' }, + { '\u2974' }, { '\u21a3' }, { '\u219d' }, { '\u291a' }, + { '\u2236' }, { '\u211a' }, { '\u290d' }, { '\u2773' }, + { '\u007d' }, { '\u005d' }, { '\u298c' }, { '\u298e' }, + { '\u2990' }, { '\u0159' }, { '\u0157' }, { '\u2309' }, + { '\u007d' }, { '\u0440' }, { '\u2937' }, { '\u2969' }, + { '\u201d' }, { '\u201d' }, { '\u21b3' }, { '\u211c' }, + { '\u211b' }, { '\u211c' }, { '\u211d' }, { '\u25ad' }, + { '\u00ae' }, { '\u00ae' }, { '\u297d' }, { '\u230b' }, + { '\ud835', '\udd2f' }, { '\u21c1' }, { '\u21c0' }, { '\u296c' }, + { '\u03c1' }, { '\u03f1' }, { '\u2192' }, { '\u21a3' }, + { '\u21c1' }, { '\u21c0' }, { '\u21c4' }, { '\u21cc' }, + { '\u21c9' }, { '\u219d' }, { '\u22cc' }, { '\u02da' }, + { '\u2253' }, { '\u21c4' }, { '\u21cc' }, { '\u200f' }, + { '\u23b1' }, { '\u23b1' }, { '\u2aee' }, { '\u27ed' }, + { '\u21fe' }, { '\u27e7' }, { '\u2986' }, { '\ud835', '\udd63' }, + { '\u2a2e' }, { '\u2a35' }, { '\u0029' }, { '\u2994' }, + { '\u2a12' }, { '\u21c9' }, { '\u203a' }, { '\ud835', '\udcc7' }, + { '\u21b1' }, { '\u005d' }, { '\u2019' }, { '\u2019' }, + { '\u22cc' }, { '\u22ca' }, { '\u25b9' }, { '\u22b5' }, + { '\u25b8' }, { '\u29ce' }, { '\u2968' }, { '\u211e' }, + { '\u015b' }, { '\u201a' }, { '\u227b' }, { '\u2ab4' }, + { '\u2ab8' }, { '\u0161' }, { '\u227d' }, { '\u2ab0' }, + { '\u015f' }, { '\u015d' }, { '\u2ab6' }, { '\u2aba' }, + { '\u22e9' }, { '\u2a13' }, { '\u227f' }, { '\u0441' }, + { '\u22c5' }, { '\u22a1' }, { '\u2a66' }, { '\u21d8' }, + { '\u2925' }, { '\u2198' }, { '\u2198' }, { '\u00a7' }, + { '\u00a7' }, { '\u003b' }, { '\u2929' }, { '\u2216' }, + { '\u2216' }, { '\u2736' }, { '\ud835', '\udd30' }, { '\u2322' }, + { '\u266f' }, { '\u0449' }, { '\u0448' }, { '\u2223' }, + { '\u2225' }, { '\u00ad' }, { '\u00ad' }, { '\u03c3' }, + { '\u03c2' }, { '\u03c2' }, { '\u223c' }, { '\u2a6a' }, + { '\u2243' }, { '\u2243' }, { '\u2a9e' }, { '\u2aa0' }, + { '\u2a9d' }, { '\u2a9f' }, { '\u2246' }, { '\u2a24' }, + { '\u2972' }, { '\u2190' }, { '\u2216' }, { '\u2a33' }, + { '\u29e4' }, { '\u2223' }, { '\u2323' }, { '\u2aaa' }, + { '\u2aac' }, { '\u2aac', '\ufe00' }, { '\u044c' }, { '\u002f' }, + { '\u29c4' }, { '\u233f' }, { '\ud835', '\udd64' }, { '\u2660' }, + { '\u2660' }, { '\u2225' }, { '\u2293' }, { '\u2293', '\ufe00' }, + { '\u2294' }, { '\u2294', '\ufe00' }, { '\u228f' }, { '\u2291' }, + { '\u228f' }, { '\u2291' }, { '\u2290' }, { '\u2292' }, + { '\u2290' }, { '\u2292' }, { '\u25a1' }, { '\u25a1' }, + { '\u25aa' }, { '\u25aa' }, { '\u2192' }, { '\ud835', '\udcc8' }, + { '\u2216' }, { '\u2323' }, { '\u22c6' }, { '\u2606' }, + { '\u2605' }, { '\u03f5' }, { '\u03d5' }, { '\u00af' }, + { '\u2282' }, { '\u2ac5' }, { '\u2abd' }, { '\u2286' }, + { '\u2ac3' }, { '\u2ac1' }, { '\u2acb' }, { '\u228a' }, + { '\u2abf' }, { '\u2979' }, { '\u2282' }, { '\u2286' }, + { '\u2ac5' }, { '\u228a' }, { '\u2acb' }, { '\u2ac7' }, + { '\u2ad5' }, { '\u2ad3' }, { '\u227b' }, { '\u2ab8' }, + { '\u227d' }, { '\u2ab0' }, { '\u2aba' }, { '\u2ab6' }, + { '\u22e9' }, { '\u227f' }, { '\u2211' }, { '\u266a' }, + { '\u00b9' }, { '\u00b9' }, { '\u00b2' }, { '\u00b2' }, + { '\u00b3' }, { '\u00b3' }, { '\u2283' }, { '\u2ac6' }, + { '\u2abe' }, { '\u2ad8' }, { '\u2287' }, { '\u2ac4' }, + { '\u27c9' }, { '\u2ad7' }, { '\u297b' }, { '\u2ac2' }, + { '\u2acc' }, { '\u228b' }, { '\u2ac0' }, { '\u2283' }, + { '\u2287' }, { '\u2ac6' }, { '\u228b' }, { '\u2acc' }, + { '\u2ac8' }, { '\u2ad4' }, { '\u2ad6' }, { '\u21d9' }, + { '\u2926' }, { '\u2199' }, { '\u2199' }, { '\u292a' }, + { '\u00df' }, { '\u00df' }, { '\u2316' }, { '\u03c4' }, + { '\u23b4' }, { '\u0165' }, { '\u0163' }, { '\u0442' }, + { '\u20db' }, { '\u2315' }, { '\ud835', '\udd31' }, { '\u2234' }, + { '\u2234' }, { '\u03b8' }, { '\u03d1' }, { '\u03d1' }, + { '\u2248' }, { '\u223c' }, { '\u2009' }, { '\u2248' }, + { '\u223c' }, { '\u00fe' }, { '\u00fe' }, { '\u02dc' }, + { '\u00d7' }, { '\u00d7' }, { '\u22a0' }, { '\u2a31' }, + { '\u2a30' }, { '\u222d' }, { '\u2928' }, { '\u22a4' }, + { '\u2336' }, { '\u2af1' }, { '\ud835', '\udd65' }, { '\u2ada' }, + { '\u2929' }, { '\u2034' }, { '\u2122' }, { '\u25b5' }, + { '\u25bf' }, { '\u25c3' }, { '\u22b4' }, { '\u225c' }, + { '\u25b9' }, { '\u22b5' }, { '\u25ec' }, { '\u225c' }, + { '\u2a3a' }, { '\u2a39' }, { '\u29cd' }, { '\u2a3b' }, + { '\u23e2' }, { '\ud835', '\udcc9' }, { '\u0446' }, { '\u045b' }, + { '\u0167' }, { '\u226c' }, { '\u219e' }, { '\u21a0' }, + { '\u21d1' }, { '\u2963' }, { '\u00fa' }, { '\u00fa' }, + { '\u2191' }, { '\u045e' }, { '\u016d' }, { '\u00fb' }, + { '\u00fb' }, { '\u0443' }, { '\u21c5' }, { '\u0171' }, + { '\u296e' }, { '\u297e' }, { '\ud835', '\udd32' }, { '\u00f9' }, + { '\u00f9' }, { '\u21bf' }, { '\u21be' }, { '\u2580' }, + { '\u231c' }, { '\u231c' }, { '\u230f' }, { '\u25f8' }, + { '\u016b' }, { '\u00a8' }, { '\u00a8' }, { '\u0173' }, + { '\ud835', '\udd66' }, { '\u2191' }, { '\u2195' }, { '\u21bf' }, + { '\u21be' }, { '\u228e' }, { '\u03c5' }, { '\u03d2' }, + { '\u03c5' }, { '\u21c8' }, { '\u231d' }, { '\u231d' }, + { '\u230e' }, { '\u016f' }, { '\u25f9' }, { '\ud835', '\udcca' }, + { '\u22f0' }, { '\u0169' }, { '\u25b5' }, { '\u25b4' }, + { '\u21c8' }, { '\u00fc' }, { '\u00fc' }, { '\u29a7' }, + { '\u21d5' }, { '\u2ae8' }, { '\u2ae9' }, { '\u22a8' }, + { '\u299c' }, { '\u03f5' }, { '\u03f0' }, { '\u2205' }, + { '\u03d5' }, { '\u03d6' }, { '\u221d' }, { '\u2195' }, + { '\u03f1' }, { '\u03c2' }, { '\u228a', '\ufe00' }, + { '\u2acb', '\ufe00' }, { '\u228b', '\ufe00' }, + { '\u2acc', '\ufe00' }, { '\u03d1' }, { '\u22b2' }, { '\u22b3' }, + { '\u0432' }, { '\u22a2' }, { '\u2228' }, { '\u22bb' }, + { '\u225a' }, { '\u22ee' }, { '\u007c' }, { '\u007c' }, + { '\ud835', '\udd33' }, { '\u22b2' }, { '\u2282', '\u20d2' }, + { '\u2283', '\u20d2' }, { '\ud835', '\udd67' }, { '\u221d' }, + { '\u22b3' }, { '\ud835', '\udccb' }, { '\u2acb', '\ufe00' }, + { '\u228a', '\ufe00' }, { '\u2acc', '\ufe00' }, + { '\u228b', '\ufe00' }, { '\u299a' }, { '\u0175' }, { '\u2a5f' }, + { '\u2227' }, { '\u2259' }, { '\u2118' }, { '\ud835', '\udd34' }, + { '\ud835', '\udd68' }, { '\u2118' }, { '\u2240' }, { '\u2240' }, + { '\ud835', '\udccc' }, { '\u22c2' }, { '\u25ef' }, { '\u22c3' }, + { '\u25bd' }, { '\ud835', '\udd35' }, { '\u27fa' }, { '\u27f7' }, + { '\u03be' }, { '\u27f8' }, { '\u27f5' }, { '\u27fc' }, + { '\u22fb' }, { '\u2a00' }, { '\ud835', '\udd69' }, { '\u2a01' }, + { '\u2a02' }, { '\u27f9' }, { '\u27f6' }, { '\ud835', '\udccd' }, + { '\u2a06' }, { '\u2a04' }, { '\u25b3' }, { '\u22c1' }, + { '\u22c0' }, { '\u00fd' }, { '\u00fd' }, { '\u044f' }, + { '\u0177' }, { '\u044b' }, { '\u00a5' }, { '\u00a5' }, + { '\ud835', '\udd36' }, { '\u0457' }, { '\ud835', '\udd6a' }, + { '\ud835', '\udcce' }, { '\u044e' }, { '\u00ff' }, { '\u00ff' }, + { '\u017a' }, { '\u017e' }, { '\u0437' }, { '\u017c' }, + { '\u2128' }, { '\u03b6' }, { '\ud835', '\udd37' }, { '\u0436' }, + { '\u21dd' }, { '\ud835', '\udd6b' }, { '\ud835', '\udccf' }, + { '\u200d' }, { '\u200c' }, }; + + final static char[][] WINDOWS_1252 = { { '\u20AC' }, { '\u0081' }, + { '\u201A' }, { '\u0192' }, { '\u201E' }, { '\u2026' }, + { '\u2020' }, { '\u2021' }, { '\u02C6' }, { '\u2030' }, + { '\u0160' }, { '\u2039' }, { '\u0152' }, { '\u008D' }, + { '\u017D' }, { '\u008F' }, { '\u0090' }, { '\u2018' }, + { '\u2019' }, { '\u201C' }, { '\u201D' }, { '\u2022' }, + { '\u2013' }, { '\u2014' }, { '\u02DC' }, { '\u2122' }, + { '\u0161' }, { '\u203A' }, { '\u0153' }, { '\u009D' }, + { '\u017E' }, { '\u0178' } }; + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java new file mode 100644 index 0000000000..311f8f77fe --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java @@ -0,0 +1,311 @@ +/* + * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera + * Software ASA. + * + * You are granted a license to use, reproduce and create derivative works of + * this document. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.NoLength; + +/** + * @version $Id$ + * @author hsivonen + */ +public final class NamedCharactersAccel { + + static final @NoLength int[][] HILO_ACCEL = { + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + { 0, 0, 0, 0, 0, 0, 0, 12386493, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 40174181, 0, 0, 0, 0, 60162966, 0, 0, 0, + 75367550, 0, 0, 0, 82183396, 0, 0, 0, 0, 0, 115148507, 0, + 0, 135989275, 139397199, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28770743, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 82248935, 0, 0, 0, 0, 0, 115214046, 0, 0, 0, 139528272, 0, + 0, 0, 0, }, + null, + { 0, 0, 0, 4980811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 38470219, 0, 0, 0, 0, 0, 0, 0, 0, 64553944, 0, 0, 0, 0, + 0, 0, 0, 92145022, 0, 0, 0, 0, 0, 0, 0, 0, 139593810, 0, 0, + 0, 0, }, + { 65536, 0, 0, 0, 0, 0, 0, 0, 13172937, 0, 0, 0, 0, 0, 25297282, 0, + 0, 28901816, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 71500866, 0, 0, 0, 0, 82380008, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, }, + null, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 94897574, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 2555943, 0, 0, 0, 0, 0, 0, 0, 15532269, 0, 0, 0, 0, 0, 0, + 0, 31785444, 34406924, 0, 0, 0, 0, 0, 40895088, 0, 0, 0, + 60228503, 0, 0, 0, 0, 0, 0, 0, 82445546, 0, 0, 0, 0, 0, + 115279583, 0, 0, 136054812, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 40239718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 5046349, 0, 0, 10944679, 0, 13238474, 0, 15597806, + 16056565, 0, 20578618, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, }, + null, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 95225257, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 196610, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 8454273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 46072511, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 2687016, 0, 0, 0, 0, 0, 13304011, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 31850982, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + null, + null, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 34472462, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 95290798, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 5111886, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 34603535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 105776718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 8585346, 0, 11075752, 0, 0, 0, 0, 16187638, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28508594, 0, 0, + 0, 0, 0, 0, 0, 40305255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 95421871, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + null, + null, + null, + { 0, 0, 0, 5177423, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + null, + null, + null, + null, + null, + null, + { 327684, 1900571, 2949162, 5374032, 8716420, 0, 11206826, + 12517566, 13435084, 0, 15663343, 16515320, 19988785, + 20644155, 25428355, 27197855, 0, 29163962, 31916519, + 34734609, 36045347, 0, 0, 0, 40436328, 40960625, 41615994, + 46596800, 54264627, 60556184, 64750554, 68879387, 71763012, + 75826303, 77268122, 0, 81462490, 83952875, 92865919, + 96142769, 105973327, 110167691, 0, 116917984, 121833283, + 132253665, 136251421, 140707923, 0, 0, 144574620, + 145361066, }, + { 393222, 0, 0, 0, 0, 0, 11272364, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 36176423, 38535756, 0, 0, 0, 0, 41681532, 46727880, + 0, 60687261, 0, 0, 71828552, 75891846, 0, 0, 0, 84411650, + 0, 96404924, 0, 0, 0, 117376761, 121898820, 132319203, + 136382496, 0, 0, 0, 0, 0, }, + { 589831, 1966110, 3276846, 5505107, 8978566, 10420383, 11468973, + 12583104, 13631694, 15139046, 15794416, 16711933, 20054322, + 20840764, 25624965, 27263392, 0, 29360574, 32244200, + 34931219, 36373033, 38601293, 39584348, 0, 40567402, + 41091698, 42205821, 46858954, 54723389, 60818335, 65143773, + 68944924, 71959625, 75957383, 77530268, 80938194, 81593564, + 84739337, 92997002, 96863680, 106235474, 110233234, 0, + 117704448, 122816325, 132515812, 136579106, 140773476, + 142149753, 143001732, 144705695, 145492139, }, + { 0, 0, 3342387, 0, 9044106, 0, 11534512, 0, 13697233, 0, 0, 0, 0, + 0, 25690504, 0, 0, 0, 0, 0, 36438572, 38732366, 0, 0, 0, + 41157236, 0, 46924492, 54788932, 61080481, 65209315, 0, + 72025163, 0, 0, 0, 0, 85132558, 93062540, 96929223, + 106563158, 0, 0, 118032133, 123012947, 132581351, + 136775717, 140839013, 0, 143067271, 0, 145557677, }, + { 0, 2162719, 3473460, 5636181, 0, 0, 0, 0, 0, 0, 0, 18809088, + 20185395, 21299519, 0, 0, 0, 29622721, 0, 0, 0, 39256656, + 39649885, 0, 0, 41288309, 42336901, 47448781, 55182149, + 61342629, 65274852, 69010461, 72811596, 76219528, 77726880, + 0, 0, 86967572, 93128077, 97650120, 106628699, 110560915, + 0, 118490890, 123733846, 132646888, 0, 141232230, + 142411898, 0, 144836769, 145688750, }, + { 655370, 2228258, 3538998, 5701719, 9109643, 10485920, 11600049, + 12648641, 13762770, 15204584, 15859954, 18874656, 20250933, + 21365062, 25756041, 27328929, 28574132, 29688261, 32309741, + 34996758, 36504109, 39322200, 39715422, 39912033, 40632940, + 41353847, 42467975, 47514325, 55247691, 61473705, 65405925, + 69272606, 72877144, 76285068, 77857955, 81003732, 81659102, + 87164208, 93193614, 97715667, 106759772, 110626456, + 114296528, 118687505, 123864929, 132712425, 136906792, + 141297772, 142477438, 143132808, 144902307, 145754288, }, + { 786443, 0, 0, 0, 9240716, 0, 11665586, 0, 13893843, 0, 0, 0, 0, + 0, 25887114, 0, 0, 0, 0, 0, 36635182, 0, 0, 0, 0, 0, + 42599049, 0, 0, 0, 65733607, 0, 73008217, 0, 77989029, 0, + 81724639, 87295283, 0, 98305492, 107021918, 0, 0, 0, 0, 0, + 137037866, 0, 0, 0, 0, 0, }, + { 0, 0, 3604535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27394466, 0, + 29753798, 32571886, 35258903, 0, 0, 0, 0, 0, 0, 0, 0, + 55509836, 61604779, 0, 0, 0, 0, 0, 0, 81790176, 87557429, + 93259151, 98502109, 107152994, 110888601, 0, 119015188, + 124323683, 133498858, 137234476, 0, 0, 143263881, 0, + 145819825, }, + { 0, 0, 3866680, 6160472, 0, 10616993, 0, 12714178, 0, 0, 0, 0, + 20316470, 0, 0, 27460003, 0, 31261127, 32637426, 35521051, + 0, 0, 0, 39977570, 0, 0, 0, 48366294, 56492880, 62391213, + 0, 69338146, 73073755, 0, 78316711, 0, 0, 0, 93980048, + 98764256, 107218532, 111085213, 114362065, 119736089, + 125241194, 133957622, 0, 0, 0, 143329419, 144967844, + 145885362, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 62456761, 0, 69403683, 73139292, 0, + 78382252, 0, 81855713, 87622969, 0, 98829796, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 48431843, 0, 0, 0, 0, 0, 76416141, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 851981, 0, 4063292, 0, 9306254, 0, 0, 0, 0, 0, 0, 19005729, 0, 0, + 0, 27525540, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42795659, + 49152740, 56623967, 62587834, 66061292, 69600292, 73401437, + 0, 0, 0, 0, 87950650, 94111131, 99878373, 107546213, + 112002720, 0, 119932708, 125306744, 0, 137496623, + 141363309, 0, 143460492, 0, 0, }, + { 917518, 0, 0, 0, 9502863, 0, 0, 0, 14155989, 0, 0, 19071267, 0, + 0, 26083724, 0, 0, 0, 32702963, 0, 36700720, 0, 0, 0, 0, 0, + 43057806, 0, 0, 0, 66520049, 0, 0, 0, 78841005, 81069269, + 0, 88147263, 0, 99943925, 107873898, 112068270, 0, + 120063783, 125831033, 0, 137693235, 0, 0, 143526030, 0, 0, }, + { 983055, 0, 0, 0, 0, 0, 0, 0, 14483673, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 37093937, 0, 0, 0, 0, 0, 44565138, 49349359, 0, 0, + 66651128, 69665831, 73860193, 0, 79561908, 0, 0, 88606018, + 94176669, 0, 0, 0, 0, 120129321, 0, 0, 0, 141494382, 0, + 143591567, 0, 0, }, + { 1114128, 2293795, 4587583, 8257631, 9633938, 10813603, 11731123, + 12845251, 14680286, 15270121, 15925491, 19661092, 20382007, + 24969543, 26149263, 27656613, 28639669, 31392222, 32768500, + 35586591, 37225015, 39387737, 39780959, 40043107, 40698477, + 41419384, 44696233, 52495090, 57738081, 63439804, 66782202, + 69927976, 73925736, 76809359, 79824063, 81134806, 81921250, + 89785673, 94307742, 100795894, 107939439, 112330415, + 114427602, 120588074, 126158721, 134416381, 137824310, + 141559920, 142542975, 143853712, 145033381, 145950899, }, + { 1179666, 0, 0, 0, 9699476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26280336, + 0, 0, 0, 0, 0, 38076985, 0, 0, 0, 0, 0, 45220523, 52560674, + 0, 0, 67175420, 69993516, 0, 0, 79889603, 0, 0, 89916763, + 94373280, 101451267, 108136048, 0, 114493139, 120784689, + 126355334, 134481924, 138414136, 141625457, 142608512, 0, + 0, 0, }, + { 0, 0, 0, 0, 9896085, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 33292789, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67830786, 0, 0, + 0, 80020676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127403913, 0, 0, 0, + 0, 0, 0, 0, }, + { 1310739, 2359332, 4653127, 0, 0, 0, 12189876, 0, 0, 0, 0, 0, 0, + 0, 26345874, 28246439, 0, 31457760, 0, 35652128, 38142534, + 0, 0, 0, 0, 0, 45351603, 52757283, 57869170, 63636425, + 67961868, 71304237, 73991273, 0, 0, 0, 0, 90309981, 0, + 101910029, 108988019, 114034355, 0, 120850228, 127469465, + 135464965, 138741825, 141690994, 142739585, 143984788, 0, + 0, }, + { 1441813, 2424869, 4718664, 8388735, 10027160, 10879142, 12255419, + 12976325, 14745825, 15401194, 15991028, 19857709, 20447544, + 25035134, 26542483, 28377520, 28705206, 31588833, 33358333, + 35783201, 38208071, 39453274, 39846496, 40108644, 40764014, + 41484921, 45613749, 53216038, 58196852, 63898572, 68158478, + 71369793, 74253418, 77005973, 80479430, 81265879, 81986787, + 90965347, 94504353, 103679508, 109250176, 114165453, + 114558676, 121243445, 127731610, 135727124, 138807366, + 142018675, 142805123, 144115862, 145098918, 146016436, }, + { 1572887, 0, 0, 0, 10092698, 0, 12320956, 0, 14811362, 0, 0, + 19923248, 0, 25166207, 26739094, 0, 0, 0, 33423870, 0, + 38273608, 0, 0, 0, 0, 0, 45744825, 0, 58262393, 64095184, + 68355089, 0, 75170926, 0, 80610509, 0, 0, 91817325, 0, + 104203823, 109512324, 0, 0, 121636667, 128059294, 0, + 139069511, 0, 0, 0, 0, 0, }, + { 1703961, 2490406, 4849737, 0, 10223771, 0, 0, 13107399, 15007971, + 15466732, 0, 0, 20513081, 25231745, 26870169, 0, 0, + 31654371, 34275839, 0, 38404681, 0, 0, 0, 40829551, 0, + 45875899, 53609261, 59900794, 64226259, 68551700, 0, 0, 0, + 80807119, 81331417, 0, 91948410, 94700963, 104465975, + 109643400, 114230991, 114951893, 121702209, 131663779, 0, + 139266123, 0, 0, 144246936, 145295527, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27132315, 0, 0, 0, 0, + 0, 0, 39518811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75302012, 0, + 0, 0, 0, 92079484, 0, 105383483, 109708938, 0, 0, 0, 0, 0, + 0, 0, 0, 144312474, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 46006973, 0, 60031891, 64291797, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 105711177, 0, 0, 0, 0, 131991514, 135923736, + 139331662, 0, 0, 144378011, 0, 146147509, }, + { 0, 0, 0, 0, 10354845, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68813847, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 121767746, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 60097429, 0, 0, 0, 0, 77137048, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 64422870, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 132122591, 0, 0, 142084216, 0, 0, 0, 0, }, }; + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java new file mode 100644 index 0000000000..485684ea10 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2008-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.Literal; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.common.Interner; + +public final class Portability { + + // Allocating methods + + /** + * Allocates a new local name object. In C++, the refcount must be set up in such a way that + * calling releaseLocal on the return value balances the refcount set by this method. + */ + public static @Local String newLocalNameFromBuffer(@NoLength char[] buf, int offset, int length, Interner interner) { + return new String(buf, offset, length).intern(); + } + + public static String newStringFromBuffer(@NoLength char[] buf, int offset, int length + // CPPONLY: , TreeBuilder treeBuilder + ) { + return new String(buf, offset, length); + } + + public static String newEmptyString() { + return ""; + } + + public static String newStringFromLiteral(@Literal String literal) { + return literal; + } + + public static String newStringFromString(String string) { + return string; + } + + // XXX get rid of this + public static char[] newCharArrayFromLocal(@Local String local) { + return local.toCharArray(); + } + + public static char[] newCharArrayFromString(String string) { + return string.toCharArray(); + } + + public static @Local String newLocalFromLocal(@Local String local, Interner interner) { + return local; + } + + // Deallocation methods + + public static void releaseString(String str) { + // No-op in Java + } + + // Comparison methods + + public static boolean localEqualsBuffer(@Local String local, @NoLength char[] buf, int offset, int length) { + if (local.length() != length) { + return false; + } + for (int i = 0; i < length; i++) { + if (local.charAt(i) != buf[offset + i]) { + return false; + } + } + return true; + } + + public static boolean lowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString(@Literal String lowerCaseLiteral, + String string) { + if (string == null) { + return false; + } + if (lowerCaseLiteral.length() > string.length()) { + return false; + } + for (int i = 0; i < lowerCaseLiteral.length(); i++) { + char c0 = lowerCaseLiteral.charAt(i); + char c1 = string.charAt(i); + if (c1 >= 'A' && c1 <= 'Z') { + c1 += 0x20; + } + if (c0 != c1) { + return false; + } + } + return true; + } + + public static boolean lowerCaseLiteralEqualsIgnoreAsciiCaseString(@Literal String lowerCaseLiteral, + String string) { + if (string == null) { + return false; + } + if (lowerCaseLiteral.length() != string.length()) { + return false; + } + for (int i = 0; i < lowerCaseLiteral.length(); i++) { + char c0 = lowerCaseLiteral.charAt(i); + char c1 = string.charAt(i); + if (c1 >= 'A' && c1 <= 'Z') { + c1 += 0x20; + } + if (c0 != c1) { + return false; + } + } + return true; + } + + public static boolean literalEqualsString(@Literal String literal, String string) { + return literal.equals(string); + } + + public static boolean stringEqualsString(String one, String other) { + return one.equals(other); + } + + public static void delete(Object o) { + + } + + public static void deleteArray(Object o) { + + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java new file mode 100644 index 0000000000..fad5f43db5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +public class PushedLocation { + private final int line; + + private final int linePrev; + + private final int col; + + private final int colPrev; + + private final boolean nextCharOnNewLine; + + private final String publicId; + + private final String systemId; + + private final PushedLocation next; + + /** + * @param line + * @param linePrev + * @param col + * @param colPrev + * @param nextCharOnNewLine + * @param publicId + * @param systemId + * @param next + */ + public PushedLocation(int line, int linePrev, int col, int colPrev, + boolean nextCharOnNewLine, String publicId, String systemId, + PushedLocation next) { + this.line = line; + this.linePrev = linePrev; + this.col = col; + this.colPrev = colPrev; + this.nextCharOnNewLine = nextCharOnNewLine; + this.publicId = publicId; + this.systemId = systemId; + this.next = next; + } + + /** + * Returns the line. + * + * @return the line + */ + public int getLine() { + return line; + } + + /** + * Returns the linePrev. + * + * @return the linePrev + */ + public int getLinePrev() { + return linePrev; + } + + /** + * Returns the col. + * + * @return the col + */ + public int getCol() { + return col; + } + + /** + * Returns the colPrev. + * + * @return the colPrev + */ + public int getColPrev() { + return colPrev; + } + + /** + * Returns the nextCharOnNewLine. + * + * @return the nextCharOnNewLine + */ + public boolean isNextCharOnNewLine() { + return nextCharOnNewLine; + } + + /** + * Returns the publicId. + * + * @return the publicId + */ + public String getPublicId() { + return publicId; + } + + /** + * Returns the systemId. + * + * @return the systemId + */ + public String getSystemId() { + return systemId; + } + + /** + * Returns the next. + * + * @return the next + */ + public PushedLocation getNext() { + return next; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java new file mode 100644 index 0000000000..9aeaba0be0 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2011 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NsUri; + +final class StackNode { + final int flags; + + final @Local String name; + + final @Local String popName; + + final @NsUri String ns; + + final T node; + + // Only used on the list of formatting elements + HtmlAttributes attributes; + + private int refcount = 1; + + // [NOCPP[ + + private final TaintableLocatorImpl locator; + + public TaintableLocatorImpl getLocator() { + return locator; + } + + // ]NOCPP] + + @Inline public int getFlags() { + return flags; + } + + public int getGroup() { + return flags & ElementName.GROUP_MASK; + } + + public boolean isScoping() { + return (flags & ElementName.SCOPING) != 0; + } + + public boolean isSpecial() { + return (flags & ElementName.SPECIAL) != 0; + } + + public boolean isFosterParenting() { + return (flags & ElementName.FOSTER_PARENTING) != 0; + } + + public boolean isHtmlIntegrationPoint() { + return (flags & ElementName.HTML_INTEGRATION_POINT) != 0; + } + + // [NOCPP[ + + public boolean isOptionalEndTag() { + return (flags & ElementName.OPTIONAL_END_TAG) != 0; + } + + // ]NOCPP] + + /** + * Constructor for copying. This doesn't take another StackNode + * because in C++ the caller is reponsible for reobtaining the local names + * from another interner. + * + * @param flags + * @param ns + * @param name + * @param node + * @param popName + * @param attributes + */ + StackNode(int flags, @NsUri String ns, @Local String name, T node, + @Local String popName, HtmlAttributes attributes + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = flags; + this.name = name; + this.popName = popName; + this.ns = ns; + this.node = node; + this.attributes = attributes; + this.refcount = 1; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + /** + * Short hand for well-known HTML elements. + * + * @param elementName + * @param node + */ + StackNode(ElementName elementName, T node + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = elementName.getFlags(); + this.name = elementName.name; + this.popName = elementName.name; + this.ns = "http://www.w3.org/1999/xhtml"; + this.node = node; + this.attributes = null; + this.refcount = 1; + assert !elementName.isCustom() : "Don't use this constructor for custom elements."; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + /** + * Constructor for HTML formatting elements. + * + * @param elementName + * @param node + * @param attributes + */ + StackNode(ElementName elementName, T node, HtmlAttributes attributes + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = elementName.getFlags(); + this.name = elementName.name; + this.popName = elementName.name; + this.ns = "http://www.w3.org/1999/xhtml"; + this.node = node; + this.attributes = attributes; + this.refcount = 1; + assert !elementName.isCustom() : "Don't use this constructor for custom elements."; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + /** + * The common-case HTML constructor. + * + * @param elementName + * @param node + * @param popName + */ + StackNode(ElementName elementName, T node, @Local String popName + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = elementName.getFlags(); + this.name = elementName.name; + this.popName = popName; + this.ns = "http://www.w3.org/1999/xhtml"; + this.node = node; + this.attributes = null; + this.refcount = 1; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + /** + * Constructor for SVG elements. Note that the order of the arguments is + * what distinguishes this from the HTML constructor. This is ugly, but + * AFAICT the least disruptive way to make this work with Java's generics + * and without unnecessary branches. :-( + * + * @param elementName + * @param popName + * @param node + */ + StackNode(ElementName elementName, @Local String popName, T node + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = prepareSvgFlags(elementName.getFlags()); + this.name = elementName.name; + this.popName = popName; + this.ns = "http://www.w3.org/2000/svg"; + this.node = node; + this.attributes = null; + this.refcount = 1; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + /** + * Constructor for MathML. + * + * @param elementName + * @param node + * @param popName + * @param markAsIntegrationPoint + */ + StackNode(ElementName elementName, T node, @Local String popName, + boolean markAsIntegrationPoint + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = prepareMathFlags(elementName.getFlags(), + markAsIntegrationPoint); + this.name = elementName.name; + this.popName = popName; + this.ns = "http://www.w3.org/1998/Math/MathML"; + this.node = node; + this.attributes = null; + this.refcount = 1; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + private static int prepareSvgFlags(int flags) { + flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING + | ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG); + if ((flags & ElementName.SCOPING_AS_SVG) != 0) { + flags |= (ElementName.SCOPING | ElementName.SPECIAL | ElementName.HTML_INTEGRATION_POINT); + } + return flags; + } + + private static int prepareMathFlags(int flags, + boolean markAsIntegrationPoint) { + flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING + | ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG); + if ((flags & ElementName.SCOPING_AS_MATHML) != 0) { + flags |= (ElementName.SCOPING | ElementName.SPECIAL); + } + if (markAsIntegrationPoint) { + flags |= ElementName.HTML_INTEGRATION_POINT; + } + return flags; + } + + @SuppressWarnings("unused") private void destructor() { + Portability.delete(attributes); + } + + public void dropAttributes() { + attributes = null; + } + + // [NOCPP[ + /** + * @see java.lang.Object#toString() + */ + @Override public @Local String toString() { + return name; + } + + // ]NOCPP] + + public void retain() { + refcount++; + } + + public void release() { + refcount--; + if (refcount == 0) { + Portability.delete(this); + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java new file mode 100644 index 0000000000..ff89e04430 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2009-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.Auto; + + +public class StateSnapshot implements TreeBuilderState { + + private final @Auto StackNode[] stack; + + private final @Auto StackNode[] listOfActiveFormattingElements; + + private final @Auto int[] templateModeStack; + + private final T formPointer; + + private final T headPointer; + + private final T deepTreeSurrogateParent; + + private final int mode; + + private final int originalMode; + + private final boolean framesetOk; + + private final boolean needToDropLF; + + private final boolean quirks; + + /** + * @param stack + * @param listOfActiveFormattingElements + * @param templateModeStack + * @param formPointer + * @param headPointer + * @param deepTreeSurrogateParent + * @param mode + * @param originalMode + * @param framesetOk + * @param needToDropLF + * @param quirks + */ + StateSnapshot(StackNode[] stack, + StackNode[] listOfActiveFormattingElements, int[] templateModeStack, T formPointer, + T headPointer, T deepTreeSurrogateParent, int mode, int originalMode, + boolean framesetOk, boolean needToDropLF, boolean quirks) { + this.stack = stack; + this.listOfActiveFormattingElements = listOfActiveFormattingElements; + this.templateModeStack = templateModeStack; + this.formPointer = formPointer; + this.headPointer = headPointer; + this.deepTreeSurrogateParent = deepTreeSurrogateParent; + this.mode = mode; + this.originalMode = originalMode; + this.framesetOk = framesetOk; + this.needToDropLF = needToDropLF; + this.quirks = quirks; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getStack() + */ + public StackNode[] getStack() { + return stack; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStack() + */ + public int[] getTemplateModeStack() { + return templateModeStack; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElements() + */ + public StackNode[] getListOfActiveFormattingElements() { + return listOfActiveFormattingElements; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getFormPointer() + */ + public T getFormPointer() { + return formPointer; + } + + /** + * Returns the headPointer. + * + * @return the headPointer + */ + public T getHeadPointer() { + return headPointer; + } + + /** + * Returns the deepTreeSurrogateParent. + * + * @return the deepTreeSurrogateParent + */ + public T getDeepTreeSurrogateParent() { + return deepTreeSurrogateParent; + } + + /** + * Returns the mode. + * + * @return the mode + */ + public int getMode() { + return mode; + } + + /** + * Returns the originalMode. + * + * @return the originalMode + */ + public int getOriginalMode() { + return originalMode; + } + + /** + * Returns the framesetOk. + * + * @return the framesetOk + */ + public boolean isFramesetOk() { + return framesetOk; + } + + /** + * Returns the needToDropLF. + * + * @return the needToDropLF + */ + public boolean isNeedToDropLF() { + return needToDropLF; + } + + /** + * Returns the quirks. + * + * @return the quirks + */ + public boolean isQuirks() { + return quirks; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElementsLength() + */ + public int getListOfActiveFormattingElementsLength() { + return listOfActiveFormattingElements.length; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getStackLength() + */ + public int getStackLength() { + return stack.length; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStackLength() + */ + public int getTemplateModeStackLength() { + return templateModeStack.length; + } + + @SuppressWarnings("unused") private void destructor() { + for (int i = 0; i < stack.length; i++) { + stack[i].release(); + } + for (int i = 0; i < listOfActiveFormattingElements.length; i++) { + if (listOfActiveFormattingElements[i] != null) { + listOfActiveFormattingElements[i].release(); + } + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java new file mode 100644 index 0000000000..37cdb75d34 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2011 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import org.xml.sax.Locator; + +public class TaintableLocatorImpl extends LocatorImpl { + + private boolean tainted; + + public TaintableLocatorImpl(Locator locator) { + super(locator); + this.tainted = false; + } + + public void markTainted() { + tainted = true; + } + + public boolean isTainted() { + return tainted; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java new file mode 100644 index 0000000000..d9eaafeb3e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java @@ -0,0 +1,7067 @@ +/* + * Copyright (c) 2005-2007 Henri Sivonen + * Copyright (c) 2007-2015 Mozilla Foundation + * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla + * Foundation, and Opera Software ASA. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * The comments following this one that use the same comment syntax as this + * comment are quotes from the WHATWG HTML 5 spec as of 2 June 2007 + * amended as of June 18 2008 and May 31 2010. + * That document came with this statement: + * "© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and + * Opera Software ASA. You are granted a license to use, reproduce and + * create derivative works of this document." + */ + +package nu.validator.htmlparser.impl; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import nu.validator.htmlparser.annotation.Auto; +import nu.validator.htmlparser.annotation.CharacterName; +import nu.validator.htmlparser.annotation.Const; +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.common.EncodingDeclarationHandler; +import nu.validator.htmlparser.common.Interner; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; + +/** + * An implementation of + * https://html.spec.whatwg.org/multipage/syntax.html#tokenization + * + * This class implements the Locator interface. This is not an + * incidental implementation detail: Users of this class are encouraged to make + * use of the Locator nature. + * + * By default, the tokenizer may report data that XML 1.0 bans. The tokenizer + * can be configured to treat these conditions as fatal or to coerce the infoset + * to something that XML 1.0 allows. + * + * @version $Id$ + * @author hsivonen + */ +public class Tokenizer implements Locator { + + private static final int DATA_AND_RCDATA_MASK = ~1; + + public static final int DATA = 0; + + public static final int RCDATA = 1; + + public static final int SCRIPT_DATA = 2; + + public static final int RAWTEXT = 3; + + public static final int SCRIPT_DATA_ESCAPED = 4; + + public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5; + + public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 6; + + public static final int ATTRIBUTE_VALUE_UNQUOTED = 7; + + public static final int PLAINTEXT = 8; + + public static final int TAG_OPEN = 9; + + public static final int CLOSE_TAG_OPEN = 10; + + public static final int TAG_NAME = 11; + + public static final int BEFORE_ATTRIBUTE_NAME = 12; + + public static final int ATTRIBUTE_NAME = 13; + + public static final int AFTER_ATTRIBUTE_NAME = 14; + + public static final int BEFORE_ATTRIBUTE_VALUE = 15; + + public static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 16; + + public static final int BOGUS_COMMENT = 17; + + public static final int MARKUP_DECLARATION_OPEN = 18; + + public static final int DOCTYPE = 19; + + public static final int BEFORE_DOCTYPE_NAME = 20; + + public static final int DOCTYPE_NAME = 21; + + public static final int AFTER_DOCTYPE_NAME = 22; + + public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23; + + public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24; + + public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25; + + public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26; + + public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27; + + public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28; + + public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29; + + public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30; + + public static final int BOGUS_DOCTYPE = 31; + + public static final int COMMENT_START = 32; + + public static final int COMMENT_START_DASH = 33; + + public static final int COMMENT = 34; + + public static final int COMMENT_END_DASH = 35; + + public static final int COMMENT_END = 36; + + public static final int COMMENT_END_BANG = 37; + + public static final int NON_DATA_END_TAG_NAME = 38; + + public static final int MARKUP_DECLARATION_HYPHEN = 39; + + public static final int MARKUP_DECLARATION_OCTYPE = 40; + + public static final int DOCTYPE_UBLIC = 41; + + public static final int DOCTYPE_YSTEM = 42; + + public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD = 43; + + public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44; + + public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD = 45; + + public static final int CONSUME_CHARACTER_REFERENCE = 46; + + public static final int CONSUME_NCR = 47; + + public static final int CHARACTER_REFERENCE_TAIL = 48; + + public static final int HEX_NCR_LOOP = 49; + + public static final int DECIMAL_NRC_LOOP = 50; + + public static final int HANDLE_NCR_VALUE = 51; + + public static final int HANDLE_NCR_VALUE_RECONSUME = 52; + + public static final int CHARACTER_REFERENCE_HILO_LOOKUP = 53; + + public static final int SELF_CLOSING_START_TAG = 54; + + public static final int CDATA_START = 55; + + public static final int CDATA_SECTION = 56; + + public static final int CDATA_RSQB = 57; + + public static final int CDATA_RSQB_RSQB = 58; + + public static final int SCRIPT_DATA_LESS_THAN_SIGN = 59; + + public static final int SCRIPT_DATA_ESCAPE_START = 60; + + public static final int SCRIPT_DATA_ESCAPE_START_DASH = 61; + + public static final int SCRIPT_DATA_ESCAPED_DASH = 62; + + public static final int SCRIPT_DATA_ESCAPED_DASH_DASH = 63; + + public static final int BOGUS_COMMENT_HYPHEN = 64; + + public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN = 65; + + public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START = 67; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPED = 68; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72; + + public static final int PROCESSING_INSTRUCTION = 73; + + public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74; + + /** + * Magic value for UTF-16 operations. + */ + private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10)); + + /** + * UTF-16 code unit array containing less than and greater than for emitting + * those characters on certain parse errors. + */ + private static final @NoLength char[] LT_GT = { '<', '>' }; + + /** + * UTF-16 code unit array containing less than and solidus for emitting + * those characters on certain parse errors. + */ + private static final @NoLength char[] LT_SOLIDUS = { '<', '/' }; + + /** + * UTF-16 code unit array containing ]] for emitting those characters on + * state transitions. + */ + private static final @NoLength char[] RSQB_RSQB = { ']', ']' }; + + /** + * Array version of U+FFFD. + */ + private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' }; + + // [NOCPP[ + + /** + * Array version of space. + */ + private static final @NoLength char[] SPACE = { ' ' }; + + // ]NOCPP] + + /** + * Array version of line feed. + */ + private static final @NoLength char[] LF = { '\n' }; + + /** + * "CDATA[" as char[] + */ + private static final @NoLength char[] CDATA_LSQB = { 'C', 'D', 'A', 'T', + 'A', '[' }; + + /** + * "octype" as char[] + */ + private static final @NoLength char[] OCTYPE = { 'o', 'c', 't', 'y', 'p', + 'e' }; + + /** + * "ublic" as char[] + */ + private static final @NoLength char[] UBLIC = { 'u', 'b', 'l', 'i', 'c' }; + + /** + * "ystem" as char[] + */ + private static final @NoLength char[] YSTEM = { 'y', 's', 't', 'e', 'm' }; + + private static final char[] TITLE_ARR = { 't', 'i', 't', 'l', 'e' }; + + private static final char[] SCRIPT_ARR = { 's', 'c', 'r', 'i', 'p', 't' }; + + private static final char[] STYLE_ARR = { 's', 't', 'y', 'l', 'e' }; + + private static final char[] PLAINTEXT_ARR = { 'p', 'l', 'a', 'i', 'n', 't', + 'e', 'x', 't' }; + + private static final char[] XMP_ARR = { 'x', 'm', 'p' }; + + private static final char[] TEXTAREA_ARR = { 't', 'e', 'x', 't', 'a', 'r', + 'e', 'a' }; + + private static final char[] IFRAME_ARR = { 'i', 'f', 'r', 'a', 'm', 'e' }; + + private static final char[] NOEMBED_ARR = { 'n', 'o', 'e', 'm', 'b', 'e', + 'd' }; + + private static final char[] NOSCRIPT_ARR = { 'n', 'o', 's', 'c', 'r', 'i', + 'p', 't' }; + + private static final char[] NOFRAMES_ARR = { 'n', 'o', 'f', 'r', 'a', 'm', + 'e', 's' }; + + /** + * The token handler. + */ + protected final TokenHandler tokenHandler; + + protected EncodingDeclarationHandler encodingDeclarationHandler; + + // [NOCPP[ + + /** + * The error handler. + */ + protected ErrorHandler errorHandler; + + // ]NOCPP] + + /** + * Whether the previous char read was CR. + */ + protected boolean lastCR; + + protected int stateSave; + + private int returnStateSave; + + protected int index; + + private boolean forceQuirks; + + private char additional; + + private int entCol; + + private int firstCharKey; + + private int lo; + + private int hi; + + private int candidate; + + private int charRefBufMark; + + protected int value; + + private boolean seenDigits; + + protected int cstart; + + /** + * The SAX public id for the resource being tokenized. (Only passed to back + * as part of locator data.) + */ + private String publicId; + + /** + * The SAX system id for the resource being tokenized. (Only passed to back + * as part of locator data.) + */ + private String systemId; + + /** + * Buffer for bufferable things other than those that fit the description + * of charRefBuf. + */ + private @Auto char[] strBuf; + + /** + * Number of significant chars in strBuf. + */ + private int strBufLen; + + /** + * Buffer for characters that might form a character reference but may + * end up not forming one. + */ + private final @Auto char[] charRefBuf; + + /** + * Number of significant chars in charRefBuf. + */ + private int charRefBufLen; + + /** + * Buffer for expanding NCRs falling into the Basic Multilingual Plane. + */ + private final @Auto char[] bmpChar; + + /** + * Buffer for expanding astral NCRs. + */ + private final @Auto char[] astralChar; + + /** + * The element whose end tag closes the current CDATA or RCDATA element. + */ + protected ElementName endTagExpectation = null; + + private char[] endTagExpectationAsArray; // not @Auto! + + /** + * true if tokenizing an end tag + */ + protected boolean endTag; + + /** + * The current tag token name. + */ + private ElementName tagName = null; + + /** + * The current attribute name. + */ + protected AttributeName attributeName = null; + + // [NOCPP[ + + /** + * Whether comment tokens are emitted. + */ + private boolean wantsComments = false; + + /** + * true when HTML4-specific additional errors are requested. + */ + protected boolean html4; + + /** + * Whether the stream is past the first 1024 bytes. + */ + private boolean metaBoundaryPassed; + + // ]NOCPP] + + /** + * The name of the current doctype token. + */ + private @Local String doctypeName; + + /** + * The public id of the current doctype token. + */ + private String publicIdentifier; + + /** + * The system id of the current doctype token. + */ + private String systemIdentifier; + + /** + * The attribute holder. + */ + private HtmlAttributes attributes; + + // [NOCPP[ + + /** + * The policy for vertical tab and form feed. + */ + private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.ALTER_INFOSET; + + /** + * The policy for comments. + */ + private XmlViolationPolicy commentPolicy = XmlViolationPolicy.ALTER_INFOSET; + + private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.ALTER_INFOSET; + + private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET; + + private boolean html4ModeCompatibleWithXhtml1Schemata; + + private int mappingLangToXmlLang; + + // ]NOCPP] + + private final boolean newAttributesEachTime; + + private boolean shouldSuspend; + + protected boolean confident; + + private int line; + + /* + * The line number of the current attribute. First set to the line of the + * attribute name and if there is a value, set to the line the value + * started on. + */ + // CPPONLY: private int attributeLine; + + private Interner interner; + + // CPPONLY: private boolean viewingXmlSource; + + // [NOCPP[ + + protected LocatorImpl ampersandLocation; + + public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime) { + this.tokenHandler = tokenHandler; + this.encodingDeclarationHandler = null; + this.newAttributesEachTime = newAttributesEachTime; + // ∳ is the longest valid char ref and + // the semicolon never gets appended to the buffer. + this.charRefBuf = new char[32]; + this.bmpChar = new char[1]; + this.astralChar = new char[2]; + this.tagName = null; + this.attributeName = null; + this.doctypeName = null; + this.publicIdentifier = null; + this.systemIdentifier = null; + this.attributes = null; + } + + // ]NOCPP] + + /** + * The constructor. + * + * @param tokenHandler + * the handler for receiving tokens + */ + public Tokenizer(TokenHandler tokenHandler + // CPPONLY: , boolean viewingXmlSource + ) { + this.tokenHandler = tokenHandler; + this.encodingDeclarationHandler = null; + // [NOCPP[ + this.newAttributesEachTime = false; + // ]NOCPP] + // ∳ is the longest valid char ref and + // the semicolon never gets appended to the buffer. + this.charRefBuf = new char[32]; + this.bmpChar = new char[1]; + this.astralChar = new char[2]; + this.tagName = null; + this.attributeName = null; + this.doctypeName = null; + this.publicIdentifier = null; + this.systemIdentifier = null; + // [NOCPP[ + this.attributes = null; + // ]NOCPP] + // CPPONLY: this.attributes = tokenHandler.HasBuilder() ? new HtmlAttributes(mappingLangToXmlLang) : null; + // CPPONLY: this.newAttributesEachTime = !tokenHandler.HasBuilder(); + // CPPONLY: this.viewingXmlSource = viewingXmlSource; + } + + public void setInterner(Interner interner) { + this.interner = interner; + } + + public void initLocation(String newPublicId, String newSystemId) { + this.systemId = newSystemId; + this.publicId = newPublicId; + + } + + // CPPONLY: boolean isViewingXmlSource() { + // CPPONLY: return viewingXmlSource; + // CPPONLY: } + + // [NOCPP[ + + /** + * Returns the mappingLangToXmlLang. + * + * @return the mappingLangToXmlLang + */ + public boolean isMappingLangToXmlLang() { + return mappingLangToXmlLang == AttributeName.HTML_LANG; + } + + /** + * Sets the mappingLangToXmlLang. + * + * @param mappingLangToXmlLang + * the mappingLangToXmlLang to set + */ + public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { + this.mappingLangToXmlLang = mappingLangToXmlLang ? AttributeName.HTML_LANG + : AttributeName.HTML; + } + + /** + * Sets the error handler. + * + * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler eh) { + this.errorHandler = eh; + } + + public ErrorHandler getErrorHandler() { + return this.errorHandler; + } + + /** + * Sets the commentPolicy. + * + * @param commentPolicy + * the commentPolicy to set + */ + public void setCommentPolicy(XmlViolationPolicy commentPolicy) { + this.commentPolicy = commentPolicy; + } + + /** + * Sets the contentNonXmlCharPolicy. + * + * @param contentNonXmlCharPolicy + * the contentNonXmlCharPolicy to set + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + if (contentNonXmlCharPolicy != XmlViolationPolicy.ALLOW) { + throw new IllegalArgumentException( + "Must use ErrorReportingTokenizer to set contentNonXmlCharPolicy to non-ALLOW."); + } + } + + /** + * Sets the contentSpacePolicy. + * + * @param contentSpacePolicy + * the contentSpacePolicy to set + */ + public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { + this.contentSpacePolicy = contentSpacePolicy; + } + + /** + * Sets the xmlnsPolicy. + * + * @param xmlnsPolicy + * the xmlnsPolicy to set + */ + public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { + if (xmlnsPolicy == XmlViolationPolicy.FATAL) { + throw new IllegalArgumentException("Can't use FATAL here."); + } + this.xmlnsPolicy = xmlnsPolicy; + } + + public void setNamePolicy(XmlViolationPolicy namePolicy) { + this.namePolicy = namePolicy; + } + + /** + * Sets the html4ModeCompatibleWithXhtml1Schemata. + * + * @param html4ModeCompatibleWithXhtml1Schemata + * the html4ModeCompatibleWithXhtml1Schemata to set + */ + public void setHtml4ModeCompatibleWithXhtml1Schemata( + boolean html4ModeCompatibleWithXhtml1Schemata) { + this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; + } + + // ]NOCPP] + + // For the token handler to call + /** + * Sets the tokenizer state and the associated element name. This should + * only ever used to put the tokenizer into one of the states that have + * a special end tag expectation. + * + * @param specialTokenizerState + * the tokenizer state to set + * @param endTagExpectation + * the expected end tag for transitioning back to normal + */ + public void setStateAndEndTagExpectation(int specialTokenizerState, + @Local String endTagExpectation) { + this.stateSave = specialTokenizerState; + if (specialTokenizerState == Tokenizer.DATA) { + return; + } + @Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation); + this.endTagExpectation = ElementName.elementNameByBuffer(asArray, 0, + asArray.length, interner); + endTagExpectationToArray(); + } + + /** + * Sets the tokenizer state and the associated element name. This should + * only ever used to put the tokenizer into one of the states that have + * a special end tag expectation. + * + * @param specialTokenizerState + * the tokenizer state to set + * @param endTagExpectation + * the expected end tag for transitioning back to normal + */ + public void setStateAndEndTagExpectation(int specialTokenizerState, + ElementName endTagExpectation) { + this.stateSave = specialTokenizerState; + this.endTagExpectation = endTagExpectation; + endTagExpectationToArray(); + } + + private void endTagExpectationToArray() { + switch (endTagExpectation.getGroup()) { + case TreeBuilder.TITLE: + endTagExpectationAsArray = TITLE_ARR; + return; + case TreeBuilder.SCRIPT: + endTagExpectationAsArray = SCRIPT_ARR; + return; + case TreeBuilder.STYLE: + endTagExpectationAsArray = STYLE_ARR; + return; + case TreeBuilder.PLAINTEXT: + endTagExpectationAsArray = PLAINTEXT_ARR; + return; + case TreeBuilder.XMP: + endTagExpectationAsArray = XMP_ARR; + return; + case TreeBuilder.TEXTAREA: + endTagExpectationAsArray = TEXTAREA_ARR; + return; + case TreeBuilder.IFRAME: + endTagExpectationAsArray = IFRAME_ARR; + return; + case TreeBuilder.NOEMBED: + endTagExpectationAsArray = NOEMBED_ARR; + return; + case TreeBuilder.NOSCRIPT: + endTagExpectationAsArray = NOSCRIPT_ARR; + return; + case TreeBuilder.NOFRAMES: + endTagExpectationAsArray = NOFRAMES_ARR; + return; + default: + assert false: "Bad end tag expectation."; + return; + } + } + + /** + * For C++ use only. + */ + public void setLineNumber(int line) { + // CPPONLY: this.attributeLine = line; // XXX is this needed? + this.line = line; + } + + // start Locator impl + + /** + * @see org.xml.sax.Locator#getLineNumber() + */ + @Inline public int getLineNumber() { + return line; + } + + // [NOCPP[ + + /** + * @see org.xml.sax.Locator#getColumnNumber() + */ + @Inline public int getColumnNumber() { + return -1; + } + + /** + * @see org.xml.sax.Locator#getPublicId() + */ + public String getPublicId() { + return publicId; + } + + /** + * @see org.xml.sax.Locator#getSystemId() + */ + public String getSystemId() { + return systemId; + } + + // end Locator impl + + // end public API + + public void notifyAboutMetaBoundary() { + metaBoundaryPassed = true; + } + + void turnOnAdditionalHtml4Errors() { + html4 = true; + } + + // ]NOCPP] + + HtmlAttributes emptyAttributes() { + // [NOCPP[ + if (newAttributesEachTime) { + return new HtmlAttributes(mappingLangToXmlLang); + } else { + // ]NOCPP] + return HtmlAttributes.EMPTY_ATTRIBUTES; + // [NOCPP[ + } + // ]NOCPP] + } + + @Inline private void appendCharRefBuf(char c) { + // CPPONLY: assert charRefBufLen < charRefBuf.length: + // CPPONLY: "RELEASE: Attempted to overrun charRefBuf!"; + charRefBuf[charRefBufLen++] = c; + } + + private void emitOrAppendCharRefBuf(int returnState) throws SAXException { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendCharRefBufToStrBuf(); + } else { + if (charRefBufLen > 0) { + tokenHandler.characters(charRefBuf, 0, charRefBufLen); + charRefBufLen = 0; + } + } + } + + @Inline private void clearStrBufAfterUse() { + strBufLen = 0; + } + + @Inline private void clearStrBufBeforeUse() { + assert strBufLen == 0: "strBufLen not reset after previous use!"; + strBufLen = 0; // no-op in the absence of bugs + } + + @Inline private void clearStrBufAfterOneHyphen() { + assert strBufLen == 1: "strBufLen length not one!"; + assert strBuf[0] == '-': "strBuf does not start with a hyphen!"; + strBufLen = 0; + } + + /** + * Appends to the buffer. + * + * @param c + * the UTF-16 code unit to append + */ + @Inline private void appendStrBuf(char c) { + // CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient."; + // CPPONLY: if (strBufLen == strBuf.length) { + // CPPONLY: if (!EnsureBufferSpace(1)) { + // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure"; + // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not + // CPPONLY: } + strBuf[strBufLen++] = c; + } + + /** + * The buffer as a String. Currently only used for error reporting. + * + *

+ * C++ memory note: The return value must be released. + * + * @return the buffer as a string + */ + protected String strBufToString() { + String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen + // CPPONLY: , tokenHandler + ); + clearStrBufAfterUse(); + return str; + } + + /** + * Returns the buffer as a local name. The return value is released in + * emitDoctypeToken(). + * + * @return the buffer as local name + */ + private void strBufToDoctypeName() { + doctypeName = Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen, + interner); + clearStrBufAfterUse(); + } + + /** + * Emits the buffer as character tokens. + * + * @throws SAXException + * if the token handler threw + */ + private void emitStrBuf() throws SAXException { + if (strBufLen > 0) { + tokenHandler.characters(strBuf, 0, strBufLen); + clearStrBufAfterUse(); + } + } + + @Inline private void appendSecondHyphenToBogusComment() throws SAXException { + // [NOCPP[ + switch (commentPolicy) { + case ALTER_INFOSET: + appendStrBuf(' '); + // FALLTHROUGH + case ALLOW: + warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); + // ]NOCPP] + appendStrBuf('-'); + // [NOCPP[ + break; + case FATAL: + fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); + break; + } + // ]NOCPP] + } + + // [NOCPP[ + private void maybeAppendSpaceToBogusComment() throws SAXException { + switch (commentPolicy) { + case ALTER_INFOSET: + appendStrBuf(' '); + // FALLTHROUGH + case ALLOW: + warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment."); + break; + case FATAL: + fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment."); + break; + } + } + + // ]NOCPP] + + @Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c) + throws SAXException { + errConsecutiveHyphens(); + // [NOCPP[ + switch (commentPolicy) { + case ALTER_INFOSET: + strBufLen--; + // WARNING!!! This expands the worst case of the buffer length + // given the length of input! + appendStrBuf(' '); + appendStrBuf('-'); + // FALLTHROUGH + case ALLOW: + warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); + // ]NOCPP] + appendStrBuf(c); + // [NOCPP[ + break; + case FATAL: + fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); + break; + } + // ]NOCPP] + } + + private void appendStrBuf(@NoLength char[] buffer, int offset, int length) { + int newLen = strBufLen + length; + // CPPONLY: assert newLen <= strBuf.length: "Previous buffer length insufficient."; + // CPPONLY: if (strBuf.length < newLen) { + // CPPONLY: if (!EnsureBufferSpace(length)) { + // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure"; + // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not + // CPPONLY: } + System.arraycopy(buffer, offset, strBuf, strBufLen, length); + strBufLen = newLen; + } + + /** + * Append the contents of the char reference buffer to the main one. + */ + @Inline private void appendCharRefBufToStrBuf() { + appendStrBuf(charRefBuf, 0, charRefBufLen); + charRefBufLen = 0; + } + + /** + * Emits the current comment token. + * + * @param pos + * TODO + * + * @throws SAXException + */ + private void emitComment(int provisionalHyphens, int pos) + throws SAXException { + // [NOCPP[ + if (wantsComments) { + // ]NOCPP] + tokenHandler.comment(strBuf, 0, strBufLen + - provisionalHyphens); + // [NOCPP[ + } + // ]NOCPP] + clearStrBufAfterUse(); + cstart = pos + 1; + } + + /** + * Flushes coalesced character tokens. + * + * @param buf + * TODO + * @param pos + * TODO + * + * @throws SAXException + */ + protected void flushChars(@NoLength char[] buf, int pos) + throws SAXException { + if (pos > cstart) { + tokenHandler.characters(buf, cstart, pos - cstart); + } + cstart = Integer.MAX_VALUE; + } + + /** + * Reports an condition that would make the infoset incompatible with XML + * 1.0 as fatal. + * + * @param message + * the message + * @throws SAXException + * @throws SAXParseException + */ + public void fatal(String message) throws SAXException { + SAXParseException spe = new SAXParseException(message, this); + if (errorHandler != null) { + errorHandler.fatalError(spe); + } + throw spe; + } + + /** + * Reports a Parse Error. + * + * @param message + * the message + * @throws SAXException + */ + public void err(String message) throws SAXException { + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException(message, this); + errorHandler.error(spe); + } + + public void errTreeBuilder(String message) throws SAXException { + ErrorHandler eh = null; + if (tokenHandler instanceof TreeBuilder) { + TreeBuilder treeBuilder = (TreeBuilder) tokenHandler; + eh = treeBuilder.getErrorHandler(); + } + if (eh == null) { + eh = errorHandler; + } + if (eh == null) { + return; + } + SAXParseException spe = new SAXParseException(message, this); + eh.error(spe); + } + + /** + * Reports a warning + * + * @param message + * the message + * @throws SAXException + */ + public void warn(String message) throws SAXException { + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException(message, this); + errorHandler.warning(spe); + } + + private void strBufToElementNameString() { + tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen, + interner); + clearStrBufAfterUse(); + } + + private int emitCurrentTagToken(boolean selfClosing, int pos) + throws SAXException { + cstart = pos + 1; + maybeErrSlashInEndTag(selfClosing); + stateSave = Tokenizer.DATA; + HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES + : attributes); + if (endTag) { + /* + * When an end tag token is emitted, the content model flag must be + * switched to the PCDATA state. + */ + maybeErrAttributesOnEndTag(attrs); + // CPPONLY: if (!viewingXmlSource) { + tokenHandler.endTag(tagName); + // CPPONLY: } + // CPPONLY: if (newAttributesEachTime) { + // CPPONLY: Portability.delete(attributes); + // CPPONLY: attributes = null; + // CPPONLY: } + } else { + // CPPONLY: if (viewingXmlSource) { + // CPPONLY: assert newAttributesEachTime; + // CPPONLY: Portability.delete(attributes); + // CPPONLY: attributes = null; + // CPPONLY: } else { + tokenHandler.startTag(tagName, attrs, selfClosing); + // CPPONLY: } + } + tagName.release(); + tagName = null; + if (newAttributesEachTime) { + attributes = null; + } else { + attributes.clear(mappingLangToXmlLang); + } + /* + * The token handler may have called setStateAndEndTagExpectation + * and changed stateSave since the start of this method. + */ + return stateSave; + } + + private void attributeNameComplete() throws SAXException { + attributeName = AttributeName.nameByBuffer(strBuf, 0, strBufLen + // [NOCPP[ + , namePolicy != XmlViolationPolicy.ALLOW + // ]NOCPP] + , interner); + clearStrBufAfterUse(); + + if (attributes == null) { + attributes = new HtmlAttributes(mappingLangToXmlLang); + } + + /* + * When the user agent leaves the attribute name state (and before + * emitting the tag token, if appropriate), the complete attribute's + * name must be compared to the other attributes on the same token; if + * there is already an attribute on the token with the exact same name, + * then this is a parse error and the new attribute must be dropped, + * along with the value that gets associated with it (if any). + */ + if (attributes.contains(attributeName)) { + errDuplicateAttribute(); + attributeName.release(); + attributeName = null; + } + } + + private void addAttributeWithoutValue() throws SAXException { + noteAttributeWithoutValue(); + + // [NOCPP[ + if (metaBoundaryPassed && AttributeName.CHARSET == attributeName + && ElementName.META == tagName) { + err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes."); + } + // ]NOCPP] + if (attributeName != null) { + // [NOCPP[ + if (html4) { + if (attributeName.isBoolean()) { + if (html4ModeCompatibleWithXhtml1Schemata) { + attributes.addAttribute(attributeName, + attributeName.getLocal(AttributeName.HTML), + xmlnsPolicy); + } else { + attributes.addAttribute(attributeName, "", xmlnsPolicy); + } + } else { + if (AttributeName.BORDER != attributeName) { + err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)"); + attributes.addAttribute(attributeName, "", xmlnsPolicy); + } + } + } else { + if (AttributeName.SRC == attributeName + || AttributeName.HREF == attributeName) { + warn("Attribute \u201C" + + attributeName.getLocal(AttributeName.HTML) + + "\u201D without an explicit value seen. The attribute may be dropped by IE7."); + } + // ]NOCPP] + attributes.addAttribute(attributeName, + Portability.newEmptyString() + // [NOCPP[ + , xmlnsPolicy + // ]NOCPP] + // CPPONLY: , attributeLine + ); + // [NOCPP[ + } + // ]NOCPP] + attributeName = null; // attributeName has been adopted by the + // |attributes| object + } else { + clearStrBufAfterUse(); + } + } + + private void addAttributeWithValue() throws SAXException { + // [NOCPP[ + if (metaBoundaryPassed && ElementName.META == tagName + && AttributeName.CHARSET == attributeName) { + err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes."); + } + // ]NOCPP] + if (attributeName != null) { + String val = strBufToString(); // Ownership transferred to + // HtmlAttributes + // CPPONLY: if (mViewSource) { + // CPPONLY: mViewSource.MaybeLinkifyAttributeValue(attributeName, val); + // CPPONLY: } + // [NOCPP[ + if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata + && attributeName.isCaseFolded()) { + val = newAsciiLowerCaseStringFromString(val); + } + // ]NOCPP] + attributes.addAttribute(attributeName, val + // [NOCPP[ + , xmlnsPolicy + // ]NOCPP] + // CPPONLY: , attributeLine + ); + attributeName = null; // attributeName has been adopted by the + // |attributes| object + } else { + // We have a duplicate attribute. Explicitly discard its value. + clearStrBufAfterUse(); + } + } + + // [NOCPP[ + + private static String newAsciiLowerCaseStringFromString(String str) { + if (str == null) { + return null; + } + char[] buf = new char[str.length()]; + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + buf[i] = c; + } + return new String(buf); + } + + protected void startErrorReporting() throws SAXException { + + } + + // ]NOCPP] + + public void start() throws SAXException { + initializeWithoutStarting(); + tokenHandler.startTokenization(this); + // [NOCPP[ + startErrorReporting(); + // ]NOCPP] + } + + public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException { + int state = stateSave; + int returnState = returnStateSave; + char c = '\u0000'; + shouldSuspend = false; + lastCR = false; + + int start = buffer.getStart(); + int end = buffer.getEnd(); + + // In C++, the caller of tokenizeBuffer needs to do this explicitly. + // [NOCPP[ + ensureBufferSpace(end - start); + // ]NOCPP] + + /** + * The index of the last char read from buf. + */ + int pos = start - 1; + + /** + * The index of the first char in buf that is + * part of a coalesced run of character tokens or + * Integer.MAX_VALUE if there is not a current run being + * coalesced. + */ + switch (state) { + case DATA: + case RCDATA: + case SCRIPT_DATA: + case PLAINTEXT: + case RAWTEXT: + case CDATA_SECTION: + case SCRIPT_DATA_ESCAPED: + case SCRIPT_DATA_ESCAPE_START: + case SCRIPT_DATA_ESCAPE_START_DASH: + case SCRIPT_DATA_ESCAPED_DASH: + case SCRIPT_DATA_ESCAPED_DASH_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPE_START: + case SCRIPT_DATA_DOUBLE_ESCAPED: + case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPE_END: + cstart = start; + break; + default: + cstart = Integer.MAX_VALUE; + break; + } + + /** + * The number of chars in buf that have + * meaning. (The rest of the array is garbage and should not be + * examined.) + */ + // CPPONLY: if (mViewSource) { + // CPPONLY: mViewSource.SetBuffer(buffer); + // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1); + // CPPONLY: } else { + // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: } + // [NOCPP[ + pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, + end); + // ]NOCPP] + if (pos == end) { + // exiting due to end of buffer + buffer.setStart(pos); + } else { + buffer.setStart(pos + 1); + } + return lastCR; + } + + // [NOCPP[ + private void ensureBufferSpace(int inputLength) throws SAXException { + // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB. + // Adding to the general worst case instead of only the + // TreeBuilder-exposed worst case to avoid re-introducing a bug when + // unifying the tokenizer and tree builder buffers in the future. + int worstCase = strBufLen + inputLength + charRefBufLen + 2; + tokenHandler.ensureBufferSpace(worstCase); + if (commentPolicy == XmlViolationPolicy.ALTER_INFOSET) { + // When altering infoset, if the comment contents are consecutive + // hyphens, each hyphen generates a space, too. These buffer + // contents never get emitted as characters() to the tokenHandler, + // which is why this calculation happens after the call to + // ensureBufferSpace on tokenHandler. + worstCase *= 2; + } + if (strBuf == null) { + // Add an arbitrary small value to avoid immediate reallocation + // once there are a few characters in the buffer. + strBuf = new char[worstCase + 128]; + } else if (worstCase > strBuf.length) { + // HotSpot reportedly allocates memory with 8-byte accuracy, so + // there's no point in trying to do math here to avoid slop. + // Maybe we should add some small constant to worstCase here + // but not doing that without profiling. In C++ with jemalloc, + // the corresponding method should do math to round up here + // to avoid slop. + char[] newBuf = new char[worstCase]; + System.arraycopy(strBuf, 0, newBuf, 0, strBufLen); + strBuf = newBuf; + } + } + // ]NOCPP] + + @SuppressWarnings("unused") private int stateLoop(int state, char c, + int pos, @NoLength char[] buf, boolean reconsume, int returnState, + int endPos) throws SAXException { + /* + * Idioms used in this code: + * + * + * Consuming the next input character + * + * To consume the next input character, the code does this: if (++pos == + * endPos) { break stateloop; } c = checkChar(buf, pos); + * + * + * Staying in a state + * + * When there's a state that the tokenizer may stay in over multiple + * input characters, the state has a wrapper |for(;;)| loop and staying + * in the state continues the loop. + * + * + * Switching to another state + * + * To switch to another state, the code sets the state variable to the + * magic number of the new state. Then it either continues stateloop or + * breaks out of the state's own wrapper loop if the target state is + * right after the current state in source order. (This is a partial + * workaround for Java's lack of goto.) + * + * + * Reconsume support + * + * The spec sometimes says that an input character is reconsumed in + * another state. If a state can ever be entered so that an input + * character can be reconsumed in it, the state's code starts with an + * |if (reconsume)| that sets reconsume to false and skips over the + * normal code for consuming a new character. + * + * To reconsume the current character in another state, the code sets + * |reconsume| to true and then switches to the other state. + * + * + * Emitting character tokens + * + * This method emits character tokens lazily. Whenever a new range of + * character tokens starts, the field cstart must be set to the start + * index of the range. The flushChars() method must be called at the end + * of a range to flush it. + * + * + * U+0000 handling + * + * The various states have to handle the replacement of U+0000 with + * U+FFFD. However, if U+0000 would be reconsumed in another state, the + * replacement doesn't need to happen, because it's handled by the + * reconsuming state. + * + * + * LF handling + * + * Every state needs to increment the line number upon LF unless the LF + * gets reconsumed by another state which increments the line number. + * + * + * CR handling + * + * Every state needs to handle CR unless the CR gets reconsumed and is + * handled by the reconsuming state. The CR needs to be handled as if it + * were and LF, the lastCR field must be set to true and then this + * method must return. The IO driver will then swallow the next + * character if it is an LF to coalesce CRLF. + */ + stateloop: for (;;) { + switch (state) { + case DATA: + dataloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in data state. + */ + flushChars(buf, pos); + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\u0000'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the tag + * open state. + */ + flushChars(buf, pos); + + state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos); + break dataloop; // FALL THROUGH continue + // stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the input character as a + * character token. + * + * Stay in the data state. + */ + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case TAG_OPEN: + tagopenloop: for (;;) { + /* + * The behavior of this state depends on the content + * model flag. + */ + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * If the content model flag is set to the PCDATA state + * Consume the next input character: + */ + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to U+005A + * LATIN CAPITAL LETTER Z Create a new start tag + * token, + */ + endTag = false; + /* + * set its tag name to the lowercase version of the + * input character (add 0x0020 to the character's + * code point), + */ + clearStrBufBeforeUse(); + appendStrBuf((char) (c + 0x20)); + /* then switch to the tag name state. */ + state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); + /* + * (Don't emit the token yet; further details will + * be filled in before it is emitted.) + */ + break tagopenloop; + // continue stateloop; + } else if (c >= 'a' && c <= 'z') { + /* + * U+0061 LATIN SMALL LETTER A through to U+007A + * LATIN SMALL LETTER Z Create a new start tag + * token, + */ + endTag = false; + /* + * set its tag name to the input character, + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + /* then switch to the tag name state. */ + state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); + /* + * (Don't emit the token yet; further details will + * be filled in before it is emitted.) + */ + break tagopenloop; + // continue stateloop; + } + switch (c) { + case '!': + /* + * U+0021 EXCLAMATION MARK (!) Switch to the + * markup declaration open state. + */ + state = transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos); + continue stateloop; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the close tag + * open state. + */ + state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos); + continue stateloop; + case '?': + // CPPONLY: if (viewingXmlSource) { + // CPPONLY: state = transition(state, + // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION, + // CPPONLY: reconsume, + // CPPONLY: pos); + // CPPONLY: continue stateloop; + // CPPONLY: } + /* + * U+003F QUESTION MARK (?) Parse error. + */ + errProcessingInstruction(); + /* + * Switch to the bogus comment state. + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errLtGt(); + /* + * Emit a U+003C LESS-THAN SIGN character token + * and a U+003E GREATER-THAN SIGN character + * token. + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 2); + /* Switch to the data state. */ + cstart = pos + 1; + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + /* + * Anything else Parse error. + */ + errBadCharAfterLt(c); + /* + * Emit a U+003C LESS-THAN SIGN character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in + * the data state. + */ + cstart = pos; + reconsume = true; + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + } + } + // FALL THROUGH DON'T REORDER + case TAG_NAME: + tagnameloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + strBufToElementNameString(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before attribute name state. + */ + strBufToElementNameString(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + break tagnameloop; + // continue stateloop; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the self-closing + * start tag state. + */ + strBufToElementNameString(); + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + strBufToElementNameString(); + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Append the + * lowercase version of the current input + * character (add 0x0020 to the character's + * code point) to the current tag token's + * tag name. + */ + c += 0x20; + } + /* + * Anything else Append the current input + * character to the current tag token's tag + * name. + */ + appendStrBuf(c); + /* + * Stay in the tag name state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_ATTRIBUTE_NAME: + beforeattributenameloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the before attribute name state. + */ + continue; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the self-closing + * start tag state. + */ + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + case '\"': + case '\'': + case '<': + case '=': + /* + * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE + * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS + * SIGN (=) Parse error. + */ + errBadCharBeforeAttributeNameOrNull(c); + /* + * Treat it as per the "anything else" entry + * below. + */ + default: + /* + * Anything else Start a new attribute in the + * current tag token. + */ + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Set that + * attribute's name to the lowercase version + * of the current input character (add + * 0x0020 to the character's code point) + */ + c += 0x20; + } + // CPPONLY: attributeLine = line; + /* + * Set that attribute's name to the current + * input character, + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + /* + * and its value to the empty string. + */ + // Will do later. + /* + * Switch to the attribute name state. + */ + state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos); + break beforeattributenameloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case ATTRIBUTE_NAME: + attributenameloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + attributeNameComplete(); + state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the after attribute name state. + */ + attributeNameComplete(); + state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the self-closing + * start tag state. + */ + attributeNameComplete(); + addAttributeWithoutValue(); + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + continue stateloop; + case '=': + /* + * U+003D EQUALS SIGN (=) Switch to the before + * attribute value state. + */ + attributeNameComplete(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos); + break attributenameloop; + // continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + attributeNameComplete(); + addAttributeWithoutValue(); + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + case '\"': + case '\'': + case '<': + /* + * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE + * (') U+003C LESS-THAN SIGN (<) Parse error. + */ + errQuoteOrLtInAttributeNameOrNull(c); + /* + * Treat it as per the "anything else" entry + * below. + */ + default: + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Append the + * lowercase version of the current input + * character (add 0x0020 to the character's + * code point) to the current attribute's + * name. + */ + c += 0x20; + } + /* + * Anything else Append the current input + * character to the current attribute's name. + */ + appendStrBuf(c); + /* + * Stay in the attribute name state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_ATTRIBUTE_VALUE: + beforeattributevalueloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the before attribute value state. + */ + continue; + case '"': + /* + * U+0022 QUOTATION MARK (") Switch to the + * attribute value (double-quoted) state. + */ + // CPPONLY: attributeLine = line; + clearStrBufBeforeUse(); + state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos); + break beforeattributevalueloop; + // continue stateloop; + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the attribute + * value (unquoted) state and reconsume this + * input character. + */ + // CPPONLY: attributeLine = line; + clearStrBufBeforeUse(); + reconsume = true; + state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); + noteUnquotedAttributeValue(); + continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Switch to the attribute + * value (single-quoted) state. + */ + // CPPONLY: attributeLine = line; + clearStrBufBeforeUse(); + state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errAttributeValueMissing(); + /* + * Emit the current tag token. + */ + addAttributeWithoutValue(); + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + case '<': + case '=': + case '`': + /* + * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN + * (=) U+0060 GRAVE ACCENT (`) + */ + errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c); + /* + * Treat it as per the "anything else" entry + * below. + */ + default: + // [NOCPP[ + errHtml4NonNameInUnquotedAttribute(c); + // ]NOCPP] + /* + * Anything else Append the current input + * character to the current attribute's value. + */ + // CPPONLY: attributeLine = line; + clearStrBufBeforeUse(); + appendStrBuf(c); + /* + * Switch to the attribute value (unquoted) + * state. + */ + + state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); + noteUnquotedAttributeValue(); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case ATTRIBUTE_VALUE_DOUBLE_QUOTED: + attributevaluedoublequotedloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '"': + /* + * U+0022 QUOTATION MARK (") Switch to the after + * attribute value (quoted) state. + */ + addAttributeWithValue(); + + state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos); + break attributevaluedoublequotedloop; + // continue stateloop; + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in attribute value state, with the + * additional allowed character being U+0022 + * QUOTATION MARK ("). + */ + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\"'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current attribute's value. + */ + appendStrBuf(c); + /* + * Stay in the attribute value (double-quoted) + * state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case AFTER_ATTRIBUTE_VALUE_QUOTED: + afterattributevaluequotedloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before attribute name state. + */ + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the self-closing + * start tag state. + */ + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + break afterattributevaluequotedloop; + // continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + default: + /* + * Anything else Parse error. + */ + errNoSpaceBetweenAttributes(); + /* + * Reconsume the character in the before + * attribute name state. + */ + reconsume = true; + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case SELF_CLOSING_START_TAG: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Set the self-closing + * flag of the current tag token. Emit the current + * tag token. + */ + // [NOCPP[ + errHtml4XmlVoidSyntax(); + // ]NOCPP] + state = transition(state, emitCurrentTagToken(true, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + default: + /* Anything else Parse error. */ + errSlashNotFollowedByGt(); + /* + * Reconsume the character in the before attribute + * name state. + */ + reconsume = true; + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + } + // XXX reorder point + case ATTRIBUTE_VALUE_UNQUOTED: + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + addAttributeWithValue(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before attribute name state. + */ + addAttributeWithValue(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in attribute value state, with the + * additional allowed character being U+003E + * GREATER-THAN SIGN (>) + */ + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('>'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + addAttributeWithValue(); + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + case '<': + case '\"': + case '\'': + case '=': + case '`': + /* + * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE + * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS + * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error. + */ + errUnquotedAttributeValOrNull(c); + /* + * Treat it as per the "anything else" entry + * below. + */ + // fall through + default: + // [NOCPP] + errHtml4NonNameInUnquotedAttribute(c); + // ]NOCPP] + /* + * Anything else Append the current input + * character to the current attribute's value. + */ + appendStrBuf(c); + /* + * Stay in the attribute value (unquoted) state. + */ + continue; + } + } + // XXX reorder point + case AFTER_ATTRIBUTE_NAME: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the after attribute name state. + */ + continue; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the self-closing + * start tag state. + */ + addAttributeWithoutValue(); + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + continue stateloop; + case '=': + /* + * U+003D EQUALS SIGN (=) Switch to the before + * attribute value state. + */ + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + addAttributeWithoutValue(); + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + case '\"': + case '\'': + case '<': + errQuoteOrLtInAttributeNameOrNull(c); + /* + * Treat it as per the "anything else" entry + * below. + */ + default: + addAttributeWithoutValue(); + /* + * Anything else Start a new attribute in the + * current tag token. + */ + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Set that + * attribute's name to the lowercase version + * of the current input character (add + * 0x0020 to the character's code point) + */ + c += 0x20; + } + /* + * Set that attribute's name to the current + * input character, + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + /* + * and its value to the empty string. + */ + // Will do later. + /* + * Switch to the attribute name state. + */ + state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case MARKUP_DECLARATION_OPEN: + markupdeclarationopenloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * If the next two characters are both U+002D + * HYPHEN-MINUS characters (-), consume those two + * characters, create a comment token whose data is the + * empty string, and switch to the comment start state. + * + * Otherwise, if the next seven characters are an ASCII + * case-insensitive match for the word "DOCTYPE", then + * consume those characters and switch to the DOCTYPE + * state. + * + * Otherwise, if the insertion mode is + * "in foreign content" and the current node is not an + * element in the HTML namespace and the next seven + * characters are an case-sensitive match for the string + * "[CDATA[" (the five uppercase letters "CDATA" with a + * U+005B LEFT SQUARE BRACKET character before and + * after), then consume those characters and switch to + * the CDATA section state. + * + * Otherwise, is is a parse error. Switch to the bogus + * comment state. The next character that is consumed, + * if any, is the first character that will be in the + * comment. + */ + switch (c) { + case '-': + clearStrBufBeforeUse(); + appendStrBuf(c); + state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos); + break markupdeclarationopenloop; + // continue stateloop; + case 'd': + case 'D': + clearStrBufBeforeUse(); + appendStrBuf(c); + index = 0; + state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos); + continue stateloop; + case '[': + if (tokenHandler.cdataSectionAllowed()) { + clearStrBufBeforeUse(); + appendStrBuf(c); + index = 0; + state = transition(state, Tokenizer.CDATA_START, reconsume, pos); + continue stateloop; + } + // else fall through + default: + errBogusComment(); + clearStrBufBeforeUse(); + reconsume = true; + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case MARKUP_DECLARATION_HYPHEN: + markupdeclarationhyphenloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '\u0000': + break stateloop; + case '-': + clearStrBufAfterOneHyphen(); + state = transition(state, Tokenizer.COMMENT_START, reconsume, pos); + break markupdeclarationhyphenloop; + // continue stateloop; + default: + errBogusComment(); + reconsume = true; + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_START: + commentstartloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment start state + * + * + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Switch to the comment + * start dash state. + */ + appendStrBuf(c); + state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errPrematureEndOfComment(); + /* Emit the comment token. */ + emitComment(0, pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break stateloop; + case '\n': + appendStrBufLineFeed(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break commentstartloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the input character to + * the comment token's data. + */ + appendStrBuf(c); + /* + * Switch to the comment state. + */ + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break commentstartloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT: + commentloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment state Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Switch to the comment + * end dash state + */ + appendStrBuf(c); + state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos); + break commentloop; + // continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the input character to + * the comment token's data. + */ + appendStrBuf(c); + /* + * Stay in the comment state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_END_DASH: + commentenddashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment end dash state Consume the next input + * character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Switch to the comment + * end state + */ + appendStrBuf(c); + state = transition(state, Tokenizer.COMMENT_END, reconsume, pos); + break commentenddashloop; + // continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break stateloop; + case '\n': + appendStrBufLineFeed(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append a U+002D HYPHEN-MINUS + * (-) character and the input character to the + * comment token's data. + */ + appendStrBuf(c); + /* + * Switch to the comment state. + */ + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_END: + commentendloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment end dash state Consume the next input + * character: + */ + switch (c) { + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the comment + * token. + */ + emitComment(2, pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '-': + /* U+002D HYPHEN-MINUS (-) Parse error. */ + /* + * Append a U+002D HYPHEN-MINUS (-) character to + * the comment token's data. + */ + adjustDoubleHyphenAndAppendToStrBufAndErr(c); + /* + * Stay in the comment end state. + */ + continue; + case '\r': + adjustDoubleHyphenAndAppendToStrBufCarriageReturn(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break stateloop; + case '\n': + adjustDoubleHyphenAndAppendToStrBufLineFeed(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + case '!': + errHyphenHyphenBang(); + appendStrBuf(c); + state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Append two U+002D HYPHEN-MINUS (-) characters + * and the input character to the comment + * token's data. + */ + adjustDoubleHyphenAndAppendToStrBufAndErr(c); + /* + * Switch to the comment state. + */ + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case COMMENT_END_BANG: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment end bang state + * + * Consume the next input character: + */ + switch (c) { + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the comment + * token. + */ + emitComment(3, pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '-': + /* + * Append two U+002D HYPHEN-MINUS (-) characters + * and a U+0021 EXCLAMATION MARK (!) character + * to the comment token's data. + */ + appendStrBuf(c); + /* + * Switch to the comment end dash state. + */ + state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append two U+002D HYPHEN-MINUS + * (-) characters, a U+0021 EXCLAMATION MARK (!) + * character, and the input character to the + * comment token's data. Switch to the comment + * state. + */ + appendStrBuf(c); + /* + * Switch to the comment state. + */ + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case COMMENT_START_DASH: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment start dash state + * + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Switch to the comment end + * state + */ + appendStrBuf(c); + state = transition(state, Tokenizer.COMMENT_END, reconsume, pos); + continue stateloop; + case '>': + errPrematureEndOfComment(); + /* Emit the comment token. */ + emitComment(1, pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break stateloop; + case '\n': + appendStrBufLineFeed(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Append a U+002D HYPHEN-MINUS character (-) and + * the current input character to the comment + * token's data. + */ + appendStrBuf(c); + /* + * Switch to the comment state. + */ + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + } + // XXX reorder point + case CDATA_START: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + if (index < 6) { // CDATA_LSQB.length + if (c == Tokenizer.CDATA_LSQB[index]) { + appendStrBuf(c); + } else { + errBogusComment(); + reconsume = true; + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + index++; + continue; + } else { + clearStrBufAfterUse(); + cstart = pos; // start coalescing + reconsume = true; + state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos); + break; // FALL THROUGH continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case CDATA_SECTION: + cdatasectionloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case ']': + flushChars(buf, pos); + state = transition(state, Tokenizer.CDATA_RSQB, reconsume, pos); + break cdatasectionloop; // FALL THROUGH + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + default: + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case CDATA_RSQB: + cdatarsqb: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case ']': + state = transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos); + break cdatarsqb; + default: + tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, + 1); + cstart = pos; + reconsume = true; + state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case CDATA_RSQB_RSQB: + cdatarsqbrsqb: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case ']': + // Saw a third ]. Emit one ] (logically the + // first one) and stay in this state to + // remember that the last two characters seen + // have been ]]. + tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1); + continue; + case '>': + cstart = pos + 1; + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2); + cstart = pos; + reconsume = true; + state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case ATTRIBUTE_VALUE_SINGLE_QUOTED: + attributevaluesinglequotedloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\'': + /* + * U+0027 APOSTROPHE (') Switch to the after + * attribute value (quoted) state. + */ + addAttributeWithValue(); + + state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos); + continue stateloop; + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in attribute value state, with the + * + additional allowed character being U+0027 + * APOSTROPHE ('). + */ + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\''); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + break attributevaluesinglequotedloop; + // continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current attribute's value. + */ + appendStrBuf(c); + /* + * Stay in the attribute value (double-quoted) + * state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case CONSUME_CHARACTER_REFERENCE: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + if (c == '\u0000') { + break stateloop; + } + /* + * Unlike the definition is the spec, this state does not + * return a value and never requires the caller to + * backtrack. This state takes care of emitting characters + * or appending to the current attribute value. It also + * takes care of that in the case when consuming the + * character reference fails. + */ + /* + * This section defines how to consume a character + * reference. This definition is used when parsing character + * references in text and in attributes. + * + * The behavior depends on the identity of the next + * character (the one immediately after the U+0026 AMPERSAND + * character): + */ + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': // we'll reconsume! + case '\u000C': + case '<': + case '&': + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + case '#': + /* + * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER + * SIGN. + */ + appendCharRefBuf('#'); + state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos); + continue stateloop; + default: + if (c == additional) { + emitOrAppendCharRefBuf(returnState); + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + if (c >= 'a' && c <= 'z') { + firstCharKey = c - 'a' + 26; + } else if (c >= 'A' && c <= 'Z') { + firstCharKey = c - 'A'; + } else { + // No match + /* + * If no match can be made, then this is a parse + * error. + */ + errNoNamedCharacterMatch(); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + // Didn't fail yet + appendCharRefBuf(c); + state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos); + // FALL THROUGH continue stateloop; + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case CHARACTER_REFERENCE_HILO_LOOKUP: + { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + if (c == '\u0000') { + break stateloop; + } + /* + * The data structure is as follows: + * + * HILO_ACCEL is a two-dimensional int array whose major + * index corresponds to the second character of the + * character reference (code point as index) and the + * minor index corresponds to the first character of the + * character reference (packed so that A-Z runs from 0 + * to 25 and a-z runs from 26 to 51). This layout makes + * it easier to use the sparseness of the data structure + * to omit parts of it: The second dimension of the + * table is null when no character reference starts with + * the character corresponding to that row. + * + * The int value HILO_ACCEL (by these indeces) is zero + * if there exists no character reference starting with + * that two-letter prefix. Otherwise, the value is an + * int that packs two shorts so that the higher short is + * the index of the highest character reference name + * with that prefix in NAMES and the lower short + * corresponds to the index of the lowest character + * reference name with that prefix. (It happens that the + * first two character reference names share their + * prefix so the packed int cannot be 0 by packing the + * two shorts.) + * + * NAMES is an array of byte arrays where each byte + * array encodes the name of a character references as + * ASCII. The names omit the first two letters of the + * name. (Since storing the first two letters would be + * redundant with the data contained in HILO_ACCEL.) The + * entries are lexically sorted. + * + * For a given index in NAMES, the same index in VALUES + * contains the corresponding expansion as an array of + * two UTF-16 code units (either the character and + * U+0000 or a suggogate pair). + */ + int hilo = 0; + if (c <= 'z') { + @Const @NoLength int[] row = NamedCharactersAccel.HILO_ACCEL[c]; + if (row != null) { + hilo = row[firstCharKey]; + } + } + if (hilo == 0) { + /* + * If no match can be made, then this is a parse + * error. + */ + errNoNamedCharacterMatch(); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + // Didn't fail yet + appendCharRefBuf(c); + lo = hilo & 0xFFFF; + hi = hilo >> 16; + entCol = -1; + candidate = -1; + charRefBufMark = 0; + state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos); + // FALL THROUGH continue stateloop; + } + case CHARACTER_REFERENCE_TAIL: + outer: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + if (c == '\u0000') { + break stateloop; + } + entCol++; + /* + * Consume the maximum number of characters possible, + * with the consumed characters matching one of the + * identifiers in the first column of the named + * character references table (in a case-sensitive + * manner). + */ + loloop: for (;;) { + if (hi < lo) { + break outer; + } + if (entCol == NamedCharacters.NAMES[lo].length()) { + candidate = lo; + charRefBufMark = charRefBufLen; + lo++; + } else if (entCol > NamedCharacters.NAMES[lo].length()) { + break outer; + } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) { + lo++; + } else { + break loloop; + } + } + + hiloop: for (;;) { + if (hi < lo) { + break outer; + } + if (entCol == NamedCharacters.NAMES[hi].length()) { + break hiloop; + } + if (entCol > NamedCharacters.NAMES[hi].length()) { + break outer; + } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) { + hi--; + } else { + break hiloop; + } + } + + if (c == ';') { + // If we see a semicolon, there cannot be a + // longer match. Break the loop. However, before + // breaking, take the longest match so far as the + // candidate, if we are just about to complete a + // match. + if (entCol + 1 == NamedCharacters.NAMES[lo].length()) { + candidate = lo; + charRefBufMark = charRefBufLen; + } + break outer; + } + + if (hi < lo) { + break outer; + } + appendCharRefBuf(c); + continue; + } + + if (candidate == -1) { + // reconsume deals with CR, LF or nul + /* + * If no match can be made, then this is a parse error. + */ + errNoNamedCharacterMatch(); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } else { + // c can't be CR, LF or nul if we got here + @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate]; + if (candidateName.length() == 0 + || candidateName.charAt(candidateName.length() - 1) != ';') { + /* + * If the last character matched is not a U+003B + * SEMICOLON (;), there is a parse error. + */ + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + /* + * If the entity is being consumed as part of an + * attribute, and the last character matched is + * not a U+003B SEMICOLON (;), + */ + char ch; + if (charRefBufMark == charRefBufLen) { + ch = c; + } else { + ch = charRefBuf[charRefBufMark]; + } + if (ch == '=' || (ch >= '0' && ch <= '9') + || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z')) { + /* + * and the next character is either a U+003D + * EQUALS SIGN character (=) or in the range + * U+0030 DIGIT ZERO to U+0039 DIGIT NINE, + * U+0041 LATIN CAPITAL LETTER A to U+005A + * LATIN CAPITAL LETTER Z, or U+0061 LATIN + * SMALL LETTER A to U+007A LATIN SMALL + * LETTER Z, then, for historical reasons, + * all the characters that were matched + * after the U+0026 AMPERSAND (&) must be + * unconsumed, and nothing is returned. + */ + errNoNamedCharacterMatch(); + appendCharRefBufToStrBuf(); + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + } + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + errUnescapedAmpersandInterpretedAsCharacterReference(); + } else { + errNotSemicolonTerminated(); + } + } + + /* + * Otherwise, return a character token for the character + * corresponding to the entity name (as given by the + * second column of the named character references + * table). + */ + // CPPONLY: completedNamedCharacterReference(); + @Const @NoLength char[] val = NamedCharacters.VALUES[candidate]; + if ( + // [NOCPP[ + val.length == 1 + // ]NOCPP] + // CPPONLY: val[1] == 0 + ) { + emitOrAppendOne(val, returnState); + } else { + emitOrAppendTwo(val, returnState); + } + // this is so complicated! + if (charRefBufMark < charRefBufLen) { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendStrBuf(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } else { + tokenHandler.characters(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } + } + // charRefBufLen will be zeroed below! + + // Check if we broke out early with c being the last + // character that matched as opposed to being the + // first one that didn't match. In the case of an + // early break, the next run on text should start + // *after* the current character and the current + // character shouldn't be reconsumed. + boolean earlyBreak = (c == ';' && charRefBufMark == charRefBufLen); + charRefBufLen = 0; + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = earlyBreak ? pos + 1 : pos; + } + reconsume = !earlyBreak; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + /* + * If the markup contains I'm ¬it; I tell you, the + * entity is parsed as "not", as in, I'm ¬it; I tell + * you. But if the markup was I'm ∉ I tell you, + * the entity would be parsed as "notin;", resulting in + * I'm ∉ I tell you. + */ + } + // XXX reorder point + case CONSUME_NCR: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + value = 0; + seenDigits = false; + /* + * The behavior further depends on the character after the + * U+0023 NUMBER SIGN: + */ + switch (c) { + case 'x': + case 'X': + + /* + * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL + * LETTER X Consume the X. + * + * Follow the steps below, but using the range of + * characters U+0030 DIGIT ZERO through to U+0039 + * DIGIT NINE, U+0061 LATIN SMALL LETTER A through + * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN + * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL + * LETTER F (in other words, 0-9, A-F, a-f). + * + * When it comes to interpreting the number, + * interpret it as a hexadecimal number. + */ + appendCharRefBuf(c); + state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos); + continue stateloop; + default: + /* + * Anything else Follow the steps below, but using + * the range of characters U+0030 DIGIT ZERO through + * to U+0039 DIGIT NINE (i.e. just 0-9). + * + * When it comes to interpreting the number, + * interpret it as a decimal number. + */ + reconsume = true; + state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos); + // FALL THROUGH continue stateloop; + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case DECIMAL_NRC_LOOP: + decimalloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume as many characters as match the range of + * characters given above. + */ + assert value >= 0: "value must not become negative."; + if (c >= '0' && c <= '9') { + seenDigits = true; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 10; + value += c - '0'; + } + continue; + } else if (c == ';') { + if (seenDigits) { + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos + 1; + } + state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); + // FALL THROUGH continue stateloop; + break decimalloop; + } else { + errNoDigitsInNCR(); + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos + 1; + } + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + } else { + /* + * If no characters match the range, then don't + * consume any characters (and unconsume the U+0023 + * NUMBER SIGN character and, if appropriate, the X + * character). This is a parse error; nothing is + * returned. + * + * Otherwise, if the next character is a U+003B + * SEMICOLON, consume that too. If it isn't, there + * is a parse error. + */ + if (!seenDigits) { + errNoDigitsInNCR(); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } else { + errCharRefLacksSemicolon(); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); + // FALL THROUGH continue stateloop; + break decimalloop; + } + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case HANDLE_NCR_VALUE: + // WARNING previous state sets reconsume + // We are not going to emit the contents of charRefBuf. + charRefBufLen = 0; + // XXX inline this case if the method size can take it + handleNcrValue(returnState); + state = transition(state, returnState, reconsume, pos); + continue stateloop; + // XXX reorder point + case HEX_NCR_LOOP: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume as many characters as match the range of + * characters given above. + */ + assert value >= 0: "value must not become negative."; + if (c >= '0' && c <= '9') { + seenDigits = true; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 16; + value += c - '0'; + } + continue; + } else if (c >= 'A' && c <= 'F') { + seenDigits = true; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'A' + 10; + } + continue; + } else if (c >= 'a' && c <= 'f') { + seenDigits = true; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'a' + 10; + } + continue; + } else if (c == ';') { + if (seenDigits) { + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos + 1; + } + state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); + continue stateloop; + } else { + errNoDigitsInNCR(); + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos + 1; + } + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + } else { + /* + * If no characters match the range, then don't + * consume any characters (and unconsume the U+0023 + * NUMBER SIGN character and, if appropriate, the X + * character). This is a parse error; nothing is + * returned. + * + * Otherwise, if the next character is a U+003B + * SEMICOLON, consume that too. If it isn't, there + * is a parse error. + */ + if (!seenDigits) { + errNoDigitsInNCR(); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } else { + errCharRefLacksSemicolon(); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); + continue stateloop; + } + } + } + // XXX reorder point + case PLAINTEXT: + plaintextloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case '\u0000': + emitPlaintextReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Stay in the + * RAWTEXT state. + */ + continue; + } + } + // XXX reorder point + case CLOSE_TAG_OPEN: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Otherwise, if the content model flag is set to the PCDATA + * state, or if the next few characters do match that tag + * name, consume the next input character: + */ + switch (c) { + case '>': + /* U+003E GREATER-THAN SIGN (>) Parse error. */ + errLtSlashGt(); + /* + * Switch to the data state. + */ + cstart = pos + 1; + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + silentCarriageReturn(); + /* Anything else Parse error. */ + errGarbageAfterLtSlash(); + /* + * Switch to the bogus comment state. + */ + clearStrBufBeforeUse(); + appendStrBuf('\n'); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + /* Anything else Parse error. */ + errGarbageAfterLtSlash(); + /* + * Switch to the bogus comment state. + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + if (c >= 'a' && c <= 'z') { + /* + * U+0061 LATIN SMALL LETTER A through to U+007A + * LATIN SMALL LETTER Z Create a new end tag + * token, + */ + endTag = true; + /* + * set its tag name to the input character, + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + /* + * then switch to the tag name state. (Don't + * emit the token yet; further details will be + * filled in before it is emitted.) + */ + state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); + continue stateloop; + } else { + /* Anything else Parse error. */ + errGarbageAfterLtSlash(); + /* + * Switch to the bogus comment state. + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case RCDATA: + rcdataloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in RCDATA state. + */ + flushChars(buf, pos); + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\u0000'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * RCDATA less-than sign state. + */ + flushChars(buf, pos); + + returnState = state; + state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Emit the current input character as a + * character token. Stay in the RCDATA state. + */ + continue; + } + } + // XXX reorder point + case RAWTEXT: + rawtextloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * RAWTEXT less-than sign state. + */ + flushChars(buf, pos); + + returnState = state; + state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); + break rawtextloop; + // FALL THRU continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Emit the current input character as a + * character token. Stay in the RAWTEXT state. + */ + continue; + } + } + // XXX fallthru don't reorder + case RAWTEXT_RCDATA_LESS_THAN_SIGN: + rawtextrcdatalessthansignloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '/': + /* + * U+002F SOLIDUS (/) Set the temporary buffer + * to the empty string. Switch to the script + * data end tag open state. + */ + index = 0; + clearStrBufBeforeUse(); + state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos); + break rawtextrcdatalessthansignloop; + // FALL THRU continue stateloop; + default: + /* + * Otherwise, emit a U+003C LESS-THAN SIGN + * character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in + * the data state. + */ + cstart = pos; + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + } + // XXX fall thru. don't reorder. + case NON_DATA_END_TAG_NAME: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * ASSERT! when entering this state, set index to 0 and + * call clearStrBufBeforeUse() assert (contentModelElement != + * null); Let's implement the above without lookahead. + * strBuf is the 'temporary buffer'. + */ + if (index < endTagExpectationAsArray.length) { + char e = endTagExpectationAsArray[index]; + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != e) { + // [NOCPP[ + errHtml4LtSlashInRcdata(folded); + // ]NOCPP] + tokenHandler.characters(Tokenizer.LT_SOLIDUS, + 0, 2); + emitStrBuf(); + cstart = pos; + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + appendStrBuf(c); + index++; + continue; + } else { + endTag = true; + // XXX replace contentModelElement with different + // type + tagName = endTagExpectation; + switch (c) { + case '\r': + silentCarriageReturn(); + clearStrBufAfterUse(); // strBuf not used + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE + * FEED (LF) U+000C FORM FEED (FF) U+0020 + * SPACE If the current end tag token is an + * appropriate end tag token, then switch to + * the before attribute name state. + */ + clearStrBufAfterUse(); // strBuf not used + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + case '/': + /* + * U+002F SOLIDUS (/) If the current end tag + * token is an appropriate end tag token, + * then switch to the self-closing start tag + * state. + */ + clearStrBufAfterUse(); // strBuf not used + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) If the + * current end tag token is an appropriate + * end tag token, then emit the current tag + * token and switch to the data state. + */ + clearStrBufAfterUse(); // strBuf not used + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + continue stateloop; + default: + /* + * Emit a U+003C LESS-THAN SIGN character + * token, a U+002F SOLIDUS character token, + * a character token for each of the + * characters in the temporary buffer (in + * the order they were added to the buffer), + * and reconsume the current input character + * in the RAWTEXT state. + */ + // [NOCPP[ + errWarnLtSlashInRcdata(); + // ]NOCPP] + tokenHandler.characters( + Tokenizer.LT_SOLIDUS, 0, 2); + emitStrBuf(); + if (c == '\u0000') { + emitReplacementCharacter(buf, pos); + } else { + cstart = pos; // don't drop the + // character + } + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + } + } + // XXX reorder point + // BEGIN HOTSPOT WORKAROUND + case BOGUS_COMMENT: + boguscommentloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume every character up to and including the first + * U+003E GREATER-THAN SIGN character (>) or the end of + * the file (EOF), whichever comes first. Emit a comment + * token whose data is the concatenation of all the + * characters starting from and including the character + * that caused the state machine to switch into the + * bogus comment state, up to and including the + * character immediately before the last consumed + * character (i.e. up to the character just before the + * U+003E or EOF character). (If the comment was started + * by the end of the file (EOF), the token is empty.) + * + * Switch to the data state. + * + * If the end of the file was reached, reconsume the EOF + * character. + */ + switch (c) { + case '>': + emitComment(0, pos); + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '-': + appendStrBuf(c); + state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos); + break boguscommentloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + appendStrBuf(c); + continue; + } + } + // FALLTHRU DON'T REORDER + case BOGUS_COMMENT_HYPHEN: + boguscommenthyphenloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '>': + // [NOCPP[ + maybeAppendSpaceToBogusComment(); + // ]NOCPP] + emitComment(0, pos); + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '-': + appendSecondHyphenToBogusComment(); + continue boguscommenthyphenloop; + case '\r': + appendStrBufCarriageReturn(); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + break stateloop; + case '\n': + appendStrBufLineFeed(); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + appendStrBuf(c); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case SCRIPT_DATA: + scriptdataloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * script data less-than sign state. + */ + flushChars(buf, pos); + returnState = state; + state = transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos); + break scriptdataloop; // FALL THRU continue + // stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Stay in the + * script data state. + */ + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_LESS_THAN_SIGN: + scriptdatalessthansignloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '/': + /* + * U+002F SOLIDUS (/) Set the temporary buffer + * to the empty string. Switch to the script + * data end tag open state. + */ + index = 0; + clearStrBufBeforeUse(); + state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos); + continue stateloop; + case '!': + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + cstart = pos; + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos); + break scriptdatalessthansignloop; // FALL THRU + // continue + // stateloop; + default: + /* + * Otherwise, emit a U+003C LESS-THAN SIGN + * character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in + * the data state. + */ + cstart = pos; + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPE_START: + scriptdataescapestartloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data escape start dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos); + break scriptdataescapestartloop; // FALL THRU + // continue + // stateloop; + default: + /* + * Anything else Reconsume the current input + * character in the script data state. + */ + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPE_START_DASH: + scriptdataescapestartdashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data escaped dash dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos); + break scriptdataescapestartdashloop; + // continue stateloop; + default: + /* + * Anything else Reconsume the current input + * character in the script data state. + */ + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPED_DASH_DASH: + scriptdataescapeddashdashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Stay in the + * script data escaped dash dash state. + */ + continue; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * script data escaped less-than sign state. + */ + flushChars(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit a U+003E + * GREATER-THAN SIGN character token. Switch to + * the script data state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + break scriptdataescapeddashdashloop; + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Switch to the + * script data escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + break scriptdataescapeddashdashloop; + // continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPED: + scriptdataescapedloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data escaped dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos); + break scriptdataescapedloop; // FALL THRU + // continue + // stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * script data escaped less-than sign state. + */ + flushChars(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Stay in the + * script data escaped state. + */ + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPED_DASH: + scriptdataescapeddashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data escaped dash dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * script data escaped less-than sign state. + */ + flushChars(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + break scriptdataescapeddashloop; + // continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Switch to the + * script data escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: + scriptdataescapedlessthanloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '/': + /* + * U+002F SOLIDUS (/) Set the temporary buffer + * to the empty string. Switch to the script + * data escaped end tag open state. + */ + index = 0; + clearStrBufBeforeUse(); + returnState = Tokenizer.SCRIPT_DATA_ESCAPED; + state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos); + continue stateloop; + case 'S': + case 's': + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Emit a U+003C + * LESS-THAN SIGN character token and the + * current input character as a character token. + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + cstart = pos; + index = 1; + /* + * Set the temporary buffer to the empty string. + * Append the lowercase version of the current + * input character (add 0x0020 to the + * character's code point) to the temporary + * buffer. Switch to the script data double + * escape start state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos); + break scriptdataescapedlessthanloop; + // continue stateloop; + default: + /* + * Anything else Emit a U+003C LESS-THAN SIGN + * character token and reconsume the current + * input character in the script data escaped + * state. + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + cstart = pos; + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPE_START: + scriptdatadoubleescapestartloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + assert index > 0; + if (index < 6) { // SCRIPT_ARR.length + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != Tokenizer.SCRIPT_ARR[index]) { + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + } + index++; + continue; + } + switch (c) { + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + case ' ': + case '\t': + case '\u000C': + case '/': + case '>': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN + * (>) Emit the current input character as a + * character token. If the temporary buffer is + * the string "script", then switch to the + * script data double escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + break scriptdatadoubleescapestartloop; + // continue stateloop; + default: + /* + * Anything else Reconsume the current input + * character in the script data escaped state. + */ + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPED: + scriptdatadoubleescapedloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data double escaped dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos); + break scriptdatadoubleescapedloop; // FALL THRU + // continue + // stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Emit a U+003C + * LESS-THAN SIGN character token. Switch to the + * script data double escaped less-than sign + * state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Stay in the + * script data double escaped state. + */ + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: + scriptdatadoubleescapeddashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data double escaped dash dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos); + break scriptdatadoubleescapeddashloop; + // continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Emit a U+003C + * LESS-THAN SIGN character token. Switch to the + * script data double escaped less-than sign + * state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Switch to the + * script data double escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: + scriptdatadoubleescapeddashdashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Stay in the + * script data double escaped dash dash state. + */ + continue; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Emit a U+003C + * LESS-THAN SIGN character token. Switch to the + * script data double escaped less-than sign + * state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + break scriptdatadoubleescapeddashdashloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit a U+003E + * GREATER-THAN SIGN character token. Switch to + * the script data state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Switch to the + * script data double escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: + scriptdatadoubleescapedlessthanloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '/': + /* + * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS + * character token. Set the temporary buffer to + * the empty string. Switch to the script data + * double escape end state. + */ + index = 0; + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos); + break scriptdatadoubleescapedlessthanloop; + default: + /* + * Anything else Reconsume the current input + * character in the script data double escaped + * state. + */ + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPE_END: + scriptdatadoubleescapeendloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + if (index < 6) { // SCRIPT_ARR.length + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != Tokenizer.SCRIPT_ARR[index]) { + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + } + index++; + continue; + } + switch (c) { + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + case ' ': + case '\t': + case '\u000C': + case '/': + case '>': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN + * (>) Emit the current input character as a + * character token. If the temporary buffer is + * the string "script", then switch to the + * script data escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + default: + /* + * Reconsume the current input character in the + * script data double escaped state. + */ + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case MARKUP_DECLARATION_OCTYPE: + markupdeclarationdoctypeloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + if (index < 6) { // OCTYPE.length + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded == Tokenizer.OCTYPE[index]) { + appendStrBuf(c); + } else { + errBogusComment(); + reconsume = true; + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + index++; + continue; + } else { + reconsume = true; + state = transition(state, Tokenizer.DOCTYPE, reconsume, pos); + break markupdeclarationdoctypeloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE: + doctypeloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + initDoctypeFields(); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before DOCTYPE name state. + */ + state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos); + break doctypeloop; + // continue stateloop; + default: + /* + * Anything else Parse error. + */ + errMissingSpaceBeforeDoctypeName(); + /* + * Reconsume the current character in the before + * DOCTYPE name state. + */ + reconsume = true; + state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos); + break doctypeloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_DOCTYPE_NAME: + beforedoctypenameloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the before DOCTYPE name state. + */ + continue; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errNamelessDoctype(); + /* + * Create a new DOCTYPE token. Set its + * force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit the token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Create a + * new DOCTYPE token. Set the token's name + * to the lowercase version of the input + * character (add 0x0020 to the character's + * code point). + */ + c += 0x20; + } + /* Anything else Create a new DOCTYPE token. */ + /* + * Set the token's name name to the current + * input character. + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + /* + * Switch to the DOCTYPE name state. + */ + state = transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos); + break beforedoctypenameloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE_NAME: + doctypenameloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + strBufToDoctypeName(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the after DOCTYPE name state. + */ + strBufToDoctypeName(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos); + break doctypenameloop; + // continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * DOCTYPE token. + */ + strBufToDoctypeName(); + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Append the + * lowercase version of the input character (add + * 0x0020 to the character's code point) to the + * current DOCTYPE token's name. + */ + if (c >= 'A' && c <= 'Z') { + c += 0x0020; + } + /* + * Anything else Append the current input + * character to the current DOCTYPE token's + * name. + */ + appendStrBuf(c); + /* + * Stay in the DOCTYPE name state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case AFTER_DOCTYPE_NAME: + afterdoctypenameloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the after DOCTYPE name state. + */ + continue; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case 'p': + case 'P': + index = 0; + state = transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos); + break afterdoctypenameloop; + // continue stateloop; + case 's': + case 'S': + index = 0; + state = transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos); + continue stateloop; + default: + /* + * Otherwise, this is the parse error. + */ + bogusDoctype(); + + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE_UBLIC: + doctypeublicloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * If the six characters starting from the current input + * character are an ASCII case-insensitive match for the + * word "PUBLIC", then consume those characters and + * switch to the before DOCTYPE public identifier state. + */ + if (index < 5) { // UBLIC.length + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != Tokenizer.UBLIC[index]) { + bogusDoctype(); + // forceQuirks = true; + reconsume = true; + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + index++; + continue; + } else { + reconsume = true; + state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos); + break doctypeublicloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case AFTER_DOCTYPE_PUBLIC_KEYWORD: + afterdoctypepublickeywordloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before DOCTYPE public + * identifier state. + */ + state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); + break afterdoctypepublickeywordloop; + // FALL THROUGH continue stateloop + case '"': + /* + * U+0022 QUOTATION MARK (") Parse Error. + */ + errNoSpaceBetweenDoctypePublicKeywordAndQuote(); + /* + * Set the DOCTYPE token's public identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Parse Error. + */ + errNoSpaceBetweenDoctypePublicKeywordAndQuote(); + /* + * Set the DOCTYPE token's public identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + case '>': + /* U+003E GREATER-THAN SIGN (>) Parse error. */ + errExpectedPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: + beforedoctypepublicidentifierloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the before DOCTYPE public identifier + * state. + */ + continue; + case '"': + /* + * U+0022 QUOTATION MARK (") Set the DOCTYPE + * token's public identifier to the empty string + * (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + break beforedoctypepublicidentifierloop; + // continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Set the DOCTYPE token's + * public identifier to the empty string (not + * missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + case '>': + /* U+003E GREATER-THAN SIGN (>) Parse error. */ + errExpectedPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: + doctypepublicidentifierdoublequotedloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '"': + /* + * U+0022 QUOTATION MARK (") Switch to the after + * DOCTYPE public identifier state. + */ + publicIdentifier = strBufToString(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); + break doctypepublicidentifierdoublequotedloop; + // continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errGtInPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + publicIdentifier = strBufToString(); + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current DOCTYPE token's + * public identifier. + */ + appendStrBuf(c); + /* + * Stay in the DOCTYPE public identifier + * (double-quoted) state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: + afterdoctypepublicidentifierloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the between DOCTYPE public and + * system identifiers state. + */ + state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos); + break afterdoctypepublicidentifierloop; + // continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '"': + /* + * U+0022 QUOTATION MARK (") Parse error. + */ + errNoSpaceBetweenPublicAndSystemIds(); + /* + * Set the DOCTYPE token's system identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Parse error. + */ + errNoSpaceBetweenPublicAndSystemIds(); + /* + * Set the DOCTYPE token's system identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: + betweendoctypepublicandsystemidentifiersloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the between DOCTYPE public and system + * identifiers state. + */ + continue; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '"': + /* + * U+0022 QUOTATION MARK (") Set the DOCTYPE + * token's system identifier to the empty string + * (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + break betweendoctypepublicandsystemidentifiersloop; + // continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Set the DOCTYPE token's + * system identifier to the empty string (not + * missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: + doctypesystemidentifierdoublequotedloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '"': + /* + * U+0022 QUOTATION MARK (") Switch to the after + * DOCTYPE system identifier state. + */ + systemIdentifier = strBufToString(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errGtInSystemId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + systemIdentifier = strBufToString(); + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current DOCTYPE token's + * system identifier. + */ + appendStrBuf(c); + /* + * Stay in the DOCTYPE system identifier + * (double-quoted) state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: + afterdoctypesystemidentifierloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the after DOCTYPE system identifier state. + */ + continue; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + /* + * Switch to the bogus DOCTYPE state. (This does + * not set the DOCTYPE token's force-quirks flag + * to on.) + */ + bogusDoctypeWithoutQuirks(); + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + break afterdoctypesystemidentifierloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case BOGUS_DOCTYPE: + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit that + * DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + default: + /* + * Anything else Stay in the bogus DOCTYPE + * state. + */ + continue; + } + } + // XXX reorder point + case DOCTYPE_YSTEM: + doctypeystemloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Otherwise, if the six characters starting from the + * current input character are an ASCII case-insensitive + * match for the word "SYSTEM", then consume those + * characters and switch to the before DOCTYPE system + * identifier state. + */ + if (index < 5) { // YSTEM.length + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != Tokenizer.YSTEM[index]) { + bogusDoctype(); + reconsume = true; + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + index++; + continue stateloop; + } else { + reconsume = true; + state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos); + break doctypeystemloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case AFTER_DOCTYPE_SYSTEM_KEYWORD: + afterdoctypesystemkeywordloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before DOCTYPE public + * identifier state. + */ + state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); + break afterdoctypesystemkeywordloop; + // FALL THROUGH continue stateloop + case '"': + /* + * U+0022 QUOTATION MARK (") Parse Error. + */ + errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); + /* + * Set the DOCTYPE token's system identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Parse Error. + */ + errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); + /* + * Set the DOCTYPE token's public identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + case '>': + /* U+003E GREATER-THAN SIGN (>) Parse error. */ + errExpectedPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: + beforedoctypesystemidentifierloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the before DOCTYPE system identifier + * state. + */ + continue; + case '"': + /* + * U+0022 QUOTATION MARK (") Set the DOCTYPE + * token's system identifier to the empty string + * (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Set the DOCTYPE token's + * system identifier to the empty string (not + * missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + break beforedoctypesystemidentifierloop; + // continue stateloop; + case '>': + /* U+003E GREATER-THAN SIGN (>) Parse error. */ + errExpectedSystemId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\'': + /* + * U+0027 APOSTROPHE (') Switch to the after + * DOCTYPE system identifier state. + */ + systemIdentifier = strBufToString(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); + continue stateloop; + case '>': + errGtInSystemId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + systemIdentifier = strBufToString(); + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current DOCTYPE token's + * system identifier. + */ + appendStrBuf(c); + /* + * Stay in the DOCTYPE system identifier + * (double-quoted) state. + */ + continue; + } + } + // XXX reorder point + case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\'': + /* + * U+0027 APOSTROPHE (') Switch to the after + * DOCTYPE public identifier state. + */ + publicIdentifier = strBufToString(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); + continue stateloop; + case '>': + errGtInPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + publicIdentifier = strBufToString(); + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current DOCTYPE token's + * public identifier. + */ + appendStrBuf(c); + /* + * Stay in the DOCTYPE public identifier + * (single-quoted) state. + */ + continue; + } + } + // XXX reorder point + case PROCESSING_INSTRUCTION: + processinginstructionloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '?': + state = transition( + state, + Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK, + reconsume, pos); + break processinginstructionloop; + // continue stateloop; + default: + continue; + } + } + case PROCESSING_INSTRUCTION_QUESTION_MARK: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '>': + state = transition(state, Tokenizer.DATA, + reconsume, pos); + continue stateloop; + default: + state = transition(state, + Tokenizer.PROCESSING_INSTRUCTION, + reconsume, pos); + continue stateloop; + } + // END HOTSPOT WORKAROUND + } + } + flushChars(buf, pos); + /* + * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; } + */ + // Save locals + stateSave = state; + returnStateSave = returnState; + return pos; + } + + // HOTSPOT WORKAROUND INSERTION POINT + + // [NOCPP[ + + protected int transition(int from, int to, boolean reconsume, int pos) throws SAXException { + return to; + } + + // ]NOCPP] + + private void initDoctypeFields() { + // Discard the characters "DOCTYPE" accumulated as a potential bogus + // comment into strBuf. + clearStrBufAfterUse(); + doctypeName = ""; + if (systemIdentifier != null) { + Portability.releaseString(systemIdentifier); + systemIdentifier = null; + } + if (publicIdentifier != null) { + Portability.releaseString(publicIdentifier); + publicIdentifier = null; + } + forceQuirks = false; + } + + @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() + throws SAXException { + silentCarriageReturn(); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); + } + + @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed() + throws SAXException { + silentLineFeed(); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); + } + + @Inline private void appendStrBufLineFeed() { + silentLineFeed(); + appendStrBuf('\n'); + } + + @Inline private void appendStrBufCarriageReturn() { + silentCarriageReturn(); + appendStrBuf('\n'); + } + + @Inline protected void silentCarriageReturn() { + ++line; + lastCR = true; + } + + @Inline protected void silentLineFeed() { + ++line; + } + + private void emitCarriageReturn(@NoLength char[] buf, int pos) + throws SAXException { + silentCarriageReturn(); + flushChars(buf, pos); + tokenHandler.characters(Tokenizer.LF, 0, 1); + cstart = Integer.MAX_VALUE; + } + + private void emitReplacementCharacter(@NoLength char[] buf, int pos) + throws SAXException { + flushChars(buf, pos); + tokenHandler.zeroOriginatingReplacementCharacter(); + cstart = pos + 1; + } + + private void emitPlaintextReplacementCharacter(@NoLength char[] buf, int pos) + throws SAXException { + flushChars(buf, pos); + tokenHandler.characters(REPLACEMENT_CHARACTER, 0, 1); + cstart = pos + 1; + } + + private void setAdditionalAndRememberAmpersandLocation(char add) { + additional = add; + // [NOCPP[ + ampersandLocation = new LocatorImpl(this); + // ]NOCPP] + } + + private void bogusDoctype() throws SAXException { + errBogusDoctype(); + forceQuirks = true; + } + + private void bogusDoctypeWithoutQuirks() throws SAXException { + errBogusDoctype(); + forceQuirks = false; + } + + private void handleNcrValue(int returnState) throws SAXException { + /* + * If one or more characters match the range, then take them all and + * interpret the string of characters as a number (either hexadecimal or + * decimal as appropriate). + */ + if (value <= 0xFFFF) { + if (value >= 0x80 && value <= 0x9f) { + /* + * If that number is one of the numbers in the first column of + * the following table, then this is a parse error. + */ + errNcrInC1Range(); + /* + * Find the row with that number in the first column, and return + * a character token for the Unicode character given in the + * second column of that row. + */ + @NoLength char[] val = NamedCharacters.WINDOWS_1252[value - 0x80]; + emitOrAppendOne(val, returnState); + // [NOCPP[ + } else if (value == 0xC + && contentSpacePolicy != XmlViolationPolicy.ALLOW) { + if (contentSpacePolicy == XmlViolationPolicy.ALTER_INFOSET) { + emitOrAppendOne(Tokenizer.SPACE, returnState); + } else if (contentSpacePolicy == XmlViolationPolicy.FATAL) { + fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space."); + } + // ]NOCPP] + } else if (value == 0x0) { + errNcrZero(); + emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState); + } else if ((value & 0xF800) == 0xD800) { + errNcrSurrogate(); + emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState); + } else { + /* + * Otherwise, return a character token for the Unicode character + * whose code point is that number. + */ + char ch = (char) value; + // [NOCPP[ + if (value == 0x0D) { + errNcrCr(); + } else if ((value <= 0x0008) || (value == 0x000B) + || (value >= 0x000E && value <= 0x001F)) { + ch = errNcrControlChar(ch); + } else if (value >= 0xFDD0 && value <= 0xFDEF) { + errNcrUnassigned(); + } else if ((value & 0xFFFE) == 0xFFFE) { + ch = errNcrNonCharacter(ch); + } else if (value >= 0x007F && value <= 0x009F) { + errNcrControlChar(); + } else { + maybeWarnPrivateUse(ch); + } + // ]NOCPP] + bmpChar[0] = ch; + emitOrAppendOne(bmpChar, returnState); + } + } else if (value <= 0x10FFFF) { + // [NOCPP[ + maybeWarnPrivateUseAstral(); + if ((value & 0xFFFE) == 0xFFFE) { + errAstralNonCharacter(value); + } + // ]NOCPP] + astralChar[0] = (char) (Tokenizer.LEAD_OFFSET + (value >> 10)); + astralChar[1] = (char) (0xDC00 + (value & 0x3FF)); + emitOrAppendTwo(astralChar, returnState); + } else { + errNcrOutOfRange(); + emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState); + } + } + + public void eof() throws SAXException { + int state = stateSave; + int returnState = returnStateSave; + + eofloop: for (;;) { + switch (state) { + case SCRIPT_DATA_LESS_THAN_SIGN: + case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: + /* + * Otherwise, emit a U+003C LESS-THAN SIGN character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in the data + * state. + */ + break eofloop; + case TAG_OPEN: + /* + * The behavior of this state depends on the content model + * flag. + */ + /* + * Anything else Parse error. + */ + errEofAfterLt(); + /* + * Emit a U+003C LESS-THAN SIGN character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in the data + * state. + */ + break eofloop; + case RAWTEXT_RCDATA_LESS_THAN_SIGN: + /* + * Emit a U+003C LESS-THAN SIGN character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in the RCDATA + * state. + */ + break eofloop; + case NON_DATA_END_TAG_NAME: + /* + * Emit a U+003C LESS-THAN SIGN character token, a U+002F + * SOLIDUS character token, + */ + tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2); + /* + * a character token for each of the characters in the + * temporary buffer (in the order they were added to the + * buffer), + */ + emitStrBuf(); + /* + * and reconsume the current input character in the RCDATA + * state. + */ + break eofloop; + case CLOSE_TAG_OPEN: + /* EOF Parse error. */ + errEofAfterLt(); + /* + * Emit a U+003C LESS-THAN SIGN character token and a U+002F + * SOLIDUS character token. + */ + tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case TAG_NAME: + /* + * EOF Parse error. + */ + errEofInTagName(); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case BEFORE_ATTRIBUTE_NAME: + case AFTER_ATTRIBUTE_VALUE_QUOTED: + case SELF_CLOSING_START_TAG: + /* EOF Parse error. */ + errEofWithoutGt(); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case ATTRIBUTE_NAME: + /* + * EOF Parse error. + */ + errEofInAttributeName(); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case AFTER_ATTRIBUTE_NAME: + case BEFORE_ATTRIBUTE_VALUE: + /* EOF Parse error. */ + errEofWithoutGt(); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case ATTRIBUTE_VALUE_DOUBLE_QUOTED: + case ATTRIBUTE_VALUE_SINGLE_QUOTED: + case ATTRIBUTE_VALUE_UNQUOTED: + /* EOF Parse error. */ + errEofInAttributeValue(); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case BOGUS_COMMENT: + emitComment(0, 0); + break eofloop; + case BOGUS_COMMENT_HYPHEN: + // [NOCPP[ + maybeAppendSpaceToBogusComment(); + // ]NOCPP] + emitComment(0, 0); + break eofloop; + case MARKUP_DECLARATION_OPEN: + errBogusComment(); + emitComment(0, 0); + break eofloop; + case MARKUP_DECLARATION_HYPHEN: + errBogusComment(); + emitComment(0, 0); + break eofloop; + case MARKUP_DECLARATION_OCTYPE: + if (index < 6) { + errBogusComment(); + emitComment(0, 0); + } else { + /* EOF Parse error. */ + errEofInDoctype(); + /* + * Create a new DOCTYPE token. Set its force-quirks flag + * to on. + */ + doctypeName = ""; + if (systemIdentifier != null) { + Portability.releaseString(systemIdentifier); + systemIdentifier = null; + } + if (publicIdentifier != null) { + Portability.releaseString(publicIdentifier); + publicIdentifier = null; + } + forceQuirks = true; + /* + * Emit the token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + } + break eofloop; + case COMMENT_START: + case COMMENT: + /* + * EOF Parse error. + */ + errEofInComment(); + /* Emit the comment token. */ + emitComment(0, 0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case COMMENT_END: + errEofInComment(); + /* Emit the comment token. */ + emitComment(2, 0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case COMMENT_END_DASH: + case COMMENT_START_DASH: + errEofInComment(); + /* Emit the comment token. */ + emitComment(1, 0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case COMMENT_END_BANG: + errEofInComment(); + /* Emit the comment token. */ + emitComment(3, 0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case DOCTYPE: + case BEFORE_DOCTYPE_NAME: + errEofInDoctype(); + /* + * Create a new DOCTYPE token. Set its force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit the token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case DOCTYPE_NAME: + errEofInDoctype(); + strBufToDoctypeName(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case DOCTYPE_UBLIC: + case DOCTYPE_YSTEM: + case AFTER_DOCTYPE_NAME: + case AFTER_DOCTYPE_PUBLIC_KEYWORD: + case AFTER_DOCTYPE_SYSTEM_KEYWORD: + case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: + errEofInDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: + case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: + /* EOF Parse error. */ + errEofInPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + publicIdentifier = strBufToString(); + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: + case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: + case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: + errEofInDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: + case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: + /* EOF Parse error. */ + errEofInSystemId(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + systemIdentifier = strBufToString(); + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: + errEofInDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case BOGUS_DOCTYPE: + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case CONSUME_CHARACTER_REFERENCE: + /* + * Unlike the definition is the spec, this state does not + * return a value and never requires the caller to + * backtrack. This state takes care of emitting characters + * or appending to the current attribute value. It also + * takes care of that in the case when consuming the entity + * fails. + */ + /* + * This section defines how to consume an entity. This + * definition is used when parsing entities in text and in + * attributes. + * + * The behavior depends on the identity of the next + * character (the one immediately after the U+0026 AMPERSAND + * character): + */ + + emitOrAppendCharRefBuf(returnState); + state = returnState; + continue; + case CHARACTER_REFERENCE_HILO_LOOKUP: + errNoNamedCharacterMatch(); + emitOrAppendCharRefBuf(returnState); + state = returnState; + continue; + case CHARACTER_REFERENCE_TAIL: + outer: for (;;) { + char c = '\u0000'; + entCol++; + /* + * Consume the maximum number of characters possible, + * with the consumed characters matching one of the + * identifiers in the first column of the named + * character references table (in a case-sensitive + * manner). + */ + hiloop: for (;;) { + if (hi == -1) { + break hiloop; + } + if (entCol == NamedCharacters.NAMES[hi].length()) { + break hiloop; + } + if (entCol > NamedCharacters.NAMES[hi].length()) { + break outer; + } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) { + hi--; + } else { + break hiloop; + } + } + + loloop: for (;;) { + if (hi < lo) { + break outer; + } + if (entCol == NamedCharacters.NAMES[lo].length()) { + candidate = lo; + charRefBufMark = charRefBufLen; + lo++; + } else if (entCol > NamedCharacters.NAMES[lo].length()) { + break outer; + } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) { + lo++; + } else { + break loloop; + } + } + if (hi < lo) { + break outer; + } + continue; + } + + if (candidate == -1) { + /* + * If no match can be made, then this is a parse error. + */ + errNoNamedCharacterMatch(); + emitOrAppendCharRefBuf(returnState); + state = returnState; + continue eofloop; + } else { + @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate]; + if (candidateName.length() == 0 + || candidateName.charAt(candidateName.length() - 1) != ';') { + /* + * If the last character matched is not a U+003B + * SEMICOLON (;), there is a parse error. + */ + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + /* + * If the entity is being consumed as part of an + * attribute, and the last character matched is + * not a U+003B SEMICOLON (;), + */ + char ch; + if (charRefBufMark == charRefBufLen) { + ch = '\u0000'; + } else { + ch = charRefBuf[charRefBufMark]; + } + if ((ch >= '0' && ch <= '9') + || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z')) { + /* + * and the next character is in the range + * U+0030 DIGIT ZERO to U+0039 DIGIT NINE, + * U+0041 LATIN CAPITAL LETTER A to U+005A + * LATIN CAPITAL LETTER Z, or U+0061 LATIN + * SMALL LETTER A to U+007A LATIN SMALL + * LETTER Z, then, for historical reasons, + * all the characters that were matched + * after the U+0026 AMPERSAND (&) must be + * unconsumed, and nothing is returned. + */ + errNoNamedCharacterMatch(); + appendCharRefBufToStrBuf(); + state = returnState; + continue eofloop; + } + } + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + errUnescapedAmpersandInterpretedAsCharacterReference(); + } else { + errNotSemicolonTerminated(); + } + } + + /* + * Otherwise, return a character token for the character + * corresponding to the entity name (as given by the + * second column of the named character references + * table). + */ + @Const @NoLength char[] val = NamedCharacters.VALUES[candidate]; + if ( + // [NOCPP[ + val.length == 1 + // ]NOCPP] + // CPPONLY: val[1] == 0 + ) { + emitOrAppendOne(val, returnState); + } else { + emitOrAppendTwo(val, returnState); + } + // this is so complicated! + if (charRefBufMark < charRefBufLen) { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendStrBuf(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } else { + tokenHandler.characters(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } + } + charRefBufLen = 0; + state = returnState; + continue eofloop; + /* + * If the markup contains I'm ¬it; I tell you, the + * entity is parsed as "not", as in, I'm ¬it; I tell + * you. But if the markup was I'm ∉ I tell you, + * the entity would be parsed as "notin;", resulting in + * I'm ∉ I tell you. + */ + } + case CONSUME_NCR: + case DECIMAL_NRC_LOOP: + case HEX_NCR_LOOP: + /* + * If no characters match the range, then don't consume any + * characters (and unconsume the U+0023 NUMBER SIGN + * character and, if appropriate, the X character). This is + * a parse error; nothing is returned. + * + * Otherwise, if the next character is a U+003B SEMICOLON, + * consume that too. If it isn't, there is a parse error. + */ + if (!seenDigits) { + errNoDigitsInNCR(); + emitOrAppendCharRefBuf(returnState); + state = returnState; + continue; + } else { + errCharRefLacksSemicolon(); + } + // WARNING previous state sets reconsume + handleNcrValue(returnState); + state = returnState; + continue; + case CDATA_RSQB: + tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1); + break eofloop; + case CDATA_RSQB_RSQB: + tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2); + break eofloop; + case DATA: + default: + break eofloop; + } + } + // case DATA: + /* + * EOF Emit an end-of-file token. + */ + tokenHandler.eof(); + return; + } + + private void emitDoctypeToken(int pos) throws SAXException { + cstart = pos + 1; + tokenHandler.doctype(doctypeName, publicIdentifier, systemIdentifier, + forceQuirks); + // It is OK and sufficient to release these here, since + // there's no way out of the doctype states than through paths + // that call this method. + doctypeName = null; + Portability.releaseString(publicIdentifier); + publicIdentifier = null; + Portability.releaseString(systemIdentifier); + systemIdentifier = null; + } + + @Inline protected char checkChar(@NoLength char[] buf, int pos) + throws SAXException { + return buf[pos]; + } + + public boolean internalEncodingDeclaration(String internalCharset) + throws SAXException { + if (encodingDeclarationHandler != null) { + return encodingDeclarationHandler.internalEncodingDeclaration(internalCharset); + } + return false; + } + + /** + * @param val + * @throws SAXException + */ + private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState) + throws SAXException { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendStrBuf(val[0]); + appendStrBuf(val[1]); + } else { + tokenHandler.characters(val, 0, 2); + } + } + + private void emitOrAppendOne(@Const @NoLength char[] val, int returnState) + throws SAXException { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendStrBuf(val[0]); + } else { + tokenHandler.characters(val, 0, 1); + } + } + + public void end() throws SAXException { + strBuf = null; + doctypeName = null; + if (systemIdentifier != null) { + Portability.releaseString(systemIdentifier); + systemIdentifier = null; + } + if (publicIdentifier != null) { + Portability.releaseString(publicIdentifier); + publicIdentifier = null; + } + if (tagName != null) { + tagName.release(); + tagName = null; + } + if (attributeName != null) { + attributeName.release(); + attributeName = null; + } + tokenHandler.endTokenization(); + if (attributes != null) { + // [NOCPP[ + attributes = null; + // ]NOCPP] + // CPPONLY: attributes.clear(mappingLangToXmlLang); + } + } + + public void requestSuspension() { + shouldSuspend = true; + } + + // [NOCPP[ + + public void becomeConfident() { + confident = true; + } + + /** + * Returns the nextCharOnNewLine. + * + * @return the nextCharOnNewLine + */ + public boolean isNextCharOnNewLine() { + return false; + } + + public boolean isPrevCR() { + return lastCR; + } + + /** + * Returns the line. + * + * @return the line + */ + public int getLine() { + return -1; + } + + /** + * Returns the col. + * + * @return the col + */ + public int getCol() { + return -1; + } + + // ]NOCPP] + + public boolean isInDataState() { + return (stateSave == DATA); + } + + public void resetToDataState() { + clearStrBufAfterUse(); + charRefBufLen = 0; + stateSave = Tokenizer.DATA; + // line = 1; XXX line numbers + lastCR = false; + index = 0; + forceQuirks = false; + additional = '\u0000'; + entCol = -1; + firstCharKey = -1; + lo = 0; + hi = 0; // will always be overwritten before use anyway + candidate = -1; + charRefBufMark = 0; + value = 0; + seenDigits = false; + endTag = false; + shouldSuspend = false; + initDoctypeFields(); + if (tagName != null) { + tagName.release(); + tagName = null; + } + if (attributeName != null) { + attributeName.release(); + attributeName = null; + } + if (newAttributesEachTime) { + if (attributes != null) { + Portability.delete(attributes); + attributes = null; + } + } + } + + public void loadState(Tokenizer other) throws SAXException { + strBufLen = other.strBufLen; + if (strBufLen > strBuf.length) { + strBuf = new char[strBufLen]; + } + System.arraycopy(other.strBuf, 0, strBuf, 0, strBufLen); + + charRefBufLen = other.charRefBufLen; + System.arraycopy(other.charRefBuf, 0, charRefBuf, 0, charRefBufLen); + + stateSave = other.stateSave; + returnStateSave = other.returnStateSave; + endTagExpectation = other.endTagExpectation; + endTagExpectationAsArray = other.endTagExpectationAsArray; + // line = 1; XXX line numbers + lastCR = other.lastCR; + index = other.index; + forceQuirks = other.forceQuirks; + additional = other.additional; + entCol = other.entCol; + firstCharKey = other.firstCharKey; + lo = other.lo; + hi = other.hi; + candidate = other.candidate; + charRefBufMark = other.charRefBufMark; + value = other.value; + seenDigits = other.seenDigits; + endTag = other.endTag; + shouldSuspend = false; + + if (other.doctypeName == null) { + doctypeName = null; + } else { + doctypeName = Portability.newLocalFromLocal(other.doctypeName, + interner); + } + + Portability.releaseString(systemIdentifier); + if (other.systemIdentifier == null) { + systemIdentifier = null; + } else { + systemIdentifier = Portability.newStringFromString(other.systemIdentifier); + } + + Portability.releaseString(publicIdentifier); + if (other.publicIdentifier == null) { + publicIdentifier = null; + } else { + publicIdentifier = Portability.newStringFromString(other.publicIdentifier); + } + + if (tagName != null) { + tagName.release(); + } + if (other.tagName == null) { + tagName = null; + } else { + tagName = other.tagName.cloneElementName(interner); + } + + if (attributeName != null) { + attributeName.release(); + } + if (other.attributeName == null) { + attributeName = null; + } else { + attributeName = other.attributeName.cloneAttributeName(interner); + } + + Portability.delete(attributes); + if (other.attributes == null) { + attributes = null; + } else { + attributes = other.attributes.cloneAttributes(interner); + } + } + + public void initializeWithoutStarting() throws SAXException { + confident = false; + strBuf = null; + line = 1; + // CPPONLY: attributeLine = 1; + // [NOCPP[ + html4 = false; + metaBoundaryPassed = false; + wantsComments = tokenHandler.wantsComments(); + if (!newAttributesEachTime) { + attributes = new HtmlAttributes(mappingLangToXmlLang); + } + // ]NOCPP] + resetToDataState(); + } + + protected void errGarbageAfterLtSlash() throws SAXException { + } + + protected void errLtSlashGt() throws SAXException { + } + + protected void errWarnLtSlashInRcdata() throws SAXException { + } + + protected void errHtml4LtSlashInRcdata(char folded) throws SAXException { + } + + protected void errCharRefLacksSemicolon() throws SAXException { + } + + protected void errNoDigitsInNCR() throws SAXException { + } + + protected void errGtInSystemId() throws SAXException { + } + + protected void errGtInPublicId() throws SAXException { + } + + protected void errNamelessDoctype() throws SAXException { + } + + protected void errConsecutiveHyphens() throws SAXException { + } + + protected void errPrematureEndOfComment() throws SAXException { + } + + protected void errBogusComment() throws SAXException { + } + + protected void errUnquotedAttributeValOrNull(char c) throws SAXException { + } + + protected void errSlashNotFollowedByGt() throws SAXException { + } + + protected void errHtml4XmlVoidSyntax() throws SAXException { + } + + protected void errNoSpaceBetweenAttributes() throws SAXException { + } + + protected void errHtml4NonNameInUnquotedAttribute(char c) + throws SAXException { + } + + protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c) + throws SAXException { + } + + protected void errAttributeValueMissing() throws SAXException { + } + + protected void errBadCharBeforeAttributeNameOrNull(char c) + throws SAXException { + } + + protected void errEqualsSignBeforeAttributeName() throws SAXException { + } + + protected void errBadCharAfterLt(char c) throws SAXException { + } + + protected void errLtGt() throws SAXException { + } + + protected void errProcessingInstruction() throws SAXException { + } + + protected void errUnescapedAmpersandInterpretedAsCharacterReference() + throws SAXException { + } + + protected void errNotSemicolonTerminated() throws SAXException { + } + + protected void errNoNamedCharacterMatch() throws SAXException { + } + + protected void errQuoteBeforeAttributeName(char c) throws SAXException { + } + + protected void errQuoteOrLtInAttributeNameOrNull(char c) + throws SAXException { + } + + protected void errExpectedPublicId() throws SAXException { + } + + protected void errBogusDoctype() throws SAXException { + } + + protected void maybeWarnPrivateUseAstral() throws SAXException { + } + + protected void maybeWarnPrivateUse(char ch) throws SAXException { + } + + protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs) + throws SAXException { + } + + protected void maybeErrSlashInEndTag(boolean selfClosing) + throws SAXException { + } + + protected char errNcrNonCharacter(char ch) throws SAXException { + return ch; + } + + protected void errAstralNonCharacter(int ch) throws SAXException { + } + + protected void errNcrSurrogate() throws SAXException { + } + + protected char errNcrControlChar(char ch) throws SAXException { + return ch; + } + + protected void errNcrCr() throws SAXException { + } + + protected void errNcrInC1Range() throws SAXException { + } + + protected void errEofInPublicId() throws SAXException { + } + + protected void errEofInComment() throws SAXException { + } + + protected void errEofInDoctype() throws SAXException { + } + + protected void errEofInAttributeValue() throws SAXException { + } + + protected void errEofInAttributeName() throws SAXException { + } + + protected void errEofWithoutGt() throws SAXException { + } + + protected void errEofInTagName() throws SAXException { + } + + protected void errEofInEndTag() throws SAXException { + } + + protected void errEofAfterLt() throws SAXException { + } + + protected void errNcrOutOfRange() throws SAXException { + } + + protected void errNcrUnassigned() throws SAXException { + } + + protected void errDuplicateAttribute() throws SAXException { + } + + protected void errEofInSystemId() throws SAXException { + } + + protected void errExpectedSystemId() throws SAXException { + } + + protected void errMissingSpaceBeforeDoctypeName() throws SAXException { + } + + protected void errHyphenHyphenBang() throws SAXException { + } + + protected void errNcrControlChar() throws SAXException { + } + + protected void errNcrZero() throws SAXException { + } + + protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote() + throws SAXException { + } + + protected void errNoSpaceBetweenPublicAndSystemIds() throws SAXException { + } + + protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote() + throws SAXException { + } + + protected void noteAttributeWithoutValue() throws SAXException { + } + + protected void noteUnquotedAttributeValue() throws SAXException { + } + + /** + * Sets the encodingDeclarationHandler. + * + * @param encodingDeclarationHandler + * the encodingDeclarationHandler to set + */ + public void setEncodingDeclarationHandler( + EncodingDeclarationHandler encodingDeclarationHandler) { + this.encodingDeclarationHandler = encodingDeclarationHandler; + } + + void destructor() { + // The translator will write refcount tracing stuff here + Portability.delete(attributes); + attributes = null; + } + + // [NOCPP[ + + /** + * Sets an offset to be added to the position reported to + * TransitionHandler. + * + * @param offset the offset + */ + public void setTransitionBaseOffset(int offset) { + + } + + // ]NOCPP] + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java new file mode 100644 index 0000000000..5e83d18473 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java @@ -0,0 +1,6558 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2015 Mozilla Foundation + * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla + * Foundation, and Opera Software ASA. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * The comments following this one that use the same comment syntax as this + * comment are quotes from the WHATWG HTML 5 spec as of 27 June 2007 + * amended as of June 28 2007. + * That document came with this statement: + * "© Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, and + * Opera Software ASA. You are granted a license to use, reproduce and + * create derivative works of this document." + */ + +package nu.validator.htmlparser.impl; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import nu.validator.htmlparser.annotation.Auto; +import nu.validator.htmlparser.annotation.Const; +import nu.validator.htmlparser.annotation.IdType; +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.annotation.Literal; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.annotation.NsUri; +import nu.validator.htmlparser.common.DoctypeExpectation; +import nu.validator.htmlparser.common.DocumentMode; +import nu.validator.htmlparser.common.DocumentModeHandler; +import nu.validator.htmlparser.common.Interner; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; + +public abstract class TreeBuilder implements TokenHandler, + TreeBuilderState { + + /** + * Array version of U+FFFD. + */ + private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' }; + + // Start dispatch groups + + final static int OTHER = 0; + + final static int A = 1; + + final static int BASE = 2; + + final static int BODY = 3; + + final static int BR = 4; + + final static int BUTTON = 5; + + final static int CAPTION = 6; + + final static int COL = 7; + + final static int COLGROUP = 8; + + final static int FORM = 9; + + final static int FRAME = 10; + + final static int FRAMESET = 11; + + final static int IMAGE = 12; + + final static int INPUT = 13; + + final static int ISINDEX = 14; + + final static int LI = 15; + + final static int LINK_OR_BASEFONT_OR_BGSOUND = 16; + + final static int MATH = 17; + + final static int META = 18; + + final static int SVG = 19; + + final static int HEAD = 20; + + final static int HR = 22; + + final static int HTML = 23; + + final static int NOBR = 24; + + final static int NOFRAMES = 25; + + final static int NOSCRIPT = 26; + + final static int OPTGROUP = 27; + + final static int OPTION = 28; + + final static int P = 29; + + final static int PLAINTEXT = 30; + + final static int SCRIPT = 31; + + final static int SELECT = 32; + + final static int STYLE = 33; + + final static int TABLE = 34; + + final static int TEXTAREA = 35; + + final static int TITLE = 36; + + final static int TR = 37; + + final static int XMP = 38; + + final static int TBODY_OR_THEAD_OR_TFOOT = 39; + + final static int TD_OR_TH = 40; + + final static int DD_OR_DT = 41; + + final static int H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 = 42; + + final static int MARQUEE_OR_APPLET = 43; + + final static int PRE_OR_LISTING = 44; + + final static int B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U = 45; + + final static int UL_OR_OL_OR_DL = 46; + + final static int IFRAME = 47; + + final static int EMBED = 48; + + final static int AREA_OR_WBR = 49; + + final static int DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU = 50; + + final static int ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY = 51; + + final static int RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR = 52; + + final static int RB_OR_RTC = 53; + + final static int PARAM_OR_SOURCE_OR_TRACK = 55; + + final static int MGLYPH_OR_MALIGNMARK = 56; + + final static int MI_MO_MN_MS_MTEXT = 57; + + final static int ANNOTATION_XML = 58; + + final static int FOREIGNOBJECT_OR_DESC = 59; + + final static int NOEMBED = 60; + + final static int FIELDSET = 61; + + final static int OUTPUT = 62; + + final static int OBJECT = 63; + + final static int FONT = 64; + + final static int KEYGEN = 65; + + final static int MENUITEM = 66; + + final static int TEMPLATE = 67; + + final static int IMG = 68; + + final static int RT_OR_RP = 69; + + // start insertion modes + + private static final int IN_ROW = 0; + + private static final int IN_TABLE_BODY = 1; + + private static final int IN_TABLE = 2; + + private static final int IN_CAPTION = 3; + + private static final int IN_CELL = 4; + + private static final int FRAMESET_OK = 5; + + private static final int IN_BODY = 6; + + private static final int IN_HEAD = 7; + + private static final int IN_HEAD_NOSCRIPT = 8; + + // no fall-through + + private static final int IN_COLUMN_GROUP = 9; + + // no fall-through + + private static final int IN_SELECT_IN_TABLE = 10; + + private static final int IN_SELECT = 11; + + // no fall-through + + private static final int AFTER_BODY = 12; + + // no fall-through + + private static final int IN_FRAMESET = 13; + + private static final int AFTER_FRAMESET = 14; + + // no fall-through + + private static final int INITIAL = 15; + + // could add fall-through + + private static final int BEFORE_HTML = 16; + + // could add fall-through + + private static final int BEFORE_HEAD = 17; + + // no fall-through + + private static final int AFTER_HEAD = 18; + + // no fall-through + + private static final int AFTER_AFTER_BODY = 19; + + // no fall-through + + private static final int AFTER_AFTER_FRAMESET = 20; + + // no fall-through + + private static final int TEXT = 21; + + private static final int IN_TEMPLATE = 22; + + // start charset states + + private static final int CHARSET_INITIAL = 0; + + private static final int CHARSET_C = 1; + + private static final int CHARSET_H = 2; + + private static final int CHARSET_A = 3; + + private static final int CHARSET_R = 4; + + private static final int CHARSET_S = 5; + + private static final int CHARSET_E = 6; + + private static final int CHARSET_T = 7; + + private static final int CHARSET_EQUALS = 8; + + private static final int CHARSET_SINGLE_QUOTED = 9; + + private static final int CHARSET_DOUBLE_QUOTED = 10; + + private static final int CHARSET_UNQUOTED = 11; + + // end pseudo enums + + // [NOCPP[ + + private final static String[] HTML4_PUBLIC_IDS = { + "-//W3C//DTD HTML 4.0 Frameset//EN", + "-//W3C//DTD HTML 4.0 Transitional//EN", + "-//W3C//DTD HTML 4.0//EN", "-//W3C//DTD HTML 4.01 Frameset//EN", + "-//W3C//DTD HTML 4.01 Transitional//EN", + "-//W3C//DTD HTML 4.01//EN" }; + + // ]NOCPP] + + @Literal private final static String[] QUIRKY_PUBLIC_IDS = { + "+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//" }; + + private static final int NOT_FOUND_ON_STACK = Integer.MAX_VALUE; + + // [NOCPP[ + + private static final @Local String HTML_LOCAL = "html"; + + // ]NOCPP] + + private int mode = INITIAL; + + private int originalMode = INITIAL; + + /** + * Used only when moving back to IN_BODY. + */ + private boolean framesetOk = true; + + protected Tokenizer tokenizer; + + // [NOCPP[ + + protected ErrorHandler errorHandler; + + private DocumentModeHandler documentModeHandler; + + private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; + + private LocatorImpl firstCommentLocation; + + // ]NOCPP] + + private boolean scriptingEnabled = false; + + private boolean needToDropLF; + + // [NOCPP[ + + private boolean wantingComments; + + // ]NOCPP] + + private boolean fragment; + + private @Local String contextName; + + private @NsUri String contextNamespace; + + private T contextNode; + + /** + * Stack of template insertion modes + */ + private @Auto int[] templateModeStack; + + /** + * Current template mode stack pointer. + */ + private int templateModePtr = -1; + + private @Auto StackNode[] stack; + + private int currentPtr = -1; + + private @Auto StackNode[] listOfActiveFormattingElements; + + private int listPtr = -1; + + private T formPointer; + + private T headPointer; + + /** + * Used to work around Gecko limitations. Not used in Java. + */ + private T deepTreeSurrogateParent; + + protected @Auto char[] charBuffer; + + protected int charBufferLen = 0; + + private boolean quirks = false; + + private boolean isSrcdocDocument = false; + + // [NOCPP[ + + private boolean reportingDoctype = true; + + private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET; + + private final Map idLocations = new HashMap(); + + private boolean html4; + + // ]NOCPP] + + protected TreeBuilder() { + fragment = false; + } + + /** + * Reports an condition that would make the infoset incompatible with XML + * 1.0 as fatal. + * + * @throws SAXException + * @throws SAXParseException + */ + protected void fatal() throws SAXException { + } + + // [NOCPP[ + + protected final void fatal(Exception e) throws SAXException { + SAXParseException spe = new SAXParseException(e.getMessage(), + tokenizer, e); + if (errorHandler != null) { + errorHandler.fatalError(spe); + } + throw spe; + } + + final void fatal(String s) throws SAXException { + SAXParseException spe = new SAXParseException(s, tokenizer); + if (errorHandler != null) { + errorHandler.fatalError(spe); + } + throw spe; + } + + /** + * Reports a Parse Error. + * + * @param message + * the message + * @throws SAXException + */ + final void err(String message) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck(message); + } + + /** + * Reports a Parse Error without checking if an error handler is present. + * + * @param message + * the message + * @throws SAXException + */ + final void errNoCheck(String message) throws SAXException { + SAXParseException spe = new SAXParseException(message, tokenizer); + errorHandler.error(spe); + } + + private void errListUnclosedStartTags(int eltPos) throws SAXException { + if (currentPtr != -1) { + for (int i = currentPtr; i > eltPos; i--) { + reportUnclosedElementNameAndLocation(i); + } + } + } + + /** + * Reports the name and location of an unclosed element. + * + * @throws SAXException + */ + private final void reportUnclosedElementNameAndLocation(int pos) throws SAXException { + StackNode node = stack[pos]; + if (node.isOptionalEndTag()) { + return; + } + TaintableLocatorImpl locator = node.getLocator(); + if (locator.isTainted()) { + return; + } + locator.markTainted(); + SAXParseException spe = new SAXParseException( + "Unclosed element \u201C" + node.popName + "\u201D.", locator); + errorHandler.error(spe); + } + + /** + * Reports a warning + * + * @param message + * the message + * @throws SAXException + */ + final void warn(String message) throws SAXException { + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException(message, tokenizer); + errorHandler.warning(spe); + } + + /** + * Reports a warning with an explicit locator + * + * @param message + * the message + * @throws SAXException + */ + final void warn(String message, Locator locator) throws SAXException { + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException(message, locator); + errorHandler.warning(spe); + } + + // ]NOCPP] + + @SuppressWarnings("unchecked") public final void startTokenization(Tokenizer self) throws SAXException { + tokenizer = self; + stack = new StackNode[64]; + templateModeStack = new int[64]; + listOfActiveFormattingElements = new StackNode[64]; + needToDropLF = false; + originalMode = INITIAL; + templateModePtr = -1; + currentPtr = -1; + listPtr = -1; + formPointer = null; + headPointer = null; + deepTreeSurrogateParent = null; + // [NOCPP[ + html4 = false; + idLocations.clear(); + wantingComments = wantsComments(); + firstCommentLocation = null; + // ]NOCPP] + start(fragment); + charBufferLen = 0; + charBuffer = null; + framesetOk = true; + if (fragment) { + T elt; + if (contextNode != null) { + elt = contextNode; + } else { + elt = createHtmlElementSetAsRoot(tokenizer.emptyAttributes()); + } + // When the context node is not in the HTML namespace, contrary + // to the spec, the first node on the stack is not set to "html" + // in the HTML namespace. Instead, it is set to a node that has + // the characteristics of the appropriate "adjusted current node". + // This way, there is no need to perform "adjusted current node" + // checks during tree construction. Instead, it's sufficient to + // just look at the current node. However, this also means that it + // is not safe to treat "html" in the HTML namespace as a sentinel + // that ends stack popping. Instead, stack popping loops that are + // meant not to pop the first element on the stack need to check + // for currentPos becoming zero. + if (contextNamespace == "http://www.w3.org/2000/svg") { + ElementName elementName = ElementName.SVG; + if ("title" == contextName || "desc" == contextName + || "foreignObject" == contextName) { + // These elements are all alike and we don't care about + // the exact name. + elementName = ElementName.FOREIGNOBJECT; + } + // This is the SVG variant of the StackNode constructor. + StackNode node = new StackNode(elementName, + elementName.camelCaseName, elt + // [NOCPP[ + , errorHandler == null ? null + : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + currentPtr++; + stack[currentPtr] = node; + tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, + contextName); + // The frameset-ok flag is set even though never + // ends up being allowed as HTML frameset in the fragment case. + mode = FRAMESET_OK; + } else if (contextNamespace == "http://www.w3.org/1998/Math/MathML") { + ElementName elementName = ElementName.MATH; + if ("mi" == contextName || "mo" == contextName + || "mn" == contextName || "ms" == contextName + || "mtext" == contextName) { + // These elements are all alike and we don't care about + // the exact name. + elementName = ElementName.MTEXT; + } else if ("annotation-xml" == contextName) { + elementName = ElementName.ANNOTATION_XML; + // Blink does not check the encoding attribute of the + // annotation-xml element innerHTML is being set on. + // Let's do the same at least until + // https://www.w3.org/Bugs/Public/show_bug.cgi?id=26783 + // is resolved. + } + // This is the MathML variant of the StackNode constructor. + StackNode node = new StackNode(elementName, elt, + elementName.name, false + // [NOCPP[ + , errorHandler == null ? null + : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + currentPtr++; + stack[currentPtr] = node; + tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, + contextName); + // The frameset-ok flag is set even though never + // ends up being allowed as HTML frameset in the fragment case. + mode = FRAMESET_OK; + } else { // html + StackNode node = new StackNode(ElementName.HTML, elt + // [NOCPP[ + , errorHandler == null ? null + : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + currentPtr++; + stack[currentPtr] = node; + if ("template" == contextName) { + pushTemplateMode(IN_TEMPLATE); + } + resetTheInsertionMode(); + formPointer = getFormPointerForContext(contextNode); + if ("title" == contextName || "textarea" == contextName) { + tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, + contextName); + } else if ("style" == contextName || "xmp" == contextName + || "iframe" == contextName || "noembed" == contextName + || "noframes" == contextName + || (scriptingEnabled && "noscript" == contextName)) { + tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, + contextName); + } else if ("plaintext" == contextName) { + tokenizer.setStateAndEndTagExpectation(Tokenizer.PLAINTEXT, + contextName); + } else if ("script" == contextName) { + tokenizer.setStateAndEndTagExpectation( + Tokenizer.SCRIPT_DATA, contextName); + } else { + tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, + contextName); + } + } + contextName = null; + contextNode = null; + } else { + mode = INITIAL; + // If we are viewing XML source, put a foreign element permanently + // on the stack so that cdataSectionAllowed() returns true. + // CPPONLY: if (tokenizer.isViewingXmlSource()) { + // CPPONLY: T elt = createElement("http://www.w3.org/2000/svg", + // CPPONLY: "svg", + // CPPONLY: tokenizer.emptyAttributes(), null); + // CPPONLY: StackNode node = new StackNode(ElementName.SVG, + // CPPONLY: "svg", + // CPPONLY: elt); + // CPPONLY: currentPtr++; + // CPPONLY: stack[currentPtr] = node; + // CPPONLY: } + } + } + + public final void doctype(@Local String name, String publicIdentifier, + String systemIdentifier, boolean forceQuirks) throws SAXException { + needToDropLF = false; + if (!isInForeign() && mode == INITIAL) { + // [NOCPP[ + if (reportingDoctype) { + // ]NOCPP] + String emptyString = Portability.newEmptyString(); + appendDoctypeToDocument(name == null ? "" : name, + publicIdentifier == null ? emptyString + : publicIdentifier, + systemIdentifier == null ? emptyString + : systemIdentifier); + Portability.releaseString(emptyString); + // [NOCPP[ + } + switch (doctypeExpectation) { + case HTML: + // ]NOCPP] + if (isQuirky(name, publicIdentifier, systemIdentifier, + forceQuirks)) { + errQuirkyDoctype(); + documentModeInternal(DocumentMode.QUIRKS_MODE, + publicIdentifier, systemIdentifier, false); + } else if (isAlmostStandards(publicIdentifier, + systemIdentifier)) { + // [NOCPP[ + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + // ]NOCPP] + errAlmostStandardsDoctype(); + documentModeInternal( + DocumentMode.ALMOST_STANDARDS_MODE, + publicIdentifier, systemIdentifier, false); + } else { + // [NOCPP[ + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + if ((Portability.literalEqualsString( + "-//W3C//DTD HTML 4.0//EN", publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString( + "http://www.w3.org/TR/REC-html40/strict.dtd", + systemIdentifier))) + || (Portability.literalEqualsString( + "-//W3C//DTD HTML 4.01//EN", + publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString( + "http://www.w3.org/TR/html4/strict.dtd", + systemIdentifier))) + || (Portability.literalEqualsString( + "-//W3C//DTD XHTML 1.0 Strict//EN", + publicIdentifier) && Portability.literalEqualsString( + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd", + systemIdentifier)) + || (Portability.literalEqualsString( + "-//W3C//DTD XHTML 1.1//EN", + publicIdentifier) && Portability.literalEqualsString( + "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", + systemIdentifier)) + + ) { + warn("Obsolete doctype. Expected \u201C\u201D."); + } else if (!((systemIdentifier == null || Portability.literalEqualsString( + "about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) { + err("Legacy doctype. Expected \u201C\u201D."); + } + // ]NOCPP] + documentModeInternal(DocumentMode.STANDARDS_MODE, + publicIdentifier, systemIdentifier, false); + } + // [NOCPP[ + break; + case HTML401_STRICT: + html4 = true; + tokenizer.turnOnAdditionalHtml4Errors(); + if (isQuirky(name, publicIdentifier, systemIdentifier, + forceQuirks)) { + err("Quirky doctype. Expected \u201C\u201D."); + documentModeInternal(DocumentMode.QUIRKS_MODE, + publicIdentifier, systemIdentifier, true); + } else if (isAlmostStandards(publicIdentifier, + systemIdentifier)) { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + err("Almost standards mode doctype. Expected \u201C\u201D."); + documentModeInternal( + DocumentMode.ALMOST_STANDARDS_MODE, + publicIdentifier, systemIdentifier, true); + } else { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) { + if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C\u201D."); + } + } else { + err("The doctype was not the HTML 4.01 Strict doctype. Expected \u201C\u201D."); + } + documentModeInternal(DocumentMode.STANDARDS_MODE, + publicIdentifier, systemIdentifier, true); + } + break; + case HTML401_TRANSITIONAL: + html4 = true; + tokenizer.turnOnAdditionalHtml4Errors(); + if (isQuirky(name, publicIdentifier, systemIdentifier, + forceQuirks)) { + err("Quirky doctype. Expected \u201C\u201D."); + documentModeInternal(DocumentMode.QUIRKS_MODE, + publicIdentifier, systemIdentifier, true); + } else if (isAlmostStandards(publicIdentifier, + systemIdentifier)) { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier) + && systemIdentifier != null) { + if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C\u201D."); + } + } else { + err("The doctype was not a non-quirky HTML 4.01 Transitional doctype. Expected \u201C\u201D."); + } + documentModeInternal( + DocumentMode.ALMOST_STANDARDS_MODE, + publicIdentifier, systemIdentifier, true); + } else { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + err("The doctype was not the HTML 4.01 Transitional doctype. Expected \u201C\u201D."); + documentModeInternal(DocumentMode.STANDARDS_MODE, + publicIdentifier, systemIdentifier, true); + } + break; + case AUTO: + html4 = isHtml4Doctype(publicIdentifier); + if (html4) { + tokenizer.turnOnAdditionalHtml4Errors(); + } + if (isQuirky(name, publicIdentifier, systemIdentifier, + forceQuirks)) { + err("Quirky doctype. Expected e.g. \u201C\u201D."); + documentModeInternal(DocumentMode.QUIRKS_MODE, + publicIdentifier, systemIdentifier, html4); + } else if (isAlmostStandards(publicIdentifier, + systemIdentifier)) { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) { + if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C\u201D."); + } + } else { + err("Almost standards mode doctype. Expected e.g. \u201C\u201D."); + } + documentModeInternal( + DocumentMode.ALMOST_STANDARDS_MODE, + publicIdentifier, systemIdentifier, html4); + } else { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) { + if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C\u201D."); + } + } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) { + if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the XHTML 1.0 specification. Expected \u201C\u201D."); + } + } else if ("//W3C//DTD XHTML 1.1//EN".equals(publicIdentifier)) { + if (!"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the XHTML 1.1 specification. Expected \u201C\u201D."); + } + } else if (!((systemIdentifier == null || Portability.literalEqualsString( + "about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) { + err("Unexpected doctype. Expected, e.g., \u201C\u201D."); + } + documentModeInternal(DocumentMode.STANDARDS_MODE, + publicIdentifier, systemIdentifier, html4); + } + break; + case NO_DOCTYPE_ERRORS: + if (isQuirky(name, publicIdentifier, systemIdentifier, + forceQuirks)) { + documentModeInternal(DocumentMode.QUIRKS_MODE, + publicIdentifier, systemIdentifier, false); + } else if (isAlmostStandards(publicIdentifier, + systemIdentifier)) { + documentModeInternal( + DocumentMode.ALMOST_STANDARDS_MODE, + publicIdentifier, systemIdentifier, false); + } else { + documentModeInternal(DocumentMode.STANDARDS_MODE, + publicIdentifier, systemIdentifier, false); + } + break; + } + // ]NOCPP] + + /* + * + * Then, switch to the root element mode of the tree construction + * stage. + */ + mode = BEFORE_HTML; + return; + } + /* + * A DOCTYPE token Parse error. + */ + errStrayDoctype(); + /* + * Ignore the token. + */ + return; + } + + // [NOCPP[ + + private boolean isHtml4Doctype(String publicIdentifier) { + if (publicIdentifier != null + && (Arrays.binarySearch(TreeBuilder.HTML4_PUBLIC_IDS, + publicIdentifier) > -1)) { + return true; + } + return false; + } + + // ]NOCPP] + + public final void comment(@NoLength char[] buf, int start, int length) + throws SAXException { + needToDropLF = false; + // [NOCPP[ + if (firstCommentLocation == null) { + firstCommentLocation = new LocatorImpl(tokenizer); + } + if (!wantingComments) { + return; + } + // ]NOCPP] + if (!isInForeign()) { + switch (mode) { + case INITIAL: + case BEFORE_HTML: + case AFTER_AFTER_BODY: + case AFTER_AFTER_FRAMESET: + /* + * A comment token Append a Comment node to the Document + * object with the data attribute set to the data given in + * the comment token. + */ + appendCommentToDocument(buf, start, length); + return; + case AFTER_BODY: + /* + * A comment token Append a Comment node to the first + * element in the stack of open elements (the html element), + * with the data attribute set to the data given in the + * comment token. + */ + flushCharacters(); + appendComment(stack[0].node, buf, start, length); + return; + default: + break; + } + } + /* + * A comment token Append a Comment node to the current node with the + * data attribute set to the data given in the comment token. + */ + flushCharacters(); + appendComment(stack[currentPtr].node, buf, start, length); + return; + } + + /** + * @see nu.validator.htmlparser.common.TokenHandler#characters(char[], int, + * int) + */ + public final void characters(@Const @NoLength char[] buf, int start, int length) + throws SAXException { + // Note: Can't attach error messages to EOF in C++ yet + + // CPPONLY: if (tokenizer.isViewingXmlSource()) { + // CPPONLY: return; + // CPPONLY: } + if (needToDropLF) { + needToDropLF = false; + if (buf[start] == '\n') { + start++; + length--; + if (length == 0) { + return; + } + } + } + + // optimize the most common case + switch (mode) { + case IN_BODY: + case IN_CELL: + case IN_CAPTION: + if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) { + reconstructTheActiveFormattingElements(); + } + // fall through + case TEXT: + accumulateCharacters(buf, start, length); + return; + case IN_TABLE: + case IN_TABLE_BODY: + case IN_ROW: + accumulateCharactersForced(buf, start, length); + return; + default: + int end = start + length; + charactersloop: for (int i = start; i < end; i++) { + switch (buf[i]) { + case ' ': + case '\t': + case '\n': + case '\r': + case '\u000C': + /* + * A character token that is one of one of U+0009 + * CHARACTER TABULATION, U+000A LINE FEED (LF), + * U+000C FORM FEED (FF), or U+0020 SPACE + */ + switch (mode) { + case INITIAL: + case BEFORE_HTML: + case BEFORE_HEAD: + /* + * Ignore the token. + */ + start = i + 1; + continue; + case IN_HEAD: + case IN_HEAD_NOSCRIPT: + case AFTER_HEAD: + case IN_COLUMN_GROUP: + case IN_FRAMESET: + case AFTER_FRAMESET: + /* + * Append the character to the current node. + */ + continue; + case FRAMESET_OK: + case IN_TEMPLATE: + case IN_BODY: + case IN_CELL: + case IN_CAPTION: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + + /* + * Reconstruct the active formatting + * elements, if any. + */ + if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) { + flushCharacters(); + reconstructTheActiveFormattingElements(); + } + /* + * Append the token's character to the + * current node. + */ + break charactersloop; + case IN_SELECT: + case IN_SELECT_IN_TABLE: + break charactersloop; + case IN_TABLE: + case IN_TABLE_BODY: + case IN_ROW: + accumulateCharactersForced(buf, i, 1); + start = i + 1; + continue; + case AFTER_BODY: + case AFTER_AFTER_BODY: + case AFTER_AFTER_FRAMESET: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Reconstruct the active formatting + * elements, if any. + */ + flushCharacters(); + reconstructTheActiveFormattingElements(); + /* + * Append the token's character to the + * current node. + */ + continue; + } + default: + /* + * A character token that is not one of one of + * U+0009 CHARACTER TABULATION, U+000A LINE FEED + * (LF), U+000C FORM FEED (FF), or U+0020 SPACE + */ + switch (mode) { + case INITIAL: + /* + * Parse error. + */ + // [NOCPP[ + switch (doctypeExpectation) { + case AUTO: + err("Non-space characters found without seeing a doctype first. Expected e.g. \u201C\u201D."); + break; + case HTML: + // XXX figure out a way to report this in the Gecko View Source case + err("Non-space characters found without seeing a doctype first. Expected \u201C\u201D."); + break; + case HTML401_STRICT: + err("Non-space characters found without seeing a doctype first. Expected \u201C\u201D."); + break; + case HTML401_TRANSITIONAL: + err("Non-space characters found without seeing a doctype first. Expected \u201C\u201D."); + break; + case NO_DOCTYPE_ERRORS: + } + // ]NOCPP] + /* + * + * Set the document to quirks mode. + */ + documentModeInternal( + DocumentMode.QUIRKS_MODE, null, + null, false); + /* + * Then, switch to the root element mode of + * the tree construction stage + */ + mode = BEFORE_HTML; + /* + * and reprocess the current token. + */ + i--; + continue; + case BEFORE_HTML: + /* + * Create an HTMLElement node with the tag + * name html, in the HTML namespace. Append + * it to the Document object. + */ + // No need to flush characters here, + // because there's nothing to flush. + appendHtmlElementToDocumentAndPush(); + /* Switch to the main mode */ + mode = BEFORE_HEAD; + /* + * reprocess the current token. + */ + i--; + continue; + case BEFORE_HEAD: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * /Act as if a start tag token with the tag + * name "head" and no attributes had been + * seen, + */ + flushCharacters(); + appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_HEAD; + /* + * then reprocess the current token. + * + * This will result in an empty head element + * being generated, with the current token + * being reprocessed in the "after head" + * insertion mode. + */ + i--; + continue; + case IN_HEAD: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Act as if an end tag token with the tag + * name "head" had been seen, + */ + flushCharacters(); + pop(); + mode = AFTER_HEAD; + /* + * and reprocess the current token. + */ + i--; + continue; + case IN_HEAD_NOSCRIPT: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Parse error. Act as if an end tag with + * the tag name "noscript" had been seen + */ + errNonSpaceInNoscriptInHead(); + flushCharacters(); + pop(); + mode = IN_HEAD; + /* + * and reprocess the current token. + */ + i--; + continue; + case AFTER_HEAD: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Act as if a start tag token with the tag + * name "body" and no attributes had been + * seen, + */ + flushCharacters(); + appendToCurrentNodeAndPushBodyElement(); + mode = FRAMESET_OK; + /* + * and then reprocess the current token. + */ + i--; + continue; + case FRAMESET_OK: + framesetOk = false; + mode = IN_BODY; + i--; + continue; + case IN_TEMPLATE: + case IN_BODY: + case IN_CELL: + case IN_CAPTION: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Reconstruct the active formatting + * elements, if any. + */ + if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) { + flushCharacters(); + reconstructTheActiveFormattingElements(); + } + /* + * Append the token's character to the + * current node. + */ + break charactersloop; + case IN_TABLE: + case IN_TABLE_BODY: + case IN_ROW: + accumulateCharactersForced(buf, i, 1); + start = i + 1; + continue; + case IN_COLUMN_GROUP: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Act as if an end tag with the tag name + * "colgroup" had been seen, and then, if + * that token wasn't ignored, reprocess the + * current token. + */ + if (currentPtr == 0 || stack[currentPtr].getGroup() == + TreeBuilder.TEMPLATE) { + errNonSpaceInColgroupInFragment(); + start = i + 1; + continue; + } + flushCharacters(); + pop(); + mode = IN_TABLE; + i--; + continue; + case IN_SELECT: + case IN_SELECT_IN_TABLE: + break charactersloop; + case AFTER_BODY: + errNonSpaceAfterBody(); + fatal(); + mode = framesetOk ? FRAMESET_OK : IN_BODY; + i--; + continue; + case IN_FRAMESET: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + // start index is adjusted below. + } + /* + * Parse error. + */ + errNonSpaceInFrameset(); + /* + * Ignore the token. + */ + start = i + 1; + continue; + case AFTER_FRAMESET: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + // start index is adjusted below. + } + /* + * Parse error. + */ + errNonSpaceAfterFrameset(); + /* + * Ignore the token. + */ + start = i + 1; + continue; + case AFTER_AFTER_BODY: + /* + * Parse error. + */ + errNonSpaceInTrailer(); + /* + * Switch back to the main mode and + * reprocess the token. + */ + mode = framesetOk ? FRAMESET_OK : IN_BODY; + i--; + continue; + case AFTER_AFTER_FRAMESET: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + // start index is adjusted below. + } + /* + * Parse error. + */ + errNonSpaceInTrailer(); + /* + * Ignore the token. + */ + start = i + 1; + continue; + } + } + } + if (start < end) { + accumulateCharacters(buf, start, end - start); + } + } + } + + /** + * @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter() + */ + public void zeroOriginatingReplacementCharacter() throws SAXException { + if (mode == TEXT) { + accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1); + return; + } + if (currentPtr >= 0) { + if (isSpecialParentInForeign(stack[currentPtr])) { + return; + } + accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1); + } + } + + public final void eof() throws SAXException { + flushCharacters(); + // Note: Can't attach error messages to EOF in C++ yet + eofloop: for (;;) { + switch (mode) { + case INITIAL: + /* + * Parse error. + */ + // [NOCPP[ + switch (doctypeExpectation) { + case AUTO: + err("End of file seen without seeing a doctype first. Expected e.g. \u201C\u201D."); + break; + case HTML: + err("End of file seen without seeing a doctype first. Expected \u201C\u201D."); + break; + case HTML401_STRICT: + err("End of file seen without seeing a doctype first. Expected \u201C\u201D."); + break; + case HTML401_TRANSITIONAL: + err("End of file seen without seeing a doctype first. Expected \u201C\u201D."); + break; + case NO_DOCTYPE_ERRORS: + } + // ]NOCPP] + /* + * + * Set the document to quirks mode. + */ + documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, + false); + /* + * Then, switch to the root element mode of the tree + * construction stage + */ + mode = BEFORE_HTML; + /* + * and reprocess the current token. + */ + continue; + case BEFORE_HTML: + /* + * Create an HTMLElement node with the tag name html, in the + * HTML namespace. Append it to the Document object. + */ + appendHtmlElementToDocumentAndPush(); + // XXX application cache manifest + /* Switch to the main mode */ + mode = BEFORE_HEAD; + /* + * reprocess the current token. + */ + continue; + case BEFORE_HEAD: + appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_HEAD; + continue; + case IN_HEAD: + // [NOCPP[ + if (errorHandler != null && currentPtr > 1) { + errEofWithUnclosedElements(); + } + // ]NOCPP] + while (currentPtr > 0) { + popOnEof(); + } + mode = AFTER_HEAD; + continue; + case IN_HEAD_NOSCRIPT: + // [NOCPP[ + errEofWithUnclosedElements(); + // ]NOCPP] + while (currentPtr > 1) { + popOnEof(); + } + mode = IN_HEAD; + continue; + case AFTER_HEAD: + appendToCurrentNodeAndPushBodyElement(); + mode = IN_BODY; + continue; + case IN_TABLE_BODY: + case IN_ROW: + case IN_TABLE: + case IN_SELECT_IN_TABLE: + case IN_SELECT: + case IN_COLUMN_GROUP: + case FRAMESET_OK: + case IN_CAPTION: + case IN_CELL: + case IN_BODY: + // [NOCPP[ + // i > 0 to stop in time in the foreign fragment case. + openelementloop: for (int i = currentPtr; i > 0; i--) { + int group = stack[i].getGroup(); + switch (group) { + case DD_OR_DT: + case LI: + case P: + case TBODY_OR_THEAD_OR_TFOOT: + case TD_OR_TH: + case BODY: + case HTML: + break; + default: + errEofWithUnclosedElements(); + break openelementloop; + } + } + // ]NOCPP] + + if (isTemplateModeStackEmpty()) { + break eofloop; + } + + // fall through to IN_TEMPLATE + case IN_TEMPLATE: + int eltPos = findLast("template"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + break eofloop; + } + if (errorHandler != null) { + errUnclosedElements(eltPos, "template"); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + popTemplateMode(); + resetTheInsertionMode(); + + // Reprocess token. + continue; + case TEXT: + // [NOCPP[ + if (errorHandler != null) { + errNoCheck("End of file seen when expecting text or an end tag."); + errListUnclosedStartTags(0); + } + // ]NOCPP] + // XXX mark script as already executed + if (originalMode == AFTER_HEAD) { + popOnEof(); + } + popOnEof(); + mode = originalMode; + continue; + case IN_FRAMESET: + // [NOCPP[ + if (errorHandler != null && currentPtr > 0) { + errEofWithUnclosedElements(); + } + // ]NOCPP] + break eofloop; + case AFTER_BODY: + case AFTER_FRAMESET: + case AFTER_AFTER_BODY: + case AFTER_AFTER_FRAMESET: + default: + // [NOCPP[ + if (currentPtr == 0) { // This silliness is here to poison + // buggy compiler optimizations in + // GWT + System.currentTimeMillis(); + } + // ]NOCPP] + break eofloop; + } + } + while (currentPtr > 0) { + popOnEof(); + } + if (!fragment) { + popOnEof(); + } + /* Stop parsing. */ + } + + /** + * @see nu.validator.htmlparser.common.TokenHandler#endTokenization() + */ + public final void endTokenization() throws SAXException { + formPointer = null; + headPointer = null; + deepTreeSurrogateParent = null; + templateModeStack = null; + if (stack != null) { + while (currentPtr > -1) { + stack[currentPtr].release(); + currentPtr--; + } + stack = null; + } + if (listOfActiveFormattingElements != null) { + while (listPtr > -1) { + if (listOfActiveFormattingElements[listPtr] != null) { + listOfActiveFormattingElements[listPtr].release(); + } + listPtr--; + } + listOfActiveFormattingElements = null; + } + // [NOCPP[ + idLocations.clear(); + // ]NOCPP] + charBuffer = null; + end(); + } + + public final void startTag(ElementName elementName, + HtmlAttributes attributes, boolean selfClosing) throws SAXException { + flushCharacters(); + + // [NOCPP[ + if (errorHandler != null) { + // ID uniqueness + @IdType String id = attributes.getId(); + if (id != null) { + LocatorImpl oldLoc = idLocations.get(id); + if (oldLoc != null) { + err("Duplicate ID \u201C" + id + "\u201D."); + errorHandler.warning(new SAXParseException( + "The first occurrence of ID \u201C" + id + + "\u201D was here.", oldLoc)); + } else { + idLocations.put(id, new LocatorImpl(tokenizer)); + } + } + } + // ]NOCPP] + + int eltPos; + needToDropLF = false; + starttagloop: for (;;) { + int group = elementName.getGroup(); + @Local String name = elementName.name; + if (isInForeign()) { + StackNode currentNode = stack[currentPtr]; + @NsUri String currNs = currentNode.ns; + if (!(currentNode.isHtmlIntegrationPoint() || (currNs == "http://www.w3.org/1998/Math/MathML" && ((currentNode.getGroup() == MI_MO_MN_MS_MTEXT && group != MGLYPH_OR_MALIGNMARK) || (currentNode.getGroup() == ANNOTATION_XML && group == SVG))))) { + switch (group) { + case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U: + case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU: + case BODY: + case BR: + case RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR: + case DD_OR_DT: + case UL_OR_OL_OR_DL: + case EMBED: + case IMG: + case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6: + case HEAD: + case HR: + case LI: + case META: + case NOBR: + case P: + case PRE_OR_LISTING: + case TABLE: + case FONT: + // re-check FONT to deal with the special case + if (!(group == FONT && !(attributes.contains(AttributeName.COLOR) + || attributes.contains(AttributeName.FACE) || attributes.contains(AttributeName.SIZE)))) { + errHtmlStartTagInForeignContext(name); + if (!fragment) { + while (!isSpecialParentInForeign(stack[currentPtr])) { + pop(); + } + continue starttagloop; + } // else fall thru + } + // else fall thru + default: + if ("http://www.w3.org/2000/svg" == currNs) { + attributes.adjustForSvg(); + if (selfClosing) { + appendVoidElementToCurrentMayFosterSVG( + elementName, attributes); + selfClosing = false; + } else { + appendToCurrentNodeAndPushElementMayFosterSVG( + elementName, attributes); + } + attributes = null; // CPP + break starttagloop; + } else { + attributes.adjustForMath(); + if (selfClosing) { + appendVoidElementToCurrentMayFosterMathML( + elementName, attributes); + selfClosing = false; + } else { + appendToCurrentNodeAndPushElementMayFosterMathML( + elementName, attributes); + } + attributes = null; // CPP + break starttagloop; + } + } // switch + } // foreignObject / annotation-xml + } + switch (mode) { + case IN_TEMPLATE: + switch (group) { + case COL: + popTemplateMode(); + pushTemplateMode(IN_COLUMN_GROUP); + mode = IN_COLUMN_GROUP; + // Reprocess token. + continue; + case CAPTION: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + popTemplateMode(); + pushTemplateMode(IN_TABLE); + mode = IN_TABLE; + // Reprocess token. + continue; + case TR: + popTemplateMode(); + pushTemplateMode(IN_TABLE_BODY); + mode = IN_TABLE_BODY; + // Reprocess token. + continue; + case TD_OR_TH: + popTemplateMode(); + pushTemplateMode(IN_ROW); + mode = IN_ROW; + // Reprocess token. + continue; + case META: + checkMetaCharset(attributes); + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case TITLE: + startTagTitleInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case BASE: + case LINK_OR_BASEFONT_OR_BGSOUND: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case SCRIPT: + startTagScriptInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case NOFRAMES: + case STYLE: + startTagGenericRawText(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case TEMPLATE: + startTagTemplateInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + default: + popTemplateMode(); + pushTemplateMode(IN_BODY); + mode = IN_BODY; + // Reprocess token. + continue; + } + case IN_ROW: + switch (group) { + case TD_OR_TH: + clearStackBackTo(findLastOrRoot(TreeBuilder.TR)); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_CELL; + insertMarker(); + attributes = null; // CPP + break starttagloop; + case CAPTION: + case COL: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + eltPos = findLastOrRoot(TreeBuilder.TR); + if (eltPos == 0) { + assert fragment || isTemplateContents(); + errNoTableRowToClose(); + break starttagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE_BODY; + continue; + default: + // fall through to IN_TABLE + } + case IN_TABLE_BODY: + switch (group) { + case TR: + clearStackBackTo(findLastInTableScopeOrRootTemplateTbodyTheadTfoot()); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_ROW; + attributes = null; // CPP + break starttagloop; + case TD_OR_TH: + errStartTagInTableBody(name); + clearStackBackTo(findLastInTableScopeOrRootTemplateTbodyTheadTfoot()); + appendToCurrentNodeAndPushElement( + ElementName.TR, + HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_ROW; + continue; + case CAPTION: + case COL: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + eltPos = findLastInTableScopeOrRootTemplateTbodyTheadTfoot(); + if (eltPos == 0 || stack[eltPos].getGroup() == TEMPLATE) { + assert fragment || isTemplateContents(); + errStrayStartTag(name); + break starttagloop; + } else { + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE; + continue; + } + default: + // fall through to IN_TABLE + } + case IN_TABLE: + intableloop: for (;;) { + switch (group) { + case CAPTION: + clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE)); + insertMarker(); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_CAPTION; + attributes = null; // CPP + break starttagloop; + case COLGROUP: + clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE)); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_COLUMN_GROUP; + attributes = null; // CPP + break starttagloop; + case COL: + clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE)); + appendToCurrentNodeAndPushElement( + ElementName.COLGROUP, + HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_COLUMN_GROUP; + continue starttagloop; + case TBODY_OR_THEAD_OR_TFOOT: + clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE)); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_TABLE_BODY; + attributes = null; // CPP + break starttagloop; + case TR: + case TD_OR_TH: + clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE)); + appendToCurrentNodeAndPushElement( + ElementName.TBODY, + HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_TABLE_BODY; + continue starttagloop; + case TEMPLATE: + // fall through to IN_HEAD + break intableloop; + case TABLE: + errTableSeenWhileTableOpen(); + eltPos = findLastInTableScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment || isTemplateContents(); + break starttagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent("table")) { + errNoCheckUnclosedElementsOnStack(); + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + continue starttagloop; + case SCRIPT: + // XXX need to manage much more stuff + // here if + // supporting + // document.write() + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.SCRIPT_DATA, elementName); + attributes = null; // CPP + break starttagloop; + case STYLE: + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + attributes = null; // CPP + break starttagloop; + case INPUT: + errStartTagInTable(name); + if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "hidden", + attributes.getValue(AttributeName.TYPE))) { + break intableloop; + } + appendVoidElementToCurrent( + name, attributes, + formPointer); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case FORM: + if (formPointer != null || isTemplateContents()) { + errFormWhenFormOpen(); + break starttagloop; + } else { + errStartTagInTable(name); + appendVoidFormToCurrent(attributes); + attributes = null; // CPP + break starttagloop; + } + default: + errStartTagInTable(name); + // fall through to IN_BODY + break intableloop; + } + } + case IN_CAPTION: + switch (group) { + case CAPTION: + case COL: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + case TD_OR_TH: + errStrayStartTag(name); + eltPos = findLastInTableScope("caption"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + break starttagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && currentPtr != eltPos) { + errNoCheckUnclosedElementsOnStack(); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + mode = IN_TABLE; + continue; + default: + // fall through to IN_BODY + } + case IN_CELL: + switch (group) { + case CAPTION: + case COL: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + case TD_OR_TH: + eltPos = findLastInTableScopeTdTh(); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errNoCellToClose(); + break starttagloop; + } else { + closeTheCell(eltPos); + continue; + } + default: + // fall through to IN_BODY + } + case FRAMESET_OK: + switch (group) { + case FRAMESET: + if (mode == FRAMESET_OK) { + if (currentPtr == 0 || stack[1].getGroup() != BODY) { + assert fragment || isTemplateContents(); + errStrayStartTag(name); + break starttagloop; + } else { + errFramesetStart(); + detachFromParent(stack[1].node); + while (currentPtr > 0) { + pop(); + } + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_FRAMESET; + attributes = null; // CPP + break starttagloop; + } + } else { + errStrayStartTag(name); + break starttagloop; + } + // NOT falling through! + case PRE_OR_LISTING: + case LI: + case DD_OR_DT: + case BUTTON: + case MARQUEE_OR_APPLET: + case OBJECT: + case TABLE: + case AREA_OR_WBR: + case BR: + case EMBED: + case IMG: + case INPUT: + case KEYGEN: + case HR: + case TEXTAREA: + case XMP: + case IFRAME: + case SELECT: + if (mode == FRAMESET_OK + && !(group == INPUT && Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "hidden", + attributes.getValue(AttributeName.TYPE)))) { + framesetOk = false; + mode = IN_BODY; + } + // fall through to IN_BODY + default: + // fall through to IN_BODY + } + case IN_BODY: + inbodyloop: for (;;) { + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case BASE: + case LINK_OR_BASEFONT_OR_BGSOUND: + case META: + case STYLE: + case SCRIPT: + case TITLE: + case TEMPLATE: + // Fall through to IN_HEAD + break inbodyloop; + case BODY: + if (currentPtr == 0 || stack[1].getGroup() != BODY || isTemplateContents()) { + assert fragment || isTemplateContents(); + errStrayStartTag(name); + break starttagloop; + } + errFooSeenWhenFooOpen(name); + framesetOk = false; + if (mode == FRAMESET_OK) { + mode = IN_BODY; + } + if (addAttributesToBody(attributes)) { + attributes = null; // CPP + } + break starttagloop; + case P: + case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU: + case UL_OR_OL_OR_DL: + case ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY: + implicitlyCloseP(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6: + implicitlyCloseP(); + if (stack[currentPtr].getGroup() == H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) { + errHeadingWhenHeadingOpen(); + pop(); + } + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case FIELDSET: + implicitlyCloseP(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + attributes = null; // CPP + break starttagloop; + case PRE_OR_LISTING: + implicitlyCloseP(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + needToDropLF = true; + attributes = null; // CPP + break starttagloop; + case FORM: + if (formPointer != null && !isTemplateContents()) { + errFormWhenFormOpen(); + break starttagloop; + } else { + implicitlyCloseP(); + appendToCurrentNodeAndPushFormElementMayFoster(attributes); + attributes = null; // CPP + break starttagloop; + } + case LI: + case DD_OR_DT: + eltPos = currentPtr; + for (;;) { + StackNode node = stack[eltPos]; // weak + // ref + if (node.getGroup() == group) { // LI or + // DD_OR_DT + generateImpliedEndTagsExceptFor(node.name); + if (errorHandler != null + && eltPos != currentPtr) { + errUnclosedElementsImplied(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + break; + } else if (eltPos == 0 || (node.isSpecial() + && (node.ns != "http://www.w3.org/1999/xhtml" + || (node.name != "p" + && node.name != "address" + && node.name != "div")))) { + break; + } + eltPos--; + } + implicitlyCloseP(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case PLAINTEXT: + implicitlyCloseP(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + tokenizer.setStateAndEndTagExpectation( + Tokenizer.PLAINTEXT, elementName); + attributes = null; // CPP + break starttagloop; + case A: + int activeAPos = findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker("a"); + if (activeAPos != -1) { + errFooSeenWhenFooOpen(name); + StackNode activeA = listOfActiveFormattingElements[activeAPos]; + activeA.retain(); + adoptionAgencyEndTag("a"); + removeFromStack(activeA); + activeAPos = findInListOfActiveFormattingElements(activeA); + if (activeAPos != -1) { + removeFromListOfActiveFormattingElements(activeAPos); + } + activeA.release(); + } + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushFormattingElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U: + case FONT: + reconstructTheActiveFormattingElements(); + maybeForgetEarlierDuplicateFormattingElement(elementName.name, attributes); + appendToCurrentNodeAndPushFormattingElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case NOBR: + reconstructTheActiveFormattingElements(); + if (TreeBuilder.NOT_FOUND_ON_STACK != findLastInScope("nobr")) { + errFooSeenWhenFooOpen(name); + adoptionAgencyEndTag("nobr"); + reconstructTheActiveFormattingElements(); + } + appendToCurrentNodeAndPushFormattingElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case BUTTON: + eltPos = findLastInScope(name); + if (eltPos != TreeBuilder.NOT_FOUND_ON_STACK) { + errFooSeenWhenFooOpen(name); + generateImpliedEndTags(); + if (errorHandler != null + && !isCurrent(name)) { + errUnclosedElementsImplied(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + continue starttagloop; + } else { + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + attributes = null; // CPP + break starttagloop; + } + case OBJECT: + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + insertMarker(); + attributes = null; // CPP + break starttagloop; + case MARQUEE_OR_APPLET: + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + insertMarker(); + attributes = null; // CPP + break starttagloop; + case TABLE: + // The only quirk. Blame Hixie and + // Acid2. + if (!quirks) { + implicitlyCloseP(); + } + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + mode = IN_TABLE; + attributes = null; // CPP + break starttagloop; + case BR: + case EMBED: + case AREA_OR_WBR: + reconstructTheActiveFormattingElements(); + // FALL THROUGH to PARAM_OR_SOURCE_OR_TRACK + // CPPONLY: case MENUITEM: + case PARAM_OR_SOURCE_OR_TRACK: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case HR: + implicitlyCloseP(); + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case IMAGE: + errImage(); + elementName = ElementName.IMG; + continue starttagloop; + case IMG: + case KEYGEN: + case INPUT: + reconstructTheActiveFormattingElements(); + appendVoidElementToCurrentMayFoster( + name, attributes, + formPointer); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case ISINDEX: + errIsindex(); + if (formPointer != null && !isTemplateContents()) { + break starttagloop; + } + implicitlyCloseP(); + HtmlAttributes formAttrs = new HtmlAttributes(0); + int actionIndex = attributes.getIndex(AttributeName.ACTION); + if (actionIndex > -1) { + formAttrs.addAttribute( + AttributeName.ACTION, + attributes.getValueNoBoundsCheck(actionIndex) + // [NOCPP[ + , XmlViolationPolicy.ALLOW + // ]NOCPP] + // CPPONLY: , attributes.getLineNoBoundsCheck(actionIndex) + ); + } + appendToCurrentNodeAndPushFormElementMayFoster(formAttrs); + appendVoidElementToCurrentMayFoster( + ElementName.HR, + HtmlAttributes.EMPTY_ATTRIBUTES); + appendToCurrentNodeAndPushElementMayFoster( + ElementName.LABEL, + HtmlAttributes.EMPTY_ATTRIBUTES); + int promptIndex = attributes.getIndex(AttributeName.PROMPT); + if (promptIndex > -1) { + @Auto char[] prompt = Portability.newCharArrayFromString(attributes.getValueNoBoundsCheck(promptIndex)); + appendCharacters(stack[currentPtr].node, + prompt, 0, prompt.length); + } else { + appendIsindexPrompt(stack[currentPtr].node); + } + HtmlAttributes inputAttributes = new HtmlAttributes( + 0); + inputAttributes.addAttribute( + AttributeName.NAME, + Portability.newStringFromLiteral("isindex") + // [NOCPP[ + , XmlViolationPolicy.ALLOW + // ]NOCPP] + // CPPONLY: , tokenizer.getLineNumber() + ); + for (int i = 0; i < attributes.getLength(); i++) { + AttributeName attributeQName = attributes.getAttributeNameNoBoundsCheck(i); + if (AttributeName.NAME == attributeQName + || AttributeName.PROMPT == attributeQName) { + attributes.releaseValue(i); + } else if (AttributeName.ACTION != attributeQName) { + inputAttributes.addAttribute( + attributeQName, + attributes.getValueNoBoundsCheck(i) + // [NOCPP[ + , XmlViolationPolicy.ALLOW + // ]NOCPP] + // CPPONLY: , attributes.getLineNoBoundsCheck(i) + ); + } + } + attributes.clearWithoutReleasingContents(); + appendVoidElementToCurrentMayFoster( + "input", + inputAttributes, formPointer); + pop(); // label + appendVoidElementToCurrentMayFoster( + ElementName.HR, + HtmlAttributes.EMPTY_ATTRIBUTES); + pop(); // form + + if (!isTemplateContents()) { + formPointer = null; + } + + selfClosing = false; + // Portability.delete(formAttrs); + // Portability.delete(inputAttributes); + // Don't delete attributes, they are deleted + // later + break starttagloop; + case TEXTAREA: + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RCDATA, elementName); + originalMode = mode; + mode = TEXT; + needToDropLF = true; + attributes = null; // CPP + break starttagloop; + case XMP: + implicitlyCloseP(); + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + attributes = null; // CPP + break starttagloop; + case NOSCRIPT: + if (!scriptingEnabled) { + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + } else { + // fall through + } + case NOFRAMES: + case IFRAME: + case NOEMBED: + startTagGenericRawText(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case SELECT: + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + switch (mode) { + case IN_TABLE: + case IN_CAPTION: + case IN_COLUMN_GROUP: + case IN_TABLE_BODY: + case IN_ROW: + case IN_CELL: + mode = IN_SELECT_IN_TABLE; + break; + default: + mode = IN_SELECT; + break; + } + attributes = null; // CPP + break starttagloop; + case OPTGROUP: + case OPTION: + if (isCurrent("option")) { + pop(); + } + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case RB_OR_RTC: + eltPos = findLastInScope("ruby"); + if (eltPos != NOT_FOUND_ON_STACK) { + generateImpliedEndTags(); + } + if (eltPos != currentPtr) { + if (eltPos == NOT_FOUND_ON_STACK) { + errStartTagSeenWithoutRuby(name); + } else { + errUnclosedChildrenInRuby(); + } + } + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case RT_OR_RP: + eltPos = findLastInScope("ruby"); + if (eltPos != NOT_FOUND_ON_STACK) { + generateImpliedEndTagsExceptFor("rtc"); + } + if (eltPos != currentPtr) { + if (!isCurrent("rtc")) { + if (eltPos == NOT_FOUND_ON_STACK) { + errStartTagSeenWithoutRuby(name); + } else { + errUnclosedChildrenInRuby(); + } + } + } + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case MATH: + reconstructTheActiveFormattingElements(); + attributes.adjustForMath(); + if (selfClosing) { + appendVoidElementToCurrentMayFosterMathML( + elementName, attributes); + selfClosing = false; + } else { + appendToCurrentNodeAndPushElementMayFosterMathML( + elementName, attributes); + } + attributes = null; // CPP + break starttagloop; + case SVG: + reconstructTheActiveFormattingElements(); + attributes.adjustForSvg(); + if (selfClosing) { + appendVoidElementToCurrentMayFosterSVG( + elementName, + attributes); + selfClosing = false; + } else { + appendToCurrentNodeAndPushElementMayFosterSVG( + elementName, attributes); + } + attributes = null; // CPP + break starttagloop; + case CAPTION: + case COL: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + case TD_OR_TH: + case FRAME: + case FRAMESET: + case HEAD: + errStrayStartTag(name); + break starttagloop; + case OUTPUT: + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + attributes = null; // CPP + break starttagloop; + default: + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + } + } + case IN_HEAD: + inheadloop: for (;;) { + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case BASE: + case LINK_OR_BASEFONT_OR_BGSOUND: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case META: + // Fall through to IN_HEAD_NOSCRIPT + break inheadloop; + case TITLE: + startTagTitleInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case NOSCRIPT: + if (scriptingEnabled) { + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + } else { + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + mode = IN_HEAD_NOSCRIPT; + } + attributes = null; // CPP + break starttagloop; + case SCRIPT: + startTagScriptInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case STYLE: + case NOFRAMES: + startTagGenericRawText(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case HEAD: + /* Parse error. */ + errFooSeenWhenFooOpen(name); + /* Ignore the token. */ + break starttagloop; + case TEMPLATE: + startTagTemplateInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + default: + pop(); + mode = AFTER_HEAD; + continue starttagloop; + } + } + case IN_HEAD_NOSCRIPT: + switch (group) { + case HTML: + // XXX did Hixie really mean to omit "base" + // here? + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case LINK_OR_BASEFONT_OR_BGSOUND: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case META: + checkMetaCharset(attributes); + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case STYLE: + case NOFRAMES: + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + attributes = null; // CPP + break starttagloop; + case HEAD: + errFooSeenWhenFooOpen(name); + break starttagloop; + case NOSCRIPT: + errFooSeenWhenFooOpen(name); + break starttagloop; + default: + errBadStartTagInHead(name); + pop(); + mode = IN_HEAD; + continue; + } + case IN_COLUMN_GROUP: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case COL: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case TEMPLATE: + startTagTemplateInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + default: + if (currentPtr == 0 || stack[currentPtr].getGroup() == TEMPLATE) { + assert fragment || isTemplateContents(); + errGarbageInColgroup(); + break starttagloop; + } + pop(); + mode = IN_TABLE; + continue; + } + case IN_SELECT_IN_TABLE: + switch (group) { + case CAPTION: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + case TD_OR_TH: + case TABLE: + errStartTagWithSelectOpen(name); + eltPos = findLastInTableScope("select"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + break starttagloop; // http://www.w3.org/Bugs/Public/show_bug.cgi?id=8375 + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + continue; + default: + // fall through to IN_SELECT + } + case IN_SELECT: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case OPTION: + if (isCurrent("option")) { + pop(); + } + appendToCurrentNodeAndPushElement( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case OPTGROUP: + if (isCurrent("option")) { + pop(); + } + if (isCurrent("optgroup")) { + pop(); + } + appendToCurrentNodeAndPushElement( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case SELECT: + errStartSelectWhereEndSelectExpected(); + eltPos = findLastInTableScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + errNoSelectInTableScope(); + break starttagloop; + } else { + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + break starttagloop; + } + case INPUT: + case TEXTAREA: + case KEYGEN: + errStartTagWithSelectOpen(name); + eltPos = findLastInTableScope("select"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + break starttagloop; + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + continue; + case SCRIPT: + startTagScriptInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case TEMPLATE: + startTagTemplateInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + default: + errStrayStartTag(name); + break starttagloop; + } + case AFTER_BODY: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + default: + errStrayStartTag(name); + mode = framesetOk ? FRAMESET_OK : IN_BODY; + continue; + } + case IN_FRAMESET: + switch (group) { + case FRAMESET: + appendToCurrentNodeAndPushElement( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case FRAME: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + default: + // fall through to AFTER_FRAMESET + } + case AFTER_FRAMESET: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case NOFRAMES: + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + attributes = null; // CPP + break starttagloop; + default: + errStrayStartTag(name); + break starttagloop; + } + case INITIAL: + /* + * Parse error. + */ + // [NOCPP[ + switch (doctypeExpectation) { + case AUTO: + err("Start tag seen without seeing a doctype first. Expected e.g. \u201C\u201D."); + break; + case HTML: + // ]NOCPP] + errStartTagWithoutDoctype(); + // [NOCPP[ + break; + case HTML401_STRICT: + err("Start tag seen without seeing a doctype first. Expected \u201C\u201D."); + break; + case HTML401_TRANSITIONAL: + err("Start tag seen without seeing a doctype first. Expected \u201C\u201D."); + break; + case NO_DOCTYPE_ERRORS: + } + // ]NOCPP] + /* + * + * Set the document to quirks mode. + */ + documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, + false); + /* + * Then, switch to the root element mode of the tree + * construction stage + */ + mode = BEFORE_HTML; + /* + * and reprocess the current token. + */ + continue; + case BEFORE_HTML: + switch (group) { + case HTML: + // optimize error check and streaming SAX by + // hoisting + // "html" handling here. + if (attributes == HtmlAttributes.EMPTY_ATTRIBUTES) { + // This has the right magic side effect + // that + // it + // makes attributes in SAX Tree mutable. + appendHtmlElementToDocumentAndPush(); + } else { + appendHtmlElementToDocumentAndPush(attributes); + } + // XXX application cache should fire here + mode = BEFORE_HEAD; + attributes = null; // CPP + break starttagloop; + default: + /* + * Create an HTMLElement node with the tag name + * html, in the HTML namespace. Append it to the + * Document object. + */ + appendHtmlElementToDocumentAndPush(); + /* Switch to the main mode */ + mode = BEFORE_HEAD; + /* + * reprocess the current token. + */ + continue; + } + case BEFORE_HEAD: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case HEAD: + /* + * A start tag whose tag name is "head" + * + * Create an element for the token. + * + * Set the head element pointer to this new element + * node. + * + * Append the new element to the current node and + * push it onto the stack of open elements. + */ + appendToCurrentNodeAndPushHeadElement(attributes); + /* + * Change the insertion mode to "in head". + */ + mode = IN_HEAD; + attributes = null; // CPP + break starttagloop; + default: + /* + * Any other start tag token + * + * Act as if a start tag token with the tag name + * "head" and no attributes had been seen, + */ + appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_HEAD; + /* + * then reprocess the current token. + * + * This will result in an empty head element being + * generated, with the current token being + * reprocessed in the "after head" insertion mode. + */ + continue; + } + case AFTER_HEAD: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case BODY: + if (attributes.getLength() == 0) { + // This has the right magic side effect + // that + // it + // makes attributes in SAX Tree mutable. + appendToCurrentNodeAndPushBodyElement(); + } else { + appendToCurrentNodeAndPushBodyElement(attributes); + } + framesetOk = false; + mode = IN_BODY; + attributes = null; // CPP + break starttagloop; + case FRAMESET: + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_FRAMESET; + attributes = null; // CPP + break starttagloop; + case TEMPLATE: + errFooBetweenHeadAndBody(name); + pushHeadPointerOntoStack(); + StackNode headOnStack = stack[currentPtr]; + startTagTemplateInHead(elementName, attributes); + removeFromStack(headOnStack); + attributes = null; // CPP + break starttagloop; + case BASE: + case LINK_OR_BASEFONT_OR_BGSOUND: + errFooBetweenHeadAndBody(name); + pushHeadPointerOntoStack(); + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + pop(); // head + attributes = null; // CPP + break starttagloop; + case META: + errFooBetweenHeadAndBody(name); + checkMetaCharset(attributes); + pushHeadPointerOntoStack(); + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + pop(); // head + attributes = null; // CPP + break starttagloop; + case SCRIPT: + errFooBetweenHeadAndBody(name); + pushHeadPointerOntoStack(); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.SCRIPT_DATA, elementName); + attributes = null; // CPP + break starttagloop; + case STYLE: + case NOFRAMES: + errFooBetweenHeadAndBody(name); + pushHeadPointerOntoStack(); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + attributes = null; // CPP + break starttagloop; + case TITLE: + errFooBetweenHeadAndBody(name); + pushHeadPointerOntoStack(); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RCDATA, elementName); + attributes = null; // CPP + break starttagloop; + case HEAD: + errStrayStartTag(name); + break starttagloop; + default: + appendToCurrentNodeAndPushBodyElement(); + mode = FRAMESET_OK; + continue; + } + case AFTER_AFTER_BODY: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + default: + errStrayStartTag(name); + fatal(); + mode = framesetOk ? FRAMESET_OK : IN_BODY; + continue; + } + case AFTER_AFTER_FRAMESET: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case NOFRAMES: + startTagGenericRawText(elementName, attributes); + attributes = null; // CPP + break starttagloop; + default: + errStrayStartTag(name); + break starttagloop; + } + case TEXT: + assert false; + break starttagloop; // Avoid infinite loop if the assertion + // fails + } + } + if (selfClosing) { + errSelfClosing(); + } + // CPPONLY: if (mBuilder == null && attributes != HtmlAttributes.EMPTY_ATTRIBUTES) { + // CPPONLY: Portability.delete(attributes); + // CPPONLY: } + } + + private void startTagTitleInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException { + appendToCurrentNodeAndPushElementMayFoster(elementName, attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, elementName); + } + + private void startTagGenericRawText(ElementName elementName, HtmlAttributes attributes) throws SAXException { + appendToCurrentNodeAndPushElementMayFoster(elementName, attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, elementName); + } + + private void startTagScriptInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException { + // XXX need to manage much more stuff here if supporting document.write() + appendToCurrentNodeAndPushElementMayFoster(elementName, attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation(Tokenizer.SCRIPT_DATA, elementName); + } + + private void startTagTemplateInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException { + appendToCurrentNodeAndPushElement(elementName, attributes); + insertMarker(); + framesetOk = false; + originalMode = mode; + mode = IN_TEMPLATE; + pushTemplateMode(IN_TEMPLATE); + } + + private boolean isTemplateContents() { + return TreeBuilder.NOT_FOUND_ON_STACK != findLast("template"); + } + + private boolean isTemplateModeStackEmpty() { + return templateModePtr == -1; + } + + private boolean isSpecialParentInForeign(StackNode stackNode) { + @NsUri String ns = stackNode.ns; + return ("http://www.w3.org/1999/xhtml" == ns) + || (stackNode.isHtmlIntegrationPoint()) + || (("http://www.w3.org/1998/Math/MathML" == ns) && (stackNode.getGroup() == MI_MO_MN_MS_MTEXT)); + } + + /** + * + *

+ * C++ memory note: The return value must be released. + * + * @return + * @throws SAXException + * @throws StopSniffingException + */ + public static String extractCharsetFromContent(String attributeValue + // CPPONLY: , TreeBuilder tb + ) { + // This is a bit ugly. Converting the string to char array in order to + // make the portability layer smaller. + int charsetState = CHARSET_INITIAL; + int start = -1; + int end = -1; + @Auto char[] buffer = Portability.newCharArrayFromString(attributeValue); + + charsetloop: for (int i = 0; i < buffer.length; i++) { + char c = buffer[i]; + switch (charsetState) { + case CHARSET_INITIAL: + switch (c) { + case 'c': + case 'C': + charsetState = CHARSET_C; + continue; + default: + continue; + } + case CHARSET_C: + switch (c) { + case 'h': + case 'H': + charsetState = CHARSET_H; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_H: + switch (c) { + case 'a': + case 'A': + charsetState = CHARSET_A; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_A: + switch (c) { + case 'r': + case 'R': + charsetState = CHARSET_R; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_R: + switch (c) { + case 's': + case 'S': + charsetState = CHARSET_S; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_S: + switch (c) { + case 'e': + case 'E': + charsetState = CHARSET_E; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_E: + switch (c) { + case 't': + case 'T': + charsetState = CHARSET_T; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_T: + switch (c) { + case '\t': + case '\n': + case '\u000C': + case '\r': + case ' ': + continue; + case '=': + charsetState = CHARSET_EQUALS; + continue; + default: + return null; + } + case CHARSET_EQUALS: + switch (c) { + case '\t': + case '\n': + case '\u000C': + case '\r': + case ' ': + continue; + case '\'': + start = i + 1; + charsetState = CHARSET_SINGLE_QUOTED; + continue; + case '\"': + start = i + 1; + charsetState = CHARSET_DOUBLE_QUOTED; + continue; + default: + start = i; + charsetState = CHARSET_UNQUOTED; + continue; + } + case CHARSET_SINGLE_QUOTED: + switch (c) { + case '\'': + end = i; + break charsetloop; + default: + continue; + } + case CHARSET_DOUBLE_QUOTED: + switch (c) { + case '\"': + end = i; + break charsetloop; + default: + continue; + } + case CHARSET_UNQUOTED: + switch (c) { + case '\t': + case '\n': + case '\u000C': + case '\r': + case ' ': + case ';': + end = i; + break charsetloop; + default: + continue; + } + } + } + String charset = null; + if (start != -1) { + if (end == -1) { + end = buffer.length; + } + charset = Portability.newStringFromBuffer(buffer, start, end + - start + // CPPONLY: , tb + ); + } + return charset; + } + + private void checkMetaCharset(HtmlAttributes attributes) + throws SAXException { + String charset = attributes.getValue(AttributeName.CHARSET); + if (charset != null) { + if (tokenizer.internalEncodingDeclaration(charset)) { + requestSuspension(); + return; + } + return; + } + if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "content-type", + attributes.getValue(AttributeName.HTTP_EQUIV))) { + return; + } + String content = attributes.getValue(AttributeName.CONTENT); + if (content != null) { + String extract = TreeBuilder.extractCharsetFromContent(content + // CPPONLY: , this + ); + // remember not to return early without releasing the string + if (extract != null) { + if (tokenizer.internalEncodingDeclaration(extract)) { + requestSuspension(); + } + } + Portability.releaseString(extract); + } + } + + public final void endTag(ElementName elementName) throws SAXException { + flushCharacters(); + needToDropLF = false; + int eltPos; + int group = elementName.getGroup(); + @Local String name = elementName.name; + endtagloop: for (;;) { + if (isInForeign()) { + if (stack[currentPtr].name != name) { + if (currentPtr == 0) { + errStrayEndTag(name); + } else { + errEndTagDidNotMatchCurrentOpenElement(name, stack[currentPtr].popName); + } + } + eltPos = currentPtr; + for (;;) { + if (eltPos == 0) { + assert fragment: "We can get this close to the root of the stack in foreign content only in the fragment case."; + break endtagloop; + } + if (stack[eltPos].name == name) { + while (currentPtr >= eltPos) { + pop(); + } + break endtagloop; + } + if (stack[--eltPos].ns == "http://www.w3.org/1999/xhtml") { + break; + } + } + } + switch (mode) { + case IN_TEMPLATE: + switch (group) { + case TEMPLATE: + // fall through to IN_HEAD + break; + default: + errStrayEndTag(name); + break endtagloop; + } + case IN_ROW: + switch (group) { + case TR: + eltPos = findLastOrRoot(TreeBuilder.TR); + if (eltPos == 0) { + assert fragment || isTemplateContents(); + errNoTableRowToClose(); + break endtagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE_BODY; + break endtagloop; + case TABLE: + eltPos = findLastOrRoot(TreeBuilder.TR); + if (eltPos == 0) { + assert fragment || isTemplateContents(); + errNoTableRowToClose(); + break endtagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE_BODY; + continue; + case TBODY_OR_THEAD_OR_TFOOT: + if (findLastInTableScope(name) == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + break endtagloop; + } + eltPos = findLastOrRoot(TreeBuilder.TR); + if (eltPos == 0) { + assert fragment || isTemplateContents(); + errNoTableRowToClose(); + break endtagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE_BODY; + continue; + case BODY: + case CAPTION: + case COL: + case COLGROUP: + case HTML: + case TD_OR_TH: + errStrayEndTag(name); + break endtagloop; + default: + // fall through to IN_TABLE + } + case IN_TABLE_BODY: + switch (group) { + case TBODY_OR_THEAD_OR_TFOOT: + eltPos = findLastOrRoot(name); + if (eltPos == 0) { + errStrayEndTag(name); + break endtagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE; + break endtagloop; + case TABLE: + eltPos = findLastInTableScopeOrRootTemplateTbodyTheadTfoot(); + if (eltPos == 0 || stack[eltPos].getGroup() == TEMPLATE) { + assert fragment || isTemplateContents(); + errStrayEndTag(name); + break endtagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE; + continue; + case BODY: + case CAPTION: + case COL: + case COLGROUP: + case HTML: + case TD_OR_TH: + case TR: + errStrayEndTag(name); + break endtagloop; + default: + // fall through to IN_TABLE + } + case IN_TABLE: + switch (group) { + case TABLE: + eltPos = findLast("table"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment || isTemplateContents(); + errStrayEndTag(name); + break endtagloop; + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + break endtagloop; + case BODY: + case CAPTION: + case COL: + case COLGROUP: + case HTML: + case TBODY_OR_THEAD_OR_TFOOT: + case TD_OR_TH: + case TR: + errStrayEndTag(name); + break endtagloop; + case TEMPLATE: + // fall through to IN_HEAD + break; + default: + errStrayEndTag(name); + // fall through to IN_BODY + } + case IN_CAPTION: + switch (group) { + case CAPTION: + eltPos = findLastInTableScope("caption"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + break endtagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && currentPtr != eltPos) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + mode = IN_TABLE; + break endtagloop; + case TABLE: + errTableClosedWhileCaptionOpen(); + eltPos = findLastInTableScope("caption"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + break endtagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && currentPtr != eltPos) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + mode = IN_TABLE; + continue; + case BODY: + case COL: + case COLGROUP: + case HTML: + case TBODY_OR_THEAD_OR_TFOOT: + case TD_OR_TH: + case TR: + errStrayEndTag(name); + break endtagloop; + default: + // fall through to IN_BODY + } + case IN_CELL: + switch (group) { + case TD_OR_TH: + eltPos = findLastInTableScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + break endtagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + mode = IN_ROW; + break endtagloop; + case TABLE: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + if (findLastInTableScope(name) == TreeBuilder.NOT_FOUND_ON_STACK) { + assert name == "tbody" || name == "tfoot" || name == "thead" || fragment || isTemplateContents(); + errStrayEndTag(name); + break endtagloop; + } + closeTheCell(findLastInTableScopeTdTh()); + continue; + case BODY: + case CAPTION: + case COL: + case COLGROUP: + case HTML: + errStrayEndTag(name); + break endtagloop; + default: + // fall through to IN_BODY + } + case FRAMESET_OK: + case IN_BODY: + switch (group) { + case BODY: + if (!isSecondOnStackBody()) { + assert fragment || isTemplateContents(); + errStrayEndTag(name); + break endtagloop; + } + assert currentPtr >= 1; + if (errorHandler != null) { + uncloseloop1: for (int i = 2; i <= currentPtr; i++) { + switch (stack[i].getGroup()) { + case DD_OR_DT: + case LI: + case OPTGROUP: + case OPTION: // is this possible? + case P: + case RB_OR_RTC: + case RT_OR_RP: + case TD_OR_TH: + case TBODY_OR_THEAD_OR_TFOOT: + break; + default: + errEndWithUnclosedElements(name); + break uncloseloop1; + } + } + } + mode = AFTER_BODY; + break endtagloop; + case HTML: + if (!isSecondOnStackBody()) { + assert fragment || isTemplateContents(); + errStrayEndTag(name); + break endtagloop; + } + if (errorHandler != null) { + uncloseloop2: for (int i = 0; i <= currentPtr; i++) { + switch (stack[i].getGroup()) { + case DD_OR_DT: + case LI: + case P: + case RB_OR_RTC: + case RT_OR_RP: + case TBODY_OR_THEAD_OR_TFOOT: + case TD_OR_TH: + case BODY: + case HTML: + break; + default: + errEndWithUnclosedElements(name); + break uncloseloop2; + } + } + } + mode = AFTER_BODY; + continue; + case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU: + case UL_OR_OL_OR_DL: + case PRE_OR_LISTING: + case FIELDSET: + case BUTTON: + case ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY: + eltPos = findLastInScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + } else { + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + } + break endtagloop; + case FORM: + if (!isTemplateContents()) { + if (formPointer == null) { + errStrayEndTag(name); + break endtagloop; + } + formPointer = null; + eltPos = findLastInScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + break endtagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + removeFromStack(eltPos); + break endtagloop; + } else { + eltPos = findLastInScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + break endtagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + break endtagloop; + } + case P: + eltPos = findLastInButtonScope("p"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errNoElementToCloseButEndTagSeen("p"); + // XXX Can the 'in foreign' case happen anymore? + if (isInForeign()) { + errHtmlStartTagInForeignContext(name); + // Check for currentPtr for the fragment + // case. + while (currentPtr >= 0 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") { + pop(); + } + } + appendVoidElementToCurrentMayFoster( + elementName, + HtmlAttributes.EMPTY_ATTRIBUTES); + break endtagloop; + } + generateImpliedEndTagsExceptFor("p"); + assert eltPos != TreeBuilder.NOT_FOUND_ON_STACK; + if (errorHandler != null && eltPos != currentPtr) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + break endtagloop; + case LI: + eltPos = findLastInListScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errNoElementToCloseButEndTagSeen(name); + } else { + generateImpliedEndTagsExceptFor(name); + if (errorHandler != null + && eltPos != currentPtr) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + } + break endtagloop; + case DD_OR_DT: + eltPos = findLastInScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errNoElementToCloseButEndTagSeen(name); + } else { + generateImpliedEndTagsExceptFor(name); + if (errorHandler != null + && eltPos != currentPtr) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + } + break endtagloop; + case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6: + eltPos = findLastInScopeHn(); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + } else { + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + } + break endtagloop; + case OBJECT: + case MARQUEE_OR_APPLET: + eltPos = findLastInScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + } else { + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + } + break endtagloop; + case BR: + errEndTagBr(); + if (isInForeign()) { + // XXX can this happen anymore? + errHtmlStartTagInForeignContext(name); + // Check for currentPtr for the fragment + // case. + while (currentPtr >= 0 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") { + pop(); + } + } + reconstructTheActiveFormattingElements(); + appendVoidElementToCurrentMayFoster( + elementName, + HtmlAttributes.EMPTY_ATTRIBUTES); + break endtagloop; + case TEMPLATE: + // fall through to IN_HEAD; + break; + case AREA_OR_WBR: + // CPPONLY: case MENUITEM: + case PARAM_OR_SOURCE_OR_TRACK: + case EMBED: + case IMG: + case IMAGE: + case INPUT: + case KEYGEN: // XXX?? + case HR: + case ISINDEX: + case IFRAME: + case NOEMBED: // XXX??? + case NOFRAMES: // XXX?? + case SELECT: + case TABLE: + case TEXTAREA: // XXX?? + errStrayEndTag(name); + break endtagloop; + case NOSCRIPT: + if (scriptingEnabled) { + errStrayEndTag(name); + break endtagloop; + } else { + // fall through + } + case A: + case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U: + case FONT: + case NOBR: + if (adoptionAgencyEndTag(name)) { + break endtagloop; + } + // else handle like any other tag + default: + if (isCurrent(name)) { + pop(); + break endtagloop; + } + + eltPos = currentPtr; + for (;;) { + StackNode node = stack[eltPos]; + if (node.ns == "http://www.w3.org/1999/xhtml" && node.name == name) { + generateImpliedEndTags(); + if (errorHandler != null + && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + break endtagloop; + } else if (eltPos == 0 || node.isSpecial()) { + errStrayEndTag(name); + break endtagloop; + } + eltPos--; + } + } + case IN_HEAD: + switch (group) { + case HEAD: + pop(); + mode = AFTER_HEAD; + break endtagloop; + case BR: + case HTML: + case BODY: + pop(); + mode = AFTER_HEAD; + continue; + case TEMPLATE: + endTagTemplateInHead(); + break endtagloop; + default: + errStrayEndTag(name); + break endtagloop; + } + case IN_HEAD_NOSCRIPT: + switch (group) { + case NOSCRIPT: + pop(); + mode = IN_HEAD; + break endtagloop; + case BR: + errStrayEndTag(name); + pop(); + mode = IN_HEAD; + continue; + default: + errStrayEndTag(name); + break endtagloop; + } + case IN_COLUMN_GROUP: + switch (group) { + case COLGROUP: + if (currentPtr == 0 || stack[currentPtr].getGroup() == + TreeBuilder.TEMPLATE) { + assert fragment || isTemplateContents(); + errGarbageInColgroup(); + break endtagloop; + } + pop(); + mode = IN_TABLE; + break endtagloop; + case COL: + errStrayEndTag(name); + break endtagloop; + case TEMPLATE: + endTagTemplateInHead(); + break endtagloop; + default: + if (currentPtr == 0 || stack[currentPtr].getGroup() == + TreeBuilder.TEMPLATE) { + assert fragment || isTemplateContents(); + errGarbageInColgroup(); + break endtagloop; + } + pop(); + mode = IN_TABLE; + continue; + } + case IN_SELECT_IN_TABLE: + switch (group) { + case CAPTION: + case TABLE: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + case TD_OR_TH: + errEndTagSeenWithSelectOpen(name); + if (findLastInTableScope(name) != TreeBuilder.NOT_FOUND_ON_STACK) { + eltPos = findLastInTableScope("select"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + break endtagloop; // http://www.w3.org/Bugs/Public/show_bug.cgi?id=8375 + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + continue; + } else { + break endtagloop; + } + default: + // fall through to IN_SELECT + } + case IN_SELECT: + switch (group) { + case OPTION: + if (isCurrent("option")) { + pop(); + break endtagloop; + } else { + errStrayEndTag(name); + break endtagloop; + } + case OPTGROUP: + if (isCurrent("option") + && "optgroup" == stack[currentPtr - 1].name) { + pop(); + } + if (isCurrent("optgroup")) { + pop(); + } else { + errStrayEndTag(name); + } + break endtagloop; + case SELECT: + eltPos = findLastInTableScope("select"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + errStrayEndTag(name); + break endtagloop; + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + break endtagloop; + case TEMPLATE: + endTagTemplateInHead(); + break endtagloop; + default: + errStrayEndTag(name); + break endtagloop; + } + case AFTER_BODY: + switch (group) { + case HTML: + if (fragment) { + errStrayEndTag(name); + break endtagloop; + } else { + mode = AFTER_AFTER_BODY; + break endtagloop; + } + default: + errEndTagAfterBody(); + mode = framesetOk ? FRAMESET_OK : IN_BODY; + continue; + } + case IN_FRAMESET: + switch (group) { + case FRAMESET: + if (currentPtr == 0) { + assert fragment; + errStrayEndTag(name); + break endtagloop; + } + pop(); + if ((!fragment) && !isCurrent("frameset")) { + mode = AFTER_FRAMESET; + } + break endtagloop; + default: + errStrayEndTag(name); + break endtagloop; + } + case AFTER_FRAMESET: + switch (group) { + case HTML: + mode = AFTER_AFTER_FRAMESET; + break endtagloop; + default: + errStrayEndTag(name); + break endtagloop; + } + case INITIAL: + /* + * Parse error. + */ + // [NOCPP[ + switch (doctypeExpectation) { + case AUTO: + err("End tag seen without seeing a doctype first. Expected e.g. \u201C\u201D."); + break; + case HTML: + // ]NOCPP] + errEndTagSeenWithoutDoctype(); + // [NOCPP[ + break; + case HTML401_STRICT: + err("End tag seen without seeing a doctype first. Expected \u201C\u201D."); + break; + case HTML401_TRANSITIONAL: + err("End tag seen without seeing a doctype first. Expected \u201C\u201D."); + break; + case NO_DOCTYPE_ERRORS: + } + // ]NOCPP] + /* + * + * Set the document to quirks mode. + */ + documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, + false); + /* + * Then, switch to the root element mode of the tree + * construction stage + */ + mode = BEFORE_HTML; + /* + * and reprocess the current token. + */ + continue; + case BEFORE_HTML: + switch (group) { + case HEAD: + case BR: + case HTML: + case BODY: + /* + * Create an HTMLElement node with the tag name + * html, in the HTML namespace. Append it to the + * Document object. + */ + appendHtmlElementToDocumentAndPush(); + /* Switch to the main mode */ + mode = BEFORE_HEAD; + /* + * reprocess the current token. + */ + continue; + default: + errStrayEndTag(name); + break endtagloop; + } + case BEFORE_HEAD: + switch (group) { + case HEAD: + case BR: + case HTML: + case BODY: + appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_HEAD; + continue; + default: + errStrayEndTag(name); + break endtagloop; + } + case AFTER_HEAD: + switch (group) { + case TEMPLATE: + endTagTemplateInHead(); + break endtagloop; + case HTML: + case BODY: + case BR: + appendToCurrentNodeAndPushBodyElement(); + mode = FRAMESET_OK; + continue; + default: + errStrayEndTag(name); + break endtagloop; + } + case AFTER_AFTER_BODY: + errStrayEndTag(name); + mode = framesetOk ? FRAMESET_OK : IN_BODY; + continue; + case AFTER_AFTER_FRAMESET: + errStrayEndTag(name); + break endtagloop; + case TEXT: + // XXX need to manage insertion point here + pop(); + if (originalMode == AFTER_HEAD) { + silentPop(); + } + mode = originalMode; + break endtagloop; + } + } // endtagloop + } + + private void endTagTemplateInHead() throws SAXException { + int eltPos = findLast("template"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag("template"); + return; + } + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent("template")) { + errUnclosedElements(eltPos, "template"); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + popTemplateMode(); + resetTheInsertionMode(); + } + + private int findLastInTableScopeOrRootTemplateTbodyTheadTfoot() { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].getGroup() == TreeBuilder.TBODY_OR_THEAD_OR_TFOOT || + stack[i].getGroup() == TreeBuilder.TEMPLATE) { + return i; + } + } + return 0; + } + + private int findLast(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) { + return i; + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private int findLastInTableScope(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml") { + if (stack[i].name == name) { + return i; + } else if (stack[i].name == "table" || stack[i].name == "template") { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private int findLastInButtonScope(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml") { + if (stack[i].name == name) { + return i; + } else if (stack[i].name == "button") { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + + if (stack[i].isScoping()) { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private int findLastInScope(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) { + return i; + } else if (stack[i].isScoping()) { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private int findLastInListScope(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml") { + if (stack[i].name == name) { + return i; + } else if (stack[i].name == "ul" || stack[i].name == "ol") { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + + if (stack[i].isScoping()) { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private int findLastInScopeHn() { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].getGroup() == TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) { + return i; + } else if (stack[i].isScoping()) { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private void generateImpliedEndTagsExceptFor(@Local String name) + throws SAXException { + for (;;) { + StackNode node = stack[currentPtr]; + switch (node.getGroup()) { + case P: + case LI: + case DD_OR_DT: + case OPTION: + case OPTGROUP: + case RB_OR_RTC: + case RT_OR_RP: + if (node.ns == "http://www.w3.org/1999/xhtml" && node.name == name) { + return; + } + pop(); + continue; + default: + return; + } + } + } + + private void generateImpliedEndTags() throws SAXException { + for (;;) { + switch (stack[currentPtr].getGroup()) { + case P: + case LI: + case DD_OR_DT: + case OPTION: + case OPTGROUP: + case RB_OR_RTC: + case RT_OR_RP: + pop(); + continue; + default: + return; + } + } + } + + private boolean isSecondOnStackBody() { + return currentPtr >= 1 && stack[1].getGroup() == TreeBuilder.BODY; + } + + private void documentModeInternal(DocumentMode m, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + + if (isSrcdocDocument) { + // Srcdoc documents are always rendered in standards mode. + quirks = false; + if (documentModeHandler != null) { + documentModeHandler.documentMode( + DocumentMode.STANDARDS_MODE + // [NOCPP[ + , null, null, false + // ]NOCPP] + ); + } + return; + } + + quirks = (m == DocumentMode.QUIRKS_MODE); + if (documentModeHandler != null) { + documentModeHandler.documentMode( + m + // [NOCPP[ + , publicIdentifier, systemIdentifier, + html4SpecificAdditionalErrorChecks + // ]NOCPP] + ); + } + // [NOCPP[ + documentMode(m, publicIdentifier, systemIdentifier, + html4SpecificAdditionalErrorChecks); + // ]NOCPP] + } + + private boolean isAlmostStandards(String publicIdentifier, + String systemIdentifier) { + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd xhtml 1.0 transitional//en", publicIdentifier)) { + return true; + } + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd xhtml 1.0 frameset//en", publicIdentifier)) { + return true; + } + if (systemIdentifier != null) { + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd html 4.01 transitional//en", publicIdentifier)) { + return true; + } + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd html 4.01 frameset//en", publicIdentifier)) { + return true; + } + } + return false; + } + + private boolean isQuirky(@Local String name, String publicIdentifier, + String systemIdentifier, boolean forceQuirks) { + if (forceQuirks) { + return true; + } + if (name != HTML_LOCAL) { + return true; + } + if (publicIdentifier != null) { + for (int i = 0; i < TreeBuilder.QUIRKY_PUBLIC_IDS.length; i++) { + if (Portability.lowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString( + TreeBuilder.QUIRKY_PUBLIC_IDS[i], publicIdentifier)) { + return true; + } + } + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3o//dtd w3 html strict 3.0//en//", publicIdentifier) + || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-/w3c/dtd html 4.0 transitional/en", + publicIdentifier) + || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "html", publicIdentifier)) { + return true; + } + } + if (systemIdentifier == null) { + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd html 4.01 transitional//en", publicIdentifier)) { + return true; + } else if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd html 4.01 frameset//en", publicIdentifier)) { + return true; + } + } else if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd", + systemIdentifier)) { + return true; + } + return false; + } + + private void closeTheCell(int eltPos) throws SAXException { + generateImpliedEndTags(); + if (errorHandler != null && eltPos != currentPtr) { + errUnclosedElementsCell(eltPos); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + mode = IN_ROW; + return; + } + + private int findLastInTableScopeTdTh() { + for (int i = currentPtr; i > 0; i--) { + @Local String name = stack[i].name; + if (stack[i].ns == "http://www.w3.org/1999/xhtml") { + if ("td" == name || "th" == name) { + return i; + } else if (name == "table" || name == "template") { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private void clearStackBackTo(int eltPos) throws SAXException { + int eltGroup = stack[eltPos].getGroup(); + while (currentPtr > eltPos) { // > not >= intentional + if (stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" + && stack[currentPtr].getGroup() == TEMPLATE + && (eltGroup == TABLE || eltGroup == TBODY_OR_THEAD_OR_TFOOT|| eltGroup == TR || eltPos == 0)) { + return; + } + pop(); + } + } + + private void resetTheInsertionMode() { + StackNode node; + @Local String name; + @NsUri String ns; + for (int i = currentPtr; i >= 0; i--) { + node = stack[i]; + name = node.name; + ns = node.ns; + if (i == 0) { + if (!(contextNamespace == "http://www.w3.org/1999/xhtml" && (contextName == "td" || contextName == "th"))) { + if (fragment) { + // Make sure we are parsing a fragment otherwise the context element doesn't make sense. + name = contextName; + ns = contextNamespace; + } + } else { + mode = framesetOk ? FRAMESET_OK : IN_BODY; // XXX from Hixie's email + return; + } + } + if ("select" == name) { + int ancestorIndex = i; + while (ancestorIndex > 0) { + StackNode ancestor = stack[ancestorIndex--]; + if ("http://www.w3.org/1999/xhtml" == ancestor.ns) { + if ("template" == ancestor.name) { + break; + } + if ("table" == ancestor.name) { + mode = IN_SELECT_IN_TABLE; + return; + } + } + } + mode = IN_SELECT; + return; + } else if ("td" == name || "th" == name) { + mode = IN_CELL; + return; + } else if ("tr" == name) { + mode = IN_ROW; + return; + } else if ("tbody" == name || "thead" == name || "tfoot" == name) { + mode = IN_TABLE_BODY; + return; + } else if ("caption" == name) { + mode = IN_CAPTION; + return; + } else if ("colgroup" == name) { + mode = IN_COLUMN_GROUP; + return; + } else if ("table" == name) { + mode = IN_TABLE; + return; + } else if ("http://www.w3.org/1999/xhtml" != ns) { + mode = framesetOk ? FRAMESET_OK : IN_BODY; + return; + } else if ("template" == name) { + assert templateModePtr >= 0; + mode = templateModeStack[templateModePtr]; + return; + } else if ("head" == name) { + if (name == contextName) { + mode = framesetOk ? FRAMESET_OK : IN_BODY; // really + } else { + mode = IN_HEAD; + } + return; + } else if ("body" == name) { + mode = framesetOk ? FRAMESET_OK : IN_BODY; + return; + } else if ("frameset" == name) { + // TODO: Fragment case. Add error reporting. + mode = IN_FRAMESET; + return; + } else if ("html" == name) { + if (headPointer == null) { + // TODO: Fragment case. Add error reporting. + mode = BEFORE_HEAD; + } else { + mode = AFTER_HEAD; + } + return; + } else if (i == 0) { + mode = framesetOk ? FRAMESET_OK : IN_BODY; + return; + } + } + } + + /** + * @throws SAXException + * + */ + private void implicitlyCloseP() throws SAXException { + int eltPos = findLastInButtonScope("p"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + return; + } + generateImpliedEndTagsExceptFor("p"); + if (errorHandler != null && eltPos != currentPtr) { + errUnclosedElementsImplied(eltPos, "p"); + } + while (currentPtr >= eltPos) { + pop(); + } + } + + private boolean debugOnlyClearLastStackSlot() { + stack[currentPtr] = null; + return true; + } + + private boolean debugOnlyClearLastListSlot() { + listOfActiveFormattingElements[listPtr] = null; + return true; + } + + private void pushTemplateMode(int mode) { + templateModePtr++; + if (templateModePtr == templateModeStack.length) { + int[] newStack = new int[templateModeStack.length + 64]; + System.arraycopy(templateModeStack, 0, newStack, 0, templateModeStack.length); + templateModeStack = newStack; + } + templateModeStack[templateModePtr] = mode; + } + + @SuppressWarnings("unchecked") private void push(StackNode node) throws SAXException { + currentPtr++; + if (currentPtr == stack.length) { + StackNode[] newStack = new StackNode[stack.length + 64]; + System.arraycopy(stack, 0, newStack, 0, stack.length); + stack = newStack; + } + stack[currentPtr] = node; + elementPushed(node.ns, node.popName, node.node); + } + + @SuppressWarnings("unchecked") private void silentPush(StackNode node) throws SAXException { + currentPtr++; + if (currentPtr == stack.length) { + StackNode[] newStack = new StackNode[stack.length + 64]; + System.arraycopy(stack, 0, newStack, 0, stack.length); + stack = newStack; + } + stack[currentPtr] = node; + } + + @SuppressWarnings("unchecked") private void append(StackNode node) { + listPtr++; + if (listPtr == listOfActiveFormattingElements.length) { + StackNode[] newList = new StackNode[listOfActiveFormattingElements.length + 64]; + System.arraycopy(listOfActiveFormattingElements, 0, newList, 0, + listOfActiveFormattingElements.length); + listOfActiveFormattingElements = newList; + } + listOfActiveFormattingElements[listPtr] = node; + } + + @Inline private void insertMarker() { + append(null); + } + + private void clearTheListOfActiveFormattingElementsUpToTheLastMarker() { + while (listPtr > -1) { + if (listOfActiveFormattingElements[listPtr] == null) { + --listPtr; + return; + } + listOfActiveFormattingElements[listPtr].release(); + --listPtr; + } + } + + @Inline private boolean isCurrent(@Local String name) { + return stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" && + name == stack[currentPtr].name; + } + + private void removeFromStack(int pos) throws SAXException { + if (currentPtr == pos) { + pop(); + } else { + fatal(); + stack[pos].release(); + System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos); + assert debugOnlyClearLastStackSlot(); + currentPtr--; + } + } + + private void removeFromStack(StackNode node) throws SAXException { + if (stack[currentPtr] == node) { + pop(); + } else { + int pos = currentPtr - 1; + while (pos >= 0 && stack[pos] != node) { + pos--; + } + if (pos == -1) { + // dead code? + return; + } + fatal(); + node.release(); + System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos); + currentPtr--; + } + } + + private void removeFromListOfActiveFormattingElements(int pos) { + assert listOfActiveFormattingElements[pos] != null; + listOfActiveFormattingElements[pos].release(); + if (pos == listPtr) { + assert debugOnlyClearLastListSlot(); + listPtr--; + return; + } + assert pos < listPtr; + System.arraycopy(listOfActiveFormattingElements, pos + 1, + listOfActiveFormattingElements, pos, listPtr - pos); + assert debugOnlyClearLastListSlot(); + listPtr--; + } + + /** + * Adoption agency algorithm. + * + * @param name subject as described in the specified algorithm. + * @return Returns true if the algorithm has completed and there is nothing remaining to + * be done. Returns false if the algorithm needs to "act as described in the 'any other + * end tag' entry" as described in the specified algorithm. + * @throws SAXException + */ + private boolean adoptionAgencyEndTag(@Local String name) throws SAXException { + // This check intends to ensure that for properly nested tags, closing tags will match + // against the stack instead of the listOfActiveFormattingElements. + if (stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" && + stack[currentPtr].name == name && + findInListOfActiveFormattingElements(stack[currentPtr]) == -1) { + // If the current element matches the name but isn't on the list of active + // formatting elements, then it is possible that the list was mangled by the Noah's Ark + // clause. In this case, we want to match the end tag against the stack instead of + // proceeding with the AAA algorithm that may match against the list of + // active formatting elements (and possibly mangle the tree in unexpected ways). + pop(); + return true; + } + + // If you crash around here, perhaps some stack node variable claimed to + // be a weak ref isn't. + for (int i = 0; i < 8; ++i) { + int formattingEltListPos = listPtr; + while (formattingEltListPos > -1) { + StackNode listNode = listOfActiveFormattingElements[formattingEltListPos]; // weak ref + if (listNode == null) { + formattingEltListPos = -1; + break; + } else if (listNode.name == name) { + break; + } + formattingEltListPos--; + } + if (formattingEltListPos == -1) { + return false; + } + // this *looks* like a weak ref to the list of formatting elements + StackNode formattingElt = listOfActiveFormattingElements[formattingEltListPos]; + int formattingEltStackPos = currentPtr; + boolean inScope = true; + while (formattingEltStackPos > -1) { + StackNode node = stack[formattingEltStackPos]; // weak ref + if (node == formattingElt) { + break; + } else if (node.isScoping()) { + inScope = false; + } + formattingEltStackPos--; + } + if (formattingEltStackPos == -1) { + errNoElementToCloseButEndTagSeen(name); + removeFromListOfActiveFormattingElements(formattingEltListPos); + return true; + } + if (!inScope) { + errNoElementToCloseButEndTagSeen(name); + return true; + } + // stackPos now points to the formatting element and it is in scope + if (formattingEltStackPos != currentPtr) { + errEndTagViolatesNestingRules(name); + } + int furthestBlockPos = formattingEltStackPos + 1; + while (furthestBlockPos <= currentPtr) { + StackNode node = stack[furthestBlockPos]; // weak ref + assert furthestBlockPos > 0: "How is formattingEltStackPos + 1 not > 0?"; + if (node.isSpecial()) { + break; + } + furthestBlockPos++; + } + if (furthestBlockPos > currentPtr) { + // no furthest block + while (currentPtr >= formattingEltStackPos) { + pop(); + } + removeFromListOfActiveFormattingElements(formattingEltListPos); + return true; + } + StackNode commonAncestor = stack[formattingEltStackPos - 1]; // weak ref + StackNode furthestBlock = stack[furthestBlockPos]; // weak ref + // detachFromParent(furthestBlock.node); XXX AAA CHANGE + int bookmark = formattingEltListPos; + int nodePos = furthestBlockPos; + StackNode lastNode = furthestBlock; // weak ref + int j = 0; + for (;;) { + ++j; + nodePos--; + if (nodePos == formattingEltStackPos) { + break; + } + StackNode node = stack[nodePos]; // weak ref + int nodeListPos = findInListOfActiveFormattingElements(node); + + if (j > 3 && nodeListPos != -1) { + removeFromListOfActiveFormattingElements(nodeListPos); + + // Adjust the indices into the list to account + // for the removal of nodeListPos. + if (nodeListPos <= formattingEltListPos) { + formattingEltListPos--; + } + if (nodeListPos <= bookmark) { + bookmark--; + } + + // Update position to reflect removal from list. + nodeListPos = -1; + } + + if (nodeListPos == -1) { + assert formattingEltStackPos < nodePos; + assert bookmark < nodePos; + assert furthestBlockPos > nodePos; + removeFromStack(nodePos); // node is now a bad pointer in C++ + furthestBlockPos--; + continue; + } + // now node is both on stack and in the list + if (nodePos == furthestBlockPos) { + bookmark = nodeListPos + 1; + } + // if (hasChildren(node.node)) { XXX AAA CHANGE + assert node == listOfActiveFormattingElements[nodeListPos]; + assert node == stack[nodePos]; + T clone = createElement("http://www.w3.org/1999/xhtml", + node.name, node.attributes.cloneAttributes(null), commonAncestor.node); + StackNode newNode = new StackNode(node.getFlags(), node.ns, + node.name, clone, node.popName, node.attributes + // [NOCPP[ + , node.getLocator() + // ]NOCPP] + ); // creation ownership goes to stack + node.dropAttributes(); // adopt ownership to newNode + stack[nodePos] = newNode; + newNode.retain(); // retain for list + listOfActiveFormattingElements[nodeListPos] = newNode; + node.release(); // release from stack + node.release(); // release from list + node = newNode; + // } XXX AAA CHANGE + detachFromParent(lastNode.node); + appendElement(lastNode.node, node.node); + lastNode = node; + } + if (commonAncestor.isFosterParenting()) { + fatal(); + detachFromParent(lastNode.node); + insertIntoFosterParent(lastNode.node); + } else { + detachFromParent(lastNode.node); + appendElement(lastNode.node, commonAncestor.node); + } + T clone = createElement("http://www.w3.org/1999/xhtml", + formattingElt.name, + formattingElt.attributes.cloneAttributes(null), furthestBlock.node); + StackNode formattingClone = new StackNode( + formattingElt.getFlags(), formattingElt.ns, + formattingElt.name, clone, formattingElt.popName, + formattingElt.attributes + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); // Ownership transfers to stack below + formattingElt.dropAttributes(); // transfer ownership to + // formattingClone + appendChildrenToNewParent(furthestBlock.node, clone); + appendElement(clone, furthestBlock.node); + removeFromListOfActiveFormattingElements(formattingEltListPos); + insertIntoListOfActiveFormattingElements(formattingClone, bookmark); + assert formattingEltStackPos < furthestBlockPos; + removeFromStack(formattingEltStackPos); + // furthestBlockPos is now off by one and points to the slot after + // it + insertIntoStack(formattingClone, furthestBlockPos); + } + return true; + } + + private void insertIntoStack(StackNode node, int position) + throws SAXException { + assert currentPtr + 1 < stack.length; + assert position <= currentPtr + 1; + if (position == currentPtr + 1) { + push(node); + } else { + System.arraycopy(stack, position, stack, position + 1, + (currentPtr - position) + 1); + currentPtr++; + stack[position] = node; + } + } + + private void insertIntoListOfActiveFormattingElements( + StackNode formattingClone, int bookmark) { + formattingClone.retain(); + assert listPtr + 1 < listOfActiveFormattingElements.length; + if (bookmark <= listPtr) { + System.arraycopy(listOfActiveFormattingElements, bookmark, + listOfActiveFormattingElements, bookmark + 1, + (listPtr - bookmark) + 1); + } + listPtr++; + listOfActiveFormattingElements[bookmark] = formattingClone; + } + + private int findInListOfActiveFormattingElements(StackNode node) { + for (int i = listPtr; i >= 0; i--) { + if (node == listOfActiveFormattingElements[i]) { + return i; + } + } + return -1; + } + + private int findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker( + @Local String name) { + for (int i = listPtr; i >= 0; i--) { + StackNode node = listOfActiveFormattingElements[i]; + if (node == null) { + return -1; + } else if (node.name == name) { + return i; + } + } + return -1; + } + + + private void maybeForgetEarlierDuplicateFormattingElement( + @Local String name, HtmlAttributes attributes) throws SAXException { + int candidate = -1; + int count = 0; + for (int i = listPtr; i >= 0; i--) { + StackNode node = listOfActiveFormattingElements[i]; + if (node == null) { + break; + } + if (node.name == name && node.attributes.equalsAnother(attributes)) { + candidate = i; + ++count; + } + } + if (count >= 3) { + removeFromListOfActiveFormattingElements(candidate); + } + } + + private int findLastOrRoot(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) { + return i; + } + } + return 0; + } + + private int findLastOrRoot(int group) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].getGroup() == group) { + return i; + } + } + return 0; + } + + /** + * Attempt to add attribute to the body element. + * @param attributes the attributes + * @return true iff the attributes were added + * @throws SAXException + */ + private boolean addAttributesToBody(HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + if (currentPtr >= 1) { + StackNode body = stack[1]; + if (body.getGroup() == TreeBuilder.BODY) { + addAttributesToElement(body.node, attributes); + return true; + } + } + return false; + } + + private void addAttributesToHtml(HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + addAttributesToElement(stack[0].node, attributes); + } + + private void pushHeadPointerOntoStack() throws SAXException { + assert headPointer != null; + assert mode == AFTER_HEAD; + fatal(); + silentPush(new StackNode(ElementName.HEAD, headPointer + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + )); + } + + /** + * @throws SAXException + * + */ + private void reconstructTheActiveFormattingElements() throws SAXException { + if (listPtr == -1) { + return; + } + StackNode mostRecent = listOfActiveFormattingElements[listPtr]; + if (mostRecent == null || isInStack(mostRecent)) { + return; + } + int entryPos = listPtr; + for (;;) { + entryPos--; + if (entryPos == -1) { + break; + } + if (listOfActiveFormattingElements[entryPos] == null) { + break; + } + if (isInStack(listOfActiveFormattingElements[entryPos])) { + break; + } + } + while (entryPos < listPtr) { + entryPos++; + StackNode entry = listOfActiveFormattingElements[entryPos]; + StackNode currentNode = stack[currentPtr]; + + T clone; + if (currentNode.isFosterParenting()) { + clone = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", entry.name, + entry.attributes.cloneAttributes(null)); + } else { + clone = createElement("http://www.w3.org/1999/xhtml", entry.name, + entry.attributes.cloneAttributes(null), currentNode.node); + appendElement(clone, currentNode.node); + } + + StackNode entryClone = new StackNode(entry.getFlags(), + entry.ns, entry.name, clone, entry.popName, + entry.attributes + // [NOCPP[ + , entry.getLocator() + // ]NOCPP] + ); + + entry.dropAttributes(); // transfer ownership to entryClone + + push(entryClone); + // stack takes ownership of the local variable + listOfActiveFormattingElements[entryPos] = entryClone; + // overwriting the old entry on the list, so release & retain + entry.release(); + entryClone.retain(); + } + } + + private void insertIntoFosterParent(T child) throws SAXException { + int tablePos = findLastOrRoot(TreeBuilder.TABLE); + int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE); + + if (templatePos >= tablePos) { + appendElement(child, stack[templatePos].node); + return; + } + + StackNode node = stack[tablePos]; + insertFosterParentedChild(child, node.node, stack[tablePos - 1].node); + } + + private T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes) throws SAXException { + return createAndInsertFosterParentedElement(ns, name, attributes, null); + } + + private T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes, T form) throws SAXException { + int tablePos = findLastOrRoot(TreeBuilder.TABLE); + int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE); + + if (templatePos >= tablePos) { + T child = createElement(ns, name, attributes, form, stack[templatePos].node); + appendElement(child, stack[templatePos].node); + return child; + } + + StackNode node = stack[tablePos]; + return createAndInsertFosterParentedElement(ns, name, attributes, form, node.node, stack[tablePos - 1].node); + } + + private boolean isInStack(StackNode node) { + for (int i = currentPtr; i >= 0; i--) { + if (stack[i] == node) { + return true; + } + } + return false; + } + + private void popTemplateMode() { + templateModePtr--; + } + + private void pop() throws SAXException { + StackNode node = stack[currentPtr]; + assert debugOnlyClearLastStackSlot(); + currentPtr--; + elementPopped(node.ns, node.popName, node.node); + node.release(); + } + + private void silentPop() throws SAXException { + StackNode node = stack[currentPtr]; + assert debugOnlyClearLastStackSlot(); + currentPtr--; + node.release(); + } + + private void popOnEof() throws SAXException { + StackNode node = stack[currentPtr]; + assert debugOnlyClearLastStackSlot(); + currentPtr--; + markMalformedIfScript(node.node); + elementPopped(node.ns, node.popName, node.node); + node.release(); + } + + // [NOCPP[ + private void checkAttributes(HtmlAttributes attributes, @NsUri String ns) + throws SAXException { + if (errorHandler != null) { + int len = attributes.getXmlnsLength(); + for (int i = 0; i < len; i++) { + AttributeName name = attributes.getXmlnsAttributeName(i); + if (name == AttributeName.XMLNS) { + if (html4) { + err("Attribute \u201Cxmlns\u201D not allowed here. (HTML4-only error.)"); + } else { + String xmlns = attributes.getXmlnsValue(i); + if (!ns.equals(xmlns)) { + err("Bad value \u201C" + + xmlns + + "\u201D for the attribute \u201Cxmlns\u201D (only \u201C" + + ns + "\u201D permitted here)."); + switch (namePolicy) { + case ALTER_INFOSET: + // fall through + case ALLOW: + warn("Attribute \u201Cxmlns\u201D is not serializable as XML 1.0."); + break; + case FATAL: + fatal("Attribute \u201Cxmlns\u201D is not serializable as XML 1.0."); + break; + } + } + } + } else if (ns != "http://www.w3.org/1999/xhtml" + && name == AttributeName.XMLNS_XLINK) { + String xmlns = attributes.getXmlnsValue(i); + if (!"http://www.w3.org/1999/xlink".equals(xmlns)) { + err("Bad value \u201C" + + xmlns + + "\u201D for the attribute \u201Cxmlns:link\u201D (only \u201Chttp://www.w3.org/1999/xlink\u201D permitted here)."); + switch (namePolicy) { + case ALTER_INFOSET: + // fall through + case ALLOW: + warn("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics."); + break; + case FATAL: + fatal("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics."); + break; + } + } + } else { + err("Attribute \u201C" + attributes.getXmlnsLocalName(i) + + "\u201D not allowed here."); + switch (namePolicy) { + case ALTER_INFOSET: + // fall through + case ALLOW: + warn("Attribute with the local name \u201C" + + attributes.getXmlnsLocalName(i) + + "\u201D is not serializable as XML 1.0."); + break; + case FATAL: + fatal("Attribute with the local name \u201C" + + attributes.getXmlnsLocalName(i) + + "\u201D is not serializable as XML 1.0."); + break; + } + } + } + } + attributes.processNonNcNames(this, namePolicy); + } + + private String checkPopName(@Local String name) throws SAXException { + if (NCName.isNCName(name)) { + return name; + } else { + switch (namePolicy) { + case ALLOW: + warn("Element name \u201C" + name + + "\u201D cannot be represented as XML 1.0."); + return name; + case ALTER_INFOSET: + warn("Element name \u201C" + name + + "\u201D cannot be represented as XML 1.0."); + return NCName.escapeName(name); + case FATAL: + fatal("Element name \u201C" + name + + "\u201D cannot be represented as XML 1.0."); + } + } + return null; // keep compiler happy + } + + // ]NOCPP] + + private void appendHtmlElementToDocumentAndPush(HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + T elt = createHtmlElementSetAsRoot(attributes); + StackNode node = new StackNode(ElementName.HTML, + elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendHtmlElementToDocumentAndPush() throws SAXException { + appendHtmlElementToDocumentAndPush(tokenizer.emptyAttributes()); + } + + private void appendToCurrentNodeAndPushHeadElement(HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + T currentNode = stack[currentPtr].node; + T elt = createElement("http://www.w3.org/1999/xhtml", "head", attributes, currentNode); + appendElement(elt, currentNode); + headPointer = elt; + StackNode node = new StackNode(ElementName.HEAD, + elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendToCurrentNodeAndPushBodyElement(HtmlAttributes attributes) + throws SAXException { + appendToCurrentNodeAndPushElement(ElementName.BODY, + attributes); + } + + private void appendToCurrentNodeAndPushBodyElement() throws SAXException { + appendToCurrentNodeAndPushBodyElement(tokenizer.emptyAttributes()); + } + + private void appendToCurrentNodeAndPushFormElementMayFoster( + HtmlAttributes attributes) throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + + T elt; + StackNode current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", "form", attributes); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", "form", attributes, current.node); + appendElement(elt, current.node); + } + + if (!isTemplateContents()) { + formPointer = elt; + } + + StackNode node = new StackNode(ElementName.FORM, + elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendToCurrentNodeAndPushFormattingElementMayFoster( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + // This method can't be called for custom elements + HtmlAttributes clone = attributes.cloneAttributes(null); + // Attributes must not be read after calling createElement, because + // createElement may delete attributes in C++. + T elt; + StackNode current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.name, attributes); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes, current.node); + appendElement(elt, current.node); + } + StackNode node = new StackNode(elementName, elt, clone + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + append(node); + node.retain(); // append doesn't retain itself + } + + private void appendToCurrentNodeAndPushElement(ElementName elementName, + HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + // This method can't be called for custom elements + T currentNode = stack[currentPtr].node; + T elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes, currentNode); + appendElement(elt, currentNode); + if (ElementName.TEMPLATE == elementName) { + elt = getDocumentFragmentForTemplate(elt); + } + StackNode node = new StackNode(elementName, elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName, + HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.name; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + T elt; + StackNode current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", popName, attributes); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", popName, attributes, current.node); + appendElement(elt, current.node); + } + StackNode node = new StackNode(elementName, elt, popName + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendToCurrentNodeAndPushElementMayFosterMathML( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.name; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + boolean markAsHtmlIntegrationPoint = false; + if (ElementName.ANNOTATION_XML == elementName + && annotationXmlEncodingPermitsHtml(attributes)) { + markAsHtmlIntegrationPoint = true; + } + // Attributes must not be read after calling createElement(), since + // createElement may delete the object in C++. + T elt; + StackNode current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1998/Math/MathML", popName, attributes); + } else { + elt = createElement("http://www.w3.org/1998/Math/MathML", popName, attributes, current.node); + appendElement(elt, current.node); + } + StackNode node = new StackNode(elementName, elt, popName, + markAsHtmlIntegrationPoint + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + // [NOCPP[ + T getDocumentFragmentForTemplate(T template) { + return template; + } + + T getFormPointerForContext(T context) { + return null; + } + // ]NOCPP] + + private boolean annotationXmlEncodingPermitsHtml(HtmlAttributes attributes) { + String encoding = attributes.getValue(AttributeName.ENCODING); + if (encoding == null) { + return false; + } + return Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "application/xhtml+xml", encoding) + || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "text/html", encoding); + } + + private void appendToCurrentNodeAndPushElementMayFosterSVG( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.camelCaseName; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/2000/svg"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + T elt; + StackNode current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/2000/svg", popName, attributes); + } else { + elt = createElement("http://www.w3.org/2000/svg", popName, attributes, current.node); + appendElement(elt, current.node); + } + StackNode node = new StackNode(elementName, popName, elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName, + HtmlAttributes attributes, T form) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + // Can't be called for custom elements + T elt; + T formOwner = form == null || fragment || isTemplateContents() ? null : form; + StackNode current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.name, + attributes, formOwner); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, + attributes, formOwner, current.node); + appendElement(elt, current.node); + } + StackNode node = new StackNode(elementName, elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendVoidElementToCurrentMayFoster( + @Local String name, HtmlAttributes attributes, T form) throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + // Can't be called for custom elements + T elt; + T formOwner = form == null || fragment || isTemplateContents() ? null : form; + StackNode current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", name, + attributes, formOwner); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", name, + attributes, formOwner, current.node); + appendElement(elt, current.node); + } + elementPushed("http://www.w3.org/1999/xhtml", name, elt); + elementPopped("http://www.w3.org/1999/xhtml", name, elt); + } + + private void appendVoidElementToCurrentMayFoster( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.name; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + T elt; + StackNode current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", popName, attributes); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", popName, attributes, current.node); + appendElement(elt, current.node); + } + elementPushed("http://www.w3.org/1999/xhtml", popName, elt); + elementPopped("http://www.w3.org/1999/xhtml", popName, elt); + } + + private void appendVoidElementToCurrentMayFosterSVG( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.camelCaseName; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/2000/svg"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + T elt; + StackNode current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/2000/svg", popName, attributes); + } else { + elt = createElement("http://www.w3.org/2000/svg", popName, attributes, current.node); + appendElement(elt, current.node); + } + elementPushed("http://www.w3.org/2000/svg", popName, elt); + elementPopped("http://www.w3.org/2000/svg", popName, elt); + } + + private void appendVoidElementToCurrentMayFosterMathML( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.name; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + T elt; + StackNode current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1998/Math/MathML", popName, attributes); + } else { + elt = createElement("http://www.w3.org/1998/Math/MathML", popName, attributes, current.node); + appendElement(elt, current.node); + } + elementPushed("http://www.w3.org/1998/Math/MathML", popName, elt); + elementPopped("http://www.w3.org/1998/Math/MathML", popName, elt); + } + + private void appendVoidElementToCurrent( + @Local String name, HtmlAttributes attributes, T form) throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + // Can't be called for custom elements + T currentNode = stack[currentPtr].node; + T elt = createElement("http://www.w3.org/1999/xhtml", name, attributes, + form == null || fragment || isTemplateContents() ? null : form, currentNode); + appendElement(elt, currentNode); + elementPushed("http://www.w3.org/1999/xhtml", name, elt); + elementPopped("http://www.w3.org/1999/xhtml", name, elt); + } + + private void appendVoidFormToCurrent(HtmlAttributes attributes) throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + T currentNode = stack[currentPtr].node; + T elt = createElement("http://www.w3.org/1999/xhtml", "form", + attributes, currentNode); + formPointer = elt; + // ownership transferred to form pointer + appendElement(elt, currentNode); + elementPushed("http://www.w3.org/1999/xhtml", "form", elt); + elementPopped("http://www.w3.org/1999/xhtml", "form", elt); + } + + // [NOCPP[ + + private final void accumulateCharactersForced(@Const @NoLength char[] buf, + int start, int length) throws SAXException { + System.arraycopy(buf, start, charBuffer, charBufferLen, length); + charBufferLen += length; + } + + @Override public void ensureBufferSpace(int inputLength) + throws SAXException { + // TODO: Unify Tokenizer.strBuf and TreeBuilder.charBuffer so that + // this method becomes unnecessary. + int worstCase = charBufferLen + inputLength; + if (charBuffer == null) { + // Add an arbitrary small value to avoid immediate reallocation + // once there are a few characters in the buffer. + charBuffer = new char[worstCase + 128]; + } else if (worstCase > charBuffer.length) { + // HotSpot reportedly allocates memory with 8-byte accuracy, so + // there's no point in trying to do math here to avoid slop. + // Maybe we should add some small constant to worstCase here + // but not doing that without profiling. In C++ with jemalloc, + // the corresponding method should do math to round up here + // to avoid slop. + char[] newBuf = new char[worstCase]; + System.arraycopy(charBuffer, 0, newBuf, 0, charBufferLen); + charBuffer = newBuf; + } + } + + // ]NOCPP] + + protected void accumulateCharacters(@Const @NoLength char[] buf, int start, + int length) throws SAXException { + appendCharacters(stack[currentPtr].node, buf, start, length); + } + + // ------------------------------- // + + protected final void requestSuspension() { + tokenizer.requestSuspension(); + } + + protected abstract T createElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes, T intendedParent) throws SAXException; + + protected T createElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes, T form, T intendedParent) throws SAXException { + return createElement("http://www.w3.org/1999/xhtml", name, attributes, intendedParent); + } + + protected abstract T createHtmlElementSetAsRoot(HtmlAttributes attributes) + throws SAXException; + + protected abstract void detachFromParent(T element) throws SAXException; + + protected abstract boolean hasChildren(T element) throws SAXException; + + protected abstract void appendElement(T child, T newParent) + throws SAXException; + + protected abstract void appendChildrenToNewParent(T oldParent, T newParent) + throws SAXException; + + protected abstract void insertFosterParentedChild(T child, T table, + T stackParent) throws SAXException; + + // We don't generate CPP code for this method because it is not used in generated CPP + // code. Instead, the form owner version of this method is called with a null form owner. + // [NOCPP[ + + protected abstract T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes, T table, T stackParent) throws SAXException; + + // ]NOCPP] + + protected T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes, T form, T table, T stackParent) throws SAXException { + return createAndInsertFosterParentedElement(ns, name, attributes, table, stackParent); + }; + + protected abstract void insertFosterParentedCharacters( + @NoLength char[] buf, int start, int length, T table, T stackParent) + throws SAXException; + + protected abstract void appendCharacters(T parent, @NoLength char[] buf, + int start, int length) throws SAXException; + + protected abstract void appendIsindexPrompt(T parent) throws SAXException; + + protected abstract void appendComment(T parent, @NoLength char[] buf, + int start, int length) throws SAXException; + + protected abstract void appendCommentToDocument(@NoLength char[] buf, + int start, int length) throws SAXException; + + protected abstract void addAttributesToElement(T element, + HtmlAttributes attributes) throws SAXException; + + protected void markMalformedIfScript(T elt) throws SAXException { + + } + + protected void start(boolean fragmentMode) throws SAXException { + + } + + protected void end() throws SAXException { + + } + + protected void appendDoctypeToDocument(@Local String name, + String publicIdentifier, String systemIdentifier) + throws SAXException { + + } + + protected void elementPushed(@NsUri String ns, @Local String name, T node) + throws SAXException { + + } + + protected void elementPopped(@NsUri String ns, @Local String name, T node) + throws SAXException { + + } + + // [NOCPP[ + + protected void documentMode(DocumentMode m, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + + } + + /** + * @see nu.validator.htmlparser.common.TokenHandler#wantsComments() + */ + public boolean wantsComments() { + return wantingComments; + } + + public void setIgnoringComments(boolean ignoreComments) { + wantingComments = !ignoreComments; + } + + /** + * Sets the errorHandler. + * + * @param errorHandler + * the errorHandler to set + */ + public final void setErrorHandler(ErrorHandler errorHandler) { + this.errorHandler = errorHandler; + } + + /** + * Returns the errorHandler. + * + * @return the errorHandler + */ + public ErrorHandler getErrorHandler() { + return errorHandler; + } + + /** + * The argument MUST be an interned string or null. + * + * @param context + */ + public final void setFragmentContext(@Local String context) { + this.contextName = context; + this.contextNamespace = "http://www.w3.org/1999/xhtml"; + this.contextNode = null; + this.fragment = (contextName != null); + this.quirks = false; + } + + // ]NOCPP] + + /** + * @see nu.validator.htmlparser.common.TokenHandler#cdataSectionAllowed() + */ + @Inline public boolean cdataSectionAllowed() throws SAXException { + return isInForeign(); + } + + private boolean isInForeign() { + return currentPtr >= 0 + && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml"; + } + + private boolean isInForeignButNotHtmlOrMathTextIntegrationPoint() { + if (currentPtr < 0) { + return false; + } + return !isSpecialParentInForeign(stack[currentPtr]); + } + + /** + * The argument MUST be an interned string or null. + * + * @param context + */ + public final void setFragmentContext(@Local String context, + @NsUri String ns, T node, boolean quirks) { + // [NOCPP[ + if (!((context == null && ns == null) + || "http://www.w3.org/1999/xhtml" == ns + || "http://www.w3.org/2000/svg" == ns || "http://www.w3.org/1998/Math/MathML" == ns)) { + throw new IllegalArgumentException( + "The namespace must be the HTML, SVG or MathML namespace (or null when the local name is null). Got: " + + ns); + } + // ]NOCPP] + this.contextName = context; + this.contextNamespace = ns; + this.contextNode = node; + this.fragment = (contextName != null); + this.quirks = quirks; + } + + protected final T currentNode() { + return stack[currentPtr].node; + } + + /** + * Returns the scriptingEnabled. + * + * @return the scriptingEnabled + */ + public boolean isScriptingEnabled() { + return scriptingEnabled; + } + + /** + * Sets the scriptingEnabled. + * + * @param scriptingEnabled + * the scriptingEnabled to set + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + this.scriptingEnabled = scriptingEnabled; + } + + public void setIsSrcdocDocument(boolean isSrcdocDocument) { + this.isSrcdocDocument = isSrcdocDocument; + } + + // [NOCPP[ + + /** + * Sets the doctypeExpectation. + * + * @param doctypeExpectation + * the doctypeExpectation to set + */ + public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { + this.doctypeExpectation = doctypeExpectation; + } + + public void setNamePolicy(XmlViolationPolicy namePolicy) { + this.namePolicy = namePolicy; + } + + /** + * Sets the documentModeHandler. + * + * @param documentModeHandler + * the documentModeHandler to set + */ + public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { + this.documentModeHandler = documentModeHandler; + } + + /** + * Sets the reportingDoctype. + * + * @param reportingDoctype + * the reportingDoctype to set + */ + public void setReportingDoctype(boolean reportingDoctype) { + this.reportingDoctype = reportingDoctype; + } + + // ]NOCPP] + + /** + * Flushes the pending characters. Public for document.write use cases only. + * @throws SAXException + */ + public final void flushCharacters() throws SAXException { + if (charBufferLen > 0) { + if ((mode == IN_TABLE || mode == IN_TABLE_BODY || mode == IN_ROW) + && charBufferContainsNonWhitespace()) { + errNonSpaceInTable(); + reconstructTheActiveFormattingElements(); + if (!stack[currentPtr].isFosterParenting()) { + // reconstructing gave us a new current node + appendCharacters(currentNode(), charBuffer, 0, + charBufferLen); + charBufferLen = 0; + return; + } + + int tablePos = findLastOrRoot(TreeBuilder.TABLE); + int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE); + + if (templatePos >= tablePos) { + appendCharacters(stack[templatePos].node, charBuffer, 0, charBufferLen); + charBufferLen = 0; + return; + } + + StackNode tableElt = stack[tablePos]; + insertFosterParentedCharacters(charBuffer, 0, charBufferLen, + tableElt.node, stack[tablePos - 1].node); + charBufferLen = 0; + return; + } + appendCharacters(currentNode(), charBuffer, 0, charBufferLen); + charBufferLen = 0; + } + } + + private boolean charBufferContainsNonWhitespace() { + for (int i = 0; i < charBufferLen; i++) { + switch (charBuffer[i]) { + case ' ': + case '\t': + case '\n': + case '\r': + case '\u000C': + continue; + default: + return true; + } + } + return false; + } + + /** + * Creates a comparable snapshot of the tree builder state. Snapshot + * creation is only supported immediately after a script end tag has been + * processed. In C++ the caller is responsible for calling + * delete on the returned object. + * + * @return a snapshot. + * @throws SAXException + */ + @SuppressWarnings("unchecked") public TreeBuilderState newSnapshot() + throws SAXException { + StackNode[] listCopy = new StackNode[listPtr + 1]; + for (int i = 0; i < listCopy.length; i++) { + StackNode node = listOfActiveFormattingElements[i]; + if (node != null) { + StackNode newNode = new StackNode(node.getFlags(), node.ns, + node.name, node.node, node.popName, + node.attributes.cloneAttributes(null) + // [NOCPP[ + , node.getLocator() + // ]NOCPP] + ); + listCopy[i] = newNode; + } else { + listCopy[i] = null; + } + } + StackNode[] stackCopy = new StackNode[currentPtr + 1]; + for (int i = 0; i < stackCopy.length; i++) { + StackNode node = stack[i]; + int listIndex = findInListOfActiveFormattingElements(node); + if (listIndex == -1) { + StackNode newNode = new StackNode(node.getFlags(), node.ns, + node.name, node.node, node.popName, + null + // [NOCPP[ + , node.getLocator() + // ]NOCPP] + ); + stackCopy[i] = newNode; + } else { + stackCopy[i] = listCopy[listIndex]; + stackCopy[i].retain(); + } + } + int[] templateModeStackCopy = new int[templateModePtr + 1]; + System.arraycopy(templateModeStack, 0, templateModeStackCopy, 0, + templateModeStackCopy.length); + return new StateSnapshot(stackCopy, listCopy, templateModeStackCopy, formPointer, + headPointer, deepTreeSurrogateParent, mode, originalMode, framesetOk, + needToDropLF, quirks); + } + + public boolean snapshotMatches(TreeBuilderState snapshot) { + StackNode[] stackCopy = snapshot.getStack(); + int stackLen = snapshot.getStackLength(); + StackNode[] listCopy = snapshot.getListOfActiveFormattingElements(); + int listLen = snapshot.getListOfActiveFormattingElementsLength(); + int[] templateModeStackCopy = snapshot.getTemplateModeStack(); + int templateModeStackLen = snapshot.getTemplateModeStackLength(); + + if (stackLen != currentPtr + 1 + || listLen != listPtr + 1 + || templateModeStackLen != templateModePtr + 1 + || formPointer != snapshot.getFormPointer() + || headPointer != snapshot.getHeadPointer() + || deepTreeSurrogateParent != snapshot.getDeepTreeSurrogateParent() + || mode != snapshot.getMode() + || originalMode != snapshot.getOriginalMode() + || framesetOk != snapshot.isFramesetOk() + || needToDropLF != snapshot.isNeedToDropLF() + || quirks != snapshot.isQuirks()) { // maybe just assert quirks + return false; + } + for (int i = listLen - 1; i >= 0; i--) { + if (listCopy[i] == null + && listOfActiveFormattingElements[i] == null) { + continue; + } else if (listCopy[i] == null + || listOfActiveFormattingElements[i] == null) { + return false; + } + if (listCopy[i].node != listOfActiveFormattingElements[i].node) { + return false; // it's possible that this condition is overly + // strict + } + } + for (int i = stackLen - 1; i >= 0; i--) { + if (stackCopy[i].node != stack[i].node) { + return false; + } + } + for (int i = templateModeStackLen - 1; i >=0; i--) { + if (templateModeStackCopy[i] != templateModeStack[i]) { + return false; + } + } + return true; + } + + @SuppressWarnings("unchecked") public void loadState( + TreeBuilderState snapshot, Interner interner) + throws SAXException { + StackNode[] stackCopy = snapshot.getStack(); + int stackLen = snapshot.getStackLength(); + StackNode[] listCopy = snapshot.getListOfActiveFormattingElements(); + int listLen = snapshot.getListOfActiveFormattingElementsLength(); + int[] templateModeStackCopy = snapshot.getTemplateModeStack(); + int templateModeStackLen = snapshot.getTemplateModeStackLength(); + + for (int i = 0; i <= listPtr; i++) { + if (listOfActiveFormattingElements[i] != null) { + listOfActiveFormattingElements[i].release(); + } + } + if (listOfActiveFormattingElements.length < listLen) { + listOfActiveFormattingElements = new StackNode[listLen]; + } + listPtr = listLen - 1; + + for (int i = 0; i <= currentPtr; i++) { + stack[i].release(); + } + if (stack.length < stackLen) { + stack = new StackNode[stackLen]; + } + currentPtr = stackLen - 1; + + if (templateModeStack.length < templateModeStackLen) { + templateModeStack = new int[templateModeStackLen]; + } + templateModePtr = templateModeStackLen - 1; + + for (int i = 0; i < listLen; i++) { + StackNode node = listCopy[i]; + if (node != null) { + StackNode newNode = new StackNode(node.getFlags(), node.ns, + Portability.newLocalFromLocal(node.name, interner), node.node, + Portability.newLocalFromLocal(node.popName, interner), + node.attributes.cloneAttributes(null) + // [NOCPP[ + , node.getLocator() + // ]NOCPP] + ); + listOfActiveFormattingElements[i] = newNode; + } else { + listOfActiveFormattingElements[i] = null; + } + } + for (int i = 0; i < stackLen; i++) { + StackNode node = stackCopy[i]; + int listIndex = findInArray(node, listCopy); + if (listIndex == -1) { + StackNode newNode = new StackNode(node.getFlags(), node.ns, + Portability.newLocalFromLocal(node.name, interner), node.node, + Portability.newLocalFromLocal(node.popName, interner), + null + // [NOCPP[ + , node.getLocator() + // ]NOCPP] + ); + stack[i] = newNode; + } else { + stack[i] = listOfActiveFormattingElements[listIndex]; + stack[i].retain(); + } + } + System.arraycopy(templateModeStackCopy, 0, templateModeStack, 0, templateModeStackLen); + formPointer = snapshot.getFormPointer(); + headPointer = snapshot.getHeadPointer(); + deepTreeSurrogateParent = snapshot.getDeepTreeSurrogateParent(); + mode = snapshot.getMode(); + originalMode = snapshot.getOriginalMode(); + framesetOk = snapshot.isFramesetOk(); + needToDropLF = snapshot.isNeedToDropLF(); + quirks = snapshot.isQuirks(); + } + + private int findInArray(StackNode node, StackNode[] arr) { + for (int i = listPtr; i >= 0; i--) { + if (node == arr[i]) { + return i; + } + } + return -1; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getFormPointer() + */ + public T getFormPointer() { + return formPointer; + } + + /** + * Returns the headPointer. + * + * @return the headPointer + */ + public T getHeadPointer() { + return headPointer; + } + + /** + * Returns the deepTreeSurrogateParent. + * + * @return the deepTreeSurrogateParent + */ + public T getDeepTreeSurrogateParent() { + return deepTreeSurrogateParent; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElements() + */ + public StackNode[] getListOfActiveFormattingElements() { + return listOfActiveFormattingElements; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getStack() + */ + public StackNode[] getStack() { + return stack; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStack() + */ + public int[] getTemplateModeStack() { + return templateModeStack; + } + + /** + * Returns the mode. + * + * @return the mode + */ + public int getMode() { + return mode; + } + + /** + * Returns the originalMode. + * + * @return the originalMode + */ + public int getOriginalMode() { + return originalMode; + } + + /** + * Returns the framesetOk. + * + * @return the framesetOk + */ + public boolean isFramesetOk() { + return framesetOk; + } + + /** + * Returns the needToDropLF. + * + * @return the needToDropLF + */ + public boolean isNeedToDropLF() { + return needToDropLF; + } + + /** + * Returns the quirks. + * + * @return the quirks + */ + public boolean isQuirks() { + return quirks; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElementsLength() + */ + public int getListOfActiveFormattingElementsLength() { + return listPtr + 1; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getStackLength() + */ + public int getStackLength() { + return currentPtr + 1; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStackLength() + */ + public int getTemplateModeStackLength() { + return templateModePtr + 1; + } + + /** + * Reports a stray start tag. + * @param name the name of the stray tag + * + * @throws SAXException + */ + private void errStrayStartTag(@Local String name) throws SAXException { + err("Stray start tag \u201C" + name + "\u201D."); + } + + /** + * Reports a stray end tag. + * @param name the name of the stray tag + * + * @throws SAXException + */ + private void errStrayEndTag(@Local String name) throws SAXException { + err("Stray end tag \u201C" + name + "\u201D."); + } + + /** + * Reports a state when elements expected to be closed were not. + * + * @param eltPos the position of the start tag on the stack of the element + * being closed. + * @param name the name of the end tag + * + * @throws SAXException + */ + private void errUnclosedElements(int eltPos, @Local String name) throws SAXException { + errNoCheck("End tag \u201C" + name + "\u201D seen, but there were open elements."); + errListUnclosedStartTags(eltPos); + } + + /** + * Reports a state when elements expected to be closed ahead of an implied + * end tag but were not. + * + * @param eltPos the position of the start tag on the stack of the element + * being closed. + * @param name the name of the end tag + * + * @throws SAXException + */ + private void errUnclosedElementsImplied(int eltPos, String name) throws SAXException { + errNoCheck("End tag \u201C" + name + "\u201D implied, but there were open elements."); + errListUnclosedStartTags(eltPos); + } + + /** + * Reports a state when elements expected to be closed ahead of an implied + * table cell close. + * + * @param eltPos the position of the start tag on the stack of the element + * being closed. + * @throws SAXException + */ + private void errUnclosedElementsCell(int eltPos) throws SAXException { + errNoCheck("A table cell was implicitly closed, but there were open elements."); + errListUnclosedStartTags(eltPos); + } + + private void errStrayDoctype() throws SAXException { + err("Stray doctype."); + } + + private void errAlmostStandardsDoctype() throws SAXException { + if (!isSrcdocDocument) { + err("Almost standards mode doctype. Expected \u201C\u201D."); + } + } + + private void errQuirkyDoctype() throws SAXException { + if (!isSrcdocDocument) { + err("Quirky doctype. Expected \u201C\u201D."); + } + } + + private void errNonSpaceInTrailer() throws SAXException { + err("Non-space character in page trailer."); + } + + private void errNonSpaceAfterFrameset() throws SAXException { + err("Non-space after \u201Cframeset\u201D."); + } + + private void errNonSpaceInFrameset() throws SAXException { + err("Non-space in \u201Cframeset\u201D."); + } + + private void errNonSpaceAfterBody() throws SAXException { + err("Non-space character after body."); + } + + private void errNonSpaceInColgroupInFragment() throws SAXException { + err("Non-space in \u201Ccolgroup\u201D when parsing fragment."); + } + + private void errNonSpaceInNoscriptInHead() throws SAXException { + err("Non-space character inside \u201Cnoscript\u201D inside \u201Chead\u201D."); + } + + private void errFooBetweenHeadAndBody(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("\u201C" + name + "\u201D element between \u201Chead\u201D and \u201Cbody\u201D."); + } + + private void errStartTagWithoutDoctype() throws SAXException { + if (!isSrcdocDocument) { + err("Start tag seen without seeing a doctype first. Expected \u201C\u201D."); + } + } + + private void errNoSelectInTableScope() throws SAXException { + err("No \u201Cselect\u201D in table scope."); + } + + private void errStartSelectWhereEndSelectExpected() throws SAXException { + err("\u201Cselect\u201D start tag where end tag expected."); + } + + private void errStartTagWithSelectOpen(@Local String name) + throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("\u201C" + name + + "\u201D start tag with \u201Cselect\u201D open."); + } + + private void errBadStartTagInHead(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("Bad start tag in \u201C" + name + + "\u201D in \u201Chead\u201D."); + } + + private void errImage() throws SAXException { + err("Saw a start tag \u201Cimage\u201D."); + } + + private void errIsindex() throws SAXException { + err("\u201Cisindex\u201D seen."); + } + + private void errFooSeenWhenFooOpen(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("An \u201C" + name + "\u201D start tag seen but an element of the same type was already open."); + } + + private void errHeadingWhenHeadingOpen() throws SAXException { + err("Heading cannot be a child of another heading."); + } + + private void errFramesetStart() throws SAXException { + err("\u201Cframeset\u201D start tag seen."); + } + + private void errNoCellToClose() throws SAXException { + err("No cell to close."); + } + + private void errStartTagInTable(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("Start tag \u201C" + name + + "\u201D seen in \u201Ctable\u201D."); + } + + private void errFormWhenFormOpen() throws SAXException { + err("Saw a \u201Cform\u201D start tag, but there was already an active \u201Cform\u201D element. Nested forms are not allowed. Ignoring the tag."); + } + + private void errTableSeenWhileTableOpen() throws SAXException { + err("Start tag for \u201Ctable\u201D seen but the previous \u201Ctable\u201D is still open."); + } + + private void errStartTagInTableBody(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("\u201C" + name + "\u201D start tag in table body."); + } + + private void errEndTagSeenWithoutDoctype() throws SAXException { + if (!isSrcdocDocument) { + err("End tag seen without seeing a doctype first. Expected \u201C\u201D."); + } + } + + private void errEndTagAfterBody() throws SAXException { + err("Saw an end tag after \u201Cbody\u201D had been closed."); + } + + private void errEndTagSeenWithSelectOpen(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("\u201C" + name + + "\u201D end tag with \u201Cselect\u201D open."); + } + + private void errGarbageInColgroup() throws SAXException { + err("Garbage in \u201Ccolgroup\u201D fragment."); + } + + private void errEndTagBr() throws SAXException { + err("End tag \u201Cbr\u201D."); + } + + private void errNoElementToCloseButEndTagSeen(@Local String name) + throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("No \u201C" + name + "\u201D element in scope but a \u201C" + + name + "\u201D end tag seen."); + } + + private void errHtmlStartTagInForeignContext(@Local String name) + throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("HTML start tag \u201C" + name + + "\u201D in a foreign namespace context."); + } + + private void errTableClosedWhileCaptionOpen() throws SAXException { + err("\u201Ctable\u201D closed but \u201Ccaption\u201D was still open."); + } + + private void errNoTableRowToClose() throws SAXException { + err("No table row to close."); + } + + private void errNonSpaceInTable() throws SAXException { + err("Misplaced non-space characters insided a table."); + } + + private void errUnclosedChildrenInRuby() throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("Unclosed children in \u201Cruby\u201D."); + } + + private void errStartTagSeenWithoutRuby(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("Start tag \u201C" + + name + + "\u201D seen without a \u201Cruby\u201D element being open."); + } + + private void errSelfClosing() throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("Self-closing syntax (\u201C/>\u201D) used on a non-void HTML element. Ignoring the slash and treating as a start tag."); + } + + private void errNoCheckUnclosedElementsOnStack() throws SAXException { + errNoCheck("Unclosed elements on stack."); + } + + private void errEndTagDidNotMatchCurrentOpenElement(@Local String name, + @Local String currOpenName) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("End tag \u201C" + + name + + "\u201D did not match the name of the current open element (\u201C" + + currOpenName + "\u201D)."); + } + + private void errEndTagViolatesNestingRules(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("End tag \u201C" + name + "\u201D violates nesting rules."); + } + + private void errEofWithUnclosedElements() throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("End of file seen and there were open elements."); + // just report all remaining unclosed elements + errListUnclosedStartTags(0); + } + + /** + * Reports arriving at/near end of document with unclosed elements remaining. + * + * @param message + * the message + * @throws SAXException + */ + private void errEndWithUnclosedElements(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("End tag for \u201C" + + name + + "\u201D seen, but there were unclosed elements."); + // just report all remaining unclosed elements + errListUnclosedStartTags(0); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java new file mode 100644 index 0000000000..c4e2d4afba --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2009-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +/** + * Interface for exposing the state of the HTML5 tree builder so that the + * interface can be implemented by the tree builder itself and by snapshots. + * + * @version $Id$ + * @author hsivonen + */ +public interface TreeBuilderState { + + /** + * Returns the stack. + * + * @return the stack + */ + public StackNode[] getStack(); + + /** + * Returns the listOfActiveFormattingElements. + * + * @return the listOfActiveFormattingElements + */ + public StackNode[] getListOfActiveFormattingElements(); + + /** + * Returns the stack of template insertion modes. + * + * @return the stack of template insertion modes + */ + public int[] getTemplateModeStack(); + + /** + * Returns the formPointer. + * + * @return the formPointer + */ + public T getFormPointer(); + + /** + * Returns the headPointer. + * + * @return the headPointer + */ + public T getHeadPointer(); + + /** + * Returns the deepTreeSurrogateParent. + * + * @return the deepTreeSurrogateParent + */ + public T getDeepTreeSurrogateParent(); + + /** + * Returns the mode. + * + * @return the mode + */ + public int getMode(); + + /** + * Returns the originalMode. + * + * @return the originalMode + */ + public int getOriginalMode(); + + /** + * Returns the framesetOk. + * + * @return the framesetOk + */ + public boolean isFramesetOk(); + + /** + * Returns the needToDropLF. + * + * @return the needToDropLF + */ + public boolean isNeedToDropLF(); + + /** + * Returns the quirks. + * + * @return the quirks + */ + public boolean isQuirks(); + + /** + * Return the length of the stack. + * @return the length of the stack. + */ + public int getStackLength(); + + /** + * Return the length of the list of active formatting elements. + * @return the length of the list of active formatting elements. + */ + public int getListOfActiveFormattingElementsLength(); + + /** + * Return the length of the stack of template insertion modes. + * + * @return the length of the stack of template insertion modes. + */ + int getTemplateModeStackLength(); +} \ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java new file mode 100644 index 0000000000..ec79185ec2 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.NoLength; + +/** + * An UTF-16 buffer that knows the start and end indeces of its unconsumed + * content. + * + * @version $Id$ + * @author hsivonen + */ +public final class UTF16Buffer { + + /** + * The backing store of the buffer. May be larger than the logical content + * of this UTF16Buffer. + */ + private final @NoLength char[] buffer; + + /** + * The index of the first unconsumed character in the backing buffer. + */ + private int start; + + /** + * The index of the slot immediately after the last character in the backing + * buffer that is part of the logical content of this + * UTF16Buffer. + */ + private int end; + + //[NOCPP[ + + /** + * Constructor for wrapping an existing UTF-16 code unit array. + * + * @param buffer + * the backing buffer + * @param start + * the index of the first character to consume + * @param end + * the index immediately after the last character to consume + */ + public UTF16Buffer(@NoLength char[] buffer, int start, int end) { + this.buffer = buffer; + this.start = start; + this.end = end; + } + + // ]NOCPP] + + /** + * Returns the start index. + * + * @return the start index + */ + public int getStart() { + return start; + } + + /** + * Sets the start index. + * + * @param start + * the start index + */ + public void setStart(int start) { + this.start = start; + } + + /** + * Returns the backing buffer. + * + * @return the backing buffer + */ + public @NoLength char[] getBuffer() { + return buffer; + } + + /** + * Returns the end index. + * + * @return the end index + */ + public int getEnd() { + return end; + } + + /** + * Checks if the buffer has data left. + * + * @return true if there's data left + */ + public boolean hasMore() { + return start < end; + } + + /** + * Returns end - start. + * + * @return end - start + */ + public int getLength() { + return end - start; + } + + /** + * Adjusts the start index to skip over the first character if it is a line + * feed and the previous character was a carriage return. + * + * @param lastWasCR + * whether the previous character was a carriage return + */ + public void adjust(boolean lastWasCR) { + if (lastWasCR && buffer[start] == '\n') { + start++; + } + } + + /** + * Sets the end index. + * + * @param end + * the end index + */ + public void setEnd(int end) { + this.end = end; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html new file mode 100644 index 0000000000..6d029a13e5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html @@ -0,0 +1,30 @@ + + +Package Overview + + + +

This package contains the bulk of parser internals. Only implementors of +additional tree builders or token handlers should look here.

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java new file mode 100644 index 0000000000..42d7a837f5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +import java.io.IOException; + +import nu.validator.htmlparser.common.ByteReadable; + +/** + * The BOM sniffing part of the HTML5 encoding sniffing algorithm. + * + * @version $Id$ + * @author hsivonen + */ +public final class BomSniffer { + + private final ByteReadable source; + + /** + * @param source + */ + public BomSniffer(final ByteReadable source) { + this.source = source; + } + + Encoding sniff() throws IOException { + int b = source.readByte(); + if (b == 0xEF) { // UTF-8 + b = source.readByte(); + if (b == 0xBB) { + b = source.readByte(); + if (b == 0xBF) { + return Encoding.UTF8; + } else { + return null; + } + } else { + return null; + } + } else if (b == 0xFF) { // little-endian + b = source.readByte(); + if (b == 0xFE) { + return Encoding.UTF16LE; + } else { + return null; + } + } else if (b == 0xFE) { // big-endian UTF-16 + b = source.readByte(); + if (b == 0xFF) { + return Encoding.UTF16BE; + } else { + return null; + } + } else { + return null; + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java new file mode 100644 index 0000000000..1a2d49746b --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +public enum Confidence { + TENTATIVE, CERTAIN +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java new file mode 100644 index 0000000000..f0b0cc55d6 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2005, 2006, 2007 Henri Sivonen + * Copyright (c) 2007-2013 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.nio.charset.UnsupportedCharsetException; + +import nu.validator.htmlparser.common.CharacterHandler; +import nu.validator.htmlparser.common.EncodingDeclarationHandler; +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.common.TransitionHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.extra.NormalizationChecker; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.impl.UTF16Buffer; +import nu.validator.htmlparser.rewindable.RewindableInputStream; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public class Driver implements EncodingDeclarationHandler { + + /** + * The input UTF-16 code unit stream. If a byte stream was given, this + * object is an instance of HtmlInputStreamReader. + */ + private Reader reader; + + /** + * The reference to the rewindable byte stream. null if + * prohibited or no longer needed. + */ + private RewindableInputStream rewindableInputStream; + + private boolean swallowBom; + + private Encoding characterEncoding; + + private boolean allowRewinding = true; + + private Heuristics heuristics = Heuristics.NONE; + + private final Tokenizer tokenizer; + + private Confidence confidence; + + /** + * Used for NFC checking if non-null, source code capture, + * etc. + */ + private CharacterHandler[] characterHandlers = new CharacterHandler[0]; + + public Driver(Tokenizer tokenizer) { + this.tokenizer = tokenizer; + tokenizer.setEncodingDeclarationHandler(this); + } + + /** + * Returns the allowRewinding. + * + * @return the allowRewinding + */ + public boolean isAllowRewinding() { + return allowRewinding; + } + + /** + * Sets the allowRewinding. + * + * @param allowRewinding + * the allowRewinding to set + */ + public void setAllowRewinding(boolean allowRewinding) { + this.allowRewinding = allowRewinding; + } + + /** + * Turns NFC checking on or off. + * + * @param enable + * true if checking on + */ + public void setCheckingNormalization(boolean enable) { + if (enable) { + if (isCheckingNormalization()) { + return; + } else { + NormalizationChecker normalizationChecker = new NormalizationChecker(tokenizer); + normalizationChecker.setErrorHandler(tokenizer.getErrorHandler()); + + } + } else { + if (isCheckingNormalization()) { + CharacterHandler[] newHandlers = new CharacterHandler[characterHandlers.length - 1]; + boolean skipped = false; + int j = 0; + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + if (!(!skipped && (ch instanceof NormalizationChecker))) { + newHandlers[j] = ch; + j++; + } + } + characterHandlers = newHandlers; + } else { + return; + } + } + } + + public void addCharacterHandler(CharacterHandler characterHandler) { + if (characterHandler == null) { + throw new IllegalArgumentException("Null argument."); + } + CharacterHandler[] newHandlers = new CharacterHandler[characterHandlers.length + 1]; + System.arraycopy(characterHandlers, 0, newHandlers, 0, + characterHandlers.length); + newHandlers[characterHandlers.length] = characterHandler; + characterHandlers = newHandlers; + } + + /** + * Query if checking normalization. + * + * @return true if checking on + */ + public boolean isCheckingNormalization() { + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + if (ch instanceof NormalizationChecker) { + return true; + } + } + return false; + } + + /** + * Runs the tokenization. This is the main entry point. + * + * @param is + * the input source + * @throws SAXException + * on fatal error (if configured to treat XML violations as + * fatal) or if the token handler threw + * @throws IOException + * if the stream threw + */ + public void tokenize(InputSource is) throws SAXException, IOException { + if (is == null) { + throw new IllegalArgumentException("InputSource was null."); + } + tokenizer.start(); + confidence = Confidence.TENTATIVE; + swallowBom = true; + rewindableInputStream = null; + tokenizer.initLocation(is.getPublicId(), is.getSystemId()); + this.reader = is.getCharacterStream(); + this.characterEncoding = encodingFromExternalDeclaration(is.getEncoding()); + if (this.reader == null) { + InputStream inputStream = is.getByteStream(); + if (inputStream == null) { + throw new SAXException("Both streams in InputSource were null."); + } + if (this.characterEncoding == null) { + if (allowRewinding) { + inputStream = rewindableInputStream = new RewindableInputStream( + inputStream); + } + this.reader = new HtmlInputStreamReader(inputStream, + tokenizer.getErrorHandler(), tokenizer, this, heuristics); + } else { + if (this.characterEncoding != Encoding.UTF8) { + warnWithoutLocation("Legacy encoding \u201C" + + this.characterEncoding.getCanonName() + + "\u201D used. Documents should use UTF-8."); + } + becomeConfident(); + this.reader = new HtmlInputStreamReader(inputStream, + tokenizer.getErrorHandler(), tokenizer, this, this.characterEncoding); + } + } else { + becomeConfident(); + } + Throwable t = null; + try { + for (;;) { + try { + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + ch.start(); + } + runStates(); + break; + } catch (ReparseException e) { + if (rewindableInputStream == null) { + tokenizer.fatal("Changing encoding at this point would need non-streamable behavior."); + } else { + rewindableInputStream.rewind(); + becomeConfident(); + this.reader = new HtmlInputStreamReader( + rewindableInputStream, tokenizer.getErrorHandler(), tokenizer, + this, this.characterEncoding); + } + continue; + } + } + } catch (Throwable tr) { + t = tr; + } finally { + try { + tokenizer.end(); + characterEncoding = null; + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + ch.end(); + } + reader.close(); + reader = null; + rewindableInputStream = null; + } catch (Throwable tr) { + if (t == null) { + t = tr; + } // else drop the later throwable + } + if (t != null) { + if (t instanceof IOException) { + throw (IOException) t; + } else if (t instanceof SAXException) { + throw (SAXException) t; + } else if (t instanceof RuntimeException) { + throw (RuntimeException) t; + } else if (t instanceof Error) { + throw (Error) t; + } else { + // impossible + throw new RuntimeException(t); + } + } + } + } + + void dontSwallowBom() { + swallowBom = false; + } + + private void runStates() throws SAXException, IOException { + char[] buffer = new char[2048]; + UTF16Buffer bufr = new UTF16Buffer(buffer, 0, 0); + boolean lastWasCR = false; + int len = -1; + if ((len = reader.read(buffer)) != -1) { + assert len > 0; + int streamOffset = 0; + int offset = 0; + int length = len; + if (swallowBom) { + if (buffer[0] == '\uFEFF') { + streamOffset = -1; + offset = 1; + length--; + } + } + if (length > 0) { + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + ch.characters(buffer, offset, length); + } + tokenizer.setTransitionBaseOffset(streamOffset); + bufr.setStart(offset); + bufr.setEnd(offset + length); + while (bufr.hasMore()) { + bufr.adjust(lastWasCR); + lastWasCR = false; + if (bufr.hasMore()) { + lastWasCR = tokenizer.tokenizeBuffer(bufr); + } + } + } + streamOffset = length; + while ((len = reader.read(buffer)) != -1) { + assert len > 0; + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + ch.characters(buffer, 0, len); + } + tokenizer.setTransitionBaseOffset(streamOffset); + bufr.setStart(0); + bufr.setEnd(len); + while (bufr.hasMore()) { + bufr.adjust(lastWasCR); + lastWasCR = false; + if (bufr.hasMore()) { + lastWasCR = tokenizer.tokenizeBuffer(bufr); + } + } + streamOffset += len; + } + } + tokenizer.eof(); + } + + public void setEncoding(Encoding encoding, Confidence confidence) { + this.characterEncoding = encoding; + if (confidence == Confidence.CERTAIN) { + becomeConfident(); + } + } + + public boolean internalEncodingDeclaration(String internalCharset) + throws SAXException { + try { + internalCharset = Encoding.toAsciiLowerCase(internalCharset); + Encoding cs; + if ("utf-16".equals(internalCharset) + || "utf-16be".equals(internalCharset) + || "utf-16le".equals(internalCharset)) { + tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C" + + internalCharset + + "\u201D which is not an ASCII superset. Continuing as if the encoding had been \u201Cutf-8\u201D."); + cs = Encoding.UTF8; + internalCharset = "utf-8"; + } else { + cs = Encoding.forName(internalCharset); + } + Encoding actual = cs.getActualHtmlEncoding(); + if (actual == null) { + actual = cs; + } + if (!actual.isAsciiSuperset()) { + tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C" + + internalCharset + + "\u201D which is not an ASCII superset. Not changing the encoding."); + return false; + } + if (characterEncoding == null) { + // Reader case + return true; + } + if (characterEncoding == actual) { + becomeConfident(); + return true; + } + if (confidence == Confidence.CERTAIN && actual != characterEncoding) { + tokenizer.errTreeBuilder("Internal encoding declaration \u201C" + + internalCharset + + "\u201D disagrees with the actual encoding of the document (\u201C" + + characterEncoding.getCanonName() + "\u201D)."); + } else { + Encoding newEnc = whineAboutEncodingAndReturnActual( + internalCharset, cs); + tokenizer.errTreeBuilder("Changing character encoding \u201C" + + internalCharset + "\u201D and reparsing."); + characterEncoding = newEnc; + throw new ReparseException(); + } + return true; + } catch (UnsupportedCharsetException e) { + tokenizer.errTreeBuilder("Internal encoding declaration named an unsupported chararacter encoding \u201C" + + internalCharset + "\u201D."); + return false; + } + } + + /** + * + */ + private void becomeConfident() { + if (rewindableInputStream != null) { + rewindableInputStream.willNotRewind(); + } + confidence = Confidence.CERTAIN; + tokenizer.becomeConfident(); + } + + /** + * Sets the encoding sniffing heuristics. + * + * @param heuristics + * the heuristics to set + */ + public void setHeuristics(Heuristics heuristics) { + this.heuristics = heuristics; + } + + /** + * Reports a warning without line/col + * + * @param message + * the message + * @throws SAXException + */ + protected void warnWithoutLocation(String message) throws SAXException { + ErrorHandler errorHandler = tokenizer.getErrorHandler(); + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException(message, null, + tokenizer.getSystemId(), -1, -1); + errorHandler.warning(spe); + } + + /** + * Initializes a decoder from external decl. + */ + protected Encoding encodingFromExternalDeclaration(String encoding) + throws SAXException { + if (encoding == null) { + return null; + } + encoding = Encoding.toAsciiLowerCase(encoding); + try { + Encoding cs = Encoding.forName(encoding); + if ("utf-16".equals(cs.getCanonName()) + || "utf-32".equals(cs.getCanonName())) { + swallowBom = false; + } + return whineAboutEncodingAndReturnActual(encoding, cs); + } catch (UnsupportedCharsetException e) { + tokenizer.err("Unsupported character encoding name: \u201C" + encoding + + "\u201D. Will sniff."); + swallowBom = true; + } + return null; // keep the compiler happy + } + + /** + * @param encoding + * @param cs + * @return + * @throws SAXException + */ + protected Encoding whineAboutEncodingAndReturnActual(String encoding, + Encoding cs) throws SAXException { + String canonName = cs.getCanonName(); + if (!cs.isRegistered()) { + if (encoding.startsWith("x-")) { + tokenizer.err("The encoding \u201C" + + encoding + + "\u201D is not an IANA-registered encoding. (Charmod C022)"); + } else { + tokenizer.err("The encoding \u201C" + + encoding + + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)"); + } + } else if (!canonName.equals(encoding)) { + tokenizer.err("The encoding \u201C" + + encoding + + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C" + + canonName + "\u201D. (Charmod C024)"); + } + if (cs.isShouldNot()) { + tokenizer.warn("Authors should not use the character encoding \u201C" + + encoding + + "\u201D. It is recommended to use \u201CUTF-8\u201D."); + } else if (cs.isLikelyEbcdic()) { + tokenizer.warn("Authors should not use EBCDIC-based encodings. It is recommended to use \u201CUTF-8\u201D."); + } else if (cs.isObscure()) { + tokenizer.warn("The character encoding \u201C" + + encoding + + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D."); + } + Encoding actual = cs.getActualHtmlEncoding(); + if (actual == null) { + return cs; + } else { + tokenizer.warn("Using \u201C" + actual.getCanonName() + + "\u201D instead of the declared encoding \u201C" + + encoding + "\u201D."); + return actual; + } + } + + private class ReparseException extends SAXException { + + } + + void notifyAboutMetaBoundary() { + tokenizer.notifyAboutMetaBoundary(); + } + + /** + * @param commentPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setCommentPolicy(XmlViolationPolicy commentPolicy) { + tokenizer.setCommentPolicy(commentPolicy); + } + + /** + * @param contentNonXmlCharPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + tokenizer.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); + } + + /** + * @param contentSpacePolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { + tokenizer.setContentSpacePolicy(contentSpacePolicy); + } + + /** + * @param eh + * @see nu.validator.htmlparser.impl.Tokenizer#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler eh) { + tokenizer.setErrorHandler(eh); + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + if (ch instanceof NormalizationChecker) { + NormalizationChecker nc = (NormalizationChecker) ch; + nc.setErrorHandler(eh); + } + } + } + + public void setTransitionHandler(TransitionHandler transitionHandler) { + if (tokenizer instanceof ErrorReportingTokenizer) { + ErrorReportingTokenizer ert = (ErrorReportingTokenizer) tokenizer; + ert.setTransitionHandler(transitionHandler); + } else if (transitionHandler != null) { + throw new IllegalStateException("Attempt to set a transition handler on a plain tokenizer."); + } + } + + /** + * @param html4ModeCompatibleWithXhtml1Schemata + * @see nu.validator.htmlparser.impl.Tokenizer#setHtml4ModeCompatibleWithXhtml1Schemata(boolean) + */ + public void setHtml4ModeCompatibleWithXhtml1Schemata( + boolean html4ModeCompatibleWithXhtml1Schemata) { + tokenizer.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + } + + /** + * @param mappingLangToXmlLang + * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) + */ + public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { + tokenizer.setMappingLangToXmlLang(mappingLangToXmlLang); + } + + /** + * @param namePolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setNamePolicy(XmlViolationPolicy namePolicy) { + tokenizer.setNamePolicy(namePolicy); + } + + /** + * @param xmlnsPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { + tokenizer.setXmlnsPolicy(xmlnsPolicy); + } + + public String getCharacterEncoding() throws SAXException { + return characterEncoding.getCanonName(); + } + + public Locator getDocumentLocator() { + return tokenizer; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java new file mode 100644 index 0000000000..3bbc606fab --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java @@ -0,0 +1,395 @@ +/* + * Copyright (c) 2006 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderMalfunctionError; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.UnsupportedCharsetException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; + +public class Encoding { + + public static final Encoding UTF8; + + public static final Encoding UTF16; + + public static final Encoding UTF16LE; + + public static final Encoding UTF16BE; + + public static final Encoding WINDOWS1252; + + private static String[] SHOULD_NOT = { "jisx02121990", "xjis0208" }; + + private static String[] BANNED = { "bocu1", "cesu8", "compoundtext", + "iscii91", "macarabic", "maccentraleurroman", "maccroatian", + "maccyrillic", "macdevanagari", "macfarsi", "macgreek", + "macgujarati", "macgurmukhi", "machebrew", "macicelandic", + "macroman", "macromanian", "macthai", "macturkish", "macukranian", + "scsu", "utf32", "utf32be", "utf32le", "utf7", "ximapmailboxname", + "xjisautodetect", "xutf16bebom", "xutf16lebom", "xutf32bebom", + "xutf32lebom", "xutf16oppositeendian", "xutf16platformendian", + "xutf32oppositeendian", "xutf32platformendian" }; + + private static String[] NOT_OBSCURE = { "big5", "big5hkscs", "eucjp", + "euckr", "gb18030", "gbk", "iso2022jp", "iso2022kr", "iso88591", + "iso885913", "iso885915", "iso88592", "iso88593", "iso88594", + "iso88595", "iso88596", "iso88597", "iso88598", "iso88599", + "koi8r", "shiftjis", "tis620", "usascii", "utf16", "utf16be", + "utf16le", "utf8", "windows1250", "windows1251", "windows1252", + "windows1253", "windows1254", "windows1255", "windows1256", + "windows1257", "windows1258" }; + + private static Map encodingByCookedName = new HashMap(); + + private final String canonName; + + private final Charset charset; + + private final boolean asciiSuperset; + + private final boolean obscure; + + private final boolean shouldNot; + + private final boolean likelyEbcdic; + + private Encoding actualHtmlEncoding = null; + + static { + byte[] testBuf = new byte[0x7F]; + for (int i = 0; i < 0x7F; i++) { + if (isAsciiSupersetnessSensitive(i)) { + testBuf[i] = (byte) i; + } else { + testBuf[i] = (byte) 0x20; + } + } + + Set encodings = new HashSet(); + + SortedMap charsets = Charset.availableCharsets(); + for (Map.Entry entry : charsets.entrySet()) { + Charset cs = entry.getValue(); + String name = toNameKey(cs.name()); + String canonName = toAsciiLowerCase(cs.name()); + if (!isBanned(name)) { + name = name.intern(); + boolean asciiSuperset = asciiMapsToBasicLatin(testBuf, cs); + Encoding enc = new Encoding(canonName.intern(), cs, + asciiSuperset, isObscure(name), isShouldNot(name), + isLikelyEbcdic(name, asciiSuperset)); + encodings.add(enc); + Set aliases = cs.aliases(); + for (String alias : aliases) { + encodingByCookedName.put(toNameKey(alias).intern(), enc); + } + } + } + // Overwrite possible overlapping aliases with the real things--just in + // case + for (Encoding encoding : encodings) { + encodingByCookedName.put(toNameKey(encoding.getCanonName()), + encoding); + } + UTF8 = forName("utf-8"); + UTF16 = forName("utf-16"); + UTF16BE = forName("utf-16be"); + UTF16LE = forName("utf-16le"); + WINDOWS1252 = forName("windows-1252"); + try { + forName("iso-8859-1").actualHtmlEncoding = forName("windows-1252"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("iso-8859-9").actualHtmlEncoding = forName("windows-1254"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("iso-8859-11").actualHtmlEncoding = forName("windows-874"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("x-iso-8859-11").actualHtmlEncoding = forName("windows-874"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("tis-620").actualHtmlEncoding = forName("windows-874"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("gb_2312-80").actualHtmlEncoding = forName("gbk"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("gb2312").actualHtmlEncoding = forName("gbk"); + } catch (UnsupportedCharsetException e) { + } + try { + encodingByCookedName.put("x-x-big5", forName("big5")); + } catch (UnsupportedCharsetException e) { + } + try { + encodingByCookedName.put("euc-kr", forName("windows-949")); + } catch (UnsupportedCharsetException e) { + } + try { + encodingByCookedName.put("ks_c_5601-1987", forName("windows-949")); + } catch (UnsupportedCharsetException e) { + } + } + + private static boolean isAsciiSupersetnessSensitive(int c) { + return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0x22) + || (c >= 0x26 && c <= 0x27) || (c >= 0x2C && c <= 0x3F) + || (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A); + } + + private static boolean isObscure(String lowerCasePreferredIanaName) { + return !(Arrays.binarySearch(NOT_OBSCURE, lowerCasePreferredIanaName) > -1); + } + + private static boolean isBanned(String lowerCasePreferredIanaName) { + if (lowerCasePreferredIanaName.startsWith("xibm")) { + return true; + } + return (Arrays.binarySearch(BANNED, lowerCasePreferredIanaName) > -1); + } + + private static boolean isShouldNot(String lowerCasePreferredIanaName) { + return (Arrays.binarySearch(SHOULD_NOT, lowerCasePreferredIanaName) > -1); + } + + /** + * @param testBuf + * @param cs + */ + private static boolean asciiMapsToBasicLatin(byte[] testBuf, Charset cs) { + CharsetDecoder dec = cs.newDecoder(); + dec.onMalformedInput(CodingErrorAction.REPORT); + dec.onUnmappableCharacter(CodingErrorAction.REPORT); + Reader r = new InputStreamReader(new ByteArrayInputStream(testBuf), dec); + try { + for (int i = 0; i < 0x7F; i++) { + if (isAsciiSupersetnessSensitive(i)) { + if (r.read() != i) { + return false; + } + } else { + if (r.read() != 0x20) { + return false; + } + } + } + } catch (IOException e) { + return false; + } catch (Exception e) { + return false; + } catch (CoderMalfunctionError e) { + return false; + } + + return true; + } + + private static boolean isLikelyEbcdic(String canonName, + boolean asciiSuperset) { + if (!asciiSuperset) { + return (canonName.startsWith("cp") || canonName.startsWith("ibm") || canonName.startsWith("xibm")); + } else { + return false; + } + } + + public static Encoding forName(String name) { + Encoding rv = encodingByCookedName.get(toNameKey(name)); + if (rv == null) { + throw new UnsupportedCharsetException(name); + } else { + return rv; + } + } + + public static String toNameKey(String str) { + if (str == null) { + return null; + } + int j = 0; + char[] buf = new char[str.length()]; + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + if (!((c >= '\t' && c <= '\r') || (c >= '\u0020' && c <= '\u002F') + || (c >= '\u003A' && c <= '\u0040') + || (c >= '\u005B' && c <= '\u0060') || (c >= '\u007B' && c <= '\u007E'))) { + buf[j] = c; + j++; + } + } + return new String(buf, 0, j); + } + + public static String toAsciiLowerCase(String str) { + if (str == null) { + return null; + } + char[] buf = new char[str.length()]; + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + buf[i] = c; + } + return new String(buf); + } + + /** + * @param canonName + * @param charset + * @param asciiSuperset + * @param obscure + * @param shouldNot + * @param likelyEbcdic + */ + private Encoding(final String canonName, final Charset charset, + final boolean asciiSuperset, final boolean obscure, + final boolean shouldNot, final boolean likelyEbcdic) { + this.canonName = canonName; + this.charset = charset; + this.asciiSuperset = asciiSuperset; + this.obscure = obscure; + this.shouldNot = shouldNot; + this.likelyEbcdic = likelyEbcdic; + } + + /** + * Returns the asciiSuperset. + * + * @return the asciiSuperset + */ + public boolean isAsciiSuperset() { + return asciiSuperset; + } + + /** + * Returns the canonName. + * + * @return the canonName + */ + public String getCanonName() { + return canonName; + } + + /** + * Returns the likelyEbcdic. + * + * @return the likelyEbcdic + */ + public boolean isLikelyEbcdic() { + return likelyEbcdic; + } + + /** + * Returns the obscure. + * + * @return the obscure + */ + public boolean isObscure() { + return obscure; + } + + /** + * Returns the shouldNot. + * + * @return the shouldNot + */ + public boolean isShouldNot() { + return shouldNot; + } + + public boolean isRegistered() { + return !canonName.startsWith("x-"); + } + + /** + * @return + * @see java.nio.charset.Charset#canEncode() + */ + public boolean canEncode() { + return charset.canEncode(); + } + + /** + * @return + * @see java.nio.charset.Charset#newDecoder() + */ + public CharsetDecoder newDecoder() { + return charset.newDecoder(); + } + + /** + * @return + * @see java.nio.charset.Charset#newEncoder() + */ + public CharsetEncoder newEncoder() { + return charset.newEncoder(); + } + + /** + * Returns the actualHtmlEncoding. + * + * @return the actualHtmlEncoding + */ + public Encoding getActualHtmlEncoding() { + return actualHtmlEncoding; + } + + public static void main(String[] args) { + for (Map.Entry entry : encodingByCookedName.entrySet()) { + String name = entry.getKey(); + Encoding enc = entry.getValue(); + System.out.printf( + "%21s: canon %21s, obs %5s, reg %5s, asc %5s, ebc %5s\n", + name, enc.getCanonName(), enc.isObscure(), + enc.isRegistered(), enc.isAsciiSuperset(), + enc.isLikelyEbcdic()); + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java new file mode 100644 index 0000000000..413f0d9e9b --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2013 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; + +import nu.validator.htmlparser.common.ByteReadable; +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.extra.ChardetSniffer; +import nu.validator.htmlparser.extra.IcuDetectorSniffer; +import nu.validator.htmlparser.impl.Tokenizer; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +/** + * Be very careful with this class. It is not a general-purpose subclass of of + * Reader. Instead, it is the minimal implementation that does + * what Tokenizer needs while being an instance of + * Reader. + * + * The only reason why this is a public class is that it needs to be visible to + * test code in another package. + * + * @version $Id$ + * @author hsivonen + */ +public final class HtmlInputStreamReader extends Reader implements + ByteReadable, Locator { + + private static final int SNIFFING_LIMIT = 1024; + + private final InputStream inputStream; + + private final ErrorHandler errorHandler; + + private final Tokenizer tokenizer; + + private final Driver driver; + + private CharsetDecoder decoder = null; + + private boolean sniffing = true; + + private int limit = 0; + + private int position = 0; + + private int bytesRead = 0; + + private boolean eofSeen = false; + + private boolean shouldReadBytes = false; + + private boolean charsetBoundaryPassed = false; + + private final byte[] byteArray = new byte[4096]; // Length must be >= + + // SNIFFING_LIMIT + + private final ByteBuffer byteBuffer = ByteBuffer.wrap(byteArray); + + private boolean needToNotifyTokenizer = false; + + private boolean flushing = false; + + private int line = -1; + + private int col = -1; + + private int lineColPos; + + private boolean hasPendingReplacementCharacter = false; + + private boolean nextCharOnNewLine; + + private boolean prevWasCR; + + /** + * @param inputStream + * @param errorHandler + * @param locator + * @throws IOException + * @throws SAXException + */ + public HtmlInputStreamReader(InputStream inputStream, + ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver, + Heuristics heuristics) throws SAXException, IOException { + this.inputStream = inputStream; + this.errorHandler = errorHandler; + this.tokenizer = tokenizer; + this.driver = driver; + this.sniffing = true; + Encoding encoding = (new BomSniffer(this)).sniff(); + if (encoding == null) { + position = 0; + encoding = (new MetaSniffer(errorHandler, this)).sniff(this); + boolean declared = true; + if (encoding == null) { + declared = false; + } else if (encoding != Encoding.UTF8) { + warn("Legacy encoding \u201C" + + encoding.getCanonName() + + "\u201D used. Documents should use UTF-8."); + } + if (encoding == null + && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) { + encoding = (new ChardetSniffer(byteArray, limit)).sniff(); + } + if (encoding == null + && (heuristics == Heuristics.ICU || heuristics == Heuristics.ALL)) { + position = 0; + encoding = (new IcuDetectorSniffer(this)).sniff(); + } + sniffing = false; + if (encoding == null) { + encoding = Encoding.WINDOWS1252; + } + if (!declared) { + err("The character encoding was not declared. Proceeding using \u201C" + encoding.getCanonName() + "\u201D."); + } + if (driver != null) { + driver.setEncoding(encoding, Confidence.TENTATIVE); + } + } else { + if (encoding == Encoding.UTF8) { + if (driver != null) { + driver.setEncoding(Encoding.UTF8, Confidence.CERTAIN); + } + } else { + warn("Legacy encoding \u201C" + + encoding.getCanonName() + + "\u201D used. Documents should use UTF-8."); + if (driver != null) { + driver.setEncoding(Encoding.UTF16, Confidence.CERTAIN); + } + } + } + this.decoder = encoding.newDecoder(); + sniffing = false; + position = 0; + bytesRead = 0; + byteBuffer.position(position); + byteBuffer.limit(limit); + initDecoder(); + } + + /** + * + */ + private void initDecoder() { + this.decoder.onMalformedInput(CodingErrorAction.REPORT); + this.decoder.onUnmappableCharacter(CodingErrorAction.REPORT); + } + + public HtmlInputStreamReader(InputStream inputStream, + ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver, + Encoding encoding) throws SAXException, IOException { + this.inputStream = inputStream; + this.errorHandler = errorHandler; + this.tokenizer = tokenizer; + this.driver = driver; + this.decoder = encoding.newDecoder(); + this.sniffing = false; + position = 0; + bytesRead = 0; + byteBuffer.position(0); + byteBuffer.limit(0); + shouldReadBytes = true; + initDecoder(); + } + + @Override public void close() throws IOException { + inputStream.close(); + } + + @Override public int read(char[] charArray) throws IOException { + lineColPos = 0; + assert !sniffing; + assert charArray.length >= 2; + if (needToNotifyTokenizer) { + if (driver != null) { + driver.notifyAboutMetaBoundary(); + } + needToNotifyTokenizer = false; + } + CharBuffer charBuffer = CharBuffer.wrap(charArray); + charBuffer.limit(charArray.length); + charBuffer.position(0); + if (flushing) { + decoder.flush(charBuffer); + // return -1 if zero + int cPos = charBuffer.position(); + return cPos == 0 ? -1 : cPos; + } + if (hasPendingReplacementCharacter) { + charBuffer.put('\uFFFD'); + hasPendingReplacementCharacter = false; + } + for (;;) { + if (shouldReadBytes) { + int oldLimit = byteBuffer.limit(); + int readLen; + if (charsetBoundaryPassed) { + readLen = byteArray.length - oldLimit; + } else { + readLen = SNIFFING_LIMIT - oldLimit; + } + int num = inputStream.read(byteArray, oldLimit, readLen); + if (num == -1) { + eofSeen = true; + inputStream.close(); + } else { + byteBuffer.position(0); + byteBuffer.limit(oldLimit + num); + } + shouldReadBytes = false; + } + boolean finalDecode = false; + for (;;) { + int oldBytePos = byteBuffer.position(); + CoderResult cr = decoder.decode(byteBuffer, charBuffer, + finalDecode); + bytesRead += byteBuffer.position() - oldBytePos; + if (cr == CoderResult.OVERFLOW) { + // Decoder will remember surrogates + return charBuffer.position(); + } else if (cr == CoderResult.UNDERFLOW) { + int remaining = byteBuffer.remaining(); + if (!charsetBoundaryPassed) { + if (bytesRead + remaining >= SNIFFING_LIMIT) { + needToNotifyTokenizer = true; + charsetBoundaryPassed = true; + } + } + + // XXX what happens if the entire byte buffer consists of + // a pathologically long malformed sequence? + + // If the buffer was not fully consumed, there may be an + // incomplete byte sequence that needs to seed the next + // buffer. + if (remaining > 0) { + System.arraycopy(byteArray, byteBuffer.position(), + byteArray, 0, remaining); + } + byteBuffer.position(0); + byteBuffer.limit(remaining); + if (flushing) { + // The final decode was successful. Not sure if this + // ever happens. + // Let's get out in any case. + int cPos = charBuffer.position(); + return cPos == 0 ? -1 : cPos; + } else if (eofSeen) { + // If there's something left, it isn't something that + // would be + // consumed in the middle of the stream. Rerun the loop + // once + // in the final mode. + shouldReadBytes = false; + finalDecode = true; + flushing = true; + continue; + } else { + // The usual stuff. Want more bytes next time. + shouldReadBytes = true; + int cPos = charBuffer.position(); + if (cPos == 0) { + // No output. Read more bytes right away + break; + } + return cPos; + } + } else { + // The result is in error. No need to test. + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < cr.length(); i++) { + if (i > 0) { + sb.append(", "); + } + sb.append('\u201C'); + sb.append(Integer.toHexString(byteBuffer.get() & 0xFF)); + bytesRead++; + sb.append('\u201D'); + } + if (charBuffer.hasRemaining()) { + charBuffer.put('\uFFFD'); + } else { + hasPendingReplacementCharacter = true; + } + calculateLineAndCol(charBuffer); + if (cr.isMalformed()) { + err("Malformed byte sequence: " + sb + "."); + } else if (cr.isUnmappable()) { + err("Unmappable byte sequence: " + sb + "."); + } else { + throw new RuntimeException( + "CoderResult was none of overflow, underflow, malformed or unmappable."); + } + if (finalDecode) { + // These were the last bytes of input. Return without + // relooping. + // return -1 if zero + int cPos = charBuffer.position(); + return cPos == 0 ? -1 : cPos; + } + } + } + } + } + + private void calculateLineAndCol(CharBuffer charBuffer) { + if (tokenizer != null) { + if (lineColPos == 0) { + line = tokenizer.getLine(); + col = tokenizer.getCol(); + nextCharOnNewLine = tokenizer.isNextCharOnNewLine(); + prevWasCR = tokenizer.isPrevCR(); + } + + char[] charArray = charBuffer.array(); + int i = lineColPos; + while (i < charBuffer.position()) { + char c; + if (nextCharOnNewLine) { + line++; + col = 1; + nextCharOnNewLine = false; + } else { + col++; + } + + c = charArray[i]; + switch (c) { + case '\r': + nextCharOnNewLine = true; + prevWasCR = true; + break; + case '\n': + if (prevWasCR) { + col--; + } else { + nextCharOnNewLine = true; + } + break; + } + i++; + } + lineColPos = i; + } + } + + public int readByte() throws IOException { + if (!sniffing) { + throw new IllegalStateException( + "readByte() called when not in the sniffing state."); + } + if (position == SNIFFING_LIMIT) { + return -1; + } else if (position < limit) { + return byteArray[position++] & 0xFF; + } else { + int num = inputStream.read(byteArray, limit, SNIFFING_LIMIT - limit); + if (num == -1) { + return -1; + } else { + limit += num; + return byteArray[position++] & 0xFF; + } + } + } + + public static void main(String[] args) { + CharsetDecoder dec = Charset.forName("UTF-8").newDecoder(); + dec.onMalformedInput(CodingErrorAction.REPORT); + dec.onUnmappableCharacter(CodingErrorAction.REPORT); + byte[] bytes = { (byte) 0xF0, (byte) 0x9D, (byte) 0x80, (byte) 0x80 }; + byte[] bytes2 = { (byte) 0xB8, (byte) 0x80, 0x63, 0x64, 0x65 }; + ByteBuffer byteBuf = ByteBuffer.wrap(bytes); + ByteBuffer byteBuf2 = ByteBuffer.wrap(bytes2); + char[] chars = new char[1]; + CharBuffer charBuf = CharBuffer.wrap(chars); + + CoderResult cr = dec.decode(byteBuf, charBuf, false); + System.out.println(cr); + System.out.println(byteBuf); + // byteBuf.get(); + cr = dec.decode(byteBuf2, charBuf, false); + System.out.println(cr); + System.out.println(byteBuf2); + + } + + public int getColumnNumber() { + if (tokenizer != null) { + return col; + } + return -1; + } + + public int getLineNumber() { + if (tokenizer != null) { + return line; + } + return -1; + } + + public String getPublicId() { + if (tokenizer != null) { + return tokenizer.getPublicId(); + } + return null; + } + + public String getSystemId() { + if (tokenizer != null) { + return tokenizer.getSystemId(); + } + return null; + } + + /** + * @param string + * @throws SAXException + */ + private void err(String message) throws IOException { + // TODO remove wrapping when changing read() to take a CharBuffer + try { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, this); + errorHandler.error(spe); + } + } catch (SAXException e) { + throw (IOException) new IOException(e.getMessage()).initCause(e); + } + } + + private void warn(String message) throws IOException { + // TODO remove wrapping when changing read() to take a CharBuffer + try { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, this); + errorHandler.warning(spe); + } + } catch (SAXException e) { + throw (IOException) new IOException(e.getMessage()).initCause(e); + } + } + + public Charset getCharset() { + return decoder.charset(); + } + + /** + * @see java.io.Reader#read() + */ + @Override public int read() throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * @see java.io.Reader#read(char[], int, int) + */ + @Override public int read(char[] cbuf, int off, int len) throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * @see java.io.Reader#read(java.nio.CharBuffer) + */ + @Override public int read(CharBuffer target) throws IOException { + throw new UnsupportedOperationException(); + } + + public void switchEncoding(Encoding newEnc) { + this.decoder = newEnc.newDecoder(); + initDecoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java new file mode 100644 index 0000000000..baa04e44fb --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +import java.io.IOException; +import java.nio.charset.UnsupportedCharsetException; + +import nu.validator.htmlparser.common.ByteReadable; +import nu.validator.htmlparser.impl.MetaScanner; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public class MetaSniffer extends MetaScanner implements Locator { + + private Encoding characterEncoding = null; + + private final ErrorHandler errorHandler; + + private final Locator locator; + + private int line = 1; + + private int col = 0; + + private boolean prevWasCR = false; + + public MetaSniffer(ErrorHandler eh, Locator locator) { + this.errorHandler = eh; + this.locator = locator; + this.characterEncoding = null; + } + + /** + * -1 means end. + * @return + * @throws IOException + */ + protected int read() throws IOException { + int b = readable.readByte(); + // [NOCPP[ + switch (b) { + case '\n': + if (!prevWasCR) { + line++; + col = 0; + } + prevWasCR = false; + break; + case '\r': + line++; + col = 0; + prevWasCR = true; + break; + default: + col++; + prevWasCR = false; + break; + } + // ]NOCPP] + return b; + } + + /** + * Main loop. + * + * @return + * + * @throws SAXException + * @throws IOException + * @throws + */ + public Encoding sniff(ByteReadable readable) throws SAXException, IOException { + this.readable = readable; + stateLoop(stateSave); + return characterEncoding; + } + + + /** + * @param string + * @throws SAXException + */ + private void err(String message) throws SAXException { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, this); + errorHandler.error(spe); + } + } + + /** + * @param string + * @throws SAXException + */ + private void warn(String message) throws SAXException { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, this); + errorHandler.warning(spe); + } + } + + public int getColumnNumber() { + return col; + } + + public int getLineNumber() { + return line; + } + + public String getPublicId() { + if (locator != null) { + return locator.getPublicId(); + } + return null; + } + + public String getSystemId() { + if (locator != null) { + return locator.getSystemId(); + } + return null; + } + + protected boolean tryCharset(String encoding) throws SAXException { + encoding = Encoding.toAsciiLowerCase(encoding); + try { + // XXX spec says only UTF-16 + if ("utf-16".equals(encoding) || "utf-16be".equals(encoding) || "utf-16le".equals(encoding) || "utf-32".equals(encoding) || "utf-32be".equals(encoding) || "utf-32le".equals(encoding)) { + this.characterEncoding = Encoding.UTF8; + err("The internal character encoding declaration specified \u201C" + encoding + "\u201D which is not a rough superset of ASCII. Using \u201CUTF-8\u201D instead."); + return true; + } else { + Encoding cs = Encoding.forName(encoding); + String canonName = cs.getCanonName(); + if (!cs.isAsciiSuperset()) { + err("The encoding \u201C" + + encoding + + "\u201D is not an ASCII superset and, therefore, cannot be used in an internal encoding declaration. Continuing the sniffing algorithm."); + return false; + } + if (!cs.isRegistered()) { + if (encoding.startsWith("x-")) { + err("The encoding \u201C" + + encoding + + "\u201D is not an IANA-registered encoding. (Charmod C022)"); + } else { + err("The encoding \u201C" + + encoding + + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)"); + } + } else if (!cs.getCanonName().equals(encoding)) { + err("The encoding \u201C" + encoding + + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C" + + canonName + "\u201D. (Charmod C024)"); + } + if (cs.isShouldNot()) { + warn("Authors should not use the character encoding \u201C" + + encoding + + "\u201D. It is recommended to use \u201CUTF-8\u201D."); + } else if (cs.isObscure()) { + warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D."); + } + Encoding actual = cs.getActualHtmlEncoding(); + if (actual == null) { + this.characterEncoding = cs; + } else { + warn("Using \u201C" + actual.getCanonName() + "\u201D instead of the declared encoding \u201C" + encoding + "\u201D."); + this.characterEncoding = actual; + } + return true; + } + } catch (UnsupportedCharsetException e) { + err("Unsupported character encoding name: \u201C" + encoding + "\u201D. Will continue sniffing."); + } + return false; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java new file mode 100644 index 0000000000..47a3d5eb0a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2001-2003 Thai Open Source Software Center Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Thai Open Source Software Center Ltd nor + * the names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package nu.validator.htmlparser.rewindable; + +public interface Rewindable { + void willNotRewind(); + + void rewind(); + + boolean canRewind(); +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java new file mode 100644 index 0000000000..3a1cc1b91f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2001-2003 Thai Open Source Software Center Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Thai Open Source Software Center Ltd nor + * the names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package nu.validator.htmlparser.rewindable; + +import java.io.IOException; +import java.io.InputStream; + +public class RewindableInputStream extends InputStream implements Rewindable { + static class Block { + Block next; + + final byte[] buf; + + int used = 0; + + static final int MIN_SIZE = 1024; + + Block(int minSize) { + buf = new byte[Math.max(MIN_SIZE, minSize)]; + } + + Block() { + this(0); + } + + void append(byte b) { + buf[used++] = b; + } + + void append(byte[] b, int off, int len) { + System.arraycopy(b, off, buf, used, len); + used += len; + } + } + + private Block head; + + /** + * If curBlockAvail > 0, then there are curBlockAvail bytes available to be + * returned starting at curBlockPos in curBlock.buf. + */ + private int curBlockAvail; + + private Block curBlock; + + private int curBlockPos; + + private Block lastBlock; + + /** + * true unless willNotRewind has been called + */ + private boolean saving = true; + + private final InputStream in; + + private boolean pretendClosed = false; + + /** + * true if we have got an EOF from the underlying InputStream + */ + private boolean eof; + + public RewindableInputStream(InputStream in) { + if (in == null) + throw new NullPointerException(); + this.in = in; + } + + public void close() throws IOException { + if (saving) { + curBlockAvail = 0; + curBlock = null; + pretendClosed = true; + } else { + head = null; + curBlock = null; + lastBlock = null; + saving = false; + curBlockAvail = 0; + in.close(); + } + } + + public void rewind() { + if (!saving) + throw new IllegalStateException("rewind() after willNotRewind()"); + pretendClosed = false; + if (head == null) + return; + curBlock = head; + curBlockPos = 0; + curBlockAvail = curBlock.used; + } + + public boolean canRewind() { + return saving; + } + + public void willNotRewind() { + saving = false; + head = null; + lastBlock = null; + if (pretendClosed) { + pretendClosed = false; + try { + in.close(); + } catch (IOException e) { + } + } + } + + public int read() throws IOException { + if (curBlockAvail > 0) { + int c = curBlock.buf[curBlockPos++] & 0xFF; + --curBlockAvail; + if (curBlockAvail == 0) { + curBlock = curBlock.next; + if (curBlock != null) { + curBlockPos = 0; + curBlockAvail = curBlock.used; + } + } + return c; + } + int c = in.read(); + if (saving && c != -1) { + if (lastBlock == null) + lastBlock = head = new Block(); + else if (lastBlock.used == lastBlock.buf.length) + lastBlock = lastBlock.next = new Block(); + lastBlock.append((byte) c); + } + return c; + } + + public int read(byte b[], int off, int len) throws IOException { + if (curBlockAvail == 0 && !saving) + return in.read(b, off, len); + if (b == null) + throw new NullPointerException(); + if (len < 0) + throw new IndexOutOfBoundsException(); + int nRead = 0; + if (curBlockAvail != 0) { + for (;;) { + if (len == 0) + return nRead; + b[off++] = curBlock.buf[curBlockPos++]; + --len; + nRead++; + --curBlockAvail; + if (curBlockAvail == 0) { + curBlock = curBlock.next; + if (curBlock == null) + break; + curBlockAvail = curBlock.used; + curBlockPos = 0; + } + } + } + if (len == 0) + return nRead; + if (eof) + return nRead > 0 ? nRead : -1; + try { + int n = in.read(b, off, len); + if (n < 0) { + eof = true; + return nRead > 0 ? nRead : -1; + } + nRead += n; + if (saving) { + if (lastBlock == null) + lastBlock = head = new Block(n); + else if (lastBlock.buf.length - lastBlock.used < n) { + if (lastBlock.used != lastBlock.buf.length) { + int free = lastBlock.buf.length - lastBlock.used; + lastBlock.append(b, off, free); + off += free; + n -= free; + } + lastBlock = lastBlock.next = new Block(n); + } + lastBlock.append(b, off, n); + } + } catch (IOException e) { + eof = true; + if (nRead == 0) + throw e; + } + return nRead; + } + + public int available() throws IOException { + if (curBlockAvail == 0) + return in.available(); + int n = curBlockAvail; + for (Block b = curBlock.next; b != null; b = b.next) + n += b.used; + return n + in.available(); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java new file mode 100644 index 0000000000..714053e706 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java @@ -0,0 +1,1097 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.LinkedList; +import java.util.List; +import java.util.HashMap; + +import nu.validator.htmlparser.common.CharacterHandler; +import nu.validator.htmlparser.common.DoctypeExpectation; +import nu.validator.htmlparser.common.DocumentModeHandler; +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.TransitionHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.impl.TreeBuilder; +import nu.validator.htmlparser.io.Driver; +import nu.validator.saxtree.Document; +import nu.validator.saxtree.DocumentFragment; +import nu.validator.saxtree.TreeParser; + +import org.xml.sax.ContentHandler; +import org.xml.sax.DTDHandler; +import org.xml.sax.EntityResolver; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXNotRecognizedException; +import org.xml.sax.SAXNotSupportedException; +import org.xml.sax.XMLReader; +import org.xml.sax.ext.LexicalHandler; +import org.xml.sax.helpers.DefaultHandler; + +/** + * This class implements an HTML5 parser that exposes data through the SAX2 + * interface. + * + *

By default, when using the constructor without arguments, the + * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible + * infosets. This corresponds to ALTER_INFOSET as the general + * XML violation policy. To make the parser support non-conforming HTML fully + * per the HTML 5 spec while on the other hand potentially violating the SAX2 + * API contract, set the general XML violation policy to ALLOW. + * It is possible to treat XML 1.0 infoset violations as fatal by setting + * the general XML violation policy to FATAL. + * + *

By default, this parser doesn't do true streaming but buffers everything + * first. The parser can be made truly streaming by calling + * setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL). This + * has the consequence that errors that require non-streamable recovery are + * treated as fatal. + * + *

By default, in order to make the parse events emulate the parse events + * for a DTDless XML document, the parser does not report the doctype through + * LexicalHandler. Doctype reporting through + * LexicalHandler can be turned on by calling + * setReportingDoctype(true). + * + * @version $Id$ + * @author hsivonen + */ +public class HtmlParser implements XMLReader { + + private Driver driver = null; + + private TreeBuilder treeBuilder = null; + + private SAXStreamer saxStreamer = null; // work around javac bug + + private SAXTreeBuilder saxTreeBuilder = null; // work around javac bug + + private ContentHandler contentHandler = null; + + private LexicalHandler lexicalHandler = null; + + private DTDHandler dtdHandler = null; + + private EntityResolver entityResolver = null; + + private ErrorHandler errorHandler = null; + + private DocumentModeHandler documentModeHandler = null; + + private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; + + private boolean checkingNormalization = false; + + private boolean scriptingEnabled = false; + + private final List characterHandlers = new LinkedList(); + + private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW; + + private boolean html4ModeCompatibleWithXhtml1Schemata = false; + + private boolean mappingLangToXmlLang = false; + + private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL; + + private boolean reportingDoctype = true; + + private ErrorHandler treeBuilderErrorHandler = null; + + private Heuristics heuristics = Heuristics.NONE; + + private HashMap errorProfileMap = null; + + private TransitionHandler transitionHandler = null; + + /** + * Instantiates the parser with a fatal XML violation policy. + * + */ + public HtmlParser() { + this(XmlViolationPolicy.FATAL); + } + + /** + * Instantiates the parser with a specific XML violation policy. + * @param xmlPolicy the policy + */ + public HtmlParser(XmlViolationPolicy xmlPolicy) { + setXmlPolicy(xmlPolicy); + } + + private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) { + if (errorHandler == null && transitionHandler == null && + contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) { + return new Tokenizer(handler, newAttributesEachTime); + } + ErrorReportingTokenizer tokenizer = + new ErrorReportingTokenizer(handler, newAttributesEachTime); + tokenizer.setErrorProfile(errorProfileMap); + return tokenizer; + } + + /** + * This class wraps different tree builders depending on configuration. This + * method does the work of hiding this from the user of the class. + */ + private void lazyInit() { + if (driver == null) { + if (streamabilityViolationPolicy == XmlViolationPolicy.ALLOW) { + this.saxTreeBuilder = new SAXTreeBuilder(); + this.treeBuilder = this.saxTreeBuilder; + this.saxStreamer = null; + this.driver = new Driver(newTokenizer(treeBuilder, true)); + } else { + this.saxStreamer = new SAXStreamer(); + this.treeBuilder = this.saxStreamer; + this.saxTreeBuilder = null; + this.driver = new Driver(newTokenizer(treeBuilder, false)); + } + this.driver.setErrorHandler(errorHandler); + this.driver.setTransitionHandler(transitionHandler); + this.treeBuilder.setErrorHandler(treeBuilderErrorHandler); + this.driver.setCheckingNormalization(checkingNormalization); + this.driver.setCommentPolicy(commentPolicy); + this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); + this.driver.setContentSpacePolicy(contentSpacePolicy); + this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + this.driver.setMappingLangToXmlLang(mappingLangToXmlLang); + this.driver.setXmlnsPolicy(xmlnsPolicy); + this.driver.setHeuristics(heuristics); + for (CharacterHandler characterHandler : characterHandlers) { + this.driver.addCharacterHandler(characterHandler); + } + this.treeBuilder.setDoctypeExpectation(doctypeExpectation); + this.treeBuilder.setDocumentModeHandler(documentModeHandler); + this.treeBuilder.setIgnoringComments(lexicalHandler == null); + this.treeBuilder.setScriptingEnabled(scriptingEnabled); + this.treeBuilder.setReportingDoctype(reportingDoctype); + this.treeBuilder.setNamePolicy(namePolicy); + if (saxStreamer != null) { + saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler() + : contentHandler); + saxStreamer.setLexicalHandler(lexicalHandler); + driver.setAllowRewinding(false); + } + } + } + + /** + * @see org.xml.sax.XMLReader#getContentHandler() + */ + public ContentHandler getContentHandler() { + return contentHandler; + } + + /** + * @see org.xml.sax.XMLReader#getDTDHandler() + */ + public DTDHandler getDTDHandler() { + return dtdHandler; + } + + /** + * @see org.xml.sax.XMLReader#getEntityResolver() + */ + public EntityResolver getEntityResolver() { + return entityResolver; + } + + /** + * @see org.xml.sax.XMLReader#getErrorHandler() + */ + public ErrorHandler getErrorHandler() { + return errorHandler; + } + + /** + * Exposes the configuration of the emulated XML parser as well as + * boolean-valued configuration without using non-XMLReader + * getters directly. + * + *

+ *
http://xml.org/sax/features/external-general-entities
+ *
false
+ *
http://xml.org/sax/features/external-parameter-entities
+ *
false
+ *
http://xml.org/sax/features/is-standalone
+ *
true
+ *
http://xml.org/sax/features/lexical-handler/parameter-entities
+ *
false
+ *
http://xml.org/sax/features/namespaces
+ *
true
+ *
http://xml.org/sax/features/namespace-prefixes
+ *
false
+ *
http://xml.org/sax/features/resolve-dtd-uris
+ *
true
+ *
http://xml.org/sax/features/string-interning
+ *
false
+ *
http://xml.org/sax/features/unicode-normalization-checking
+ *
isCheckingNormalization
+ *
http://xml.org/sax/features/use-attributes2
+ *
false
+ *
http://xml.org/sax/features/use-locator2
+ *
false
+ *
http://xml.org/sax/features/use-entity-resolver2
+ *
false
+ *
http://xml.org/sax/features/validation
+ *
false
+ *
http://xml.org/sax/features/xmlns-uris
+ *
false
+ *
http://xml.org/sax/features/xml-1.1
+ *
false
+ *
http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata
+ *
isHtml4ModeCompatibleWithXhtml1Schemata
+ *
http://validator.nu/features/mapping-lang-to-xml-lang
+ *
isMappingLangToXmlLang
+ *
http://validator.nu/features/scripting-enabled
+ *
isScriptingEnabled
+ *
+ * + * @param name + * feature URI string + * @return a value per the list above + * @see org.xml.sax.XMLReader#getFeature(java.lang.String) + */ + public boolean getFeature(String name) throws SAXNotRecognizedException, + SAXNotSupportedException { + if ("http://xml.org/sax/features/external-general-entities".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/is-standalone".equals(name)) { + return true; + } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/namespaces".equals(name)) { + return true; + } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) { + return true; // default value--applicable scenario never happens + } else if ("http://xml.org/sax/features/string-interning".equals(name)) { + return true; + } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) { + return isCheckingNormalization(); // the checks aren't really per + // XML 1.1 + } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/use-locator2".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/validation".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) { + return false; + } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) { + return isHtml4ModeCompatibleWithXhtml1Schemata(); + } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) { + return isMappingLangToXmlLang(); + } else if ("http://validator.nu/features/scripting-enabled".equals(name)) { + return isScriptingEnabled(); + } else { + throw new SAXNotRecognizedException(); + } + } + + /** + * Allows XMLReader-level access to non-boolean valued + * getters. + * + *

+ * The properties are mapped as follows: + * + *

+ *
http://xml.org/sax/properties/document-xml-version
+ *
"1.0"
+ *
http://xml.org/sax/properties/lexical-handler
+ *
getLexicalHandler
+ *
http://validator.nu/properties/content-space-policy
+ *
getContentSpacePolicy
+ *
http://validator.nu/properties/content-non-xml-char-policy
+ *
getContentNonXmlCharPolicy
+ *
http://validator.nu/properties/comment-policy
+ *
getCommentPolicy
+ *
http://validator.nu/properties/xmlns-policy
+ *
getXmlnsPolicy
+ *
http://validator.nu/properties/name-policy
+ *
getNamePolicy
+ *
http://validator.nu/properties/streamability-violation-policy
+ *
getStreamabilityViolationPolicy
+ *
http://validator.nu/properties/document-mode-handler
+ *
getDocumentModeHandler
+ *
http://validator.nu/properties/doctype-expectation
+ *
getDoctypeExpectation
+ *
http://xml.org/sax/features/unicode-normalization-checking
+ *
+ * + * @param name + * property URI string + * @return a value per the list above + * @see org.xml.sax.XMLReader#getProperty(java.lang.String) + */ + public Object getProperty(String name) throws SAXNotRecognizedException, + SAXNotSupportedException { + if ("http://xml.org/sax/properties/declaration-handler".equals(name)) { + throw new SAXNotSupportedException( + "This parser does not suppert DeclHandler."); + } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) { + return "1.0"; // Emulating an XML 1.1 parser is not supported. + } else if ("http://xml.org/sax/properties/dom-node".equals(name)) { + throw new SAXNotSupportedException( + "This parser does not walk the DOM."); + } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) { + return getLexicalHandler(); + } else if ("http://xml.org/sax/properties/xml-string".equals(name)) { + throw new SAXNotSupportedException( + "This parser does not expose the source as a string."); + } else if ("http://validator.nu/properties/content-space-policy".equals(name)) { + return getContentSpacePolicy(); + } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) { + return getContentNonXmlCharPolicy(); + } else if ("http://validator.nu/properties/comment-policy".equals(name)) { + return getCommentPolicy(); + } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) { + return getXmlnsPolicy(); + } else if ("http://validator.nu/properties/name-policy".equals(name)) { + return getNamePolicy(); + } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) { + return getStreamabilityViolationPolicy(); + } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) { + return getDocumentModeHandler(); + } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) { + return getDoctypeExpectation(); + } else if ("http://validator.nu/properties/xml-policy".equals(name)) { + throw new SAXNotSupportedException( + "Cannot get a convenience setter."); + } else if ("http://validator.nu/properties/heuristics".equals(name)) { + return getHeuristics(); + } else { + throw new SAXNotRecognizedException(); + } + } + + /** + * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource) + */ + public void parse(InputSource input) throws IOException, SAXException { + lazyInit(); + try { + treeBuilder.setFragmentContext(null); + tokenize(input); + } finally { + if (saxTreeBuilder != null) { + Document document = saxTreeBuilder.getDocument(); + if (document != null) { + new TreeParser(contentHandler, lexicalHandler).parse(document); + } + } + } + } + + /** + * Parses a fragment with HTML context. + * + * @param input the input to parse + * @param context the name of the context element (HTML namespace assumed) + * @throws IOException + * @throws SAXException + */ + public void parseFragment(InputSource input, String context) + throws IOException, SAXException { + lazyInit(); + try { + treeBuilder.setFragmentContext(context.intern()); + tokenize(input); + } finally { + if (saxTreeBuilder != null) { + DocumentFragment fragment = saxTreeBuilder.getDocumentFragment(); + new TreeParser(contentHandler, lexicalHandler).parse(fragment); + } + } + } + + /** + * Parses a fragment. + * + * @param input the input to parse + * @param contextLocal the local name of the context element + * @param contextNamespace the namespace of the context element + * @throws IOException + * @throws SAXException + */ + public void parseFragment(InputSource input, String contextLocal, String contextNamespace) + throws IOException, SAXException { + lazyInit(); + try { + treeBuilder.setFragmentContext(contextLocal.intern(), contextNamespace.intern(), null, false); + tokenize(input); + } finally { + if (saxTreeBuilder != null) { + DocumentFragment fragment = saxTreeBuilder.getDocumentFragment(); + new TreeParser(contentHandler, lexicalHandler).parse(fragment); + } + } + } + + /** + * @param is + * @throws SAXException + * @throws IOException + * @throws MalformedURLException + */ + private void tokenize(InputSource is) throws SAXException, IOException, MalformedURLException { + if (is == null) { + throw new IllegalArgumentException("Null input."); + } + if (is.getByteStream() == null && is.getCharacterStream() == null) { + String systemId = is.getSystemId(); + if (systemId == null) { + throw new IllegalArgumentException("No byte stream, no character stream nor URI."); + } + if (entityResolver != null) { + is = entityResolver.resolveEntity(is.getPublicId(), systemId); + } + if (is.getByteStream() == null || is.getCharacterStream() == null) { + is = new InputSource(); + is.setSystemId(systemId); + is.setByteStream(new URL(systemId).openStream()); + } + } + driver.tokenize(is); + } + + /** + * @see org.xml.sax.XMLReader#parse(java.lang.String) + */ + public void parse(String systemId) throws IOException, SAXException { + parse(new InputSource(systemId)); + } + + /** + * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler) + */ + public void setContentHandler(ContentHandler handler) { + contentHandler = handler; + if (saxStreamer != null) { + saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler() + : contentHandler); + } + } + + /** + * Sets the lexical handler. + * @param handler the hander. + */ + public void setLexicalHandler(LexicalHandler handler) { + lexicalHandler = handler; + if (treeBuilder != null) { + treeBuilder.setIgnoringComments(handler == null); + if (saxStreamer != null) { + saxStreamer.setLexicalHandler(handler); + } + } + } + + /** + * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler) + */ + public void setDTDHandler(DTDHandler handler) { + dtdHandler = handler; + } + + /** + * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) + */ + public void setEntityResolver(EntityResolver resolver) { + entityResolver = resolver; + } + + /** + * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler handler) { + errorHandler = handler; + treeBuilderErrorHandler = handler; + driver = null; + } + + public void setTransitionHandler(TransitionHandler handler) { + transitionHandler = handler; + driver = null; + } + + /** + * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) + * @deprecated For Validator.nu internal use + */ + public void setTreeBuilderErrorHandlerOverride(ErrorHandler handler) { + treeBuilderErrorHandler = handler; + if (driver != null) { + treeBuilder.setErrorHandler(handler); + } + } + + /** + * Sets a boolean feature without having to use non-XMLReader + * setters directly. + * + *

+ * The supported features are: + * + *

+ *
http://xml.org/sax/features/unicode-normalization-checking
+ *
setCheckingNormalization
+ *
http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata
+ *
setHtml4ModeCompatibleWithXhtml1Schemata
+ *
http://validator.nu/features/mapping-lang-to-xml-lang
+ *
setMappingLangToXmlLang
+ *
http://validator.nu/features/scripting-enabled
+ *
setScriptingEnabled
+ *
+ * + * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean) + */ + public void setFeature(String name, boolean value) + throws SAXNotRecognizedException, SAXNotSupportedException { + if ("http://xml.org/sax/features/external-general-entities".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/is-standalone".equals(name)) { + if (!value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/namespaces".equals(name)) { + if (!value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) { + if (!value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/string-interning".equals(name)) { + if (!value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) { + setCheckingNormalization(value); + } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/use-locator2".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/validation".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) { + setHtml4ModeCompatibleWithXhtml1Schemata(value); + } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) { + setMappingLangToXmlLang(value); + } else if ("http://validator.nu/features/scripting-enabled".equals(name)) { + setScriptingEnabled(value); + } else { + throw new SAXNotRecognizedException(); + } + } + + /** + * Sets a non-boolean property without having to use non-XMLReader + * setters directly. + * + *
+ *
http://xml.org/sax/properties/lexical-handler
+ *
setLexicalHandler
+ *
http://validator.nu/properties/content-space-policy
+ *
setContentSpacePolicy
+ *
http://validator.nu/properties/content-non-xml-char-policy
+ *
setContentNonXmlCharPolicy
+ *
http://validator.nu/properties/comment-policy
+ *
setCommentPolicy
+ *
http://validator.nu/properties/xmlns-policy
+ *
setXmlnsPolicy
+ *
http://validator.nu/properties/name-policy
+ *
setNamePolicy
+ *
http://validator.nu/properties/streamability-violation-policy
+ *
setStreamabilityViolationPolicy
+ *
http://validator.nu/properties/document-mode-handler
+ *
setDocumentModeHandler
+ *
http://validator.nu/properties/doctype-expectation
+ *
setDoctypeExpectation
+ *
http://validator.nu/properties/xml-policy
+ *
setXmlPolicy
+ *
+ * + * @see org.xml.sax.XMLReader#setProperty(java.lang.String, + * java.lang.Object) + */ + public void setProperty(String name, Object value) + throws SAXNotRecognizedException, SAXNotSupportedException { + if ("http://xml.org/sax/properties/declaration-handler".equals(name)) { + throw new SAXNotSupportedException( + "This parser does not suppert DeclHandler."); + } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) { + throw new SAXNotSupportedException( + "Can't set document-xml-version."); + } else if ("http://xml.org/sax/properties/dom-node".equals(name)) { + throw new SAXNotSupportedException("Can't set dom-node."); + } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) { + setLexicalHandler((LexicalHandler) value); + } else if ("http://xml.org/sax/properties/xml-string".equals(name)) { + throw new SAXNotSupportedException("Can't set xml-string."); + } else if ("http://validator.nu/properties/content-space-policy".equals(name)) { + setContentSpacePolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) { + setContentNonXmlCharPolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/comment-policy".equals(name)) { + setCommentPolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) { + setXmlnsPolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/name-policy".equals(name)) { + setNamePolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) { + setStreamabilityViolationPolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) { + setDocumentModeHandler((DocumentModeHandler) value); + } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) { + setDoctypeExpectation((DoctypeExpectation) value); + } else if ("http://validator.nu/properties/xml-policy".equals(name)) { + setXmlPolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/heuristics".equals(name)) { + setHeuristics((Heuristics) value); + } else { + throw new SAXNotRecognizedException(); + } + } + + /** + * Indicates whether NFC normalization of source is being checked. + * @return true if NFC normalization of source is being checked. + * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + */ + public boolean isCheckingNormalization() { + return checkingNormalization; + } + + /** + * Toggles the checking of the NFC normalization of source. + * @param enable true to check normalization + * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + */ + public void setCheckingNormalization(boolean enable) { + this.checkingNormalization = enable; + if (driver != null) { + driver.setCheckingNormalization(checkingNormalization); + } + } + + /** + * Sets the policy for consecutive hyphens in comments. + * @param commentPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setCommentPolicy(XmlViolationPolicy commentPolicy) { + this.commentPolicy = commentPolicy; + if (driver != null) { + driver.setCommentPolicy(commentPolicy); + } + } + + /** + * Sets the policy for non-XML characters except white space. + * @param contentNonXmlCharPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; + driver = null; + } + + /** + * Sets the policy for non-XML white space. + * @param contentSpacePolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { + this.contentSpacePolicy = contentSpacePolicy; + if (driver != null) { + driver.setContentSpacePolicy(contentSpacePolicy); + } + } + + /** + * Whether the parser considers scripting to be enabled for noscript treatment. + * + * @return true if enabled + * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled() + */ + public boolean isScriptingEnabled() { + return scriptingEnabled; + } + + /** + * Sets whether the parser considers scripting to be enabled for noscript treatment. + * @param scriptingEnabled true to enable + * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + this.scriptingEnabled = scriptingEnabled; + if (treeBuilder != null) { + treeBuilder.setScriptingEnabled(scriptingEnabled); + } + } + + /** + * Returns the doctype expectation. + * + * @return the doctypeExpectation + */ + public DoctypeExpectation getDoctypeExpectation() { + return doctypeExpectation; + } + + /** + * Sets the doctype expectation. + * + * @param doctypeExpectation + * the doctypeExpectation to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation) + */ + public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { + this.doctypeExpectation = doctypeExpectation; + if (treeBuilder != null) { + treeBuilder.setDoctypeExpectation(doctypeExpectation); + } + } + + /** + * Returns the document mode handler. + * + * @return the documentModeHandler + */ + public DocumentModeHandler getDocumentModeHandler() { + return documentModeHandler; + } + + /** + * Sets the document mode handler. + * + * @param documentModeHandler + * the documentModeHandler to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler) + */ + public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { + this.documentModeHandler = documentModeHandler; + } + + /** + * Returns the streamabilityViolationPolicy. + * + * @return the streamabilityViolationPolicy + */ + public XmlViolationPolicy getStreamabilityViolationPolicy() { + return streamabilityViolationPolicy; + } + + /** + * Sets the streamabilityViolationPolicy. + * + * @param streamabilityViolationPolicy + * the streamabilityViolationPolicy to set + */ + public void setStreamabilityViolationPolicy( + XmlViolationPolicy streamabilityViolationPolicy) { + this.streamabilityViolationPolicy = streamabilityViolationPolicy; + driver = null; + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * @param html4ModeCompatibleWithXhtml1Schemata + */ + public void setHtml4ModeCompatibleWithXhtml1Schemata( + boolean html4ModeCompatibleWithXhtml1Schemata) { + this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; + if (driver != null) { + driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + } + } + + /** + * Returns the Locator during parse. + * @return the Locator + */ + public Locator getDocumentLocator() { + return driver.getDocumentLocator(); + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * + * @return the html4ModeCompatibleWithXhtml1Schemata + */ + public boolean isHtml4ModeCompatibleWithXhtml1Schemata() { + return html4ModeCompatibleWithXhtml1Schemata; + } + + /** + * Whether lang is mapped to xml:lang. + * @param mappingLangToXmlLang + * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) + */ + public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { + this.mappingLangToXmlLang = mappingLangToXmlLang; + if (driver != null) { + driver.setMappingLangToXmlLang(mappingLangToXmlLang); + } + } + + /** + * Whether lang is mapped to xml:lang. + * + * @return the mappingLangToXmlLang + */ + public boolean isMappingLangToXmlLang() { + return mappingLangToXmlLang; + } + + /** + * Whether the xmlns attribute on the root element is + * passed to through. (FATAL not allowed.) + * @param xmlnsPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { + if (xmlnsPolicy == XmlViolationPolicy.FATAL) { + throw new IllegalArgumentException("Can't use FATAL here."); + } + this.xmlnsPolicy = xmlnsPolicy; + if (driver != null) { + driver.setXmlnsPolicy(xmlnsPolicy); + } + } + + /** + * Returns the xmlnsPolicy. + * + * @return the xmlnsPolicy + */ + public XmlViolationPolicy getXmlnsPolicy() { + return xmlnsPolicy; + } + + /** + * Returns the lexicalHandler. + * + * @return the lexicalHandler + */ + public LexicalHandler getLexicalHandler() { + return lexicalHandler; + } + + /** + * Returns the commentPolicy. + * + * @return the commentPolicy + */ + public XmlViolationPolicy getCommentPolicy() { + return commentPolicy; + } + + /** + * Returns the contentNonXmlCharPolicy. + * + * @return the contentNonXmlCharPolicy + */ + public XmlViolationPolicy getContentNonXmlCharPolicy() { + return contentNonXmlCharPolicy; + } + + /** + * Returns the contentSpacePolicy. + * + * @return the contentSpacePolicy + */ + public XmlViolationPolicy getContentSpacePolicy() { + return contentSpacePolicy; + } + + /** + * @param reportingDoctype + * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean) + */ + public void setReportingDoctype(boolean reportingDoctype) { + this.reportingDoctype = reportingDoctype; + if (treeBuilder != null) { + treeBuilder.setReportingDoctype(reportingDoctype); + } + } + + /** + * Returns the reportingDoctype. + * + * @return the reportingDoctype + */ + public boolean isReportingDoctype() { + return reportingDoctype; + } + + /** + * @param errorProfile + * @see nu.validator.htmlparser.impl.errorReportingTokenizer#setErrorProfile(set) + */ + public void setErrorProfile(HashMap errorProfileMap) { + this.errorProfileMap = errorProfileMap; + } + + /** + * The policy for non-NCName element and attribute names. + * @param namePolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setNamePolicy(XmlViolationPolicy namePolicy) { + this.namePolicy = namePolicy; + if (driver != null) { + driver.setNamePolicy(namePolicy); + treeBuilder.setNamePolicy(namePolicy); + } + } + + /** + * Sets the encoding sniffing heuristics. + * + * @param heuristics the heuristics to set + * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + */ + public void setHeuristics(Heuristics heuristics) { + this.heuristics = heuristics; + if (driver != null) { + driver.setHeuristics(heuristics); + } + } + + public Heuristics getHeuristics() { + return this.heuristics; + } + + /** + * This is a catch-all convenience method for setting name, xmlns, content space, + * content non-XML char and comment policies in one go. This does not affect the + * streamability policy or doctype reporting. + * + * @param xmlPolicy + */ + public void setXmlPolicy(XmlViolationPolicy xmlPolicy) { + setNamePolicy(xmlPolicy); + setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy); + setContentSpacePolicy(xmlPolicy); + setContentNonXmlCharPolicy(xmlPolicy); + setCommentPolicy(xmlPolicy); + } + + /** + * The policy for non-NCName element and attribute names. + * + * @return the namePolicy + */ + public XmlViolationPolicy getNamePolicy() { + return namePolicy; + } + + /** + * Does nothing. + * @deprecated + */ + public void setBogusXmlnsPolicy( + XmlViolationPolicy bogusXmlnsPolicy) { + } + + /** + * Returns XmlViolationPolicy.ALTER_INFOSET. + * @deprecated + * @return XmlViolationPolicy.ALTER_INFOSET + */ + public XmlViolationPolicy getBogusXmlnsPolicy() { + return XmlViolationPolicy.ALTER_INFOSET; + } + + public void addCharacterHandler(CharacterHandler characterHandler) { + this.characterHandlers.add(characterHandler); + if (driver != null) { + driver.addCharacterHandler(characterHandler); + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java new file mode 100644 index 0000000000..3312398d55 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2011 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.util.Arrays; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +public class HtmlSerializer implements ContentHandler, LexicalHandler { + + private static final String[] VOID_ELEMENTS = { "area", "base", "basefont", + "bgsound", "br", "col", "command", "embed", "frame", "hr", "img", + "input", "keygen", "link", "meta", "param", "source", "track", + "wbr" }; + + private static final String[] NON_ESCAPING = { "iframe", "noembed", + "noframes", "noscript", "plaintext", "script", "style", "xmp" }; + + private static Writer wrap(OutputStream out) { + try { + return new OutputStreamWriter(out, "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + + private int ignoreLevel = 0; + + private int escapeLevel = 0; + + private final Writer writer; + + public HtmlSerializer(OutputStream out) { + this(wrap(out)); + } + + public HtmlSerializer(Writer out) { + this.writer = out; + } + + public void characters(char[] ch, int start, int length) + throws SAXException { + try { + if (escapeLevel > 0) { + writer.write(ch, start, length); + } else { + for (int i = start; i < start + length; i++) { + char c = ch[i]; + switch (c) { + case '<': + writer.write("<"); + break; + case '>': + writer.write(">"); + break; + case '&': + writer.write("&"); + break; + case '\u00A0': + writer.write(" "); + break; + default: + writer.write(c); + break; + } + } + } + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void endDocument() throws SAXException { + try { + writer.flush(); + writer.close(); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void endElement(String uri, String localName, String qName) + throws SAXException { + if (escapeLevel > 0) { + escapeLevel--; + } + if (ignoreLevel > 0) { + ignoreLevel--; + } else { + try { + writer.write('<'); + writer.write('/'); + writer.write(localName); + writer.write('>'); + } catch (IOException e) { + throw new SAXException(e); + } + } + } + + public void ignorableWhitespace(char[] ch, int start, int length) + throws SAXException { + characters(ch, start, length); + } + + public void processingInstruction(String target, String data) + throws SAXException { + } + + public void setDocumentLocator(Locator locator) { + } + + public void startDocument() throws SAXException { + try { + writer.write("\n"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void startElement(String uri, String localName, String qName, + Attributes atts) throws SAXException { + if (escapeLevel > 0) { + escapeLevel++; + } + boolean xhtml = "http://www.w3.org/1999/xhtml".equals(uri); + if (ignoreLevel > 0 + || !(xhtml || "http://www.w3.org/2000/svg".equals(uri) || "http://www.w3.org/1998/Math/MathML".equals(uri))) { + ignoreLevel++; + return; + } + try { + writer.write('<'); + writer.write(localName); + for (int i = 0; i < atts.getLength(); i++) { + String attUri = atts.getURI(i); + String attLocal = atts.getLocalName(i); + if (attUri.length() == 0) { + writer.write(' '); + } else if (!xhtml + && "http://www.w3.org/1999/xlink".equals(attUri)) { + writer.write(" xlink:"); + } else if ("http://www.w3.org/XML/1998/namespace".equals(attUri)) { + if (xhtml) { + if ("lang".equals(attLocal)) { + writer.write(' '); + } else { + continue; + } + } else { + writer.write(" xml:"); + } + } else { + continue; + } + writer.write(atts.getLocalName(i)); + writer.write('='); + writer.write('"'); + String val = atts.getValue(i); + for (int j = 0; j < val.length(); j++) { + char c = val.charAt(j); + switch (c) { + case '"': + writer.write("""); + break; + case '&': + writer.write("&"); + break; + case '\u00A0': + writer.write(" "); + break; + default: + writer.write(c); + break; + } + } + writer.write('"'); + } + writer.write('>'); + if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) { + ignoreLevel++; + return; + } + if ("pre".equals(localName) || "textarea".equals(localName) + || "listing".equals(localName)) { + writer.write('\n'); + } + if (escapeLevel == 0 + && Arrays.binarySearch(NON_ESCAPING, localName) > -1) { + escapeLevel = 1; + } + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void comment(char[] ch, int start, int length) throws SAXException { + if (ignoreLevel > 0 || escapeLevel > 0) { + return; + } + try { + writer.write(""); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void endCDATA() throws SAXException { + } + + public void endDTD() throws SAXException { + } + + public void endEntity(String name) throws SAXException { + } + + public void startCDATA() throws SAXException { + } + + public void startDTD(String name, String publicId, String systemId) + throws SAXException { + } + + public void startEntity(String name) throws SAXException { + } + + public void startPrefixMapping(String prefix, String uri) + throws SAXException { + } + + public void endPrefixMapping(String prefix) throws SAXException { + } + + public void skippedEntity(String name) throws SAXException { + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java new file mode 100644 index 0000000000..33e98dbe88 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import nu.validator.htmlparser.common.XmlViolationPolicy; + +/** + * This subclass of HtmlParser simply provides a no-argument + * constructor that calls the constructor of the superclass with the + * ALTER_INFOSET policy. This is convenient when another Java + * component wants an implementation of XMLReader with a + * no-argument constructor and infoset coercion is the wanted behavior. + * + * @version $Id$ + * @author hsivonen + */ +public class InfosetCoercingHtmlParser extends HtmlParser { + + /** + * A constructor that passes ALTER_INFOSET to the superclass' + * constructor. + */ + public InfosetCoercingHtmlParser() { + super(XmlViolationPolicy.ALTER_INFOSET); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java new file mode 100644 index 0000000000..b6cb2f8729 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import java.io.OutputStream; +import java.io.Writer; + +import nu.validator.htmlparser.impl.NCName; + +import org.xml.sax.SAXException; + +public class NameCheckingXmlSerializer extends XmlSerializer { + + public NameCheckingXmlSerializer(OutputStream out) { + super(out); + } + + public NameCheckingXmlSerializer(Writer out) { + super(out); + } + + /** + * @see nu.validator.htmlparser.sax.XmlSerializer#checkNCName() + */ + @Override protected void checkNCName(String name) throws SAXException { + if (!NCName.isNCName(name)) { + throw new SAXException("Not an XML 1.0 4th ed. NCName: " + name); + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java new file mode 100644 index 0000000000..07ff5da4a5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.validator.htmlparser.impl.TreeBuilder; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.ext.LexicalHandler; + +class SAXStreamer extends TreeBuilder{ + + private static final char[] ISINDEX_PROMPT = "This is a searchable index. Enter search keywords: ".toCharArray(); + + private ContentHandler contentHandler = null; + private LexicalHandler lexicalHandler = null; + + SAXStreamer() { + super(); + } + + @Override + protected void addAttributesToElement(Attributes element, HtmlAttributes attributes) throws SAXException { + Attributes existingAttrs = element; + for (int i = 0; i < attributes.getLength(); i++) { + String qName = attributes.getQNameNoBoundsCheck(i); + if (existingAttrs.getIndex(qName) < 0) { + fatal(); + } + } + } + + @Override + protected void appendCharacters(Attributes parent, char[] buf, int start, int length) throws SAXException { + contentHandler.characters(buf, start, length); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object) + */ + @Override protected void appendIsindexPrompt(Attributes parent) + throws SAXException { + contentHandler.characters(ISINDEX_PROMPT, 0, ISINDEX_PROMPT.length); + } + + @Override + protected void appendChildrenToNewParent(Attributes oldParent, Attributes newParent) throws SAXException { + fatal(); + } + + @Override + protected void appendComment(Attributes parent, char[] buf, int start, int length) throws SAXException { + if (lexicalHandler != null) { + lexicalHandler.comment(buf, start, length); + } + } + + @Override + protected void appendCommentToDocument(char[] buf, int start, int length) + throws SAXException { + if (lexicalHandler != null) { + lexicalHandler.comment(buf, start, length); + } + } + + @Override + protected Attributes createElement(String ns, String name, HtmlAttributes attributes, Attributes intendedParent) throws SAXException { + return attributes; + } + + @Override + protected Attributes createHtmlElementSetAsRoot(HtmlAttributes attributes) throws SAXException { + return attributes; + } + + @Override + protected void detachFromParent(Attributes element) throws SAXException { + fatal(); + } + + @Override + protected void appendElement(Attributes child, Attributes newParent) throws SAXException { + } + + @Override + protected boolean hasChildren(Attributes element) throws SAXException { + return false; + } + + public void setContentHandler(ContentHandler handler) { + contentHandler = handler; + } + + public void setLexicalHandler(LexicalHandler handler) { + lexicalHandler = handler; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String) + */ + @Override + protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) throws SAXException { + if (lexicalHandler != null) { + lexicalHandler.startDTD(name, publicIdentifier, systemIdentifier); + lexicalHandler.endDTD(); + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(String, java.lang.String, java.lang.Object) + */ + @Override + protected void elementPopped(String ns, String name, Attributes node) throws SAXException { + contentHandler.endElement(ns, name, name); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#elementPushed(String, java.lang.String, java.lang.Object) + */ + @Override + protected void elementPushed(String ns, String name, Attributes node) throws SAXException { + contentHandler.startElement(ns, name, name, node); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#end() + */ + @Override + protected void end() throws SAXException { + contentHandler.endDocument(); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override + protected void start(boolean fragment) throws SAXException { + contentHandler.setDocumentLocator(tokenizer); + if (!fragment) { + contentHandler.startDocument(); + } + } + + protected void fatal() throws SAXException { + SAXParseException spe = new SAXParseException( + "Cannot recover after last error. Any further errors will be ignored.", + tokenizer); + if (errorHandler != null) { + errorHandler.fatalError(spe); + } + throw spe; + } + + @Override + protected Attributes createAndInsertFosterParentedElement(String ns, String name, + HtmlAttributes attributes, Attributes table, Attributes stackParent) throws SAXException { + fatal(); + throw new RuntimeException("Unreachable"); + } + + @Override protected void insertFosterParentedCharacters(char[] buf, + int start, int length, Attributes table, Attributes stackParent) + throws SAXException { + fatal(); + } + + @Override protected void insertFosterParentedChild(Attributes child, + Attributes table, Attributes stackParent) throws SAXException { + fatal(); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java new file mode 100644 index 0000000000..ef51d2a51f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.validator.htmlparser.impl.TreeBuilder; +import nu.validator.saxtree.Characters; +import nu.validator.saxtree.Comment; +import nu.validator.saxtree.DTD; +import nu.validator.saxtree.Document; +import nu.validator.saxtree.DocumentFragment; +import nu.validator.saxtree.Element; +import nu.validator.saxtree.Node; +import nu.validator.saxtree.ParentNode; + +import org.xml.sax.SAXException; + +class SAXTreeBuilder extends TreeBuilder { + + private static final char[] ISINDEX_PROMPT = "This is a searchable index. Enter search keywords: ".toCharArray(); + + private Document document; + + private Node cachedTable = null; + + private Node cachedTablePreviousSibling = null; + + SAXTreeBuilder() { + super(); + } + + @Override + protected void appendComment(Element parent, char[] buf, int start, int length) { + parent.appendChild(new Comment(tokenizer, buf, start, length)); + } + + @Override + protected void appendCommentToDocument(char[] buf, int start, int length) { + document.appendChild(new Comment(tokenizer, buf, start, length)); + } + + @Override + protected void appendCharacters(Element parent, char[] buf, int start, int length) { + parent.appendChild(new Characters(tokenizer, buf, start, length)); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object) + */ + @Override protected void appendIsindexPrompt(Element parent) + throws SAXException { + parent.appendChild(new Characters(tokenizer, ISINDEX_PROMPT, 0, ISINDEX_PROMPT.length)); + } + + @Override + protected boolean hasChildren(Element element) { + return element.getFirstChild() != null; + } + + @Override + protected void appendElement(Element child, Element newParent) { + newParent.appendChild(child); + } + + @Override + protected Element createHtmlElementSetAsRoot(HtmlAttributes attributes) { + Element newElt = new Element(tokenizer, "http://www.w3.org/1999/xhtml", "html", "html", attributes, true, null); + document.appendChild(newElt); + return newElt; + } + + @Override + protected void addAttributesToElement(Element element, HtmlAttributes attributes) throws SAXException { + HtmlAttributes existingAttrs = (HtmlAttributes) element.getAttributes(); + existingAttrs.merge(attributes); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String) + */ + @Override + protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) { + DTD dtd = new DTD(tokenizer, name, publicIdentifier, systemIdentifier); + dtd.setEndLocator(tokenizer); + document.appendChild(dtd); + } + + /** + * Returns the document. + * + * @return the document + */ + Document getDocument() { + Document rv = document; + document = null; + return rv; + } + + DocumentFragment getDocumentFragment() { + DocumentFragment rv = new DocumentFragment(); + rv.appendChildren(document.getFirstChild()); + document = null; + return rv; + } + + /** + * @throws SAXException + * @see nu.validator.htmlparser.impl.TreeBuilder#end() + */ + @Override + protected void end() throws SAXException { + document.setEndLocator(tokenizer); + cachedTable = null; + cachedTablePreviousSibling = null; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override + protected void start(boolean fragment) { + document = new Document(tokenizer); + cachedTable = null; + cachedTablePreviousSibling = null; + } + + @Override + protected void appendChildrenToNewParent(Element oldParent, Element newParent) throws SAXException { + newParent.appendChildren(oldParent); + } + + @Override + protected Element createElement(String ns, String name, HtmlAttributes attributes, + Element intendedParent) throws SAXException { + return new Element(tokenizer, ns, name, name, attributes, true, null); + } + + @Override + protected Element createAndInsertFosterParentedElement(String ns, String name, + HtmlAttributes attributes, Element table, Element stackParent) throws SAXException { + ParentNode parent = table.getParentNode(); + Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent); + if (parent != null) { // always an element if not null + parent.insertBetween(child, previousSibling(table), table); + cachedTablePreviousSibling = child; + } else { + stackParent.appendChild(child); + } + + return child; + } + + @Override protected void insertFosterParentedCharacters(char[] buf, + int start, int length, Element table, Element stackParent) throws SAXException { + Node child = new Characters(tokenizer, buf, start, length); + ParentNode parent = table.getParentNode(); + if (parent != null) { // always an element if not null + parent.insertBetween(child, previousSibling(table), table); + cachedTablePreviousSibling = child; + } else { + stackParent.appendChild(child); + } + } + + @Override protected void insertFosterParentedChild(Element child, + Element table, Element stackParent) throws SAXException { + ParentNode parent = table.getParentNode(); + if (parent != null) { // always an element if not null + parent.insertBetween(child, previousSibling(table), table); + cachedTablePreviousSibling = child; + } else { + stackParent.appendChild(child); + } + } + + private Node previousSibling(Node table) { + if (table == cachedTable) { + return cachedTablePreviousSibling; + } else { + cachedTable = table; + return (cachedTablePreviousSibling = table.getPreviousSibling()); + } + } + + @Override protected void detachFromParent(Element element) + throws SAXException { + element.detach(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java new file mode 100644 index 0000000000..5dccf5d3a8 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java @@ -0,0 +1,737 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.Map; +import java.util.Set; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +public class XmlSerializer implements ContentHandler, LexicalHandler { + + private final class PrefixMapping { + public final String uri; + + public final String prefix; + + /** + * @param uri + * @param prefix + */ + public PrefixMapping(String uri, String prefix) { + this.uri = uri; + this.prefix = prefix; + } + + /** + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override public final boolean equals(Object obj) { + if (obj instanceof PrefixMapping) { + PrefixMapping other = (PrefixMapping) obj; + return this.prefix.equals(other.prefix); + } else { + return false; + } + } + + /** + * @see java.lang.Object#hashCode() + */ + @Override public final int hashCode() { + return prefix.hashCode(); + } + + } + + private final class StackNode { + public final String uri; + + public final String prefix; + + public final String qName; + + public final Set mappings = new HashSet(); + + /** + * @param uri + * @param qName + */ + public StackNode(String uri, String qName, String prefix) { + this.uri = uri; + this.qName = qName; + this.prefix = prefix; + } + } + + private final static Map WELL_KNOWN_ATTRIBUTE_PREFIXES = new HashMap(); + + static { + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("adobe:ns:meta/", "x"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd", + "sodipodi"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/AdobeIllustrator/10.0/", "i"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", "a"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/Extensibility/1.0/", "x"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/illustrator/1.0/", "illustrator"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/pdf/1.3/", "pdf"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/photoshop/1.0/", + "photoshop"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/tiff/1.0/", + "tiff"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/", "xap"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/", + "xapG"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/mm/", + "xapMM"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/rights/", "xapRights"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/sType/Dimensions#", "stDim"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/sType/ResourceRef#", "stRef"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/t/pg/", + "xapTPg"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://purl.org/dc/elements/1.1/", + "dc"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://schemas.microsoft.com/visio/2003/SVGExtensions/", "v"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd", + "sodipodi"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://w3.org/1999/xlink", "xlink"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.carto.net/attrib/", + "attrib"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://www.iki.fi/pav/software/textext/", "textext"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://www.inkscape.org/namespaces/inkscape", "inkscape"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://www.justsystem.co.jp/hanako13/svg", "jsh"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink", + "xlink"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://www.w3.org/2001/XMLSchema-instance", "xsi"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink", + "xlink"); + } + + private final static Map WELL_KNOWN_ELEMENT_PREFIXES = new HashMap(); + + static { + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.w3.org/1999/XSL/Transform", + "xsl"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://purl.org/dc/elements/1.1/", + "dc"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://www.w3.org/2001/XMLSchema-instance", "xsi"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.ascc.net/xml/schematron", + "sch"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://purl.oclc.org/dsdl/schematron", + "sch"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://www.inkscape.org/namespaces/inkscape", "inkscape"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd", + "sodipodi"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", "a"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/AdobeIllustrator/10.0/", "i"); + WELL_KNOWN_ELEMENT_PREFIXES.put("adobe:ns:meta/", "x"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/", "xap"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/pdf/1.3/", "pdf"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/tiff/1.0/", "tiff"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://creativecommons.org/ns#", "cc"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd", + "sodipodi"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/", "Iptc4xmpCore"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/exif/1.0/", "exif"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/Extensibility/1.0/", "x"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/illustrator/1.0/", + "illustrator"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/pdfx/1.3/", "pdfx"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/photoshop/1.0/", + "photoshop"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/Variables/1.0/", + "v"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/", + "xapG"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/img/", + "xapGImg"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/mm/", + "xapMM"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/rights/", + "xapRights"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/sType/Dimensions#", "stDim"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/sType/Font#", "stFnt"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/sType/ResourceRef#", "stRef"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/t/pg/", + "xapTPg"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://product.corel.com/CGS/11/cddns/", "odm"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://schemas.microsoft.com/visio/2003/SVGExtensions/", "v"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://web.resource.org/cc/", "cc"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://www.freesoftware.fsf.org/bkchem/cdml", "cdml"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.opengis.net/gml", "gml"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.svgmaker.com/svgns", + "svgmaker"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://www.w3.org/2000/01/rdf-schema#", "rdfs"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://xmlns.com/foaf/0.1/", "foaf"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.xml-cml.org/schema/stmml", + "stm"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.iupac.org/foo/ichi", "ichi"); + } + + private final static Writer wrap(OutputStream out) { + Charset charset = Charset.forName("utf-8"); + CharsetEncoder encoder = charset.newEncoder(); + encoder.onMalformedInput(CodingErrorAction.REPLACE); + encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + try { + encoder.replaceWith("\uFFFD".getBytes("utf-8")); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + return new OutputStreamWriter(out, encoder); + } + + // grows from head + private final LinkedList stack = new LinkedList(); + + private final Writer writer; + + public XmlSerializer(OutputStream out) { + this(wrap(out)); + } + + public XmlSerializer(Writer out) { + this.writer = out; + } + + protected void checkNCName(String name) throws SAXException { + + } + + private final void push(String uri, String local, String prefix) { + stack.addFirst(new StackNode(uri, local, prefix)); + } + + private final String pop() { + String rv = stack.removeFirst().qName; + stack.getFirst().mappings.clear(); + return rv; + } + + private final String lookupPrefixAttribute(String ns) { + if ("http://www.w3.org/XML/1998/namespace".equals(ns)) { + return "xml"; + } + Set hidden = new HashSet(); + for (StackNode node : stack) { + for (PrefixMapping mapping : node.mappings) { + if (mapping.prefix.length() != 0 && mapping.uri.equals(ns) + && !hidden.contains(mapping.prefix)) { + return mapping.prefix; + } + hidden.add(mapping.prefix); + } + } + return null; + } + + private final String lookupUri(String prefix) { + for (StackNode node : stack) { + for (PrefixMapping mapping : node.mappings) { + if (mapping.prefix.equals(prefix)) { + return mapping.uri; + } + } + } + return null; + } + + private final boolean xmlNsQname(String name) { + if (name == null) { + return false; + } else if ("xmlns".equals(name)) { + return true; + } else if (name.startsWith("xmlns:")) { + return true; + } else { + return false; + } + } + + private final void writeAttributeValue(String val) throws IOException { + boolean prevWasSpace = true; + int last = val.length() - 1; + for (int i = 0; i <= last; i++) { + char c = val.charAt(i); + switch (c) { + case '<': + writer.write("<"); + prevWasSpace = false; + break; + case '>': + writer.write(">"); + prevWasSpace = false; + break; + case '&': + writer.write("&"); + prevWasSpace = false; + break; + case '"': + writer.write("""); + prevWasSpace = false; + break; + case '\r': + writer.write(" "); + prevWasSpace = false; + break; + case '\t': + writer.write(" "); + prevWasSpace = false; + break; + case '\n': + writer.write(" "); + prevWasSpace = false; + break; + case ' ': + if (prevWasSpace || i == last) { + writer.write(" "); + prevWasSpace = false; + } else { + writer.write(' '); + prevWasSpace = true; + } + break; + case '\uFFFE': + writer.write('\uFFFD'); + prevWasSpace = false; + break; + case '\uFFFF': + writer.write('\uFFFD'); + prevWasSpace = false; + break; + default: + if (c < ' ') { + writer.write('\uFFFD'); + } else { + writer.write(c); + } + prevWasSpace = false; + break; + } + } + } + + private final void generatePrefix(String uri) throws SAXException { + int counter = 0; + String candidate = WELL_KNOWN_ATTRIBUTE_PREFIXES.get(uri); + if (candidate == null) { + candidate = "p" + (counter++); + } + while (lookupUri(candidate) != null) { + candidate = "p" + (counter++); + } + startPrefixMappingPrivate(candidate, uri); + } + + public final void characters(char[] ch, int start, int length) + throws SAXException { + try { + for (int i = start; i < start + length; i++) { + char c = ch[i]; + switch (c) { + case '<': + writer.write("<"); + break; + case '>': + writer.write(">"); + break; + case '&': + writer.write("&"); + break; + case '\r': + writer.write(" "); + break; + case '\t': + writer.write('\t'); + break; + case '\n': + writer.write('\n'); + break; + case '\uFFFE': + writer.write('\uFFFD'); + break; + case '\uFFFF': + writer.write('\uFFFD'); + break; + default: + if (c < ' ') { + writer.write('\uFFFD'); + } else { + writer.write(c); + } + break; + } + } + } catch (IOException e) { + throw new SAXException(e); + } + } + + public final void endDocument() throws SAXException { + try { + stack.clear(); + writer.flush(); + writer.close(); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public final void endElement(String uri, String localName, String qName) + throws SAXException { + try { + writer.write('<'); + writer.write('/'); + writer.write(pop()); + writer.write('>'); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public final void ignorableWhitespace(char[] ch, int start, int length) + throws SAXException { + characters(ch, start, length); + } + + public final void processingInstruction(String target, String data) + throws SAXException { + try { + checkNCName(target); + writer.write("': + if (prevWasQuestionmark) { + writer.write(" >"); + } else { + writer.write('>'); + } + prevWasQuestionmark = false; + break; + case '\t': + writer.write('\t'); + prevWasQuestionmark = false; + break; + case '\r': + case '\n': + writer.write('\n'); + prevWasQuestionmark = false; + break; + case '\uFFFE': + writer.write('\uFFFD'); + prevWasQuestionmark = false; + break; + case '\uFFFF': + writer.write('\uFFFD'); + prevWasQuestionmark = false; + break; + default: + if (c < ' ') { + writer.write('\uFFFD'); + } else { + writer.write(c); + } + prevWasQuestionmark = false; + break; + } + } + writer.write("?>"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public final void setDocumentLocator(Locator locator) { + } + + public final void startDocument() throws SAXException { + try { + writer.write("\n"); + } catch (IOException e) { + throw new SAXException(e); + } + stack.clear(); + push(null, null, null); + } + + public final void startElement(String uri, String localName, String q, + Attributes atts) throws SAXException { + checkNCName(localName); + String prefix; + String qName; + if (uri.length() == 0) { + prefix = ""; + qName = localName; + // generate xmlns + startPrefixMappingPrivate(prefix, uri); + } else { + prefix = WELL_KNOWN_ELEMENT_PREFIXES.get(uri); + if (prefix == null) { + prefix = ""; + } + String lookup = lookupUri(prefix); + if (lookup != null && !lookup.equals(uri)) { + prefix = ""; + } + startPrefixMappingPrivate(prefix, uri); + if (prefix.length() == 0) { + qName = localName; + } else { + qName = prefix + ':' + localName; + } + } + + int attLen = atts.getLength(); + for (int i = 0; i < attLen; i++) { + String attUri = atts.getURI(i); + if (attUri.length() == 0 + || "http://www.w3.org/XML/1998/namespace".equals(attUri) + || "http://www.w3.org/2000/xmlns/".equals(attUri) + || atts.getLocalName(i).length() == 0 + || xmlNsQname(atts.getQName(i))) { + continue; + } + if (lookupPrefixAttribute(attUri) == null) { + generatePrefix(attUri); + } + } + + try { + writer.write('<'); + writer.write(qName); + for (PrefixMapping mapping : stack.getFirst().mappings) { + writer.write(' '); + if (mapping.prefix.length() == 0) { + writer.write("xmlns"); + } else { + writer.write("xmlns:"); + writer.write(mapping.prefix); + } + writer.write('='); + writer.write('"'); + writeAttributeValue(mapping.uri); + writer.write('"'); + } + + for (int i = 0; i < attLen; i++) { + String attUri = atts.getURI(i); + if ("http://www.w3.org/XML/1998/namespace".equals(attUri) + || "http://www.w3.org/2000/xmlns/".equals(attUri) + || atts.getLocalName(i).length() == 0 + || xmlNsQname(atts.getQName(i))) { + continue; + } + writer.write(' '); + if (attUri.length() != 0) { + writer.write(lookupPrefixAttribute(attUri)); + writer.write(':'); + } + String attLocal = atts.getLocalName(i); + checkNCName(attLocal); + writer.write(attLocal); + writer.write('='); + writer.write('"'); + writeAttributeValue(atts.getValue(i)); + writer.write('"'); + } + writer.write('>'); + } catch (IOException e) { + throw new SAXException(e); + } + push(uri, qName, prefix); + } + + public final void comment(char[] ch, int start, int length) throws SAXException { + try { + boolean prevWasHyphen = false; + writer.write(""); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public final void endCDATA() throws SAXException { + } + + public final void endDTD() throws SAXException { + } + + public final void endEntity(String name) throws SAXException { + } + + public final void startCDATA() throws SAXException { + } + + public final void startDTD(String name, String publicId, String systemId) + throws SAXException { + } + + public final void startEntity(String name) throws SAXException { + } + + public final void startPrefixMapping(String prefix, String uri) + throws SAXException { + if (prefix.length() == 0 || uri.equals(lookupUri(prefix))) { + return; + } + if (uri.equals(lookupUri(prefix))) { + return; + } + if ("http://www.w3.org/XML/1998/namespace".equals(uri)) { + if ("xml".equals(prefix)) { + return; + } else { + throw new SAXException("Attempt to declare a reserved NS uri."); + } + } + if ("http://www.w3.org/2000/xmlns/".equals(uri)) { + throw new SAXException("Attempt to declare a reserved NS uri."); + } + if (uri.length() == 0 && prefix.length() != 0) { + throw new SAXException("Can bind a prefix to no namespace."); + } + checkNCName(prefix); + Set theSet = stack.getFirst().mappings; + PrefixMapping mapping = new PrefixMapping(uri, prefix); + if (theSet.contains(mapping)) { + throw new SAXException( + "Attempt to map one prefix to two URIs on one element."); + } + theSet.add(mapping); + } + + public final void startPrefixMappingPrivate(String prefix, String uri) + throws SAXException { + if (uri.equals(lookupUri(prefix))) { + return; + } + stack.getFirst().mappings.add(new PrefixMapping(uri, prefix)); + } + + public final void endPrefixMapping(String prefix) throws SAXException { + } + + public final void skippedEntity(String name) throws SAXException { + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html new file mode 100644 index 0000000000..60532962fd --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html @@ -0,0 +1,29 @@ + + +Package Overview + + + +

This package provides an HTML5 parser that exposes the document through the SAX API.

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java new file mode 100644 index 0000000000..6dcff56001 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.xom.Element; + +/** + * Interface for elements that have an associated form pointer. + * + * @version $Id$ + * @author hsivonen + */ +public interface FormPointer { + + /** + * Returns the form. + * + * @return the form + */ + public abstract Element getForm(); + + /** + * Sets the form. + * + * @param form the form to set + */ + public abstract void setForm(Element form); + +} \ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java new file mode 100644 index 0000000000..2e2e18df79 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.xom.Element; + +/** + * Element with an associated form. + * + * @version $Id$ + * @author hsivonen + */ +public class FormPtrElement extends Element implements FormPointer { + + private Element form = null; + + /** + * Copy constructor (FormPointer-aware). + * @param elt + */ + public FormPtrElement(Element elt) { + super(elt); + if (elt instanceof FormPointer) { + FormPointer other = (FormPointer) elt; + this.setForm(other.getForm()); + } + } + + /** + * Null form. + * + * @param name + * @param uri + */ + public FormPtrElement(String name, String uri) { + super(name, uri); + } + + /** + * Full constructor. + * + * @param name + * @param uri + * @param form + */ + public FormPtrElement(String name, String uri, Element form) { + super(name, uri); + this.form = form; + } + + /** + * Gets the form. + * @see nu.validator.htmlparser.xom.FormPointer#getForm() + */ + public Element getForm() { + return form; + } + + /** + * Sets the form. + * @see nu.validator.htmlparser.xom.FormPointer#setForm(nu.xom.Element) + */ + public void setForm(Element form) { + this.form = form; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java new file mode 100644 index 0000000000..845ea15cf8 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java @@ -0,0 +1,773 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.StringReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.LinkedList; +import java.util.List; + +import nu.validator.htmlparser.common.CharacterHandler; +import nu.validator.htmlparser.common.DoctypeExpectation; +import nu.validator.htmlparser.common.DocumentModeHandler; +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.TransitionHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.io.Driver; +import nu.xom.Builder; +import nu.xom.Document; +import nu.xom.Nodes; +import nu.xom.ParsingException; +import nu.xom.ValidityException; + +import org.xml.sax.EntityResolver; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +/** + * This class implements an HTML5 parser that exposes data through the XOM + * interface. + * + *

By default, when using the constructor without arguments, the + * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible + * infosets. This corresponds to ALTER_INFOSET as the general + * XML violation policy. It is possible to treat XML 1.0 infoset violations + * as fatal by setting the general XML violation policy to FATAL. + * + *

The doctype is not represented in the tree. + * + *

The document mode is represented via the Mode + * interface on the Document node if the node implements + * that interface (depends on the used node factory). + * + *

The form pointer is stored if the node factory supports storing it. + * + *

This package has its own node factory class because the official + * XOM node factory may return multiple nodes instead of one confusing + * the assumptions of the DOM-oriented HTML5 parsing algorithm. + * + * @version $Id$ + * @author hsivonen + */ +public class HtmlBuilder extends Builder { + + private Driver driver; + + private final XOMTreeBuilder treeBuilder; + + private final SimpleNodeFactory simpleNodeFactory; + + private EntityResolver entityResolver; + + private ErrorHandler errorHandler = null; + + private DocumentModeHandler documentModeHandler = null; + + private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; + + private boolean checkingNormalization = false; + + private boolean scriptingEnabled = false; + + private final List characterHandlers = new LinkedList(); + + private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW; + + private boolean html4ModeCompatibleWithXhtml1Schemata = false; + + private boolean mappingLangToXmlLang = false; + + private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL; + + private boolean reportingDoctype = true; + + private ErrorHandler treeBuilderErrorHandler = null; + + private Heuristics heuristics = Heuristics.NONE; + + private TransitionHandler transitionHandler = null; + + /** + * Constructor with default node factory and fatal XML violation policy. + */ + public HtmlBuilder() { + this(new SimpleNodeFactory(), XmlViolationPolicy.FATAL); + } + + /** + * Constructor with given node factory and fatal XML violation policy. + * @param nodeFactory the factory + */ + public HtmlBuilder(SimpleNodeFactory nodeFactory) { + this(nodeFactory, XmlViolationPolicy.FATAL); + } + + /** + * Constructor with default node factory and given XML violation policy. + * @param xmlPolicy the policy + */ + public HtmlBuilder(XmlViolationPolicy xmlPolicy) { + this(new SimpleNodeFactory(), xmlPolicy); + } + + /** + * Constructor with given node factory and given XML violation policy. + * @param nodeFactory the factory + * @param xmlPolicy the policy + */ + public HtmlBuilder(SimpleNodeFactory nodeFactory, XmlViolationPolicy xmlPolicy) { + super(); + this.simpleNodeFactory = nodeFactory; + this.treeBuilder = new XOMTreeBuilder(nodeFactory); + this.driver = null; + setXmlPolicy(xmlPolicy); + } + + private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) { + if (errorHandler == null && transitionHandler == null + && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) { + return new Tokenizer(handler, newAttributesEachTime); + } else { + return new ErrorReportingTokenizer(handler, newAttributesEachTime); + } + } + + /** + * This class wraps different tree builders depending on configuration. This + * method does the work of hiding this from the user of the class. + */ + private void lazyInit() { + if (driver == null) { + this.driver = new Driver(newTokenizer(treeBuilder, false)); + this.driver.setErrorHandler(errorHandler); + this.driver.setTransitionHandler(transitionHandler); + this.treeBuilder.setErrorHandler(treeBuilderErrorHandler); + this.driver.setCheckingNormalization(checkingNormalization); + this.driver.setCommentPolicy(commentPolicy); + this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); + this.driver.setContentSpacePolicy(contentSpacePolicy); + this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + this.driver.setMappingLangToXmlLang(mappingLangToXmlLang); + this.driver.setXmlnsPolicy(xmlnsPolicy); + this.driver.setHeuristics(heuristics); + for (CharacterHandler characterHandler : characterHandlers) { + this.driver.addCharacterHandler(characterHandler); + } + this.treeBuilder.setDoctypeExpectation(doctypeExpectation); + this.treeBuilder.setDocumentModeHandler(documentModeHandler); + this.treeBuilder.setScriptingEnabled(scriptingEnabled); + this.treeBuilder.setReportingDoctype(reportingDoctype); + this.treeBuilder.setNamePolicy(namePolicy); + } + } + + + private void tokenize(InputSource is) throws ParsingException, IOException, + MalformedURLException { + try { + if (is == null) { + throw new IllegalArgumentException("Null input."); + } + if (is.getByteStream() == null && is.getCharacterStream() == null) { + String systemId = is.getSystemId(); + if (systemId == null) { + throw new IllegalArgumentException( + "No byte stream, no character stream nor URI."); + } + if (entityResolver != null) { + is = entityResolver.resolveEntity(is.getPublicId(), + systemId); + } + if (is.getByteStream() == null + || is.getCharacterStream() == null) { + is = new InputSource(); + is.setSystemId(systemId); + is.setByteStream(new URL(systemId).openStream()); + } + } + driver.tokenize(is); + } catch (SAXParseException e) { + throw new ParsingException(e.getMessage(), e.getSystemId(), e.getLineNumber(), + e.getColumnNumber(), e); + } catch (SAXException e) { + throw new ParsingException(e.getMessage(), e); + } + } + + /** + * Parse from SAX InputSource. + * @param is the InputSource + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + */ + public Document build(InputSource is) throws ParsingException, IOException { + lazyInit(); + treeBuilder.setFragmentContext(null); + tokenize(is); + return treeBuilder.getDocument(); + } + + /** + * Parse a fragment from SAX InputSource assuming an HTML + * context. + * @param is the InputSource + * @param context the name of the context element (HTML namespace assumed) + * @return the fragment + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + */ + public Nodes buildFragment(InputSource is, String context) + throws IOException, ParsingException { + lazyInit(); + treeBuilder.setFragmentContext(context.intern()); + tokenize(is); + return treeBuilder.getDocumentFragment(); + } + + /** + * Parse a fragment from SAX InputSource. + * @param is the InputSource + * @param contextLocal the local name of the context element + * @parem contextNamespace the namespace of the context element + * @return the fragment + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + */ + public Nodes buildFragment(InputSource is, String contextLocal, String contextNamespace) + throws IOException, ParsingException { + lazyInit(); + treeBuilder.setFragmentContext(contextLocal.intern(), contextNamespace.intern(), null, false); + tokenize(is); + return treeBuilder.getDocumentFragment(); + } + + /** + * Parse from File. + * @param file the file + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.io.File) + */ + @Override + public Document build(File file) throws ParsingException, + ValidityException, IOException { + return build(new FileInputStream(file), file.toURI().toASCIIString()); + } + + /** + * Parse from InputStream. + * @param stream the stream + * @param uri the base URI + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.io.InputStream, java.lang.String) + */ + @Override + public Document build(InputStream stream, String uri) + throws ParsingException, ValidityException, IOException { + InputSource is = new InputSource(stream); + is.setSystemId(uri); + return build(is); + } + + /** + * Parse from InputStream. + * @param stream the stream + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.io.InputStream) + */ + @Override + public Document build(InputStream stream) throws ParsingException, + ValidityException, IOException { + return build(new InputSource(stream)); + } + + /** + * Parse from Reader. + * @param stream the reader + * @param uri the base URI + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.io.Reader, java.lang.String) + */ + @Override + public Document build(Reader stream, String uri) throws ParsingException, + ValidityException, IOException { + InputSource is = new InputSource(stream); + is.setSystemId(uri); + return build(is); + } + + /** + * Parse from Reader. + * @param stream the reader + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.io.Reader) + */ + @Override + public Document build(Reader stream) throws ParsingException, + ValidityException, IOException { + return build(new InputSource(stream)); + } + + /** + * Parse from String. + * @param content the HTML source as string + * @param uri the base URI + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.lang.String, java.lang.String) + */ + @Override + public Document build(String content, String uri) throws ParsingException, + ValidityException, IOException { + return build(new StringReader(content), uri); + } + + /** + * Parse from URI. + * @param uri the URI of the document + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.lang.String) + */ + @Override + public Document build(String uri) throws ParsingException, + ValidityException, IOException { + return build(new InputSource(uri)); + } + + /** + * Gets the node factory + */ + public SimpleNodeFactory getSimpleNodeFactory() { + return simpleNodeFactory; + } + + /** + * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) + */ + public void setEntityResolver(EntityResolver resolver) { + entityResolver = resolver; + } + + /** + * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler handler) { + errorHandler = handler; + treeBuilderErrorHandler = handler; + driver = null; + } + + public void setTransitionHander(TransitionHandler handler) { + transitionHandler = handler; + driver = null; + } + + /** + * Indicates whether NFC normalization of source is being checked. + * @return true if NFC normalization of source is being checked. + * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + */ + public boolean isCheckingNormalization() { + return checkingNormalization; + } + + /** + * Toggles the checking of the NFC normalization of source. + * @param enable true to check normalization + * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + */ + public void setCheckingNormalization(boolean enable) { + this.checkingNormalization = enable; + if (driver != null) { + driver.setCheckingNormalization(checkingNormalization); + } + } + + /** + * Sets the policy for consecutive hyphens in comments. + * @param commentPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setCommentPolicy(XmlViolationPolicy commentPolicy) { + this.commentPolicy = commentPolicy; + if (driver != null) { + driver.setCommentPolicy(commentPolicy); + } + } + + /** + * Sets the policy for non-XML characters except white space. + * @param contentNonXmlCharPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; + driver = null; + } + + /** + * Sets the policy for non-XML white space. + * @param contentSpacePolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { + this.contentSpacePolicy = contentSpacePolicy; + if (driver != null) { + driver.setContentSpacePolicy(contentSpacePolicy); + } + } + + /** + * Whether the parser considers scripting to be enabled for noscript treatment. + * + * @return true if enabled + * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled() + */ + public boolean isScriptingEnabled() { + return scriptingEnabled; + } + + /** + * Sets whether the parser considers scripting to be enabled for noscript treatment. + * @param scriptingEnabled true to enable + * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + this.scriptingEnabled = scriptingEnabled; + if (treeBuilder != null) { + treeBuilder.setScriptingEnabled(scriptingEnabled); + } + } + + /** + * Returns the doctype expectation. + * + * @return the doctypeExpectation + */ + public DoctypeExpectation getDoctypeExpectation() { + return doctypeExpectation; + } + + /** + * Sets the doctype expectation. + * + * @param doctypeExpectation + * the doctypeExpectation to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation) + */ + public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { + this.doctypeExpectation = doctypeExpectation; + if (treeBuilder != null) { + treeBuilder.setDoctypeExpectation(doctypeExpectation); + } + } + + /** + * Returns the document mode handler. + * + * @return the documentModeHandler + */ + public DocumentModeHandler getDocumentModeHandler() { + return documentModeHandler; + } + + /** + * Sets the document mode handler. + * + * @param documentModeHandler + * the documentModeHandler to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler) + */ + public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { + this.documentModeHandler = documentModeHandler; + } + + /** + * Returns the streamabilityViolationPolicy. + * + * @return the streamabilityViolationPolicy + */ + public XmlViolationPolicy getStreamabilityViolationPolicy() { + return streamabilityViolationPolicy; + } + + /** + * Sets the streamabilityViolationPolicy. + * + * @param streamabilityViolationPolicy + * the streamabilityViolationPolicy to set + */ + public void setStreamabilityViolationPolicy( + XmlViolationPolicy streamabilityViolationPolicy) { + this.streamabilityViolationPolicy = streamabilityViolationPolicy; + driver = null; + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * @param html4ModeCompatibleWithXhtml1Schemata + */ + public void setHtml4ModeCompatibleWithXhtml1Schemata( + boolean html4ModeCompatibleWithXhtml1Schemata) { + this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; + if (driver != null) { + driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + } + } + + /** + * Returns the Locator during parse. + * @return the Locator + */ + public Locator getDocumentLocator() { + return driver.getDocumentLocator(); + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * + * @return the html4ModeCompatibleWithXhtml1Schemata + */ + public boolean isHtml4ModeCompatibleWithXhtml1Schemata() { + return html4ModeCompatibleWithXhtml1Schemata; + } + + /** + * Whether lang is mapped to xml:lang. + * @param mappingLangToXmlLang + * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) + */ + public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { + this.mappingLangToXmlLang = mappingLangToXmlLang; + if (driver != null) { + driver.setMappingLangToXmlLang(mappingLangToXmlLang); + } + } + + /** + * Whether lang is mapped to xml:lang. + * + * @return the mappingLangToXmlLang + */ + public boolean isMappingLangToXmlLang() { + return mappingLangToXmlLang; + } + + /** + * Whether the xmlns attribute on the root element is + * passed to through. (FATAL not allowed.) + * @param xmlnsPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { + if (xmlnsPolicy == XmlViolationPolicy.FATAL) { + throw new IllegalArgumentException("Can't use FATAL here."); + } + this.xmlnsPolicy = xmlnsPolicy; + if (driver != null) { + driver.setXmlnsPolicy(xmlnsPolicy); + } + } + + /** + * Returns the xmlnsPolicy. + * + * @return the xmlnsPolicy + */ + public XmlViolationPolicy getXmlnsPolicy() { + return xmlnsPolicy; + } + + /** + * Returns the commentPolicy. + * + * @return the commentPolicy + */ + public XmlViolationPolicy getCommentPolicy() { + return commentPolicy; + } + + /** + * Returns the contentNonXmlCharPolicy. + * + * @return the contentNonXmlCharPolicy + */ + public XmlViolationPolicy getContentNonXmlCharPolicy() { + return contentNonXmlCharPolicy; + } + + /** + * Returns the contentSpacePolicy. + * + * @return the contentSpacePolicy + */ + public XmlViolationPolicy getContentSpacePolicy() { + return contentSpacePolicy; + } + + /** + * @param reportingDoctype + * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean) + */ + public void setReportingDoctype(boolean reportingDoctype) { + this.reportingDoctype = reportingDoctype; + if (treeBuilder != null) { + treeBuilder.setReportingDoctype(reportingDoctype); + } + } + + /** + * Returns the reportingDoctype. + * + * @return the reportingDoctype + */ + public boolean isReportingDoctype() { + return reportingDoctype; + } + + /** + * The policy for non-NCName element and attribute names. + * @param namePolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setNamePolicy(XmlViolationPolicy namePolicy) { + this.namePolicy = namePolicy; + if (driver != null) { + driver.setNamePolicy(namePolicy); + treeBuilder.setNamePolicy(namePolicy); + } + } + + /** + * Sets the encoding sniffing heuristics. + * + * @param heuristics the heuristics to set + * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + */ + public void setHeuristics(Heuristics heuristics) { + this.heuristics = heuristics; + if (driver != null) { + driver.setHeuristics(heuristics); + } + } + + public Heuristics getHeuristics() { + return this.heuristics; + } + + /** + * This is a catch-all convenience method for setting name, xmlns, content space, + * content non-XML char and comment policies in one go. This does not affect the + * streamability policy or doctype reporting. + * + * @param xmlPolicy + */ + public void setXmlPolicy(XmlViolationPolicy xmlPolicy) { + setNamePolicy(xmlPolicy); + setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy); + setContentSpacePolicy(xmlPolicy); + setContentNonXmlCharPolicy(xmlPolicy); + setCommentPolicy(xmlPolicy); + } + + /** + * The policy for non-NCName element and attribute names. + * + * @return the namePolicy + */ + public XmlViolationPolicy getNamePolicy() { + return namePolicy; + } + + /** + * Does nothing. + * @deprecated + */ + public void setBogusXmlnsPolicy( + XmlViolationPolicy bogusXmlnsPolicy) { + } + + /** + * Returns XmlViolationPolicy.ALTER_INFOSET. + * @deprecated + * @return XmlViolationPolicy.ALTER_INFOSET + */ + public XmlViolationPolicy getBogusXmlnsPolicy() { + return XmlViolationPolicy.ALTER_INFOSET; + } + + public void addCharacterHandler(CharacterHandler characterHandler) { + this.characterHandlers.add(characterHandler); + if (driver != null) { + driver.addCharacterHandler(characterHandler); + } + } + + + /** + * Sets whether comment nodes appear in the tree. + * @param ignoreComments true to ignore comments + * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) + */ + public void setIgnoringComments(boolean ignoreComments) { + treeBuilder.setIgnoringComments(ignoreComments); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java new file mode 100644 index 0000000000..3b76b1421e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.validator.htmlparser.common.DocumentMode; +import nu.xom.Document; +import nu.xom.Element; + +/** + * Document with Mode. + * @version $Id$ + * @author hsivonen + */ +public class ModalDocument extends Document implements Mode { + + private DocumentMode mode = null; + + /** + * Copy constructor (Mode-aware). + * @param doc + */ + public ModalDocument(Document doc) { + super(doc); + if (doc instanceof Mode) { + Mode modal = (Mode) doc; + setMode(modal.getMode()); + } + } + + /** + * With root. + * + * @param elt + */ + public ModalDocument(Element elt) { + super(elt); + } + + /** + * Gets the mode. + * @see nu.validator.htmlparser.xom.Mode#getMode() + */ + public DocumentMode getMode() { + return mode; + } + + /** + * Sets the mode. + * @see nu.validator.htmlparser.xom.Mode#setMode(nu.validator.htmlparser.common.DocumentMode) + */ + public void setMode(DocumentMode mode) { + this.mode = mode; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java new file mode 100644 index 0000000000..bd2dcbc26b --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.validator.htmlparser.common.DocumentMode; + +/** + * Interface for attaching a DocumentMode on a Document. + * @version $Id$ + * @author hsivonen + */ +public interface Mode { + + /** + * Returns the mode. + * + * @return the mode + */ + public abstract DocumentMode getMode(); + + /** + * Sets the mode. + * + * @param mode the mode to set + */ + public abstract void setMode(DocumentMode mode); + +} \ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java new file mode 100644 index 0000000000..147b5d9304 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.xom.Attribute; +import nu.xom.Comment; +import nu.xom.Document; +import nu.xom.Element; +import nu.xom.Text; +import nu.xom.Attribute.Type; + +/** + * A simpler node factory that does not use Nodes.. + * + * @version $Id$ + * @author hsivonen + */ +public class SimpleNodeFactory { + + /** + * return new Attribute(localName, uri, value, type); + * @param localName + * @param uri + * @param value + * @param type + * @return + */ + public Attribute makeAttribute(String localName, String uri, String value, Type type) { + return new Attribute(localName, uri, value, type); + } + + /** + * return new Text(string); + * @param string + * @return + */ + public Text makeText(String string) { + return new Text(string); + } + + /** + * return new Comment(string); + * @param string + * @return + */ + public Comment makeComment(String string) { + return new Comment(string); + } + + /** + * return new Element(name, namespace); + * @param name + * @param namespace + * @return + */ + public Element makeElement(String name, String namespace) { + return new Element(name, namespace); + } + + /** + * return new FormPtrElement(name, namespace, form); + * @param name + * @param namespace + * @param form + * @return + */ + public Element makeElement(String name, String namespace, Element form) { + return new FormPtrElement(name, namespace, form); + } + + /** + * return new ModalDocument(new Element("root", "http://www.xom.nu/fakeRoot")); + * + *

Subclasses adviced to return an instance of Mode. (Not required, though.) + * + * @return + */ + public Document makeDocument() { + return new ModalDocument(new Element("root", "http://www.xom.nu/fakeRoot")); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java new file mode 100644 index 0000000000..623f319270 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.validator.htmlparser.common.DocumentMode; +import nu.validator.htmlparser.impl.CoalescingTreeBuilder; +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.xom.Attribute; +import nu.xom.Document; +import nu.xom.Element; +import nu.xom.Node; +import nu.xom.Nodes; +import nu.xom.ParentNode; +import nu.xom.Text; +import nu.xom.XMLException; + +import org.xml.sax.SAXException; + +class XOMTreeBuilder extends CoalescingTreeBuilder { + + private final SimpleNodeFactory nodeFactory; + + private Document document; + + private int cachedTableIndex = -1; + + private Element cachedTable = null; + + protected XOMTreeBuilder(SimpleNodeFactory nodeFactory) { + super(); + this.nodeFactory = nodeFactory; + } + + @Override + protected void addAttributesToElement(Element element, HtmlAttributes attributes) + throws SAXException { + try { + for (int i = 0; i < attributes.getLength(); i++) { + String localName = attributes.getLocalNameNoBoundsCheck(i); + String uri = attributes.getURINoBoundsCheck(i); + if (element.getAttribute(localName, uri) == null) { + element.addAttribute(nodeFactory.makeAttribute( + localName, + uri, + attributes.getValueNoBoundsCheck(i), + attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID + : Attribute.Type.CDATA)); + } + } + } catch (XMLException e) { + fatal(e); + } + } + + @Override protected void appendCharacters(Element parent, String text) + throws SAXException { + try { + int childCount = parent.getChildCount(); + Node lastChild; + if (childCount != 0 + && ((lastChild = parent.getChild(childCount - 1)) instanceof Text)) { + Text lastAsText = (Text) lastChild; + lastAsText.setValue(lastAsText.getValue() + text); + return; + } + parent.appendChild(nodeFactory.makeText(text)); + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected void appendChildrenToNewParent(Element oldParent, + Element newParent) throws SAXException { + try { + Nodes children = oldParent.removeChildren(); + for (int i = 0; i < children.size(); i++) { + newParent.appendChild(children.get(i)); + } + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected void appendComment(Element parent, String comment) throws SAXException { + try { + parent.appendChild(nodeFactory.makeComment(comment)); + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected void appendCommentToDocument(String comment) + throws SAXException { + try { + Element root = document.getRootElement(); + if ("http://www.xom.nu/fakeRoot".equals(root.getNamespaceURI())) { + document.insertChild(nodeFactory.makeComment(comment), document.indexOf(root)); + } else { + document.appendChild(nodeFactory.makeComment(comment)); + } + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected Element createElement(String ns, String name, + HtmlAttributes attributes, Element intendedParent) throws SAXException { + try { + Element rv = nodeFactory.makeElement(name, ns); + for (int i = 0; i < attributes.getLength(); i++) { + rv.addAttribute(nodeFactory.makeAttribute( + attributes.getLocalNameNoBoundsCheck(i), + attributes.getURINoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i), + attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID + : Attribute.Type.CDATA)); + } + return rv; + } catch (XMLException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override + protected Element createHtmlElementSetAsRoot( + HtmlAttributes attributes) throws SAXException { + try { + Element rv = nodeFactory.makeElement("html", + "http://www.w3.org/1999/xhtml"); + for (int i = 0; i < attributes.getLength(); i++) { + rv.addAttribute(nodeFactory.makeAttribute( + attributes.getLocalNameNoBoundsCheck(i), + attributes.getURINoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i), + attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID + : Attribute.Type.CDATA)); + } + document.setRootElement(rv); + return rv; + } catch (XMLException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override + protected void detachFromParent(Element element) throws SAXException { + try { + element.detach(); + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected void appendElement(Element child, + Element newParent) throws SAXException { + try { + child.detach(); + newParent.appendChild(child); + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected boolean hasChildren(Element element) throws SAXException { + try { + return element.getChildCount() != 0; + } catch (XMLException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + /** + * Returns the document. + * + * @return the document + */ + Document getDocument() { + Document rv = document; + document = null; + return rv; + } + + Nodes getDocumentFragment() { + Element rootElt = document.getRootElement(); + Nodes rv = rootElt.removeChildren(); + document = null; + return rv; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String, + * java.lang.String, org.xml.sax.Attributes, java.lang.Object) + */ + @Override + protected Element createElement(String ns, String name, + HtmlAttributes attributes, Element form, Element intendedParent) throws SAXException { + try { + Element rv = nodeFactory.makeElement(name, + ns, form); + for (int i = 0; i < attributes.getLength(); i++) { + rv.addAttribute(nodeFactory.makeAttribute( + attributes.getLocalName(i), + attributes.getURINoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i), + attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID + : Attribute.Type.CDATA)); + } + return rv; + } catch (XMLException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override + protected void start(boolean fragment) throws SAXException { + document = nodeFactory.makeDocument(); + cachedTableIndex = -1; + cachedTable = null; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#documentMode(nu.validator.htmlparser.common.DocumentMode, + * java.lang.String, java.lang.String, boolean) + */ + @Override + protected void documentMode(DocumentMode mode, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + if (document instanceof Mode) { + Mode modal = (Mode) document; + modal.setMode(mode); + } + } + + @Override + protected Element createAndInsertFosterParentedElement(String ns, String name, + HtmlAttributes attributes, Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParent(); + Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent); + if (parent != null) { // always an element if not null + ((ParentNode) parent).insertChild(child, indexOfTable(table, stackParent)); + cachedTableIndex++; + } else { + stackParent.appendChild(child); + } + return child; + } catch (XMLException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override protected void insertFosterParentedCharacters(String text, + Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParent(); + if (parent != null) { // always an element if not null + Element parentAsElt = (Element) parent; + int tableIndex = indexOfTable(table, parentAsElt); + Node prevSibling; + if (tableIndex != 0 + && ((prevSibling = parentAsElt.getChild(tableIndex - 1)) instanceof Text)) { + Text prevAsText = (Text) prevSibling; + prevAsText.setValue(prevAsText.getValue() + text); + return; + } + parentAsElt.insertChild(nodeFactory.makeText(text), tableIndex); + cachedTableIndex++; + return; + } + int childCount = stackParent.getChildCount(); + Node lastChild; + if (childCount != 0 + && ((lastChild = stackParent.getChild(childCount - 1)) instanceof Text)) { + Text lastAsText = (Text) lastChild; + lastAsText.setValue(lastAsText.getValue() + text); + return; + } + stackParent.appendChild(nodeFactory.makeText(text)); + } catch (XMLException e) { + fatal(e); + } + } + + @Override protected void insertFosterParentedChild(Element child, + Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParent(); + if (parent != null) { // always an element if not null + ((ParentNode)parent).insertChild(child, indexOfTable(table, stackParent)); + cachedTableIndex++; + } else { + stackParent.appendChild(child); + } + } catch (XMLException e) { + fatal(e); + } + } + + private int indexOfTable(Element table, Element stackParent) { + if (table == cachedTable) { + return cachedTableIndex; + } else { + cachedTable = table; + return (cachedTableIndex = stackParent.indexOf(table)); + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#end() + */ + @Override protected void end() throws SAXException { + cachedTableIndex = -1; + cachedTable = null; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html new file mode 100644 index 0000000000..a936d5e3ab --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html @@ -0,0 +1,29 @@ + + +Package Overview + + + +

This package provides an HTML5 parser that exposes the document through the XOM API.

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java new file mode 100644 index 0000000000..f17ce3f893 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A CDATA section. + * @version $Id$ + * @author hsivonen + */ +public final class CDATA extends ParentNode { + + /** + * The constructor. + * @param locator the locator + */ + public CDATA(Locator locator) { + super(locator); + } + + /** + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.startCDATA(this); + } + + /** + * + * @throws SAXException if things go wrong + * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser) + */ + @Override + void revisit(TreeParser treeParser) throws SAXException { + treeParser.endCDATA(endLocator); + } + + /** + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.CDATA; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java new file mode 100644 index 0000000000..55c7715f66 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; + +/** + * A common superclass for character buffer node classes. + * @version $Id$ + * @author hsivonen + */ +public abstract class CharBufferNode extends Node { + + /** + * The buffer. + */ + protected final char[] buffer; + + /** + * The constructor. + * @param locator the locator + * @param buf the buffer + * @param start the offset + * @param length the length + */ + CharBufferNode(Locator locator, char[] buf, int start, int length) { + super(locator); + this.buffer = new char[length]; + System.arraycopy(buf, start, buffer, 0, length); + } + + /** + * Returns the wrapped buffer as a string. + * + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return new String(buffer); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java new file mode 100644 index 0000000000..b8cc2d6d65 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A run of characters + * @version $Id$ + * @author hsivonen + */ +public final class Characters extends CharBufferNode { + + /** + * The constructor. + * @param locator the locator + * @param buf the buffer + * @param start the offset in the buffer + * @param length the length + */ + public Characters(Locator locator, char[] buf, int start, int length) { + super(locator, buf, start, length); + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.characters(buffer, 0, buffer.length, this); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.CHARACTERS; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java new file mode 100644 index 0000000000..f010462fbf --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A comment. + * + * @version $Id$ + * @author hsivonen + */ +public final class Comment extends CharBufferNode { + + /** + * The constructor. + * @param locator the locator + * @param buf the buffer + * @param start the offset + * @param length the length + */ + public Comment(Locator locator, char[] buf, int start, int length) { + super(locator, buf, start, length); + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.comment(buffer, 0, buffer.length, this); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.COMMENT; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java new file mode 100644 index 0000000000..2169e05719 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A doctype. + * @version $Id$ + * @author hsivonen + */ +public final class DTD extends ParentNode { + + /** + * The name. + */ + private final String name; + + /** + * The public id. + */ + private final String publicIdentifier; + + /** + * The system id. + */ + private final String systemIdentifier; + + /** + * The constructor. + * @param locator the locator + * @param name the name + * @param publicIdentifier the public id + * @param systemIdentifier the system id + */ + public DTD(Locator locator, String name, String publicIdentifier, String systemIdentifier) { + super(locator); + this.name = name; + this.publicIdentifier = publicIdentifier; + this.systemIdentifier = systemIdentifier; + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.startDTD(name, publicIdentifier, systemIdentifier, this); + } + + /** + * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser) + */ + @Override + void revisit(TreeParser treeParser) throws SAXException { + treeParser.endDTD(endLocator); + } + + /** + * Returns the name. + * + * @return the name + */ + public String getName() { + return name; + } + + /** + * Returns the publicIdentifier. + * + * @return the publicIdentifier + */ + public String getPublicIdentifier() { + return publicIdentifier; + } + + /** + * Returns the systemIdentifier. + * + * @return the systemIdentifier + */ + public String getSystemIdentifier() { + return systemIdentifier; + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.DTD; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java new file mode 100644 index 0000000000..3bb6f09c72 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A document. + * @version $Id$ + * @author hsivonen + */ +public final class Document extends ParentNode { + + /** + * The constructor. + * @param locator the locator + */ + public Document(Locator locator) { + super(locator); + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.startDocument(this); + } + + /** + * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser) + */ + @Override + void revisit(TreeParser treeParser) throws SAXException { + treeParser.endDocument(endLocator); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.DOCUMENT; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java new file mode 100644 index 0000000000..06816932f2 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.helpers.LocatorImpl; + +/** + * A document fragment. + * + * @version $Id$ + * @author hsivonen + */ +public final class DocumentFragment extends ParentNode { + + /** + * The constructor. + */ + public DocumentFragment() { + super(new LocatorImpl()); + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override void visit(TreeParser treeParser) { + // nothing + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override public NodeType getNodeType() { + return NodeType.DOCUMENT_FRAGMENT; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java new file mode 100644 index 0000000000..3d33164e5f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import java.util.List; + +import org.xml.sax.Attributes; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +/** + * An element. + * @version $Id$ + * @author hsivonen + */ +public final class Element extends ParentNode { + + /** + * The namespace URI. + */ + private final String uri; + + /** + * The local name. + */ + private final String localName; + + /** + * The qualified name. + */ + private final String qName; + + /** + * The attributes. + */ + private final Attributes attributes; + + /** + * The namespace prefix mappings. + */ + private final List prefixMappings; + + /** + * The contructor. + * @param locator the locator. + * @param uri the namespace URI + * @param localName the local name + * @param qName the qualified name + * @param atts the attributes + * @param retainAttributes true to retain the attributes instead of copying + * @param prefixMappings the prefix mappings + */ + public Element(Locator locator, String uri, String localName, String qName, + Attributes atts, boolean retainAttributes, + List prefixMappings) { + super(locator); + this.uri = uri; + this.localName = localName; + this.qName = qName; + if (retainAttributes) { + this.attributes = atts; + } else { + this.attributes = new AttributesImpl(atts); + } + this.prefixMappings = prefixMappings; + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + if (prefixMappings != null) { + for (PrefixMapping mapping : prefixMappings) { + treeParser.startPrefixMapping(mapping.getPrefix(), + mapping.getUri(), this); + } + } + treeParser.startElement(uri, localName, qName, attributes, this); + } + + /** + * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser) + */ + @Override + void revisit(TreeParser treeParser) throws SAXException { + treeParser.endElement(uri, localName, qName, endLocator); + if (prefixMappings != null) { + for (PrefixMapping mapping : prefixMappings) { + treeParser.endPrefixMapping(mapping.getPrefix(), endLocator); + } + } + } + + /** + * Returns the attributes. + * + * @return the attributes + */ + public Attributes getAttributes() { + return attributes; + } + + /** + * Returns the localName. + * + * @return the localName + */ + public String getLocalName() { + return localName; + } + + /** + * Returns the prefixMappings. + * + * @return the prefixMappings + */ + public List getPrefixMappings() { + return prefixMappings; + } + + /** + * Returns the qName. + * + * @return the qName + */ + public String getQName() { + return qName; + } + + /** + * Returns the uri. + * + * @return the uri + */ + public String getUri() { + return uri; + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.ELEMENT; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java new file mode 100644 index 0000000000..0910137369 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * An entity. + * @version $Id$ + * @author hsivonen + */ +public final class Entity extends ParentNode { + + /** + * The name. + */ + private final String name; + + /** + * The constructor. + * @param locator the locator + * @param name the name + */ + public Entity(Locator locator, String name) { + super(locator); + this.name = name; + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.startEntity(name, this); + } + + /** + * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser) + */ + @Override + void revisit(TreeParser treeParser) throws SAXException { + treeParser.endEntity(name, endLocator); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.ENTITY; + } + + /** + * Returns the name. + * + * @return the name + */ + public String getName() { + return name; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java new file mode 100644 index 0000000000..e5fcf350fd --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A run ignorable whitespace. + * @version $Id$ + * @author hsivonen + */ +public final class IgnorableWhitespace extends CharBufferNode { + + /** + * The constructor. + * @param locator the locator + * @param buf the buffer + * @param start the offset + * @param length the length + */ + public IgnorableWhitespace(Locator locator, char[] buf, int start, int length) { + super(locator, buf, start, length); + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.ignorableWhitespace(buffer, 0, buffer.length, this); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.IGNORABLE_WHITESPACE; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java new file mode 100644 index 0000000000..37c0c63251 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; + +/** + * A locator implementation. + * @version $Id$ + * @author hsivonen + */ +public final class LocatorImpl implements Locator { + + /** + * The system id. + */ + private final String systemId; + + /** + * The public id. + */ + private final String publicId; + + /** + * The column. + */ + private final int column; + + /** + * The line. + */ + private final int line; + + /** + * The constructor. + * @param locator the locator + */ + public LocatorImpl(Locator locator) { + if (locator == null) { + this.systemId = null; + this.publicId = null; + this.column = -1; + this.line = -1; + } else { + this.systemId = locator.getSystemId(); + this.publicId = locator.getPublicId(); + this.column = locator.getColumnNumber(); + this.line = locator.getLineNumber(); + } + } + + /** + * + * @see org.xml.sax.Locator#getColumnNumber() + */ + public int getColumnNumber() { + return column; + } + + /** + * + * @see org.xml.sax.Locator#getLineNumber() + */ + public int getLineNumber() { + return line; + } + + /** + * + * @see org.xml.sax.Locator#getPublicId() + */ + public String getPublicId() { + return publicId; + } + + /** + * + * @see org.xml.sax.Locator#getSystemId() + */ + public String getSystemId() { + return systemId; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java new file mode 100644 index 0000000000..7aed83b750 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import java.util.List; + +import org.xml.sax.Attributes; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * The common node superclass. + * @version $Id$ + * @author hsivonen + */ +public abstract class Node implements Locator { + + /** + * The system id. + */ + private final String systemId; + + /** + * The public id. + */ + private final String publicId; + + /** + * The column. + */ + private final int column; + + /** + * The line. + */ + private final int line; + + /** + * The next sibling. + */ + private Node nextSibling = null; + + /** + * The parent. + */ + private ParentNode parentNode = null; + + /** + * The constructor. + * + * @param locator the locator + */ + Node(Locator locator) { + if (locator == null) { + this.systemId = null; + this.publicId = null; + this.column = -1; + this.line = -1; + } else { + this.systemId = locator.getSystemId(); + this.publicId = locator.getPublicId(); + this.column = locator.getColumnNumber(); + this.line = locator.getLineNumber(); + } + } + + /** + * + * @see org.xml.sax.Locator#getColumnNumber() + */ + public int getColumnNumber() { + return column; + } + + /** + * + * @see org.xml.sax.Locator#getLineNumber() + */ + public int getLineNumber() { + return line; + } + + /** + * + * @see org.xml.sax.Locator#getPublicId() + */ + public String getPublicId() { + return publicId; + } + + /** + * + * @see org.xml.sax.Locator#getSystemId() + */ + public String getSystemId() { + return systemId; + } + + /** + * Visit the node. + * + * @param treeParser the visitor + * @throws SAXException if stuff goes wrong + */ + abstract void visit(TreeParser treeParser) throws SAXException; + + /** + * Revisit the node. + * + * @param treeParser the visitor + * @throws SAXException if stuff goes wrong + */ + void revisit(TreeParser treeParser) throws SAXException { + return; + } + + /** + * Return the first child. + * @return the first child + */ + public Node getFirstChild() { + return null; + } + + /** + * Returns the nextSibling. + * + * @return the nextSibling + */ + public final Node getNextSibling() { + return nextSibling; + } + + /** + * Returns the previous sibling + * @return the previous sibling + */ + public final Node getPreviousSibling() { + Node prev = null; + Node next = parentNode.getFirstChild(); + for(;;) { + if (this == next) { + return prev; + } + prev = next; + next = next.nextSibling; + } + } + + /** + * Sets the nextSibling. + * + * @param nextSibling the nextSibling to set + */ + void setNextSibling(Node nextSibling) { + this.nextSibling = nextSibling; + } + + + /** + * Returns the parentNode. + * + * @return the parentNode + */ + public final ParentNode getParentNode() { + return parentNode; + } + + /** + * Sets the parentNode. + * + * @param parentNode the parentNode to set + */ + void setParentNode(ParentNode parentNode) { + this.parentNode = parentNode; + } + + /** + * Return the node type. + * @return the node type + */ + public abstract NodeType getNodeType(); + + // Subclass-specific accessors that are hoisted here to + // avoid casting. + + /** + * Detach this node from its parent. + */ + public void detach() { + if (parentNode != null) { + parentNode.removeChild(this); + parentNode = null; + } + } + + /** + * Returns the name. + * + * @return the name + */ + public String getName() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the publicIdentifier. + * + * @return the publicIdentifier + */ + public String getPublicIdentifier() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the systemIdentifier. + * + * @return the systemIdentifier + */ + public String getSystemIdentifier() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the attributes. + * + * @return the attributes + */ + public Attributes getAttributes() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the localName. + * + * @return the localName + */ + public String getLocalName() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the prefixMappings. + * + * @return the prefixMappings + */ + public List getPrefixMappings() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the qName. + * + * @return the qName + */ + public String getQName() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the uri. + * + * @return the uri + */ + public String getUri() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the data. + * + * @return the data + */ + public String getData() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the target. + * + * @return the target + */ + public String getTarget() { + throw new UnsupportedOperationException(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java new file mode 100644 index 0000000000..c3c927f0dc --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +/** + * The node type. + * @version $Id$ + * @author hsivonen + */ +public enum NodeType { + /** + * A CDATA section. + */ + CDATA, + /** + * A run of characters. + */ + CHARACTERS, + /** + * A comment. + */ + COMMENT, + /** + * A document. + */ + DOCUMENT, + /** + * A document fragment. + */ + DOCUMENT_FRAGMENT, + /** + * A DTD. + */ + DTD, + /** + * An element. + */ + ELEMENT, + /** + * An entity. + */ + ENTITY, + /** + * A run of ignorable whitespace. + */ + IGNORABLE_WHITESPACE, + /** + * A processing instruction. + */ + PROCESSING_INSTRUCTION, + /** + * A skipped entity. + */ + SKIPPED_ENTITY +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java new file mode 100644 index 0000000000..de63f3b57c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +/** + * A lexical handler that does nothing. + * @version $Id$ + * @author hsivonen + */ +final class NullLexicalHandler implements LexicalHandler { + + /** + * + * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int) + */ + public void comment(char[] arg0, int arg1, int arg2) throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endCDATA() + */ + public void endCDATA() throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endDTD() + */ + public void endDTD() throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String) + */ + public void endEntity(String arg0) throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startCDATA() + */ + public void startCDATA() throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String) + */ + public void startDTD(String arg0, String arg1, String arg2) throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String) + */ + public void startEntity(String arg0) throws SAXException { + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java new file mode 100644 index 0000000000..6cc96003f1 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; + +/** + * Common superclass for parent nodes. + * @version $Id$ + * @author hsivonen + */ +public abstract class ParentNode extends Node { + + /** + * The end locator. + */ + protected Locator endLocator; + + /** + * The first child. + */ + private Node firstChild = null; + + /** + * The last child (for efficiency). + */ + private Node lastChild = null; + + /** + * The constuctor. + * @param locator the locator + */ + ParentNode(Locator locator) { + super(locator); + } + + /** + * Sets the endLocator. + * + * @param endLocator the endLocator to set + */ + public void setEndLocator(Locator endLocator) { + this.endLocator = new LocatorImpl(endLocator); + } + + /** + * Copies the endLocator from another node. + * + * @param another the another node + */ + public void copyEndLocator(ParentNode another) { + this.endLocator = another.endLocator; + } + + /** + * Returns the firstChild. + * + * @return the firstChild + */ + public final Node getFirstChild() { + return firstChild; + } + + /** + * Returns the lastChild. + * + * @return the lastChild + */ + public final Node getLastChild() { + return lastChild; + } + + /** + * Insert a new child before a pre-existing child and return the newly inserted child. + * @param child the new child + * @param sibling the existing child before which to insert (must be a child of this node) or null to append + * @return child + */ + public Node insertBefore(Node child, Node sibling) { + assert sibling == null || this == sibling.getParentNode(); + if (sibling == null) { + return appendChild(child); + } + child.detach(); + child.setParentNode(this); + if (firstChild == sibling) { + child.setNextSibling(sibling); + firstChild = child; + } else { + Node prev = firstChild; + Node next = firstChild.getNextSibling(); + while (next != sibling) { + prev = next; + next = next.getNextSibling(); + } + prev.setNextSibling(child); + child.setNextSibling(next); + } + return child; + } + + public Node insertBetween(Node child, Node prev, Node next) { + assert prev == null || this == prev.getParentNode(); + assert next == null || this == next.getParentNode(); + assert prev != null || next == firstChild; + assert next != null || prev == lastChild; + assert prev == null || next == null || prev.getNextSibling() == next; + if (next == null) { + return appendChild(child); + } + child.detach(); + child.setParentNode(this); + child.setNextSibling(next); + if (prev == null) { + firstChild = child; + } else { + prev.setNextSibling(child); + } + return child; + } + + /** + * Append a child to this node and return the child. + * + * @param child the child to append. + * @return child + */ + public Node appendChild(Node child) { + child.detach(); + child.setParentNode(this); + if (firstChild == null) { + firstChild = child; + } else { + lastChild.setNextSibling(child); + } + lastChild = child; + return child; + } + + /** + * Append the children of another node to this node removing them from the other node . + * @param parent the other node whose children to append to this one + */ + public void appendChildren(Node parent) { + Node child = parent.getFirstChild(); + if (child == null) { + return; + } + ParentNode another = (ParentNode) parent; + if (firstChild == null) { + firstChild = child; + } else { + lastChild.setNextSibling(child); + } + lastChild = another.lastChild; + do { + child.setParentNode(this); + } while ((child = child.getNextSibling()) != null); + another.firstChild = null; + another.lastChild = null; + } + + /** + * Remove a child from this node. + * @param node the child to remove + */ + void removeChild(Node node) { + assert this == node.getParentNode(); + if (firstChild == node) { + firstChild = node.getNextSibling(); + if (lastChild == node) { + lastChild = null; + } + } else { + Node prev = firstChild; + Node next = firstChild.getNextSibling(); + while (next != node) { + prev = next; + next = next.getNextSibling(); + } + prev.setNextSibling(node.getNextSibling()); + if (lastChild == node) { + lastChild = prev; + } + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java new file mode 100644 index 0000000000..8ffaf4a2ce --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +/** + * A prefix mapping. + * @version $Id$ + * @author hsivonen + */ +public final class PrefixMapping { + /** + * The namespace prefix. + */ + private final String prefix; + /** + * The namespace URI. + */ + private final String uri; + /** + * Constructor. + * @param prefix the prefix + * @param uri the URI + */ + public PrefixMapping(final String prefix, final String uri) { + this.prefix = prefix; + this.uri = uri; + } + /** + * Returns the prefix. + * + * @return the prefix + */ + public String getPrefix() { + return prefix; + } + /** + * Returns the uri. + * + * @return the uri + */ + public String getUri() { + return uri; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java new file mode 100644 index 0000000000..014e63821d --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A processing instruction. + * @version $Id$ + * @author hsivonen + */ +public final class ProcessingInstruction extends Node { + + /** + * PI target. + */ + private final String target; + + /** + * PI data. + */ + private final String data; + + /** + * Constructor. + * @param locator the locator + * @param target PI target + * @param data PI data + */ + public ProcessingInstruction(Locator locator, String target, String data) { + super(locator); + this.target = target; + this.data = data; + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.processingInstruction(target, data, this); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.PROCESSING_INSTRUCTION; + } + + /** + * Returns the data. + * + * @return the data + */ + public String getData() { + return data; + } + + /** + * Returns the target. + * + * @return the target + */ + public String getTarget() { + return target; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java new file mode 100644 index 0000000000..01ca614909 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A skipped entity. + * @version $Id$ + * @author hsivonen + */ +public final class SkippedEntity extends Node { + + /** + * The name. + */ + private final String name; + + /** + * Constructor. + * @param locator the locator + * @param name the name + */ + public SkippedEntity(Locator locator, String name) { + super(locator); + this.name = name; + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.skippedEntity(name, this); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.SKIPPED_ENTITY; + } + + /** + * Returns the name. + * + * @return the name + */ + public String getName() { + return name; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java new file mode 100644 index 0000000000..39fe236b36 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import java.util.LinkedList; +import java.util.List; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +/** + * Builds a SAX Tree representation of a document or a fragment + * streamed as ContentHandler and + * LexicalHandler events. The start/end event matching + * is expected to adhere to the SAX API contract. Things will + * simply break if this is not the case. Fragments are expected to + * omit startDocument() and endDocument() + * calls. + * + * @version $Id$ + * @author hsivonen + */ +public class TreeBuilder implements ContentHandler, LexicalHandler { + + /** + * The locator. + */ + private Locator locator; + + /** + * The current node. + */ + private ParentNode current; + + /** + * Whether to retain attribute objects. + */ + private final boolean retainAttributes; + + /** + * The prefix mappings for the next element to be inserted. + */ + private List prefixMappings; + + /** + * Constructs a reusable TreeBuilder that builds + * Documents and copies attributes. + */ + public TreeBuilder() { + this(false, false); + } + + /** + * The constructor. The instance will be reusabe if building a full + * document and not reusable if building a fragment. + * + * @param fragment whether this TreeBuilder should build + * a DocumentFragment instead of a Document. + * @param retainAttributes whether instances of the Attributes + * interface passed to startElement should be retained + * (the alternative is copying). + */ + public TreeBuilder(boolean fragment, boolean retainAttributes) { + if (fragment) { + current = new DocumentFragment(); + } + this.retainAttributes = retainAttributes; + } + + /** + * + * @see org.xml.sax.ContentHandler#characters(char[], int, int) + */ + public void characters(char[] ch, int start, int length) throws SAXException { + current.appendChild(new Characters(locator, ch, start, length)); + } + + /** + * + * @see org.xml.sax.ContentHandler#endDocument() + */ + public void endDocument() throws SAXException { + current.setEndLocator(locator); + } + + /** + * + * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String) + */ + public void endElement(String uri, String localName, String qName) throws SAXException { + current.setEndLocator(locator); + current = current.getParentNode(); + } + + /** + * + * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String) + */ + public void endPrefixMapping(String prefix) throws SAXException { + } + + /** + * + * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int) + */ + public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { + current.appendChild(new IgnorableWhitespace(locator, ch, start, length)); + } + + /** + * + * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String) + */ + public void processingInstruction(String target, String data) throws SAXException { + current.appendChild(new ProcessingInstruction(locator, target, data)); + } + + /** + * + * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator) + */ + public void setDocumentLocator(Locator locator) { + this.locator = locator; + } + + public void skippedEntity(String name) throws SAXException { + current.appendChild(new SkippedEntity(locator, name)); + } + + /** + * + * @see org.xml.sax.ContentHandler#startDocument() + */ + public void startDocument() throws SAXException { + current = new Document(locator); + } + + /** + * + * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) + */ + public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { + current = (ParentNode) current.appendChild(new Element(locator, uri, localName, qName, atts, retainAttributes, prefixMappings)); + prefixMappings = null; + } + + /** + * + * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String) + */ + public void startPrefixMapping(String prefix, String uri) throws SAXException { + if (prefixMappings == null) { + prefixMappings = new LinkedList(); + } + prefixMappings.add(new PrefixMapping(prefix, uri)); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int) + */ + public void comment(char[] ch, int start, int length) throws SAXException { + current.appendChild(new Comment(locator, ch, start, length)); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endCDATA() + */ + public void endCDATA() throws SAXException { + current.setEndLocator(locator); + current = current.getParentNode(); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endDTD() + */ + public void endDTD() throws SAXException { + current.setEndLocator(locator); + current = current.getParentNode(); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String) + */ + public void endEntity(String name) throws SAXException { + current.setEndLocator(locator); + current = current.getParentNode(); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startCDATA() + */ + public void startCDATA() throws SAXException { + current = (ParentNode) current.appendChild(new CDATA(locator)); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String) + */ + public void startDTD(String name, String publicId, String systemId) throws SAXException { + current = (ParentNode) current.appendChild(new DTD(locator, name, publicId, systemId)); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String) + */ + public void startEntity(String name) throws SAXException { + current = (ParentNode) current.appendChild(new Entity(locator, name)); + } + + /** + * Returns the root (Document if building a full document or + * DocumentFragment if building a fragment.). + * + * @return the root + */ + public ParentNode getRoot() { + return current; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java new file mode 100644 index 0000000000..a9d92deb0c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +/** + * A tree visitor that replays a tree as SAX events. + * @version $Id$ + * @author hsivonen + */ +public final class TreeParser implements Locator { + + /** + * The content handler. + */ + private final ContentHandler contentHandler; + + /** + * The lexical handler. + */ + private final LexicalHandler lexicalHandler; + + /** + * The current locator. + */ + private Locator locatorDelegate; + + /** + * The constructor. + * + * @param contentHandler + * must not be null + * @param lexicalHandler + * may be null + */ + public TreeParser(final ContentHandler contentHandler, + final LexicalHandler lexicalHandler) { + if (contentHandler == null) { + throw new IllegalArgumentException("contentHandler was null."); + } + this.contentHandler = contentHandler; + if (lexicalHandler == null) { + this.lexicalHandler = new NullLexicalHandler(); + } else { + this.lexicalHandler = lexicalHandler; + } + } + + /** + * Causes SAX events for the tree rooted at the argument to be emitted. + * startDocument() and endDocument() are only + * emitted for a Document node. + * + * @param node + * the root + * @throws SAXException + */ + public void parse(Node node) throws SAXException { + contentHandler.setDocumentLocator(this); + Node current = node; + Node next; + for (;;) { + current.visit(this); + if ((next = current.getFirstChild()) != null) { + current = next; + continue; + } + for (;;) { + current.revisit(this); + if (current == node) { + return; + } + if ((next = current.getNextSibling()) != null) { + current = next; + break; + } + current = current.getParentNode(); + } + } + } + + /** + * @see org.xml.sax.ContentHandler#characters(char[], int, int) + */ + void characters(char[] ch, int start, int length, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + contentHandler.characters(ch, start, length); + } + + /** + * @see org.xml.sax.ContentHandler#endDocument() + */ + void endDocument(Locator locator) throws SAXException { + this.locatorDelegate = locator; + contentHandler.endDocument(); + } + + /** + * @see org.xml.sax.ContentHandler#endElement(java.lang.String, + * java.lang.String, java.lang.String) + */ + void endElement(String uri, String localName, String qName, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + contentHandler.endElement(uri, localName, qName); + } + + /** + * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String) + */ + void endPrefixMapping(String prefix, Locator locator) throws SAXException { + this.locatorDelegate = locator; + contentHandler.endPrefixMapping(prefix); + } + + /** + * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int) + */ + void ignorableWhitespace(char[] ch, int start, int length, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + contentHandler.ignorableWhitespace(ch, start, length); + } + + /** + * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, + * java.lang.String) + */ + void processingInstruction(String target, String data, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + contentHandler.processingInstruction(target, data); + } + + /** + * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String) + */ + void skippedEntity(String name, Locator locator) throws SAXException { + this.locatorDelegate = locator; + contentHandler.skippedEntity(name); + } + + /** + * @see org.xml.sax.ContentHandler#startDocument() + */ + void startDocument(Locator locator) throws SAXException { + this.locatorDelegate = locator; + contentHandler.startDocument(); + } + + /** + * @see org.xml.sax.ContentHandler#startElement(java.lang.String, + * java.lang.String, java.lang.String, org.xml.sax.Attributes) + */ + void startElement(String uri, String localName, String qName, + Attributes atts, Locator locator) throws SAXException { + this.locatorDelegate = locator; + contentHandler.startElement(uri, localName, qName, atts); + } + + /** + * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, + * java.lang.String) + */ + void startPrefixMapping(String prefix, String uri, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + contentHandler.startPrefixMapping(prefix, uri); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int) + */ + void comment(char[] ch, int start, int length, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.comment(ch, start, length); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#endCDATA() + */ + void endCDATA(Locator locator) throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.endCDATA(); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#endDTD() + */ + void endDTD(Locator locator) throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.endDTD(); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String) + */ + void endEntity(String name, Locator locator) throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.endEntity(name); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#startCDATA() + */ + void startCDATA(Locator locator) throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.startCDATA(); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, + * java.lang.String, java.lang.String) + */ + void startDTD(String name, String publicId, String systemId, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.startDTD(name, publicId, systemId); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String) + */ + void startEntity(String name, Locator locator) throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.startEntity(name); + } + + /** + * @see org.xml.sax.Locator#getColumnNumber() + */ + public int getColumnNumber() { + if (locatorDelegate == null) { + return -1; + } else { + return locatorDelegate.getColumnNumber(); + } + } + + /** + * @see org.xml.sax.Locator#getLineNumber() + */ + public int getLineNumber() { + if (locatorDelegate == null) { + return -1; + } else { + return locatorDelegate.getLineNumber(); + } + } + + /** + * @see org.xml.sax.Locator#getPublicId() + */ + public String getPublicId() { + if (locatorDelegate == null) { + return null; + } else { + + return locatorDelegate.getPublicId(); + } + } + + /** + * @see org.xml.sax.Locator#getSystemId() + */ + public String getSystemId() { + if (locatorDelegate == null) { + return null; + } else { + return locatorDelegate.getSystemId(); + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html b/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html new file mode 100644 index 0000000000..0c34dad819 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html @@ -0,0 +1,46 @@ + + +Package Overview + + + +

This package provides SAX Tree: a tree model optimized for creation from SAX +events and replay as SAX events.

+

Design Principles

+
    +
  1. Preserve information exposed through ContentHandler, +LexicalHandler and Locator. +
  2. Creation from SAX events or as part of the parse of a conforming +HTML5 document should be fast.
  3. +
  4. Emitting SAX events based on the tree should be fast.
  5. +
  6. Mutations should be possible but should not make the above +"fast" cases slower.
  7. +
  8. Concurrent reads should work without locking when there are no +concurrent mutations.
  9. +
  10. The user of the API has the responsibility of using the API properly: +for the sake of performance, the model does not check if it is being +used properly. Improper use may, therefore, put the model in and +inconsistent state.
  11. +
+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/java/io/IOException.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/java/io/IOException.java new file mode 100644 index 0000000000..f323f1e31d --- /dev/null +++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/java/io/IOException.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package java.io; + +public class IOException extends Exception { + + public IOException() { + } + + public IOException(String arg0) { + super(arg0); + } + + public IOException(Throwable arg0) { + super(arg0); + } + + public IOException(String arg0, Throwable arg1) { + super(arg0, arg1); + } + +} diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java new file mode 100644 index 0000000000..b25432d45d --- /dev/null +++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java @@ -0,0 +1,257 @@ +// Attributes.java - attribute list with Namespace support +// http://www.saxproject.org +// Written by David Megginson +// NO WARRANTY! This class is in the public domain. +// $Id: Attributes.java,v 1.13 2004/03/18 12:28:05 dmegginson Exp $ + +package org.xml.sax; + + +/** + * Interface for a list of XML attributes. + * + *
+ * This module, both source code and documentation, is in the + * Public Domain, and comes with NO WARRANTY. + * See http://www.saxproject.org + * for further information. + *
+ * + *

This interface allows access to a list of attributes in + * three different ways:

+ * + *
    + *
  1. by attribute index;
  2. + *
  3. by Namespace-qualified name; or
  4. + *
  5. by qualified (prefixed) name.
  6. + *
+ * + *

The list will not contain attributes that were declared + * #IMPLIED but not specified in the start tag. It will also not + * contain attributes used as Namespace declarations (xmlns*) unless + * the http://xml.org/sax/features/namespace-prefixes + * feature is set to true (it is false by + * default). + * Because SAX2 conforms to the original "Namespaces in XML" + * recommendation, it normally does not + * give namespace declaration attributes a namespace URI. + *

+ * + *

Some SAX2 parsers may support using an optional feature flag + * (http://xml.org/sax/features/xmlns-uris) to request + * that those attributes be given URIs, conforming to a later + * backwards-incompatible revision of that recommendation. (The + * attribute's "local name" will be the prefix, or "xmlns" when + * defining a default element namespace.) For portability, handler + * code should always resolve that conflict, rather than requiring + * parsers that can change the setting of that feature flag.

+ * + *

If the namespace-prefixes feature (see above) is + * false, access by qualified name may not be available; if + * the http://xml.org/sax/features/namespaces feature is + * false, access by Namespace-qualified names may not be + * available.

+ * + *

This interface replaces the now-deprecated SAX1 {@link + * org.xml.sax.AttributeList AttributeList} interface, which does not + * contain Namespace support. In addition to Namespace support, it + * adds the getIndex methods (below).

+ * + *

The order of attributes in the list is unspecified, and will + * vary from implementation to implementation.

+ * + * @since SAX 2.0 + * @author David Megginson + * @version 2.0.1 (sax2r2) + * @see org.xml.sax.helpers.AttributesImpl + * @see org.xml.sax.ext.DeclHandler#attributeDecl + */ +public interface Attributes +{ + + + //////////////////////////////////////////////////////////////////// + // Indexed access. + //////////////////////////////////////////////////////////////////// + + + /** + * Return the number of attributes in the list. + * + *

Once you know the number of attributes, you can iterate + * through the list.

+ * + * @return The number of attributes in the list. + * @see #getURI(int) + * @see #getLocalName(int) + * @see #getQName(int) + * @see #getType(int) + * @see #getValue(int) + */ + public abstract int getLength (); + + + /** + * Look up an attribute's Namespace URI by index. + * + * @param index The attribute index (zero-based). + * @return The Namespace URI, or the empty string if none + * is available, or null if the index is out of + * range. + * @see #getLength + */ + public abstract String getURI (int index); + + + /** + * Look up an attribute's local name by index. + * + * @param index The attribute index (zero-based). + * @return The local name, or the empty string if Namespace + * processing is not being performed, or null + * if the index is out of range. + * @see #getLength + */ + public abstract String getLocalName (int index); + + + /** + * Look up an attribute's XML qualified (prefixed) name by index. + * + * @param index The attribute index (zero-based). + * @return The XML qualified name, or the empty string + * if none is available, or null if the index + * is out of range. + * @see #getLength + */ + public abstract String getQName (int index); + + + /** + * Look up an attribute's type by index. + * + *

The attribute type is one of the strings "CDATA", "ID", + * "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", + * or "NOTATION" (always in upper case).

+ * + *

If the parser has not read a declaration for the attribute, + * or if the parser does not report attribute types, then it must + * return the value "CDATA" as stated in the XML 1.0 Recommendation + * (clause 3.3.3, "Attribute-Value Normalization").

+ * + *

For an enumerated attribute that is not a notation, the + * parser will report the type as "NMTOKEN".

+ * + * @param index The attribute index (zero-based). + * @return The attribute's type as a string, or null if the + * index is out of range. + * @see #getLength + */ + public abstract String getType (int index); + + + /** + * Look up an attribute's value by index. + * + *

If the attribute value is a list of tokens (IDREFS, + * ENTITIES, or NMTOKENS), the tokens will be concatenated + * into a single string with each token separated by a + * single space.

+ * + * @param index The attribute index (zero-based). + * @return The attribute's value as a string, or null if the + * index is out of range. + * @see #getLength + */ + public abstract String getValue (int index); + + + + //////////////////////////////////////////////////////////////////// + // Name-based query. + //////////////////////////////////////////////////////////////////// + + + /** + * Look up the index of an attribute by Namespace name. + * + * @param uri The Namespace URI, or the empty string if + * the name has no Namespace URI. + * @param localName The attribute's local name. + * @return The index of the attribute, or -1 if it does not + * appear in the list. + */ + public int getIndex (String uri, String localName); + + + /** + * Look up the index of an attribute by XML qualified (prefixed) name. + * + * @param qName The qualified (prefixed) name. + * @return The index of the attribute, or -1 if it does not + * appear in the list. + */ + public int getIndex (String qName); + + + /** + * Look up an attribute's type by Namespace name. + * + *

See {@link #getType(int) getType(int)} for a description + * of the possible types.

+ * + * @param uri The Namespace URI, or the empty String if the + * name has no Namespace URI. + * @param localName The local name of the attribute. + * @return The attribute type as a string, or null if the + * attribute is not in the list or if Namespace + * processing is not being performed. + */ + public abstract String getType (String uri, String localName); + + + /** + * Look up an attribute's type by XML qualified (prefixed) name. + * + *

See {@link #getType(int) getType(int)} for a description + * of the possible types.

+ * + * @param qName The XML qualified name. + * @return The attribute type as a string, or null if the + * attribute is not in the list or if qualified names + * are not available. + */ + public abstract String getType (String qName); + + + /** + * Look up an attribute's value by Namespace name. + * + *

See {@link #getValue(int) getValue(int)} for a description + * of the possible values.

+ * + * @param uri The Namespace URI, or the empty String if the + * name has no Namespace URI. + * @param localName The local name of the attribute. + * @return The attribute value as a string, or null if the + * attribute is not in the list. + */ + public abstract String getValue (String uri, String localName); + + + /** + * Look up an attribute's value by XML qualified (prefixed) name. + * + *

See {@link #getValue(int) getValue(int)} for a description + * of the possible values.

+ * + * @param qName The XML qualified name. + * @return The attribute value as a string, or null if the + * attribute is not in the list or if qualified names + * are not available. + */ + public abstract String getValue (String qName); + +} + +// end of Attributes.java diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java new file mode 100644 index 0000000000..37d2501438 --- /dev/null +++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java @@ -0,0 +1,139 @@ +// SAX error handler. +// http://www.saxproject.org +// No warranty; no copyright -- use this as you will. +// $Id: ErrorHandler.java,v 1.10 2004/03/08 13:01:00 dmegginson Exp $ + +package org.xml.sax; + + +/** + * Basic interface for SAX error handlers. + * + *
+ * This module, both source code and documentation, is in the + * Public Domain, and comes with NO WARRANTY. + * See http://www.saxproject.org + * for further information. + *
+ * + *

If a SAX application needs to implement customized error + * handling, it must implement this interface and then register an + * instance with the XML reader using the + * {@link org.xml.sax.XMLReader#setErrorHandler setErrorHandler} + * method. The parser will then report all errors and warnings + * through this interface.

+ * + *

WARNING: If an application does not + * register an ErrorHandler, XML parsing errors will go unreported, + * except that SAXParseExceptions will be thrown for fatal errors. + * In order to detect validity errors, an ErrorHandler that does something + * with {@link #error error()} calls must be registered.

+ * + *

For XML processing errors, a SAX driver must use this interface + * in preference to throwing an exception: it is up to the application + * to decide whether to throw an exception for different types of + * errors and warnings. Note, however, that there is no requirement that + * the parser continue to report additional errors after a call to + * {@link #fatalError fatalError}. In other words, a SAX driver class + * may throw an exception after reporting any fatalError. + * Also parsers may throw appropriate exceptions for non-XML errors. + * For example, {@link XMLReader#parse XMLReader.parse()} would throw + * an IOException for errors accessing entities or the document.

+ * + * @since SAX 1.0 + * @author David Megginson + * @version 2.0.1+ (sax2r3pre1) + * @see org.xml.sax.XMLReader#setErrorHandler + * @see org.xml.sax.SAXParseException + */ +public interface ErrorHandler { + + + /** + * Receive notification of a warning. + * + *

SAX parsers will use this method to report conditions that + * are not errors or fatal errors as defined by the XML + * recommendation. The default behaviour is to take no + * action.

+ * + *

The SAX parser must continue to provide normal parsing events + * after invoking this method: it should still be possible for the + * application to process the document through to the end.

+ * + *

Filters may use this method to report other, non-XML warnings + * as well.

+ * + * @param exception The warning information encapsulated in a + * SAX parse exception. + * @exception org.xml.sax.SAXException Any SAX exception, possibly + * wrapping another exception. + * @see org.xml.sax.SAXParseException + */ + public abstract void warning (SAXParseException exception) + throws SAXException; + + + /** + * Receive notification of a recoverable error. + * + *

This corresponds to the definition of "error" in section 1.2 + * of the W3C XML 1.0 Recommendation. For example, a validating + * parser would use this callback to report the violation of a + * validity constraint. The default behaviour is to take no + * action.

+ * + *

The SAX parser must continue to provide normal parsing + * events after invoking this method: it should still be possible + * for the application to process the document through to the end. + * If the application cannot do so, then the parser should report + * a fatal error even if the XML recommendation does not require + * it to do so.

+ * + *

Filters may use this method to report other, non-XML errors + * as well.

+ * + * @param exception The error information encapsulated in a + * SAX parse exception. + * @exception org.xml.sax.SAXException Any SAX exception, possibly + * wrapping another exception. + * @see org.xml.sax.SAXParseException + */ + public abstract void error (SAXParseException exception) + throws SAXException; + + + /** + * Receive notification of a non-recoverable error. + * + *

There is an apparent contradiction between the + * documentation for this method and the documentation for {@link + * org.xml.sax.ContentHandler#endDocument}. Until this ambiguity + * is resolved in a future major release, clients should make no + * assumptions about whether endDocument() will or will not be + * invoked when the parser has reported a fatalError() or thrown + * an exception.

+ * + *

This corresponds to the definition of "fatal error" in + * section 1.2 of the W3C XML 1.0 Recommendation. For example, a + * parser would use this callback to report the violation of a + * well-formedness constraint.

+ * + *

The application must assume that the document is unusable + * after the parser has invoked this method, and should continue + * (if at all) only for the sake of collecting additional error + * messages: in fact, SAX parsers are free to stop reporting any + * other events once this method has been invoked.

+ * + * @param exception The error information encapsulated in a + * SAX parse exception. + * @exception org.xml.sax.SAXException Any SAX exception, possibly + * wrapping another exception. + * @see org.xml.sax.SAXParseException + */ + public abstract void fatalError (SAXParseException exception) + throws SAXException; + +} + +// end of ErrorHandler.java diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java new file mode 100644 index 0000000000..f8f3484c10 --- /dev/null +++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java @@ -0,0 +1,136 @@ +// SAX locator interface for document events. +// http://www.saxproject.org +// No warranty; no copyright -- use this as you will. +// $Id: Locator.java,v 1.8 2002/01/30 21:13:47 dbrownell Exp $ + +package org.xml.sax; + + +/** + * Interface for associating a SAX event with a document location. + * + *
+ * This module, both source code and documentation, is in the + * Public Domain, and comes with NO WARRANTY. + * See http://www.saxproject.org + * for further information. + *
+ * + *

If a SAX parser provides location information to the SAX + * application, it does so by implementing this interface and then + * passing an instance to the application using the content + * handler's {@link org.xml.sax.ContentHandler#setDocumentLocator + * setDocumentLocator} method. The application can use the + * object to obtain the location of any other SAX event + * in the XML source document.

+ * + *

Note that the results returned by the object will be valid only + * during the scope of each callback method: the application + * will receive unpredictable results if it attempts to use the + * locator at any other time, or after parsing completes.

+ * + *

SAX parsers are not required to supply a locator, but they are + * very strongly encouraged to do so. If the parser supplies a + * locator, it must do so before reporting any other document events. + * If no locator has been set by the time the application receives + * the {@link org.xml.sax.ContentHandler#startDocument startDocument} + * event, the application should assume that a locator is not + * available.

+ * + * @since SAX 1.0 + * @author David Megginson + * @version 2.0.1 (sax2r2) + * @see org.xml.sax.ContentHandler#setDocumentLocator + */ +public interface Locator { + + + /** + * Return the public identifier for the current document event. + * + *

The return value is the public identifier of the document + * entity or of the external parsed entity in which the markup + * triggering the event appears.

+ * + * @return A string containing the public identifier, or + * null if none is available. + * @see #getSystemId + */ + public abstract String getPublicId (); + + + /** + * Return the system identifier for the current document event. + * + *

The return value is the system identifier of the document + * entity or of the external parsed entity in which the markup + * triggering the event appears.

+ * + *

If the system identifier is a URL, the parser must resolve it + * fully before passing it to the application. For example, a file + * name must always be provided as a file:... URL, and other + * kinds of relative URI are also resolved against their bases.

+ * + * @return A string containing the system identifier, or null + * if none is available. + * @see #getPublicId + */ + public abstract String getSystemId (); + + + /** + * Return the line number where the current document event ends. + * Lines are delimited by line ends, which are defined in + * the XML specification. + * + *

Warning: The return value from the method + * is intended only as an approximation for the sake of diagnostics; + * it is not intended to provide sufficient information + * to edit the character content of the original XML document. + * In some cases, these "line" numbers match what would be displayed + * as columns, and in others they may not match the source text + * due to internal entity expansion.

+ * + *

The return value is an approximation of the line number + * in the document entity or external parsed entity where the + * markup triggering the event appears.

+ * + *

If possible, the SAX driver should provide the line position + * of the first character after the text associated with the document + * event. The first line is line 1.

+ * + * @return The line number, or -1 if none is available. + * @see #getColumnNumber + */ + public abstract int getLineNumber (); + + + /** + * Return the column number where the current document event ends. + * This is one-based number of Java char values since + * the last line end. + * + *

Warning: The return value from the method + * is intended only as an approximation for the sake of diagnostics; + * it is not intended to provide sufficient information + * to edit the character content of the original XML document. + * For example, when lines contain combining character sequences, wide + * characters, surrogate pairs, or bi-directional text, the value may + * not correspond to the column in a text editor's display.

+ * + *

The return value is an approximation of the column number + * in the document entity or external parsed entity where the + * markup triggering the event appears.

+ * + *

If possible, the SAX driver should provide the line position + * of the first character after the text associated with the document + * event. The first column in each line is column 1.

+ * + * @return The column number, or -1 if none is available. + * @see #getLineNumber + */ + public abstract int getColumnNumber (); + +} + +// end of Locator.java diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java new file mode 100644 index 0000000000..256719cef4 --- /dev/null +++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java @@ -0,0 +1,153 @@ +// SAX exception class. +// http://www.saxproject.org +// No warranty; no copyright -- use this as you will. +// $Id: SAXException.java,v 1.7 2002/01/30 21:13:48 dbrownell Exp $ + +package org.xml.sax; + +/** + * Encapsulate a general SAX error or warning. + * + *
+ * This module, both source code and documentation, is in the + * Public Domain, and comes with NO WARRANTY. + * See http://www.saxproject.org + * for further information. + *
+ * + *

This class can contain basic error or warning information from + * either the XML parser or the application: a parser writer or + * application writer can subclass it to provide additional + * functionality. SAX handlers may throw this exception or + * any exception subclassed from it.

+ * + *

If the application needs to pass through other types of + * exceptions, it must wrap those exceptions in a SAXException + * or an exception derived from a SAXException.

+ * + *

If the parser or application needs to include information about a + * specific location in an XML document, it should use the + * {@link org.xml.sax.SAXParseException SAXParseException} subclass.

+ * + * @since SAX 1.0 + * @author David Megginson + * @version 2.0.1 (sax2r2) + * @see org.xml.sax.SAXParseException + */ +public class SAXException extends Exception { + + + /** + * Create a new SAXException. + */ + public SAXException () + { + super(); + this.exception = null; + } + + + /** + * Create a new SAXException. + * + * @param message The error or warning message. + */ + public SAXException (String message) { + super(message); + this.exception = null; + } + + + /** + * Create a new SAXException wrapping an existing exception. + * + *

The existing exception will be embedded in the new + * one, and its message will become the default message for + * the SAXException.

+ * + * @param e The exception to be wrapped in a SAXException. + */ + public SAXException (Exception e) + { + super(); + this.exception = e; + } + + + /** + * Create a new SAXException from an existing exception. + * + *

The existing exception will be embedded in the new + * one, but the new exception will have its own message.

+ * + * @param message The detail message. + * @param e The exception to be wrapped in a SAXException. + */ + public SAXException (String message, Exception e) + { + super(message); + this.exception = e; + } + + + /** + * Return a detail message for this exception. + * + *

If there is an embedded exception, and if the SAXException + * has no detail message of its own, this method will return + * the detail message from the embedded exception.

+ * + * @return The error or warning message. + */ + public String getMessage () + { + String message = super.getMessage(); + + if (message == null && exception != null) { + return exception.getMessage(); + } else { + return message; + } + } + + + /** + * Return the embedded exception, if any. + * + * @return The embedded exception, or null if there is none. + */ + public Exception getException () + { + return exception; + } + + + /** + * Override toString to pick up any embedded exception. + * + * @return A string representation of this exception. + */ + public String toString () + { + if (exception != null) { + return exception.toString(); + } else { + return super.toString(); + } + } + + + + ////////////////////////////////////////////////////////////////////// + // Internal state. + ////////////////////////////////////////////////////////////////////// + + + /** + * @serial The embedded exception if tunnelling, or null. + */ + private Exception exception; + +} + +// end of SAXException.java diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java new file mode 100644 index 0000000000..1df5e14231 --- /dev/null +++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java @@ -0,0 +1,269 @@ +// SAX exception class. +// http://www.saxproject.org +// No warranty; no copyright -- use this as you will. +// $Id: SAXParseException.java,v 1.11 2004/04/21 13:05:02 dmegginson Exp $ + +package org.xml.sax; + +/** + * Encapsulate an XML parse error or warning. + * + *
+ * This module, both source code and documentation, is in the + * Public Domain, and comes with NO WARRANTY. + * See http://www.saxproject.org + * for further information. + *
+ * + *

This exception may include information for locating the error + * in the original XML document, as if it came from a {@link Locator} + * object. Note that although the application + * will receive a SAXParseException as the argument to the handlers + * in the {@link org.xml.sax.ErrorHandler ErrorHandler} interface, + * the application is not actually required to throw the exception; + * instead, it can simply read the information in it and take a + * different action.

+ * + *

Since this exception is a subclass of {@link org.xml.sax.SAXException + * SAXException}, it inherits the ability to wrap another exception.

+ * + * @since SAX 1.0 + * @author David Megginson + * @version 2.0.1 (sax2r2) + * @see org.xml.sax.SAXException + * @see org.xml.sax.Locator + * @see org.xml.sax.ErrorHandler + */ +public class SAXParseException extends SAXException { + + + ////////////////////////////////////////////////////////////////////// + // Constructors. + ////////////////////////////////////////////////////////////////////// + + + /** + * Create a new SAXParseException from a message and a Locator. + * + *

This constructor is especially useful when an application is + * creating its own exception from within a {@link org.xml.sax.ContentHandler + * ContentHandler} callback.

+ * + * @param message The error or warning message. + * @param locator The locator object for the error or warning (may be + * null). + * @see org.xml.sax.Locator + */ + public SAXParseException (String message, Locator locator) { + super(message); + if (locator != null) { + init(locator.getPublicId(), locator.getSystemId(), + locator.getLineNumber(), locator.getColumnNumber()); + } else { + init(null, null, -1, -1); + } + } + + + /** + * Wrap an existing exception in a SAXParseException. + * + *

This constructor is especially useful when an application is + * creating its own exception from within a {@link org.xml.sax.ContentHandler + * ContentHandler} callback, and needs to wrap an existing exception that is not a + * subclass of {@link org.xml.sax.SAXException SAXException}.

+ * + * @param message The error or warning message, or null to + * use the message from the embedded exception. + * @param locator The locator object for the error or warning (may be + * null). + * @param e Any exception. + * @see org.xml.sax.Locator + */ + public SAXParseException (String message, Locator locator, + Exception e) { + super(message, e); + if (locator != null) { + init(locator.getPublicId(), locator.getSystemId(), + locator.getLineNumber(), locator.getColumnNumber()); + } else { + init(null, null, -1, -1); + } + } + + + /** + * Create a new SAXParseException. + * + *

This constructor is most useful for parser writers.

+ * + *

All parameters except the message are as if + * they were provided by a {@link Locator}. For example, if the + * system identifier is a URL (including relative filename), the + * caller must resolve it fully before creating the exception.

+ * + * + * @param message The error or warning message. + * @param publicId The public identifier of the entity that generated + * the error or warning. + * @param systemId The system identifier of the entity that generated + * the error or warning. + * @param lineNumber The line number of the end of the text that + * caused the error or warning. + * @param columnNumber The column number of the end of the text that + * cause the error or warning. + */ + public SAXParseException (String message, String publicId, String systemId, + int lineNumber, int columnNumber) + { + super(message); + init(publicId, systemId, lineNumber, columnNumber); + } + + + /** + * Create a new SAXParseException with an embedded exception. + * + *

This constructor is most useful for parser writers who + * need to wrap an exception that is not a subclass of + * {@link org.xml.sax.SAXException SAXException}.

+ * + *

All parameters except the message and exception are as if + * they were provided by a {@link Locator}. For example, if the + * system identifier is a URL (including relative filename), the + * caller must resolve it fully before creating the exception.

+ * + * @param message The error or warning message, or null to use + * the message from the embedded exception. + * @param publicId The public identifier of the entity that generated + * the error or warning. + * @param systemId The system identifier of the entity that generated + * the error or warning. + * @param lineNumber The line number of the end of the text that + * caused the error or warning. + * @param columnNumber The column number of the end of the text that + * cause the error or warning. + * @param e Another exception to embed in this one. + */ + public SAXParseException (String message, String publicId, String systemId, + int lineNumber, int columnNumber, Exception e) + { + super(message, e); + init(publicId, systemId, lineNumber, columnNumber); + } + + + /** + * Internal initialization method. + * + * @param publicId The public identifier of the entity which generated the exception, + * or null. + * @param systemId The system identifier of the entity which generated the exception, + * or null. + * @param lineNumber The line number of the error, or -1. + * @param columnNumber The column number of the error, or -1. + */ + private void init (String publicId, String systemId, + int lineNumber, int columnNumber) + { + this.publicId = publicId; + this.systemId = systemId; + this.lineNumber = lineNumber; + this.columnNumber = columnNumber; + } + + + /** + * Get the public identifier of the entity where the exception occurred. + * + * @return A string containing the public identifier, or null + * if none is available. + * @see org.xml.sax.Locator#getPublicId + */ + public String getPublicId () + { + return this.publicId; + } + + + /** + * Get the system identifier of the entity where the exception occurred. + * + *

If the system identifier is a URL, it will have been resolved + * fully.

+ * + * @return A string containing the system identifier, or null + * if none is available. + * @see org.xml.sax.Locator#getSystemId + */ + public String getSystemId () + { + return this.systemId; + } + + + /** + * The line number of the end of the text where the exception occurred. + * + *

The first line is line 1.

+ * + * @return An integer representing the line number, or -1 + * if none is available. + * @see org.xml.sax.Locator#getLineNumber + */ + public int getLineNumber () + { + return this.lineNumber; + } + + + /** + * The column number of the end of the text where the exception occurred. + * + *

The first column in a line is position 1.

+ * + * @return An integer representing the column number, or -1 + * if none is available. + * @see org.xml.sax.Locator#getColumnNumber + */ + public int getColumnNumber () + { + return this.columnNumber; + } + + + ////////////////////////////////////////////////////////////////////// + // Internal state. + ////////////////////////////////////////////////////////////////////// + + + /** + * @serial The public identifier, or null. + * @see #getPublicId + */ + private String publicId; + + + /** + * @serial The system identifier, or null. + * @see #getSystemId + */ + private String systemId; + + + /** + * @serial The line number, or -1. + * @see #getLineNumber + */ + private int lineNumber; + + + /** + * @serial The column number, or -1. + * @see #getColumnNumber + */ + private int columnNumber; + +} + +// end of SAXParseException.java diff --git a/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html new file mode 100644 index 0000000000..dd7030e24b --- /dev/null +++ b/parser/html/java/htmlparser/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html @@ -0,0 +1,297 @@ + + + + +

This package provides the core SAX APIs. +Some SAX1 APIs are deprecated to encourage integration of +namespace-awareness into designs of new applications +and into maintenance of existing infrastructure.

+ +

See http://www.saxproject.org +for more information about SAX.

+ + +

SAX2 Standard Feature Flags

+ +

One of the essential characteristics of SAX2 is that it added +feature flags which can be used to examine and perhaps modify +parser modes, in particular modes such as validation. +Since features are identified by (absolute) URIs, anyone +can define such features. +Currently defined standard feature URIs have the prefix +http://xml.org/sax/features/ before an identifier such as +validation. Turn features on or off using +setFeature. Those standard identifiers are:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Feature IDAccessDefaultDescription
external-general-entitiesread/writeunspecified Reports whether this parser processes external + general entities; always true if validating. +
external-parameter-entitiesread/writeunspecified Reports whether this parser processes external + parameter entities; always true if validating. +
is-standalone(parsing) read-only, (not parsing) nonenot applicable May be examined only during a parse, after the + startDocument() callback has been completed; read-only. + The value is true if the document specified standalone="yes" in + its XML declaration, and otherwise is false. +
lexical-handler/parameter-entitiesread/writeunspecified A value of "true" indicates that the LexicalHandler will report + the beginning and end of parameter entities. +
namespacesread/writetrue A value of "true" indicates namespace URIs and unprefixed local names + for element and attribute names will be available. +
namespace-prefixesread/writefalse A value of "true" indicates that XML qualified names (with prefixes) and + attributes (including xmlns* attributes) will be available. +
resolve-dtd-urisread/writetrue A value of "true" indicates that system IDs in declarations will + be absolutized (relative to their base URIs) before reporting. + (That is the default behavior for all SAX2 XML parsers.) + A value of "false" indicates those IDs will not be absolutized; + parsers will provide the base URI from + Locator.getSystemId(). + This applies to system IDs passed in
    +
  • DTDHandler.notationDecl(), +
  • DTDHandler.unparsedEntityDecl(), and +
  • DeclHandler.externalEntityDecl(). +
+ It does not apply to EntityResolver.resolveEntity(), + which is not used to report declarations, or to + LexicalHandler.startDTD(), which already provides + the non-absolutized URI. +
string-interningread/writeunspecified Has a value of "true" if all XML names (for elements, prefixes, + attributes, entities, notations, and local names), + as well as Namespace URIs, will have been interned + using java.lang.String.intern. This supports fast + testing of equality/inequality against string constants, + rather than forcing slower calls to String.equals(). +
unicode-normalization-checkingread/writefalse Controls whether the parser reports Unicode normalization + errors as described in section 2.13 and Appendix B of the + XML 1.1 Recommendation. If true, Unicode normalization + errors are reported using the ErrorHandler.error() callback. + Such errors are not fatal in themselves (though, obviously, + other Unicode-related encoding errors may be). +
use-attributes2read-onlynot applicable Returns "true" if the Attributes objects passed by + this parser in ContentHandler.startElement() + implement the org.xml.sax.ext.Attributes2 interface. + That interface exposes additional DTD-related information, + such as whether the attribute was specified in the + source text rather than defaulted. +
use-locator2read-onlynot applicable Returns "true" if the Locator objects passed by + this parser in ContentHandler.setDocumentLocator() + implement the org.xml.sax.ext.Locator2 interface. + That interface exposes additional entity information, + such as the character encoding and XML version used. +
use-entity-resolver2read/writetrue Returns "true" if, when setEntityResolver is given + an object implementing the org.xml.sax.ext.EntityResolver2 interface, + those new methods will be used. + Returns "false" to indicate that those methods will not be used. +
validationread/writeunspecified Controls whether the parser is reporting all validity + errors; if true, all external entities will be read. +
xmlns-urisread/writefalse Controls whether, when the namespace-prefixes feature + is set, the parser treats namespace declaration attributes as + being in the http://www.w3.org/2000/xmlns/ namespace. + By default, SAX2 conforms to the original "Namespaces in XML" + Recommendation, which explicitly states that such attributes are + not in any namespace. + Setting this optional flag to "true" makes the SAX2 events conform to + a later backwards-incompatible revision of that recommendation, + placing those attributes in a namespace. +
xml-1.1read-onlynot applicable Returns "true" if the parser supports both XML 1.1 and XML 1.0. + Returns "false" if the parser supports only XML 1.0. +
+ +

Support for the default values of the +namespaces and namespace-prefixes +properties is required. +Support for any other feature flags is entirely optional. +

+ +

For default values not specified by SAX2, +each XMLReader implementation specifies its default, +or may choose not to expose the feature flag. +Unless otherwise specified here, +implementations may support changing current values +of these standard feature flags, but not while parsing. +

+ +

SAX2 Standard Handler and Property IDs

+ +

For parser interface characteristics that are described +as objects, a separate namespace is defined. The +objects in this namespace are again identified by URI, and +the standard property URIs have the prefix +http://xml.org/sax/properties/ before an identifier such as +lexical-handler or +dom-node. Manage those properties using +setProperty(). Those identifiers are:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Property IDDescription
declaration-handler Used to see most DTD declarations except those treated + as lexical ("document element name is ...") or which are + mandatory for all SAX parsers (DTDHandler). + The Object must implement org.xml.sax.ext.DeclHandler. +
document-xml-version May be examined only during a parse, after the startDocument() + callback has been completed; read-only. This property is a + literal string describing the actual XML version of the document, + such as "1.0" or "1.1". +
dom-node For "DOM Walker" style parsers, which ignore their + parser.parse() parameters, this is used to + specify the DOM (sub)tree being walked by the parser. + The Object must implement the + org.w3c.dom.Node interface. +
lexical-handler Used to see some syntax events that are essential in some + applications: comments, CDATA delimiters, selected general + entity inclusions, and the start and end of the DTD + (and declaration of document element name). + The Object must implement org.xml.sax.ext.LexicalHandler. +
xml-string Readable only during a parser callback, this exposes a TBS + chunk of characters responsible for the current event.
+ +

All of these standard properties are optional; +XMLReader implementations need not support them. +

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java new file mode 100644 index 0000000000..395f9eb154 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/Big5Tester.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding.test; + +import nu.validator.encoding.Encoding; + +public class Big5Tester extends EncodingTester { + + public static void main(String[] args) { + new Big5Tester().test(); + } + + private void test() { + // ASCII + decodeBig5("\u6162", "\u0061\u0062"); + // Edge cases + decodeBig5("\u8740", "\u43F0"); + decodeBig5("\uFEFE", "\u79D4"); + decodeBig5("\uFEFD", "\uD864\uDD0D"); + decodeBig5("\u8862", "\u00CA\u0304"); + decodeBig5("\u8864", "\u00CA\u030C"); + decodeBig5("\u8866", "\u00CA"); + decodeBig5("\u88A3", "\u00EA\u0304"); + decodeBig5("\u88A5", "\u00EA\u030C"); + decodeBig5("\u88A7", "\u00EA"); + decodeBig5("\u99D4", "\u8991"); + decodeBig5("\u99D5", "\uD85E\uDD67"); + decodeBig5("\u99D6", "\u8A29"); + // Edge cases surrounded with ASCII + decodeBig5("\u6187\u4062", "\u0061\u43F0\u0062"); + decodeBig5("\u61FE\uFE62", "\u0061\u79D4\u0062"); + decodeBig5("\u61FE\uFD62", "\u0061\uD864\uDD0D\u0062"); + decodeBig5("\u6188\u6262", "\u0061\u00CA\u0304\u0062"); + decodeBig5("\u6188\u6462", "\u0061\u00CA\u030C\u0062"); + decodeBig5("\u6188\u6662", "\u0061\u00CA\u0062"); + decodeBig5("\u6188\uA362", "\u0061\u00EA\u0304\u0062"); + decodeBig5("\u6188\uA562", "\u0061\u00EA\u030C\u0062"); + decodeBig5("\u6188\uA762", "\u0061\u00EA\u0062"); + decodeBig5("\u6199\uD462", "\u0061\u8991\u0062"); + decodeBig5("\u6199\uD562", "\u0061\uD85E\uDD67\u0062"); + decodeBig5("\u6199\uD662", "\u0061\u8A29\u0062"); + // Bad sequences + decodeBig5("\u8061", "\uFFFD\u0061"); + decodeBig5("\uFF61", "\uFFFD\u0061"); + decodeBig5("\uFE39", "\uFFFD\u0039"); + decodeBig5("\u8766", "\uFFFD\u0066"); + decodeBig5("\u8140", "\uFFFD\u0040"); + decodeBig5("\u6181", "\u0061\uFFFD"); + + // ASCII + encodeBig5("\u0061\u0062", "\u6162"); + // Edge cases + encodeBig5("\u9EA6\u0061", "\u3F61"); + encodeBig5("\uD858\uDE6B\u0061", "\u3F61"); + encodeBig5("\u3000", "\uA140"); + encodeBig5("\u20AC", "\uA3E1"); + encodeBig5("\u4E00", "\uA440"); + encodeBig5("\uD85D\uDE07", "\uC8A4"); + encodeBig5("\uFFE2", "\uC8CD"); + encodeBig5("\u79D4", "\uFEFE"); + // Not in index + encodeBig5("\u2603\u0061", "\u3F61"); + // duplicate low bits + encodeBig5("\uD840\uDFB5", "\uFD6A"); + // prefer last + encodeBig5("\u2550", "\uF9F9"); + } + + private void decodeBig5(String input, String expectation) { + decode(input, expectation, Encoding.BIG5); + } + + private void encodeBig5(String input, String expectation) { + encode(input, expectation, Encoding.BIG5); + } +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java new file mode 100644 index 0000000000..a910a01e93 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/encoding/test/EncodingTester.java @@ -0,0 +1,491 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding.test; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; + +import nu.validator.encoding.Encoding; + +public class EncodingTester { + + protected byte[] stringToBytes(String str) { + byte[] bytes = new byte[str.length() * 2]; + for (int i = 0; i < str.length(); i++) { + int pair = (int) str.charAt(i); + bytes[i * 2] = (byte) (pair >> 8); + bytes[i * 2 + 1] = (byte) (pair & 0xFF); + } + return bytes; + } + + protected void decode(String input, String expectation, Encoding encoding) { + // Use the convenience method from Charset + + byte[] bytes = stringToBytes(input); + ByteBuffer byteBuf = ByteBuffer.wrap(bytes); + CharBuffer charBuf = encoding.decode(byteBuf); + + if (charBuf.remaining() != expectation.length()) { + err("When decoding from a single long buffer, the output length was wrong. Expected: " + + expectation.length() + ", got: " + charBuf.remaining(), + bytes, expectation); + return; + } + + for (int i = 0; i < expectation.length(); i++) { + char expect = expectation.charAt(i); + char actual = charBuf.get(); + if (actual != expect) { + err("When decoding from a single long buffer, failed at position " + + i + + ", expected: " + + charToHex(expect) + + ", got: " + + charToHex(actual), bytes, expectation); + return; + } + } + + // Decode with a 1-byte input buffer + + byteBuf = ByteBuffer.allocate(1); + charBuf = CharBuffer.allocate(expectation.length() + 2); + CharsetDecoder decoder = encoding.newDecoder(); + decoder.onMalformedInput(CodingErrorAction.REPLACE); + for (int i = 0; i < bytes.length; i++) { + byteBuf.position(0); + byteBuf.put(bytes[i]); + byteBuf.position(0); + CoderResult result = decoder.decode(byteBuf, charBuf, + (i + 1) == bytes.length); + if (result.isMalformed()) { + err("Decoder reported a malformed sequence when asked to replace at index: " + + i, bytes, expectation); + return; + } else if (result.isUnmappable()) { + err("Decoder claimed unmappable sequence, which none of these decoders should do.", + bytes, expectation); + return; + } else if (result.isOverflow()) { + err("Decoder claimed overflow when the output buffer is know to be large enough.", + bytes, expectation); + } else if (!result.isUnderflow()) { + err("Bogus coder result, expected underflow.", bytes, + expectation); + } + } + CoderResult result = decoder.flush(charBuf); + if (result.isMalformed()) { + err("Decoder reported a malformed sequence when asked to replace when flushing.", + bytes, expectation); + return; + } else if (result.isUnmappable()) { + err("Decoder claimed unmappable sequence when flushing, which none of these decoders should do.", + bytes, expectation); + return; + } else if (result.isOverflow()) { + err("Decoder claimed overflow when flushing when the output buffer is know to be large enough.", + bytes, expectation); + } else if (!result.isUnderflow()) { + err("Bogus coder result when flushing, expected underflow.", bytes, + expectation); + } + + charBuf.limit(charBuf.position()); + charBuf.position(0); + + for (int i = 0; i < expectation.length(); i++) { + char expect = expectation.charAt(i); + char actual = charBuf.get(); + if (actual != expect) { + err("When decoding one byte at a time in REPORT mode, failed at position " + + i + + ", expected: " + + charToHex(expect) + + ", got: " + + charToHex(actual), bytes, expectation); + return; + } + } + + // Decode with 1-char output buffer + + byteBuf = ByteBuffer.wrap(bytes); + charBuf = CharBuffer.allocate(1); + + decoder.reset(); // Let's test this while at it + decoder.onMalformedInput(CodingErrorAction.REPLACE); + int codeUnitPos = 0; + while (byteBuf.hasRemaining()) { + charBuf.position(0); + charBuf.put('\u0000'); + charBuf.position(0); + result = decoder.decode(byteBuf, charBuf, false); + if (result.isMalformed()) { + err("Decoder reported a malformed sequence when asked to replace at index (decoding one output code unit at a time): " + + byteBuf.position(), bytes, expectation); + return; + } else if (result.isUnmappable()) { + err("Decoder claimed unmappable sequence (decoding one output code unit at a time), which none of these decoders should do.", + bytes, expectation); + return; + } else if (result.isUnderflow()) { + if (byteBuf.hasRemaining()) { + err("When decoding one output code unit at a time, decoder claimed underflow when there was input remaining.", + bytes, expectation); + return; + } + } else if (!result.isOverflow()) { + err("Bogus coder result, expected overflow.", bytes, + expectation); + } + if (charBuf.position() == 1) { + charBuf.position(0); + char actual = charBuf.get(); + char expect = expectation.charAt(codeUnitPos); + if (actual != expect) { + err("When decoding one output code unit at a time in REPLACE mode, failed at position " + + byteBuf.position() + + ", expected: " + + charToHex(expect) + ", got: " + charToHex(actual), + bytes, expectation); + return; + } + codeUnitPos++; + } + } + + charBuf.position(0); + charBuf.put('\u0000'); + charBuf.position(0); + result = decoder.decode(byteBuf, charBuf, true); + + if (charBuf.position() == 1) { + charBuf.position(0); + char actual = charBuf.get(); + char expect = expectation.charAt(codeUnitPos); + if (actual != expect) { + err("When decoding one output code unit at a time in REPLACE mode, failed at position " + + byteBuf.position() + + ", expected: " + + charToHex(expect) + ", got: " + charToHex(actual), + bytes, expectation); + return; + } + codeUnitPos++; + } + + charBuf.position(0); + charBuf.put('\u0000'); + charBuf.position(0); + result = decoder.flush(charBuf); + if (result.isMalformed()) { + err("Decoder reported a malformed sequence when asked to replace when flushing (one output at a time).", + bytes, expectation); + return; + } else if (result.isUnmappable()) { + err("Decoder claimed unmappable sequence when flushing, which none of these decoders should do (one output at a time).", + bytes, expectation); + return; + } else if (result.isOverflow()) { + err("Decoder claimed overflow when flushing when the output buffer is know to be large enough (one output at a time).", + bytes, expectation); + } else if (!result.isUnderflow()) { + err("Bogus coder result when flushing, expected underflow (one output at a time).", + bytes, expectation); + } + + if (charBuf.position() == 1) { + charBuf.position(0); + char actual = charBuf.get(); + char expect = expectation.charAt(codeUnitPos); + if (actual != expect) { + err("When decoding one output code unit at a time in REPLACE mode, failed when flushing, expected: " + + charToHex(expect) + ", got: " + charToHex(actual), + bytes, expectation); + return; + } + } + + // TODO: 2 bytes at a time starting at 0 and 2 bytes at a time starting + // at 1 + } + + protected void encode(String input, String expectation, Encoding encoding) { + byte[] expectedBytes = stringToBytes(expectation); + CharBuffer charBuf = CharBuffer.wrap(input); + + // Use the convenience method from Charset + + ByteBuffer byteBuf = encoding.encode(charBuf); + + if (byteBuf.remaining() != expectedBytes.length) { + err("When encoding from a single long buffer, the output length was wrong. Expected: " + + expectedBytes.length + ", got: " + byteBuf.remaining(), + input, expectedBytes); + return; + } + + for (int i = 0; i < expectedBytes.length; i++) { + byte expect = expectedBytes[i]; + byte actual = byteBuf.get(); + if (actual != expect) { + err("When encoding from a single long buffer, failed at position " + + i + + ", expected: " + + byteToHex(expect) + + ", got: " + + byteToHex(actual), input, expectedBytes); + return; + } + } + + // Encode with a 1-char input buffer + + charBuf = CharBuffer.allocate(1); + byteBuf = ByteBuffer.allocate(expectedBytes.length + 2); + CharsetEncoder encoder = encoding.newEncoder(); + encoder.onMalformedInput(CodingErrorAction.REPLACE); + encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + for (int i = 0; i < input.length(); i++) { + charBuf.position(0); + charBuf.put(input.charAt(i)); + charBuf.position(0); + CoderResult result = encoder.encode(charBuf, byteBuf, + (i + 1) == input.length()); + if (result.isMalformed()) { + err("Encoder reported a malformed sequence when asked to replace at index: " + + i, input, expectedBytes); + return; + } else if (result.isUnmappable()) { + err("Encoder reported an upmappable sequence when asked to replace at index: " + + i, input, expectedBytes); + return; + } else if (result.isOverflow()) { + err("Encoder claimed overflow when the output buffer is know to be large enough.", + input, expectedBytes); + } else if (!result.isUnderflow()) { + err("Bogus coder result, expected underflow.", input, + expectedBytes); + } + } + CoderResult result = encoder.flush(byteBuf); + if (result.isMalformed()) { + err("Encoder reported a malformed sequence when asked to replace when flushing.", + input, expectedBytes); + return; + } else if (result.isUnmappable()) { + err("Encoder reported an unmappable sequence when asked to replace when flushing.", + input, expectedBytes); + return; + } else if (result.isOverflow()) { + err("Encoder claimed overflow when flushing when the output buffer is know to be large enough.", + input, expectedBytes); + } else if (!result.isUnderflow()) { + err("Bogus coder result when flushing, expected underflow.", input, + expectedBytes); + + } + + byteBuf.limit(byteBuf.position()); + byteBuf.position(0); + + for (int i = 0; i < expectedBytes.length; i++) { + byte expect = expectedBytes[i]; + byte actual = byteBuf.get(); + if (actual != expect) { + err("When encoding one char at a time in REPORT mode, failed at position " + + i + + ", expected: " + + byteToHex(expect) + + ", got: " + + byteToHex(actual), input, expectedBytes); + return; + } + } + + // Decode with 1-byte output buffer + + charBuf = CharBuffer.wrap(input); + byteBuf = ByteBuffer.allocate(1); + + encoder.reset(); // Let's test this while at it + encoder.onMalformedInput(CodingErrorAction.REPLACE); + encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + int bytePos = 0; + while (charBuf.hasRemaining()) { + byteBuf.position(0); + byteBuf.put((byte)0); + byteBuf.position(0); + result = encoder.encode(charBuf, byteBuf, false); + if (result.isMalformed()) { + err("Encoder reported a malformed sequence when asked to replace at index (decoding one output code unit at a time): " + + charBuf.position(), input, expectedBytes); + return; + } else if (result.isUnmappable()) { + err("Encoder reported an unmappable sequence when asked to replace at index (decoding one output code unit at a time): " + + charBuf.position(), input, expectedBytes); + return; + } else if (result.isUnderflow()) { + if (charBuf.hasRemaining()) { + err("When encoding one output byte at a time, encoder claimed underflow when there was input remaining.", + input, expectedBytes); + return; + } + } else if (!result.isOverflow()) { + err("Bogus coder result, expected overflow.", input, expectedBytes); + } + if (byteBuf.position() == 1) { + byteBuf.position(0); + byte actual = byteBuf.get(); + byte expect = expectedBytes[bytePos]; + if (actual != expect) { + err("When encoding one output byte at a time in REPLACE mode, failed at position " + + charBuf.position() + + ", expected: " + + byteToHex(expect) + ", got: " + byteToHex(actual), + input, expectedBytes); + return; + } + bytePos++; + } + } + + byteBuf.position(0); + byteBuf.put((byte)0); + byteBuf.position(0); + result = encoder.encode(charBuf, byteBuf, true); + + if (byteBuf.position() == 1) { + byteBuf.position(0); + byte actual = byteBuf.get(); + byte expect = expectedBytes[bytePos]; + if (actual != expect) { + err("When encoding one output byte at a time in REPLACE mode, failed at position " + + charBuf.position() + + ", expected: " + + byteToHex(expect) + ", got: " + byteToHex(actual), + input, expectedBytes); + return; + } + bytePos++; + } + + byteBuf.position(0); + byteBuf.put((byte)0); + byteBuf.position(0); + result = encoder.flush(byteBuf); + if (result.isMalformed()) { + err("Encoder reported a malformed sequence when asked to replace when flushing (one output at a time).", + input, expectedBytes); + return; + } else if (result.isUnmappable()) { + err("Encoder reported an unmappable sequence when asked to replace when flushing (one output at a time).", + input, expectedBytes); + return; + } else if (result.isOverflow()) { + err("Encoder claimed overflow when flushing when the output buffer is know to be large enough (one output at a time).", + input, expectedBytes); + } else if (!result.isUnderflow()) { + err("Bogus coder result when flushing, expected underflow (one output at a time).", + input, expectedBytes); + } + + if (byteBuf.position() == 1) { + byteBuf.position(0); + byte actual = byteBuf.get(); + byte expect = expectedBytes[bytePos]; + if (actual != expect) { + err("When encoding one output code unit at a time in REPLACE mode, failed when flushing, expected: " + + byteToHex(expect) + ", got: " + byteToHex(actual), + input, expectedBytes); + return; + } + } + + // TODO: 2 bytes at a time starting at 0 and 2 bytes at a time starting + // at 1 + } + + private String charToHex(char c) { + String hex = Integer.toHexString(c); + switch (hex.length()) { + case 1: + return "000" + hex; + case 2: + return "00" + hex; + case 3: + return "0" + hex; + default: + return hex; + } + } + + private String byteToHex(byte b) { + String hex = Integer.toHexString(((int) b & 0xFF)); + switch (hex.length()) { + case 1: + return "0" + hex; + default: + return hex; + } + } + + private void err(String msg, byte[] bytes, String expectation) { + System.err.println(msg); + System.err.print("Input:"); + for (int i = 0; i < bytes.length; i++) { + System.err.print(' '); + System.err.print(byteToHex(bytes[i])); + } + System.err.println(); + System.err.print("Expect:"); + for (int i = 0; i < expectation.length(); i++) { + System.err.print(' '); + System.err.print(charToHex(expectation.charAt(i))); + } + System.err.println(); + } + + private void err(String msg, String chars, byte[] expectation) { + System.err.println(msg); + System.err.print("Input:"); + for (int i = 0; i < chars.length(); i++) { + System.err.print(' '); + System.err.print(charToHex(chars.charAt(i))); + } + System.err.println(); + System.err.print("Expect:"); + for (int i = 0; i < expectation.length; i++) { + System.err.print(' '); + System.err.print(byteToHex(expectation[i])); + } + System.err.println(); + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java new file mode 100644 index 0000000000..3337a65557 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; + +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.io.Encoding; +import nu.validator.htmlparser.io.HtmlInputStreamReader; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.SAXException; + +public class DecoderLoopTester { + + private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10); + + private static final int NUMBER_OR_ASTRAL_CHARS = 24500; + + private void runTest(int padding) throws SAXException, IOException { + Encoding utf8 = Encoding.forName("UTF-8"); + char[] charArr = new char[1 + padding + 2 * NUMBER_OR_ASTRAL_CHARS]; + byte[] byteArr; + int i = 0; + charArr[i++] = '\uFEFF'; + for (int j = 0; j < padding; j++) { + charArr[i++] = 'x'; + } + for (int j = 0; j < NUMBER_OR_ASTRAL_CHARS; j++) { + int value = 0x10000 + j; + charArr[i++] = (char) (LEAD_OFFSET + (value >> 10)); + charArr[i++] = (char) (0xDC00 + (value & 0x3FF)); +// charArr[i++] = 'y'; +// charArr[i++] = 'z'; + + } + CharBuffer charBuffer = CharBuffer.wrap(charArr); + CharsetEncoder enc = utf8.newEncoder(); + enc.onMalformedInput(CodingErrorAction.REPORT); + enc.onUnmappableCharacter(CodingErrorAction.REPORT); + ByteBuffer byteBuffer = enc.encode(charBuffer); + byteArr = new byte[byteBuffer.limit()]; + byteBuffer.get(byteArr); + + ErrorHandler eh = new SystemErrErrorHandler(); + compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, Heuristics.NONE), padding, charArr, byteArr); + compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, utf8), padding, charArr, byteArr); + } + + /** + * @param padding + * @param charArr + * @param byteArr + * @throws SAXException + * @throws IOException + */ + private void compare(HtmlInputStreamReader reader, int padding, char[] charArr, byte[] byteArr) throws SAXException, IOException { + char[] readBuffer = new char[2048]; + int offset = 0; + int num = 0; + int readNum = 0; + while ((num = reader.read(readBuffer)) != -1) { + for (int j = 0; j < num; j++) { + System.out.println(offset + j); + if (readBuffer[j] != charArr[offset + j]) { + throw new RuntimeException("Test failed. Char: " + Integer.toHexString(readBuffer[j]) + " j: " + j + " readNum: " + readNum); + } + } + offset += num; + readNum++; + } + } + + void runTests() throws SAXException, IOException { + for (int i = 0; i < 4; i++) { + runTest(i); + } + } + + /** + * @param args + * @throws IOException + * @throws SAXException + */ + public static void main(String[] args) throws IOException, SAXException { + new DecoderLoopTester().runTests(); + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java new file mode 100644 index 0000000000..a3866f5d93 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomIdTester.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.IOException; +import java.io.StringReader; + +import org.w3c.dom.Document; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import nu.validator.htmlparser.dom.HtmlDocumentBuilder; + +public class DomIdTester { + + private static final String testSrc = "

buoeoa

uoeuo

"; + + /** + * @param args + * @throws IOException + * @throws SAXException + */ + public static void main(String[] args) throws SAXException, IOException { + HtmlDocumentBuilder builder = new HtmlDocumentBuilder(); + Document doc = builder.parse(new InputSource(new StringReader(testSrc))); + System.out.println(doc.getElementById("foo").getLocalName()); + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java new file mode 100644 index 0000000000..07d054b9e4 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/DomTest.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +public class DomTest { + public static void main(String[] args) throws Exception { + DocumentBuilderFactory f = DocumentBuilderFactory.newInstance(); + f.setNamespaceAware(true); // not setting this causes pain and suffering with SVG + DocumentBuilder b = f.newDocumentBuilder(); + Document d = b.newDocument(); + Element e = d.createElementNS("http://www.w3.org/1999/xhtml", "html"); + e.setAttribute("xmlns:foo", "bar"); + } +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java new file mode 100644 index 0000000000..95cd3018ee --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/EncodingTester.java @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; + +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.io.Encoding; +import nu.validator.htmlparser.io.HtmlInputStreamReader; + +import org.xml.sax.SAXException; + +public class EncodingTester { + + private final InputStream aggregateStream; + + private final StringBuilder builder = new StringBuilder(); + + /** + * @param aggregateStream + */ + public EncodingTester(InputStream aggregateStream) { + this.aggregateStream = aggregateStream; + } + + private void runTests() throws IOException, SAXException { + while (runTest()) { + // spin + } + } + + private boolean runTest() throws IOException, SAXException { + if (skipLabel()) { + return false; + } + UntilHashInputStream stream = new UntilHashInputStream(aggregateStream); + HtmlInputStreamReader reader = new HtmlInputStreamReader(stream, null, + null, null, Heuristics.NONE); + Charset charset = reader.getCharset(); + stream.close(); + if (skipLabel()) { + System.err.println("Premature end of test data."); + return false; + } + builder.setLength(0); + loop: for (;;) { + int b = aggregateStream.read(); + switch (b) { + case '\n': + break loop; + case -1: + System.err.println("Premature end of test data."); + return false; + default: + builder.append(((char) b)); + } + } + String sniffed = charset.name(); + String expected = Encoding.forName(builder.toString()).newDecoder().charset().name(); + if (expected.equalsIgnoreCase(sniffed)) { + System.err.println("Success."); + // System.err.println(stream); + } else { + System.err.println("Failure. Expected: " + expected + " got " + + sniffed + "."); + System.err.println(stream); + } + return true; + } + + private boolean skipLabel() throws IOException { + int b = aggregateStream.read(); + if (b == -1) { + return true; + } + for (;;) { + b = aggregateStream.read(); + if (b == -1) { + return true; + } else if (b == 0x0A) { + return false; + } + } + } + + /** + * @param args + * @throws SAXException + * @throws IOException + */ + public static void main(String[] args) throws IOException, SAXException { + for (int i = 0; i < args.length; i++) { + EncodingTester tester = new EncodingTester(new FileInputStream( + args[i])); + tester.runTests(); + } + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java new file mode 100644 index 0000000000..2fcfc4960e --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.impl.ElementName; +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.validator.htmlparser.impl.Tokenizer; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import com.sdicons.json.model.JSONArray; +import com.sdicons.json.model.JSONBoolean; +import com.sdicons.json.model.JSONNull; +import com.sdicons.json.model.JSONObject; +import com.sdicons.json.model.JSONString; + +public class JSONArrayTokenHandler implements TokenHandler, ErrorHandler { + + private static final JSONString DOCTYPE = new JSONString("DOCTYPE"); + + private static final JSONString START_TAG = new JSONString("StartTag"); + + private static final JSONString END_TAG = new JSONString("EndTag"); + + private static final JSONString COMMENT = new JSONString("Comment"); + + private static final JSONString CHARACTER = new JSONString("Character"); + + private static final JSONString PARSE_ERROR = new JSONString("ParseError"); + + private static final char[] REPLACEMENT_CHARACTER = { '\uFFFD' }; + + private final StringBuilder builder = new StringBuilder(); + + private JSONArray array = null; + + private int contentModelFlag; + + private String contentModelElement; + + public void setContentModelFlag(int contentModelFlag, String contentModelElement) { + this.contentModelFlag = contentModelFlag; + this.contentModelElement = contentModelElement; + } + + public void characters(char[] buf, int start, int length) + throws SAXException { + builder.append(buf, start, length); + } + + private void flushCharacters() { + if (builder.length() > 0) { + JSONArray token = new JSONArray(); + token.getValue().add(CHARACTER); + token.getValue().add(new JSONString(builder.toString())); + array.getValue().add(token); + builder.setLength(0); + } + } + + public void comment(char[] buf, int start, int length) throws SAXException { + flushCharacters(); + JSONArray token = new JSONArray(); + token.getValue().add(COMMENT); + token.getValue().add(new JSONString(new String(buf, start, length))); + array.getValue().add(token); + } + + public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException { + flushCharacters(); + JSONArray token = new JSONArray(); + token.getValue().add(DOCTYPE); + token.getValue().add(new JSONString(name)); + token.getValue().add(publicIdentifier == null ? JSONNull.NULL : new JSONString(publicIdentifier)); + token.getValue().add(systemIdentifier == null ? JSONNull.NULL : new JSONString(systemIdentifier)); + token.getValue().add(new JSONBoolean(!forceQuirks)); + array.getValue().add(token); + } + + public void endTag(ElementName eltName) throws SAXException { + String name = eltName.name; + flushCharacters(); + JSONArray token = new JSONArray(); + token.getValue().add(END_TAG); + token.getValue().add(new JSONString(name)); + array.getValue().add(token); + } + + public void eof() throws SAXException { + flushCharacters(); + } + + public void startTokenization(Tokenizer self) throws SAXException { + array = new JSONArray(); + if (contentModelElement != null) { + self.setStateAndEndTagExpectation(contentModelFlag, contentModelElement); + } + } + + public void startTag(ElementName eltName, HtmlAttributes attributes, + boolean selfClosing) throws SAXException { + String name = eltName.name; + flushCharacters(); + JSONArray token = new JSONArray(); + token.getValue().add(START_TAG); + token.getValue().add(new JSONString(name)); + JSONObject attrs = new JSONObject(); + for (int i = 0; i < attributes.getLength(); i++) { + attrs.getValue().put(attributes.getQNameNoBoundsCheck(i), + new JSONString(attributes.getValueNoBoundsCheck(i))); + } + token.getValue().add(attrs); + if (selfClosing) { + token.getValue().add(JSONBoolean.TRUE); + } + array.getValue().add(token); + } + + public boolean wantsComments() throws SAXException { + return true; + } + + public void error(SAXParseException exception) throws SAXException { + flushCharacters(); + array.getValue().add(PARSE_ERROR); + } + + public void fatalError(SAXParseException exception) throws SAXException { + throw new RuntimeException("Should never happen."); + } + + public void warning(SAXParseException exception) throws SAXException { + } + + /** + * Returns the array. + * + * @return the array + */ + public JSONArray getArray() { + return array; + } + + public void endTokenization() throws SAXException { + + } + + @Override public void zeroOriginatingReplacementCharacter() + throws SAXException { + builder.append(REPLACEMENT_CHARACTER, 0, 1); + } + + @Override public boolean cdataSectionAllowed() throws SAXException { + return false; + } + + @Override public void ensureBufferSpace(int inputLength) + throws SAXException { + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java new file mode 100644 index 0000000000..9a207f2779 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/ListErrorHandler.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.util.LinkedList; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public class ListErrorHandler implements ErrorHandler { + + private boolean fatal = false; + + private LinkedList errors = new LinkedList(); + + public void error(SAXParseException spe) throws SAXException { + errors.add(Integer.toString(spe.getColumnNumber()) + ": " + spe.getMessage()); + } + + public void fatalError(SAXParseException arg0) throws SAXException { + fatal = true; + } + + public void warning(SAXParseException arg0) throws SAXException { + } + + /** + * Returns the errors. + * + * @return the errors + */ + public LinkedList getErrors() { + return errors; + } + + /** + * Returns the fatal. + * + * @return the fatal + */ + public boolean isFatal() { + return fatal; + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java new file mode 100644 index 0000000000..9ee490b9ed --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2005, 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; + +import javax.xml.transform.ErrorListener; +import javax.xml.transform.SourceLocator; +import javax.xml.transform.TransformerException; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +/** + * @version $Id$ + * @author hsivonen + */ +public class SystemErrErrorHandler implements ErrorHandler, ErrorListener { + + private Writer out; + + private boolean inError = false; + + public SystemErrErrorHandler() { + try { + out = new OutputStreamWriter(System.err, "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + + /** + * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException) + */ + public void warning(SAXParseException e) throws SAXException { + try { + out.write("Warning:\n"); + out.write(e.getMessage()); + out.write("\nFile: "); + String systemId = e.getSystemId(); + out.write((systemId == null) ? "Unknown" : systemId); + out.write("\nLine: "); + out.write(Integer.toString(e.getLineNumber())); + out.write(" Col: "); + out.write(Integer.toString(e.getColumnNumber())); + out.write("\n\n"); + out.flush(); + } catch (IOException e1) { + throw new SAXException(e1); + } + } + + /** + * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException) + */ + public void error(SAXParseException e) throws SAXException { + inError = true; + try { + out.write("Error:\n"); + out.write(e.getMessage()); + out.write("\nFile: "); + String systemId = e.getSystemId(); + out.write((systemId == null) ? "Unknown" : systemId); + out.write("\nLine: "); + out.write(Integer.toString(e.getLineNumber())); + out.write(" Col: "); + out.write(Integer.toString(e.getColumnNumber())); + out.write("\n\n"); + out.flush(); + } catch (IOException e1) { + throw new SAXException(e1); + } + } + + /** + * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException) + */ + public void fatalError(SAXParseException e) throws SAXException { + inError = true; + try { + out.write("Fatal Error:\n"); + out.write(e.getMessage()); + out.write("\nFile: "); + String systemId = e.getSystemId(); + out.write((systemId == null) ? "Unknown" : systemId); + out.write("\nLine: "); + out.write(Integer.toString(e.getLineNumber())); + out.write(" Col: "); + out.write(Integer.toString(e.getColumnNumber())); + out.write("\n\n"); + out.flush(); + } catch (IOException e1) { + throw new SAXException(e1); + } + } + + /** + * Returns the inError. + * + * @return the inError + */ + public boolean isInError() { + return inError; + } + + public void reset() { + inError = false; + } + + public void error(TransformerException e) throws TransformerException { + inError = true; + try { + out.write("Error:\n"); + out.write(e.getMessage()); + SourceLocator sourceLocator = e.getLocator(); + if (sourceLocator != null) { + out.write("\nFile: "); + String systemId = sourceLocator.getSystemId(); + out.write((systemId == null) ? "Unknown" : systemId); + out.write("\nLine: "); + out.write(Integer.toString(sourceLocator.getLineNumber())); + out.write(" Col: "); + out.write(Integer.toString(sourceLocator.getColumnNumber())); + } + out.write("\n\n"); + out.flush(); + } catch (IOException e1) { + throw new TransformerException(e1); + } + } + + public void fatalError(TransformerException e) + throws TransformerException { + inError = true; + try { + out.write("Fatal Error:\n"); + out.write(e.getMessage()); + SourceLocator sourceLocator = e.getLocator(); + if (sourceLocator != null) { + out.write("\nFile: "); + String systemId = sourceLocator.getSystemId(); + out.write((systemId == null) ? "Unknown" : systemId); + out.write("\nLine: "); + out.write(Integer.toString(sourceLocator.getLineNumber())); + out.write(" Col: "); + out.write(Integer.toString(sourceLocator.getColumnNumber())); + } + out.write("\n\n"); + out.flush(); + } catch (IOException e1) { + throw new TransformerException(e1); + } + } + + public void warning(TransformerException e) + throws TransformerException { + try { + out.write("Warning:\n"); + out.write(e.getMessage()); + SourceLocator sourceLocator = e.getLocator(); + if (sourceLocator != null) { + out.write("\nFile: "); + String systemId = sourceLocator.getSystemId(); + out.write((systemId == null) ? "Unknown" : systemId); + out.write("\nLine: "); + out.write(Integer.toString(sourceLocator.getLineNumber())); + out.write(" Col: "); + out.write(Integer.toString(sourceLocator.getColumnNumber())); + } + out.write("\n\n"); + out.flush(); + } catch (IOException e1) { + throw new TransformerException(e1); + } + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java new file mode 100644 index 0000000000..0fa5972c8a --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; + +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.impl.ElementName; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.io.Driver; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public class TokenPrinter implements TokenHandler, ErrorHandler { + + private final Writer writer; + + public void characters(char[] buf, int start, int length) + throws SAXException { + try { + boolean lineStarted = true; + writer.write('-'); + for (int i = start; i < start + length; i++) { + if (!lineStarted) { + writer.write("\n-"); + lineStarted = true; + } + char c = buf[i]; + if (c == '\n') { + writer.write("\\n"); + lineStarted = false; + } else { + writer.write(c); + } + } + writer.write('\n'); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void comment(char[] buf, int start, int length) throws SAXException { + try { + writer.write('!'); + writer.write(buf, start, length); + writer.write('\n'); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException { + try { + writer.write('D'); + writer.write(name); + writer.write(' '); + writer.write("" + forceQuirks); + writer.write('\n'); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void endTag(ElementName eltName) throws SAXException { + try { + writer.write(')'); + writer.write(eltName.name); + writer.write('\n'); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void eof() throws SAXException { + try { + writer.write("E\n"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void startTokenization(Tokenizer self) throws SAXException { + + } + + public void startTag(ElementName eltName, HtmlAttributes attributes, boolean selfClosing) + throws SAXException { + try { + writer.write('('); + writer.write(eltName.name); + writer.write('\n'); + for (int i = 0; i < attributes.getLength(); i++) { + writer.write('A'); + writer.write(attributes.getQNameNoBoundsCheck(i)); + writer.write(' '); + writer.write(attributes.getValueNoBoundsCheck(i)); + writer.write('\n'); + } + } catch (IOException e) { + throw new SAXException(e); + } + } + + public boolean wantsComments() throws SAXException { + return true; + } + + public static void main(String[] args) throws SAXException, IOException { + TokenPrinter printer = new TokenPrinter(new OutputStreamWriter(System.out, "UTF-8")); + Driver tokenizer = new Driver(new ErrorReportingTokenizer(printer)); + tokenizer.setErrorHandler(printer); + File file = new File(args[0]); + InputSource is = new InputSource(new FileInputStream(file)); + is.setSystemId(file.toURI().toASCIIString()); + tokenizer.tokenize(is); + } + + /** + * @param writer + */ + public TokenPrinter(final Writer writer) { + this.writer = writer; + } + + public void error(SAXParseException exception) throws SAXException { + try { + writer.write("R "); + writer.write(exception.getMessage()); + writer.write("\n"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void fatalError(SAXParseException exception) throws SAXException { + try { + writer.write("F "); + writer.write(exception.getMessage()); + writer.write("\n"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void warning(SAXParseException exception) throws SAXException { + try { + writer.write("W "); + writer.write(exception.getMessage()); + writer.write("\n"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void endTokenization() throws SAXException { + try { + writer.flush(); + writer.close(); + } catch (IOException e) { + throw new SAXException(e); + } + } + + @Override public void zeroOriginatingReplacementCharacter() + throws SAXException { + try { + writer.write("0\n"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + @Override public boolean cdataSectionAllowed() throws SAXException { + return false; + } + + @Override public void ensureBufferSpace(int inputLength) + throws SAXException { + } +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java new file mode 100644 index 0000000000..76ea7543a8 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenizerTester.java @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.StringReader; +import java.io.UnsupportedEncodingException; +import java.io.Writer; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.io.Driver; + +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import antlr.RecognitionException; +import antlr.TokenStreamException; + +import com.sdicons.json.model.JSONArray; +import com.sdicons.json.model.JSONObject; +import com.sdicons.json.model.JSONString; +import com.sdicons.json.model.JSONValue; +import com.sdicons.json.parser.JSONParser; + +public class TokenizerTester { + + private static JSONString PLAINTEXT = new JSONString("PLAINTEXT state"); + + private static JSONString PCDATA = new JSONString("DATA state"); + + private static JSONString RCDATA = new JSONString("RCDATA state"); + + private static JSONString RAWTEXT = new JSONString("RAWTEXT state"); + + private static boolean jsonDeepEquals(JSONValue one, JSONValue other) { + if (one.isSimple()) { + return one.equals(other); + } else if (one.isArray()) { + if (other.isArray()) { + JSONArray oneArr = (JSONArray) one; + JSONArray otherArr = (JSONArray) other; + return oneArr.getValue().equals(otherArr.getValue()); + } else { + return false; + } + } else if (one.isObject()) { + if (other.isObject()) { + JSONObject oneObject = (JSONObject) one; + JSONObject otherObject = (JSONObject) other; + return oneObject.getValue().equals(otherObject.getValue()); + } else { + return false; + } + } else { + throw new RuntimeException("Should never happen."); + } + } + + private JSONArray tests; + + private final JSONArrayTokenHandler tokenHandler; + + private final Driver driver; + + private final Writer writer; + + private TokenizerTester(InputStream stream) throws TokenStreamException, + RecognitionException, UnsupportedEncodingException { + tokenHandler = new JSONArrayTokenHandler(); + driver = new Driver(new ErrorReportingTokenizer(tokenHandler)); + driver.setCommentPolicy(XmlViolationPolicy.ALLOW); + driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW); + driver.setContentSpacePolicy(XmlViolationPolicy.ALLOW); + driver.setNamePolicy(XmlViolationPolicy.ALLOW); + driver.setXmlnsPolicy(XmlViolationPolicy.ALLOW); + driver.setErrorHandler(tokenHandler); + writer = new OutputStreamWriter(System.out, "UTF-8"); + JSONParser jsonParser = new JSONParser(new InputStreamReader(stream, + "UTF-8")); + JSONObject obj = (JSONObject) jsonParser.nextValue(); + tests = (JSONArray) obj.get("tests"); + if (tests == null) { + tests = (JSONArray) obj.get("xmlViolationTests"); + driver.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET); + driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET); + driver.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); + driver.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET); + } + } + + private void runTests() throws SAXException, IOException { + for (JSONValue val : tests.getValue()) { + runTest((JSONObject) val); + } + writer.flush(); + } + + private void runTest(JSONObject test) throws SAXException, IOException { + String inputString = ((JSONString) test.get("input")).getValue(); + JSONArray expectedTokens = (JSONArray) test.get("output"); + String description = ((JSONString) test.get("description")).getValue(); + JSONString lastStartTagJSON = ((JSONString) test.get("lastStartTag")); + String lastStartTag = lastStartTagJSON == null ? null + : lastStartTagJSON.getValue(); + JSONArray contentModelFlags = (JSONArray) test.get("initialStates"); + if (contentModelFlags == null) { + runTestInner(inputString, expectedTokens, description, + Tokenizer.DATA, null); + } else { + for (JSONValue value : contentModelFlags.getValue()) { + if (PCDATA.equals(value)) { + runTestInner(inputString, expectedTokens, description, + Tokenizer.DATA, lastStartTag); + } else if (RAWTEXT.equals(value)) { + runTestInner(inputString, expectedTokens, description, + Tokenizer.RAWTEXT, lastStartTag); + } else if (RCDATA.equals(value)) { + runTestInner(inputString, expectedTokens, description, + Tokenizer.RCDATA, lastStartTag); + } else if (PLAINTEXT.equals(value)) { + runTestInner(inputString, expectedTokens, description, + Tokenizer.PLAINTEXT, lastStartTag); + } else { + throw new RuntimeException("Broken test data."); + } + } + } + } + + /** + * @param contentModelElement + * @param contentModelFlag + * @param test + * @throws SAXException + * @throws IOException + */ + private void runTestInner(String inputString, JSONArray expectedTokens, + String description, int contentModelFlag, + String contentModelElement) throws SAXException, IOException { + tokenHandler.setContentModelFlag(contentModelFlag, contentModelElement); + InputSource is = new InputSource(new StringReader(inputString)); + try { + driver.tokenize(is); + JSONArray actualTokens = tokenHandler.getArray(); + if (jsonDeepEquals(actualTokens, expectedTokens)) { + writer.write("Success\n"); + } else { + writer.write("Failure\n"); + writer.write(description); + writer.write("\nInput:\n"); + writer.write(inputString); + writer.write("\nExpected tokens:\n"); + writer.write(expectedTokens.render(false)); + writer.write("\nActual tokens:\n"); + writer.write(actualTokens.render(false)); + writer.write("\n"); + } + } catch (Throwable t) { + writer.write("Failure\n"); + writer.write(description); + writer.write("\nInput:\n"); + writer.write(inputString); + writer.write("\n"); + t.printStackTrace(new PrintWriter(writer, false)); + } + } + + /** + * @param args + * @throws RecognitionException + * @throws TokenStreamException + * @throws IOException + * @throws SAXException + */ + public static void main(String[] args) throws TokenStreamException, + RecognitionException, SAXException, IOException { + for (int i = 0; i < args.length; i++) { + TokenizerTester tester = new TokenizerTester(new FileInputStream( + args[i])); + tester.runTests(); + } + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java new file mode 100644 index 0000000000..9b95b763e7 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.IOException; +import java.io.Writer; +import java.util.Map; +import java.util.TreeMap; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +public class TreeDumpContentHandler implements ContentHandler, LexicalHandler { + + private final Writer writer; + + private int level = 0; + + private boolean inCharacters = false; + + private boolean close; + + /** + * @param writer + */ + public TreeDumpContentHandler(final Writer writer, boolean close) { + this.writer = writer; + this.close = close; + } + + public TreeDumpContentHandler(final Writer writer) { + this(writer, true); + } + + private void printLead() throws IOException { + if (inCharacters) { + writer.write("\"\n"); + inCharacters = false; + } + writer.write("| "); + for (int i = 0; i < level; i++) { + writer.write(" "); + } + } + + public void characters(char[] ch, int start, int length) + throws SAXException { + try { + if (!inCharacters) { + printLead(); + writer.write('"'); + inCharacters = true; + } + writer.write(ch, start, length); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void endElement(String uri, String localName, String qName) + throws SAXException { + try { + if (inCharacters) { + writer.write("\"\n"); + inCharacters = false; + } + level--; + if ("http://www.w3.org/1999/xhtml" == uri && + "template" == localName) { + // decrement level for the "content" + level--; + } + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void startElement(String uri, String localName, String qName, + Attributes atts) throws SAXException { + try { + printLead(); + writer.write('<'); + if ("http://www.w3.org/1998/Math/MathML" == uri) { + writer.write("math "); + } else if ("http://www.w3.org/2000/svg" == uri) { + writer.write("svg "); + } else if ("http://www.w3.org/1999/xhtml" != uri) { + writer.write("otherns "); + } + writer.write(localName); + writer.write(">\n"); + level++; + TreeMap map = new TreeMap(); + for (int i = 0; i < atts.getLength(); i++) { + String ns = atts.getURI(i); + String name; + if ("http://www.w3.org/1999/xlink" == ns) { + name = "xlink " + atts.getLocalName(i); + } else if ("http://www.w3.org/XML/1998/namespace" == ns) { + name = "xml " + atts.getLocalName(i); + } else if ("http://www.w3.org/2000/xmlns/" == ns) { + name = "xmlns " + atts.getLocalName(i); + } else if ("" != uri) { + name = atts.getLocalName(i); + } else { + name = "otherns " + atts.getLocalName(i); + } + map.put(name, atts.getValue(i)); + } + for (Map.Entry entry : map.entrySet()) { + printLead(); + writer.write(entry.getKey()); + writer.write("=\""); + writer.write(entry.getValue()); + writer.write("\"\n"); + } + if ("http://www.w3.org/1999/xhtml" == uri && + "template" == localName) { + printLead(); + level++; + writer.write("content\n"); + } + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void comment(char[] ch, int offset, int len) throws SAXException { + try { + printLead(); + writer.write("\n"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void startDTD(String name, String publicIdentifier, + String systemIdentifier) throws SAXException { + try { + printLead(); + writer.write(" 0 || systemIdentifier.length() > 0) { + writer.write(' '); + writer.write('\"'); + writer.write(publicIdentifier); + writer.write('\"'); + writer.write(' '); + writer.write('\"'); + writer.write(systemIdentifier); + writer.write('\"'); + } + writer.write(">\n"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void endDocument() throws SAXException { + try { + if (inCharacters) { + writer.write("\"\n"); + inCharacters = false; + } + if (close) { + writer.flush(); + writer.close(); + } + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void startPrefixMapping(String prefix, String uri) + throws SAXException { + } + + public void startEntity(String arg0) throws SAXException { + } + + public void endCDATA() throws SAXException { + } + + public void endDTD() throws SAXException { + } + + public void endEntity(String arg0) throws SAXException { + } + + public void startCDATA() throws SAXException { + } + + public void endPrefixMapping(String prefix) throws SAXException { + } + + public void ignorableWhitespace(char[] ch, int start, int length) + throws SAXException { + } + + public void processingInstruction(String target, String data) + throws SAXException { + } + + public void setDocumentLocator(Locator locator) { + } + + public void skippedEntity(String name) throws SAXException { + } + + public void startDocument() throws SAXException { + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java new file mode 100644 index 0000000000..c091693839 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreePrinter.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; + +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + +public class TreePrinter { + + public static void main(String[] args) throws SAXException, IOException { + TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(new OutputStreamWriter(System.out, "UTF-8")); + HtmlParser htmlParser = new HtmlParser(); + htmlParser.setContentHandler(treeDumpContentHandler); + htmlParser.setLexicalHandler(treeDumpContentHandler); + htmlParser.setErrorHandler(new SystemErrErrorHandler()); + htmlParser.setXmlPolicy(XmlViolationPolicy.ALLOW); + File file = new File(args[0]); + InputSource is = new InputSource(new FileInputStream(file)); + is.setSystemId(file.toURI().toASCIIString()); + htmlParser.parse(is); + } +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java new file mode 100644 index 0000000000..62d3ab5307 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TreeTester.java @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.StringWriter; +import java.util.LinkedList; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; + +import org.xml.sax.InputSource; +import org.xml.sax.SAXParseException; + +public class TreeTester { + + private final BufferedInputStream aggregateStream; + + private boolean streaming = false; + + /** + * @param aggregateStream + */ + public TreeTester(InputStream aggregateStream) { + this.aggregateStream = new BufferedInputStream(aggregateStream); + } + + private void runTests() throws Throwable { + if (aggregateStream.read() != '#') { + System.err.println("No hash at start!"); + return; + } + while (runTest()) { + // spin + } + } + + private boolean runTest() throws Throwable { + UntilHashInputStream stream = null; + try { + String context = null; + boolean scriptingEnabled = true; + boolean hadScriptingDirective = false; + aggregateStream.mark(12288); + if (skipLabel()) { // #data + return false; + } + stream = new UntilHashInputStream(aggregateStream); + while (stream.read() != -1) { + // spin + } + if (skipLabel()) { // #errors + System.err.println("Premature end of test data."); + return false; + } + stream = new UntilHashInputStream(aggregateStream); + while (stream.read() != -1) { + // spin + } + + StringBuilder sb = new StringBuilder(); + int c; + while ((c = aggregateStream.read()) != '\n') { + sb.append((char) c); + } + String label = sb.toString(); + if ("document-fragment".equals(label)) { + sb.setLength(0); + while ((c = aggregateStream.read()) != '\n') { + sb.append((char) c); + } + context = sb.toString(); + // Now potentially gather #script-on/off + sb.setLength(0); + while ((c = aggregateStream.read()) != '\n') { + sb.append((char) c); + } + label = sb.toString(); + } + if ("script-on".equals(label)) { + hadScriptingDirective = true; + } else if ("script-off".equals(label)) { + hadScriptingDirective = true; + scriptingEnabled = false; + } + aggregateStream.reset(); + if (skipLabel()) { // #data + System.err.println("Premature end of test data."); + return false; + } + stream = new UntilHashInputStream(aggregateStream); + InputSource is = new InputSource(stream); + is.setEncoding("UTF-8"); + StringWriter sw = new StringWriter(); + ListErrorHandler leh = new ListErrorHandler(); + TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler( + sw); + HtmlParser htmlParser = new HtmlParser(XmlViolationPolicy.ALLOW); + if (streaming) { + htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL); + } + htmlParser.setContentHandler(treeDumpContentHandler); + htmlParser.setLexicalHandler(treeDumpContentHandler); + htmlParser.setErrorHandler(leh); + htmlParser.setScriptingEnabled(scriptingEnabled); + try { + if (context == null) { + htmlParser.parse(is); + } else { + String ns = "http://www.w3.org/1999/xhtml"; + if (context.startsWith("svg ")) { + ns = "http://www.w3.org/2000/svg"; + context = context.substring(4); + } else if (context.startsWith("math ")) { + ns = "http://www.w3.org/1998/Math/MathML"; + context = context.substring(5); + } + htmlParser.parseFragment(is, context, ns); + treeDumpContentHandler.endDocument(); + } + } catch (SAXParseException e) { + // ignore + } + stream.close(); + + if (skipLabel()) { // #errors + System.err.println("Premature end of test data."); + return false; + } + LinkedList expectedErrors = new LinkedList(); + BufferedReader br = new BufferedReader(new InputStreamReader( + new UntilHashInputStream(aggregateStream), "UTF-8")); + String line = null; + while ((line = br.readLine()) != null) { + expectedErrors.add(line); + } + + if (context != null) { + if (skipLabel()) { // #document-fragment + System.err.println("Premature end of test data."); + return false; + } + UntilHashInputStream stream2 = new UntilHashInputStream(aggregateStream); + while (stream2.read() != -1) { + // spin + } + } + if (hadScriptingDirective && skipLabel()) { // #script-on/off + System.err.println("Premature end of test data."); + return false; + } + + if (skipLabel()) { // #document + System.err.println("Premature end of test data."); + return false; + } + + StringBuilder expectedBuilder = new StringBuilder(); + br = new BufferedReader(new InputStreamReader( + new UntilHashInputStream(aggregateStream), "UTF-8")); + int ch; + while ((ch = br.read()) != -1) { + expectedBuilder.append((char)ch); + } + String expected = expectedBuilder.toString(); + String actual = sw.toString(); + + LinkedList actualErrors = leh.getErrors(); + + if (expected.equals(actual) || (streaming && leh.isFatal()) /* + * && expectedErrors.size() == + * actualErrors.size() + */) { + System.err.println("Success."); + // System.err.println(stream); + } else { + System.err.print("Failure.\nData:\n" + stream + "\nExpected:\n" + + expected + "Got: \n" + actual); + System.err.println("Expected errors:"); + for (String err : expectedErrors) { + System.err.println(err); + } + System.err.println("Actual errors:"); + for (String err : actualErrors) { + System.err.println(err); + } + } + } catch (Throwable t) { + System.err.println("Failure.\nData:\n" + stream); + throw t; + } + return true; + } + + private boolean skipLabel() throws IOException { + int b = aggregateStream.read(); + if (b == -1) { + return true; + } + for (;;) { + b = aggregateStream.read(); + if (b == -1) { + return true; + } else if (b == 0x0A) { + return false; + } + } + } + + /** + * @param args + * @throws Throwable + */ + public static void main(String[] args) throws Throwable { + for (int i = 0; i < args.length; i++) { + TreeTester tester = new TreeTester(new FileInputStream(args[i])); + tester.runTests(); + } + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java new file mode 100644 index 0000000000..473a9f7f90 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import java.io.IOException; +import java.io.InputStream; + +public class UntilHashInputStream extends InputStream { + + private final StringBuilder builder = new StringBuilder(); + + private final InputStream delegate; + + private int buffer = -1; + + private boolean closed = false; + + /** + * @param delegate + * @throws IOException + */ + public UntilHashInputStream(final InputStream delegate) throws IOException { + this.delegate = delegate; + this.buffer = delegate.read(); + if (buffer == '#') { + closed = true; + } + } + + public int read() throws IOException { + if (closed) { + return -1; + } + int rv = buffer; + buffer = delegate.read(); + if (buffer == '#' && rv == '\n') { + // end of stream + closed = true; + return -1; + } else { + if (rv >= 0x20 && rv < 0x80) { + builder.append(((char)rv)); + } else { + builder.append("0x"); + builder.append(Integer.toHexString(rv)); + } + return rv; + } + } + + /** + * @see java.io.InputStream#close() + */ + @Override + public void close() throws IOException { + super.close(); + if (closed) { + return; + } + for (;;) { + int b = delegate.read(); + if (b == 0x23 || b == -1) { + break; + } + } + closed = true; + } + + /** + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return builder.toString(); + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java new file mode 100644 index 0000000000..0d23fda3ca --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +import nu.validator.htmlparser.sax.XmlSerializer; + +public class XmlSerializerTester { + + + + /** + * @param args + * @throws SAXException + */ + public static void main(String[] args) throws SAXException { + AttributesImpl attrs = new AttributesImpl(); + XmlSerializer serializer = new XmlSerializer(System.out); + serializer.startDocument(); + serializer.startElement("1", "a", null, attrs); + serializer.startElement("1", "b", null, attrs); + serializer.endElement("1", "b", null); + serializer.startElement("2", "c", null, attrs); + serializer.endElement("2", "c", null); + attrs.addAttribute("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "about", null, "CDATA", ""); + serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "d", null, attrs); + serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "d", null); + serializer.startPrefixMapping("rdf", "foo"); + serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "e", null, attrs); + serializer.startPrefixMapping("p0", "bar"); + serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "f", null, attrs); + serializer.characters("a\uD834\uDD21a\uD834a\uDD21a".toCharArray(), 0, 8); + serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "f", null); + serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "e", null); + + serializer.endPrefixMapping("rdf"); + serializer.endElement("1", "a", null); + serializer.endDocument(); + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java new file mode 100644 index 0000000000..66d706ae95 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/XomTest.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.test; + +import nu.xom.Attribute; +import nu.xom.Element; + +public class XomTest { + public static void main(String[] args) { + Element elt = new Element("html", "http://www.w3.org/1999/xhtml"); + elt.addAttribute(new Attribute("xmlns:foo", "bar")); + } +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html new file mode 100644 index 0000000000..57809b84e0 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/package.html @@ -0,0 +1,29 @@ + + +Package Overview + + + +

Test drivers.

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java new file mode 100644 index 0000000000..5e2cf1f58a --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2HTML.java @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.tools; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.MalformedURLException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.TransformerException; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; +import nu.validator.htmlparser.sax.HtmlSerializer; +import nu.validator.htmlparser.sax.XmlSerializer; +import nu.validator.htmlparser.test.SystemErrErrorHandler; + +import org.xml.sax.ContentHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +public class HTML2HTML { + + /** + * @param args + */ + public static void main(String[] args) throws SAXException, + ParserConfigurationException, MalformedURLException, IOException, + TransformerException { + InputStream in; + OutputStream out; + + switch (args.length) { + case 0: + in = System.in; + out = System.out; + break; + case 1: + in = new FileInputStream(args[0]); + out = System.out; + break; + case 2: + in = new FileInputStream(args[0]); + out = new FileOutputStream(args[1]); + break; + default: + System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); + System.exit(1); + return; + } + + ContentHandler serializer = new HtmlSerializer(out); + + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW); + + parser.setErrorHandler(new SystemErrErrorHandler()); + parser.setContentHandler(serializer); + parser.setProperty("http://xml.org/sax/properties/lexical-handler", + serializer); + parser.parse(new InputSource(in)); + out.flush(); + out.close(); + } +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java new file mode 100644 index 0000000000..57666f93b9 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/HTML2XML.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.tools; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.MalformedURLException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.TransformerException; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; +import nu.validator.htmlparser.sax.XmlSerializer; +import nu.validator.htmlparser.test.SystemErrErrorHandler; + +import org.xml.sax.ContentHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +public class HTML2XML { + + /** + * @param args + */ + public static void main(String[] args) throws SAXException, + ParserConfigurationException, MalformedURLException, IOException, + TransformerException { + InputStream in; + OutputStream out; + + switch (args.length) { + case 0: + in = System.in; + out = System.out; + break; + case 1: + in = new FileInputStream(args[0]); + out = System.out; + break; + case 2: + in = new FileInputStream(args[0]); + out = new FileOutputStream(args[1]); + break; + default: + System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); + System.exit(1); + return; + } + + ContentHandler serializer = new XmlSerializer(out); + + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); + + parser.setErrorHandler(new SystemErrErrorHandler()); + parser.setContentHandler(serializer); + parser.setProperty("http://xml.org/sax/properties/lexical-handler", + serializer); + parser.parse(new InputSource(in)); + out.flush(); + out.close(); + } +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java new file mode 100644 index 0000000000..dad89a5b27 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2HTML.java @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.tools; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.MalformedURLException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; +import javax.xml.transform.TransformerException; + +import nu.validator.htmlparser.sax.HtmlSerializer; +import nu.validator.htmlparser.sax.XmlSerializer; +import nu.validator.htmlparser.test.SystemErrErrorHandler; + +import org.xml.sax.ContentHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +public class XML2HTML { + + /** + * @param args + */ + public static void main(String[] args) throws SAXException, + ParserConfigurationException, MalformedURLException, IOException, + TransformerException { + InputStream in; + OutputStream out; + + switch (args.length) { + case 0: + in = System.in; + out = System.out; + break; + case 1: + in = new FileInputStream(args[0]); + out = System.out; + break; + case 2: + in = new FileInputStream(args[0]); + out = new FileOutputStream(args[1]); + break; + default: + System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); + System.exit(1); + return; + } + + ContentHandler serializer = new HtmlSerializer(out); + + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + factory.setValidating(false); + XMLReader parser = factory.newSAXParser().getXMLReader(); + parser.setErrorHandler(new SystemErrErrorHandler()); + parser.setContentHandler(serializer); + parser.setProperty("http://xml.org/sax/properties/lexical-handler", + serializer); + parser.parse(new InputSource(in)); + out.flush(); + out.close(); + } +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java new file mode 100644 index 0000000000..2f6aa24d8c --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XML2XML.java @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.tools; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.MalformedURLException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; +import javax.xml.transform.TransformerException; + +import nu.validator.htmlparser.sax.NameCheckingXmlSerializer; +import nu.validator.htmlparser.sax.XmlSerializer; +import nu.validator.htmlparser.test.SystemErrErrorHandler; + +import org.xml.sax.ContentHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +public class XML2XML { + + /** + * @param args + */ + public static void main(String[] args) throws SAXException, + ParserConfigurationException, MalformedURLException, IOException, + TransformerException { + InputStream in; + OutputStream out; + + switch (args.length) { + case 0: + in = System.in; + out = System.out; + break; + case 1: + in = new FileInputStream(args[0]); + out = System.out; + break; + case 2: + in = new FileInputStream(args[0]); + out = new FileOutputStream(args[1]); + break; + default: + System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); + System.exit(1); + return; + } + + ContentHandler serializer = new NameCheckingXmlSerializer(out); + + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + factory.setValidating(false); + XMLReader parser = factory.newSAXParser().getXMLReader(); + parser.setErrorHandler(new SystemErrErrorHandler()); + parser.setContentHandler(serializer); + parser.setProperty("http://xml.org/sax/properties/lexical-handler", + serializer); + parser.parse(new InputSource(in)); + out.flush(); + out.close(); + } +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java new file mode 100644 index 0000000000..05d8193c10 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.tools; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.MalformedURLException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; +import javax.xml.transform.Templates; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.sax.SAXResult; +import javax.xml.transform.sax.SAXTransformerFactory; +import javax.xml.transform.sax.TemplatesHandler; +import javax.xml.transform.sax.TransformerHandler; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.dom.HtmlDocumentBuilder; +import nu.validator.htmlparser.sax.HtmlParser; +import nu.validator.htmlparser.sax.HtmlSerializer; +import nu.validator.htmlparser.sax.XmlSerializer; +import nu.validator.htmlparser.test.SystemErrErrorHandler; + +import org.w3c.dom.Document; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.ext.LexicalHandler; + +public class XSLT4HTML5 { + + private enum Mode { + STREAMING_SAX, BUFFERED_SAX, DOM, + } + + private static final String TEMPLATE = "--template="; + + private static final String INPUT_HTML = "--input-html="; + + private static final String INPUT_XML = "--input-xml="; + + private static final String OUTPUT_HTML = "--output-html="; + + private static final String OUTPUT_XML = "--output-xml="; + + private static final String MODE = "--mode="; + + /** + * @param args + * @throws ParserConfigurationException + * @throws SAXException + * @throws IOException + * @throws MalformedURLException + * @throws TransformerException + */ + public static void main(String[] args) throws SAXException, + ParserConfigurationException, MalformedURLException, IOException, TransformerException { + if (args.length == 0) { + System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]"); + System.exit(0); + } + String template = null; + String input = null; + boolean inputHtml = false; + String output = null; + boolean outputHtml = false; + Mode mode = null; + for (int i = 0; i < args.length; i++) { + String arg = args[i]; + if (arg.startsWith(TEMPLATE)) { + if (template == null) { + template = arg.substring(TEMPLATE.length()); + } else { + System.err.println("Tried to set template twice."); + System.exit(1); + } + } else if (arg.startsWith(INPUT_HTML)) { + if (input == null) { + input = arg.substring(INPUT_HTML.length()); + inputHtml = true; + } else { + System.err.println("Tried to set input twice."); + System.exit(2); + } + } else if (arg.startsWith(INPUT_XML)) { + if (input == null) { + input = arg.substring(INPUT_XML.length()); + inputHtml = false; + } else { + System.err.println("Tried to set input twice."); + System.exit(2); + } + } else if (arg.startsWith(OUTPUT_HTML)) { + if (output == null) { + output = arg.substring(OUTPUT_HTML.length()); + outputHtml = true; + } else { + System.err.println("Tried to set output twice."); + System.exit(3); + } + } else if (arg.startsWith(OUTPUT_XML)) { + if (output == null) { + output = arg.substring(OUTPUT_XML.length()); + outputHtml = false; + } else { + System.err.println("Tried to set output twice."); + System.exit(3); + } + } else if (arg.startsWith(MODE)) { + if (mode == null) { + String modeStr = arg.substring(MODE.length()); + if ("dom".equals(modeStr)) { + mode = Mode.DOM; + } else if ("sax-buffered".equals(modeStr)) { + mode = Mode.BUFFERED_SAX; + } else if ("sax-streaming".equals(modeStr)) { + mode = Mode.STREAMING_SAX; + } else { + System.err.println("Unrecognized mode."); + System.exit(5); + } + } else { + System.err.println("Tried to set mode twice."); + System.exit(4); + } + } + } + + if (template == null) { + System.err.println("No template specified."); + System.exit(6); + } + if (input == null) { + System.err.println("No input specified."); + System.exit(7); + } + if (output == null) { + System.err.println("No output specified."); + System.exit(8); + } + if (mode == null) { + mode = Mode.BUFFERED_SAX; + } + + SystemErrErrorHandler errorHandler = new SystemErrErrorHandler(); + + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + factory.setValidating(false); + XMLReader reader = factory.newSAXParser().getXMLReader(); + reader.setErrorHandler(errorHandler); + + SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance(); + transformerFactory.setErrorListener(errorHandler); + TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler(); + reader.setContentHandler(templatesHandler); + reader.parse(new File(template).toURI().toASCIIString()); + + Templates templates = templatesHandler.getTemplates(); + + FileOutputStream outputStream = new FileOutputStream(output); + ContentHandler serializer; + if (outputHtml) { + serializer = new HtmlSerializer(outputStream); + } else { + serializer = new XmlSerializer(outputStream); + } + SAXResult result = new SAXResult(new XmlnsDropper(serializer)); + result.setLexicalHandler((LexicalHandler) serializer); + + if (mode == Mode.DOM) { + Document inputDoc; + DocumentBuilder builder; + if (inputHtml) { + builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET); + } else { + DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + try { + builder = builderFactory.newDocumentBuilder(); + } catch (ParserConfigurationException e) { + throw new RuntimeException(e); + } + } + inputDoc = builder.parse(new File(input)); + DOMSource inputSource = new DOMSource(inputDoc, + new File(input).toURI().toASCIIString()); + Transformer transformer = templates.newTransformer(); + transformer.setErrorListener(errorHandler); + transformer.transform(inputSource, result); + } else { + if (inputHtml) { + reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); + if (mode == Mode.STREAMING_SAX) { + reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL); + } + } + TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates); + transformerHandler.setResult(result); + reader.setErrorHandler(errorHandler); + reader.setContentHandler(transformerHandler); + reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler); + reader.parse(new File(input).toURI().toASCIIString()); + } + outputStream.flush(); + outputStream.close(); + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java new file mode 100644 index 0000000000..b364cc5211 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.tools; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlSerializer; +import nu.validator.htmlparser.xom.HtmlBuilder; +import nu.xom.Builder; +import nu.xom.Document; +import nu.xom.Element; +import nu.xom.Nodes; +import nu.xom.ParsingException; +import nu.xom.Serializer; +import nu.xom.ValidityException; +import nu.xom.converters.SAXConverter; +import nu.xom.xslt.XSLException; +import nu.xom.xslt.XSLTransform; + +import org.xml.sax.SAXException; + +public class XSLT4HTML5XOM { + + private static final String TEMPLATE = "--template="; + + private static final String INPUT_HTML = "--input-html="; + + private static final String INPUT_XML = "--input-xml="; + + private static final String OUTPUT_HTML = "--output-html="; + + private static final String OUTPUT_XML = "--output-xml="; + + /** + * @param args + * @throws IOException + * @throws ParsingException + * @throws ValidityException + * @throws XSLException + * @throws SAXException + */ + public static void main(String[] args) throws ValidityException, + ParsingException, IOException, XSLException, SAXException { + if (args.length == 0) { + System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]"); + System.exit(0); + } + String template = null; + String input = null; + boolean inputHtml = false; + String output = null; + boolean outputHtml = false; + for (int i = 0; i < args.length; i++) { + String arg = args[i]; + if (arg.startsWith(TEMPLATE)) { + if (template == null) { + template = arg.substring(TEMPLATE.length()); + } else { + System.err.println("Tried to set template twice."); + System.exit(1); + } + } else if (arg.startsWith(INPUT_HTML)) { + if (input == null) { + input = arg.substring(INPUT_HTML.length()); + inputHtml = true; + } else { + System.err.println("Tried to set input twice."); + System.exit(2); + } + } else if (arg.startsWith(INPUT_XML)) { + if (input == null) { + input = arg.substring(INPUT_XML.length()); + inputHtml = false; + } else { + System.err.println("Tried to set input twice."); + System.exit(2); + } + } else if (arg.startsWith(OUTPUT_HTML)) { + if (output == null) { + output = arg.substring(OUTPUT_HTML.length()); + outputHtml = true; + } else { + System.err.println("Tried to set output twice."); + System.exit(3); + } + } else if (arg.startsWith(OUTPUT_XML)) { + if (output == null) { + output = arg.substring(OUTPUT_XML.length()); + outputHtml = false; + } else { + System.err.println("Tried to set output twice."); + System.exit(3); + } + } + } + + if (template == null) { + System.err.println("No template specified."); + System.exit(6); + } + if (input == null) { + System.err.println("No input specified."); + System.exit(7); + } + if (output == null) { + System.err.println("No output specified."); + System.exit(8); + } + + Builder builder = new Builder(); + + Document transformationDoc = builder.build(new File(template)); + + XSLTransform transform = new XSLTransform(transformationDoc); + + FileOutputStream outputStream = new FileOutputStream(output); + + Document inputDoc; + if (inputHtml) { + builder = new HtmlBuilder(XmlViolationPolicy.ALTER_INFOSET); + } + inputDoc = builder.build(new File(input)); + Nodes result = transform.transform(inputDoc); + Document outputDoc = new Document((Element) result.get(0)); + if (outputHtml) { + HtmlSerializer htmlSerializer = new HtmlSerializer(outputStream); + SAXConverter converter = new SAXConverter(htmlSerializer); + converter.setLexicalHandler(htmlSerializer); + converter.convert(outputDoc); + } else { + Serializer serializer = new Serializer(outputStream); + serializer.write(outputDoc); + } + outputStream.flush(); + outputStream.close(); + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java new file mode 100644 index 0000000000..0e6d4b1c26 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.tools; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +/** + * Quick and dirty hack to work around Xalan xmlns weirdness. + * + * @version $Id$ + * @author hsivonen + */ +class XmlnsDropper implements ContentHandler { + + private final ContentHandler delegate; + + /** + * @param delegate + */ + public XmlnsDropper(final ContentHandler delegate) { + this.delegate = delegate; + } + + /** + * @param ch + * @param start + * @param length + * @throws SAXException + * @see org.xml.sax.ContentHandler#characters(char[], int, int) + */ + public void characters(char[] ch, int start, int length) throws SAXException { + delegate.characters(ch, start, length); + } + + /** + * @throws SAXException + * @see org.xml.sax.ContentHandler#endDocument() + */ + public void endDocument() throws SAXException { + delegate.endDocument(); + } + + /** + * @param uri + * @param localName + * @param qName + * @throws SAXException + * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String) + */ + public void endElement(String uri, String localName, String qName) throws SAXException { + delegate.endElement(uri, localName, qName); + } + + /** + * @param prefix + * @throws SAXException + * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String) + */ + public void endPrefixMapping(String prefix) throws SAXException { + delegate.endPrefixMapping(prefix); + } + + /** + * @param ch + * @param start + * @param length + * @throws SAXException + * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int) + */ + public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { + delegate.ignorableWhitespace(ch, start, length); + } + + /** + * @param target + * @param data + * @throws SAXException + * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String) + */ + public void processingInstruction(String target, String data) throws SAXException { + delegate.processingInstruction(target, data); + } + + /** + * @param locator + * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator) + */ + public void setDocumentLocator(Locator locator) { + delegate.setDocumentLocator(locator); + } + + /** + * @param name + * @throws SAXException + * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String) + */ + public void skippedEntity(String name) throws SAXException { + delegate.skippedEntity(name); + } + + /** + * @throws SAXException + * @see org.xml.sax.ContentHandler#startDocument() + */ + public void startDocument() throws SAXException { + delegate.startDocument(); + } + + /** + * @param uri + * @param localName + * @param qName + * @param atts + * @throws SAXException + * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) + */ + public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { + AttributesImpl ai = new AttributesImpl(); + for (int i = 0; i < atts.getLength(); i++) { + String u = atts.getURI(i); + String t = atts.getType(i); + String v = atts.getValue(i); + String n = atts.getLocalName(i); + String q = atts.getQName(i); + if (q != null) { + if ("xmlns".equals(q) || q.startsWith("xmlns:")) { + continue; + } + } + ai.addAttribute(u, n, q, t, v); + } + delegate.startElement(uri, localName, qName, ai); + } + + /** + * @param prefix + * @param uri + * @throws SAXException + * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String) + */ + public void startPrefixMapping(String prefix, String uri) throws SAXException { + delegate.startPrefixMapping(prefix, uri); + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html new file mode 100644 index 0000000000..a04bf3cd02 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/tools/package.html @@ -0,0 +1,29 @@ + + +Package Overview + + + +

Demo apps.

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java new file mode 100644 index 0000000000..df391d4b4d --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/PassThruPrinter.java @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree.test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; + +import nu.validator.htmlparser.sax.XmlSerializer; +import nu.validator.saxtree.Node; +import nu.validator.saxtree.TreeBuilder; +import nu.validator.saxtree.TreeParser; + +import org.xml.sax.ContentHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.ext.LexicalHandler; + +public class PassThruPrinter { + public static void main(String[] args) throws SAXException, IOException, ParserConfigurationException { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + factory.setValidating(false); + XMLReader reader = factory.newSAXParser().getXMLReader(); + + TreeBuilder treeBuilder = new TreeBuilder(); + reader.setContentHandler(treeBuilder); + reader.setProperty("http://xml.org/sax/properties/lexical-handler", treeBuilder); + + File file = new File(args[0]); + InputSource is = new InputSource(new FileInputStream(file)); + is.setSystemId(file.toURI().toASCIIString()); + reader.parse(is); + + Node doc = treeBuilder.getRoot(); + + ContentHandler xmlSerializer = new XmlSerializer(System.out); + + TreeParser treeParser = new TreeParser(xmlSerializer, (LexicalHandler) xmlSerializer); + treeParser.parse(doc); + } + +} diff --git a/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html new file mode 100644 index 0000000000..57809b84e0 --- /dev/null +++ b/parser/html/java/htmlparser/test-src/nu/validator/saxtree/test/package.html @@ -0,0 +1,29 @@ + + +Package Overview + + + +

Test drivers.

+ + \ No newline at end of file diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java new file mode 100644 index 0000000000..337394a89f --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java @@ -0,0 +1,139 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2009 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import java.util.List; + +import japa.parser.ast.expr.AnnotationExpr; +import japa.parser.ast.expr.MarkerAnnotationExpr; +import japa.parser.ast.type.ReferenceType; +import japa.parser.ast.visitor.VoidVisitorAdapter; + +public class AnnotationHelperVisitor extends VoidVisitorAdapter { + + protected List currentAnnotations; + + protected boolean nsUri() { + return hasAnnotation("NsUri"); + } + + protected boolean prefix() { + return hasAnnotation("Prefix"); + } + + protected boolean local() { + return hasAnnotation("Local"); + } + + protected boolean literal() { + return hasAnnotation("Literal"); + } + + protected boolean inline() { + return hasAnnotation("Inline"); + } + + protected boolean noLength() { + return hasAnnotation("NoLength"); + } + + protected boolean auto() { + return hasAnnotation("Auto"); + } + + protected boolean virtual() { + return hasAnnotation("Virtual"); + } + + protected boolean isConst() { + return hasAnnotation("Const"); + } + + protected boolean characterName() { + return hasAnnotation("CharacterName"); + } + + private boolean hasAnnotation(String anno) { + if (currentAnnotations == null) { + return false; + } + for (AnnotationExpr ann : currentAnnotations) { + if (ann instanceof MarkerAnnotationExpr) { + MarkerAnnotationExpr marker = (MarkerAnnotationExpr) ann; + if (marker.getName().getName().equals(anno)) { + return true; + } + } + } + return false; + } + + protected Type convertType(japa.parser.ast.type.Type type, int modifiers) { + if (type instanceof ReferenceType) { + ReferenceType referenceType = (ReferenceType) type; + return new Type(convertTypeName(referenceType.getType().toString()), referenceType.getArrayCount(), noLength(), modifiers); + } else { + return new Type(convertTypeName(type.toString()), 0, false, modifiers); + } + } + + private String convertTypeName(String name) { + if ("String".equals(name)) { + if (local()) { + return "@Local"; + } + if (nsUri()) { + return "@NsUri"; + } + if (prefix()) { + return "@Prefix"; + } + if (literal()) { + return "@Literal"; + } + if (auto()) { + return "@Auto"; + } + if (characterName()) { + return "@CharacterName"; + } + } + return name; + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java new file mode 100644 index 0000000000..587b81604f --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java @@ -0,0 +1,70 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2010 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; + +public class CppOnlyInputStream extends InputStream { + + private static final String DROP = "// CPPONLY:"; + + private final InputStream delegate; + + public CppOnlyInputStream(InputStream delegate) { + this.delegate = new BufferedInputStream(delegate); + } + + @Override public int read() throws IOException { + int c = delegate.read(); + if (c == DROP.charAt(0)) { + delegate.mark(DROP.length()); + for (int i = 1; i < DROP.length(); ++i) { + int d = delegate.read(); + if (d != DROP.charAt(i)) { + delegate.reset(); + return c; + } + } + return delegate.read(); + } + return c; + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java new file mode 100644 index 0000000000..35c3f66854 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java @@ -0,0 +1,445 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2008-2009 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class CppTypes { + + /** + * The license for the atom list written by this program. + */ + private static final String ATOM_LICENSE = "/*\n" + + " * Copyright (c) 2008-2010 Mozilla Foundation\n" + + " *\n" + + " * Permission is hereby granted, free of charge, to any person obtaining a \n" + + " * copy of this software and associated documentation files (the \"Software\"), \n" + + " * to deal in the Software without restriction, including without limitation \n" + + " * the rights to use, copy, modify, merge, publish, distribute, sublicense, \n" + + " * and/or sell copies of the Software, and to permit persons to whom the \n" + + " * Software is furnished to do so, subject to the following conditions:\n" + + " *\n" + + " * The above copyright notice and this permission notice shall be included in \n" + + " * all copies or substantial portions of the Software.\n" + + " *\n" + + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR \n" + + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, \n" + + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL \n" + + " * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER \n" + + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING \n" + + " * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER \n" + + " * DEALINGS IN THE SOFTWARE.\n" + " */\n\n"; + + private static Set reservedWords = new HashSet(); + + static { + reservedWords.add("small"); + reservedWords.add("for"); + reservedWords.add("false"); + reservedWords.add("true"); + reservedWords.add("default"); + reservedWords.add("class"); + reservedWords.add("switch"); + reservedWords.add("union"); + reservedWords.add("template"); + reservedWords.add("int"); + reservedWords.add("char"); + reservedWords.add("operator"); + reservedWords.add("or"); + reservedWords.add("and"); + reservedWords.add("not"); + reservedWords.add("xor"); + reservedWords.add("unicode"); + } + + private static final String[] TREE_BUILDER_INCLUDES = { "nsContentUtils", + "nsIAtom", "nsHtml5AtomTable", "nsITimer", "nsString", + "nsNameSpaceManager", "nsIContent", "nsTraceRefcnt", "jArray", + "nsHtml5DocumentMode", "nsHtml5ArrayCopy", "nsHtml5Parser", + "nsHtml5Atoms", "nsHtml5TreeOperation", "nsHtml5StateSnapshot", + "nsHtml5StackNode", "nsHtml5TreeOpExecutor", "nsHtml5StreamParser", + "nsAHtml5TreeBuilderState", "nsHtml5Highlighter", + "nsHtml5PlainTextUtils", "nsHtml5ViewSourceUtils", + "mozilla/Likely", "nsIContentHandle", "nsHtml5OplessBuilder" }; + + private static final String[] TOKENIZER_INCLUDES = { "nsIAtom", + "nsHtml5AtomTable", "nsString", "nsIContent", "nsTraceRefcnt", + "jArray", "nsHtml5DocumentMode", "nsHtml5ArrayCopy", + "nsHtml5NamedCharacters", "nsHtml5NamedCharactersAccel", + "nsHtml5Atoms", "nsAHtml5TreeBuilderState", "nsHtml5Macros", + "nsHtml5Highlighter", "nsHtml5TokenizerLoopPolicies" }; + + private static final String[] INCLUDES = { "nsIAtom", "nsHtml5AtomTable", + "nsString", "nsNameSpaceManager", "nsIContent", "nsTraceRefcnt", + "jArray", "nsHtml5ArrayCopy", "nsAHtml5TreeBuilderState", + "nsHtml5Atoms", "nsHtml5ByteReadable", "nsIUnicodeDecoder", + "nsHtml5Macros", "nsIContentHandle" }; + + private static final String[] OTHER_DECLATIONS = {}; + + private static final String[] TREE_BUILDER_OTHER_DECLATIONS = {}; + + private static final String[] NAMED_CHARACTERS_INCLUDES = { "jArray", + "nscore", "nsDebug", "prlog", "mozilla/ArrayUtils" }; + + private static final String[] FORWARD_DECLARATIONS = { "nsHtml5StreamParser" }; + + private static final String[] CLASSES_THAT_NEED_SUPPLEMENT = { + "MetaScanner", "Tokenizer", "TreeBuilder", "UTF16Buffer", }; + + private static final String[] STATE_LOOP_POLICIES = { + "nsHtml5ViewSourcePolicy", "nsHtml5SilentPolicy" }; + + private final Map atomMap = new HashMap(); + + private final Writer atomWriter; + + public CppTypes(File atomList) { + if (atomList == null) { + atomWriter = null; + } else { + try { + atomWriter = new OutputStreamWriter(new FileOutputStream( + atomList), "utf-8"); + atomWriter.write(ATOM_LICENSE); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + public void finished() { + try { + if (atomWriter != null) { + atomWriter.flush(); + atomWriter.close(); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public String classPrefix() { + return "nsHtml5"; + } + + public String booleanType() { + return "bool"; + } + + public String byteType() { + return "int8_t"; + } + + public String charType() { + return "char16_t"; + } + + /** + * Only used for named characters. + * + * @return + */ + public String unsignedShortType() { + return "uint16_t"; + } + + public String intType() { + return "int32_t"; + } + + public String stringType() { + return "nsString*"; + } + + public String localType() { + return "nsIAtom*"; + } + + public String prefixType() { + return "nsIAtom*"; + } + + public String nsUriType() { + return "int32_t"; + } + + public String falseLiteral() { + return "false"; + } + + public String trueLiteral() { + return "true"; + } + + public String nullLiteral() { + return "nullptr"; + } + + public String encodingDeclarationHandlerType() { + return "nsHtml5StreamParser*"; + } + + public String nodeType() { + return "nsIContentHandle*"; + } + + public String xhtmlNamespaceLiteral() { + return "kNameSpaceID_XHTML"; + } + + public String svgNamespaceLiteral() { + return "kNameSpaceID_SVG"; + } + + public String xmlnsNamespaceLiteral() { + return "kNameSpaceID_XMLNS"; + } + + public String xmlNamespaceLiteral() { + return "kNameSpaceID_XML"; + } + + public String noNamespaceLiteral() { + return "kNameSpaceID_None"; + } + + public String xlinkNamespaceLiteral() { + return "kNameSpaceID_XLink"; + } + + public String mathmlNamespaceLiteral() { + return "kNameSpaceID_MathML"; + } + + public String arrayTemplate() { + return "jArray"; + } + + public String autoArrayTemplate() { + return "autoJArray"; + } + + public String localForLiteral(String literal) { + String atom = atomMap.get(literal); + if (atom == null) { + atom = createAtomName(literal); + atomMap.put(literal, atom); + if (atomWriter != null) { + try { + atomWriter.write("HTML5_ATOM(" + atom + ", \"" + literal + + "\")\n"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + return "nsHtml5Atoms::" + atom; + } + + private String createAtomName(String literal) { + String candidate = literal.replaceAll("[^a-zA-Z0-9_]", "_"); + if ("".equals(candidate)) { + candidate = "emptystring"; + } + while (atomMap.values().contains(candidate) + || reservedWords.contains(candidate)) { + candidate = candidate + '_'; + } + return candidate; + } + + public String stringForLiteral(String literal) { + return '"' + literal + '"'; + } + + public String staticArrayTemplate() { + return "staticJArray"; + } + + public String newArrayCreator() { + return "newJArray"; + } + + public String[] boilerplateIncludes(String javaClass) { + if ("TreeBuilder".equals(javaClass)) { + return TREE_BUILDER_INCLUDES; + } else if ("Tokenizer".equals(javaClass)) { + return TOKENIZER_INCLUDES; + } else { + return INCLUDES; + } + } + + public String[] boilerplateDeclarations(String javaClass) { + if ("TreeBuilder".equals(javaClass)) { + return TREE_BUILDER_OTHER_DECLATIONS; + } else { + return OTHER_DECLATIONS; + } + } + + public String[] namedCharactersIncludes() { + return NAMED_CHARACTERS_INCLUDES; + } + + public String[] boilerplateForwardDeclarations() { + return FORWARD_DECLARATIONS; + } + + public String documentModeHandlerType() { + return "nsHtml5TreeBuilder*"; + } + + public String documentModeType() { + return "nsHtml5DocumentMode"; + } + + public String arrayCopy() { + return "nsHtml5ArrayCopy::arraycopy"; + } + + public String maxInteger() { + return "INT32_MAX"; + } + + public String constructorBoilerplate(String className) { + return "MOZ_COUNT_CTOR(" + className + ");"; + } + + public String destructorBoilderplate(String className) { + return "MOZ_COUNT_DTOR(" + className + ");"; + } + + public String literalType() { + return "const char*"; + } + + public boolean hasSupplement(String javaClass) { + return Arrays.binarySearch(CLASSES_THAT_NEED_SUPPLEMENT, javaClass) > -1; + } + + public String internerType() { + return "nsHtml5AtomTable*"; + } + + public String treeBuilderStateInterface() { + return "nsAHtml5TreeBuilderState"; + } + + public String treeBuilderStateType() { + return "nsAHtml5TreeBuilderState*"; + } + + public String arrayLengthMacro() { + return "MOZ_ARRAY_LENGTH"; + } + + public String staticAssert() { + return "PR_STATIC_ASSERT"; + } + + public String abortIfFalse() { + return "NS_ABORT_IF_FALSE"; + } + + public String continueMacro() { + return "NS_HTML5_CONTINUE"; + } + + public String breakMacro() { + return "NS_HTML5_BREAK"; + } + + public String characterNameType() { + return "nsHtml5CharacterName&"; + } + + public String characterNameTypeDeclaration() { + return "nsHtml5CharacterName"; + } + + public String transition() { + return "P::transition"; + } + + public String tokenizerErrorCondition() { + return "P::reportErrors"; + } + + public String firstTransitionArg() { + return "mViewSource"; + } + + public String errorHandler() { + return this.unlikely() + "(mViewSource)"; + } + + public String unlikely() { + return "MOZ_UNLIKELY"; + } + + public String completedCharacterReference() { + return "P::completedNamedCharacterReference(mViewSource)"; + } + + public String[] stateLoopPolicies() { + return STATE_LOOP_POLICIES; + } + + public String assertionMacro() { + return "MOZ_ASSERT"; + } + + public String releaseAssertionMacro() { + return "MOZ_RELEASE_ASSERT"; + } + + public String crashMacro() { + return "MOZ_CRASH"; + } +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java new file mode 100644 index 0000000000..66f7678aab --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java @@ -0,0 +1,2421 @@ +/* + * Copyright (C) 2007 Júlio Vilmar Gesser. + * Copyright (C) 2008 Mozilla Foundation + * + * This file is part of HTML Parser C++ Translator. It was derived from DumpVisitor + * which was part of Java 1.5 parser and Abstract Syntax Tree and came with the following notice: + * + * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Java 1.5 parser and Abstract Syntax Tree. If not, see . + */ +/* + * Created on 05/10/2006 + */ +package nu.validator.htmlparser.cpptranslate; + +import japa.parser.ast.BlockComment; +import japa.parser.ast.CompilationUnit; +import japa.parser.ast.ImportDeclaration; +import japa.parser.ast.LineComment; +import japa.parser.ast.Node; +import japa.parser.ast.PackageDeclaration; +import japa.parser.ast.TypeParameter; +import japa.parser.ast.body.AnnotationDeclaration; +import japa.parser.ast.body.AnnotationMemberDeclaration; +import japa.parser.ast.body.BodyDeclaration; +import japa.parser.ast.body.ClassOrInterfaceDeclaration; +import japa.parser.ast.body.ConstructorDeclaration; +import japa.parser.ast.body.EmptyMemberDeclaration; +import japa.parser.ast.body.EmptyTypeDeclaration; +import japa.parser.ast.body.EnumConstantDeclaration; +import japa.parser.ast.body.EnumDeclaration; +import japa.parser.ast.body.FieldDeclaration; +import japa.parser.ast.body.InitializerDeclaration; +import japa.parser.ast.body.JavadocComment; +import japa.parser.ast.body.MethodDeclaration; +import japa.parser.ast.body.ModifierSet; +import japa.parser.ast.body.Parameter; +import japa.parser.ast.body.TypeDeclaration; +import japa.parser.ast.body.VariableDeclarator; +import japa.parser.ast.body.VariableDeclaratorId; +import japa.parser.ast.expr.ArrayAccessExpr; +import japa.parser.ast.expr.ArrayCreationExpr; +import japa.parser.ast.expr.ArrayInitializerExpr; +import japa.parser.ast.expr.AssignExpr; +import japa.parser.ast.expr.BinaryExpr; +import japa.parser.ast.expr.BooleanLiteralExpr; +import japa.parser.ast.expr.CastExpr; +import japa.parser.ast.expr.CharLiteralExpr; +import japa.parser.ast.expr.ClassExpr; +import japa.parser.ast.expr.ConditionalExpr; +import japa.parser.ast.expr.DoubleLiteralExpr; +import japa.parser.ast.expr.EnclosedExpr; +import japa.parser.ast.expr.Expression; +import japa.parser.ast.expr.FieldAccessExpr; +import japa.parser.ast.expr.InstanceOfExpr; +import japa.parser.ast.expr.IntegerLiteralExpr; +import japa.parser.ast.expr.IntegerLiteralMinValueExpr; +import japa.parser.ast.expr.LongLiteralExpr; +import japa.parser.ast.expr.LongLiteralMinValueExpr; +import japa.parser.ast.expr.MarkerAnnotationExpr; +import japa.parser.ast.expr.MemberValuePair; +import japa.parser.ast.expr.MethodCallExpr; +import japa.parser.ast.expr.NameExpr; +import japa.parser.ast.expr.NormalAnnotationExpr; +import japa.parser.ast.expr.NullLiteralExpr; +import japa.parser.ast.expr.ObjectCreationExpr; +import japa.parser.ast.expr.QualifiedNameExpr; +import japa.parser.ast.expr.SingleMemberAnnotationExpr; +import japa.parser.ast.expr.StringLiteralExpr; +import japa.parser.ast.expr.SuperExpr; +import japa.parser.ast.expr.ThisExpr; +import japa.parser.ast.expr.UnaryExpr; +import japa.parser.ast.expr.VariableDeclarationExpr; +import japa.parser.ast.stmt.AssertStmt; +import japa.parser.ast.stmt.BlockStmt; +import japa.parser.ast.stmt.BreakStmt; +import japa.parser.ast.stmt.CatchClause; +import japa.parser.ast.stmt.ContinueStmt; +import japa.parser.ast.stmt.DoStmt; +import japa.parser.ast.stmt.EmptyStmt; +import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt; +import japa.parser.ast.stmt.ExpressionStmt; +import japa.parser.ast.stmt.ForStmt; +import japa.parser.ast.stmt.ForeachStmt; +import japa.parser.ast.stmt.IfStmt; +import japa.parser.ast.stmt.LabeledStmt; +import japa.parser.ast.stmt.ReturnStmt; +import japa.parser.ast.stmt.Statement; +import japa.parser.ast.stmt.SwitchEntryStmt; +import japa.parser.ast.stmt.SwitchStmt; +import japa.parser.ast.stmt.SynchronizedStmt; +import japa.parser.ast.stmt.ThrowStmt; +import japa.parser.ast.stmt.TryStmt; +import japa.parser.ast.stmt.TypeDeclarationStmt; +import japa.parser.ast.stmt.WhileStmt; +import japa.parser.ast.type.ClassOrInterfaceType; +import japa.parser.ast.type.PrimitiveType; +import japa.parser.ast.type.ReferenceType; +import japa.parser.ast.type.Type; +import japa.parser.ast.type.VoidType; +import japa.parser.ast.type.WildcardType; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +/** + * @author Julio Vilmar Gesser + * @author Henri Sivonen + */ + +public class CppVisitor extends AnnotationHelperVisitor { + + private static final String[] CLASS_NAMES = { "AttributeName", + "ElementName", "HtmlAttributes", "LocatorImpl", "MetaScanner", + "NamedCharacters", "NamedCharactersAccel", "Portability", + "StackNode", "Tokenizer", "TreeBuilder", "UTF16Buffer" }; + + private static final String[] METHODS_WITH_UNLIKELY_CONDITIONS = { + "appendStrBuf" }; + + public class SourcePrinter { + + private int level = 0; + + private boolean indented = false; + + private final StringBuilder buf = new StringBuilder(); + + public void indent() { + level++; + } + + public void unindent() { + level--; + } + + private void makeIndent() { + for (int i = 0; i < level; i++) { + buf.append(" "); + } + } + + public void printWithoutIndent(String arg) { + indented = false; + buf.append(arg); + } + + public void print(String arg) { + if (!indented) { + makeIndent(); + indented = true; + } + buf.append(arg); + } + + public void printLn(String arg) { + print(arg); + printLn(); + } + + public void printLn() { + buf.append("\n"); + indented = false; + } + + public String getSource() { + return buf.toString(); + } + + @Override public String toString() { + return getSource(); + } + } + + private boolean supportErrorReporting = true; + + protected SourcePrinter printer = new SourcePrinter(); + + private SourcePrinter staticInitializerPrinter = new SourcePrinter(); + + private SourcePrinter tempPrinterHolder; + + protected final CppTypes cppTypes; + + protected String className = ""; + + protected int currentArrayCount; + + protected Set forLoopsWithCondition = new HashSet(); + + protected boolean inPrimitiveNoLengthFieldDeclarator = false; + + protected final SymbolTable symbolTable; + + protected String definePrefix; + + protected String javaClassName; + + protected boolean suppressPointer = false; + + private final List staticReleases = new LinkedList(); + + private boolean inConstructorBody = false; + + private String currentMethod = null; + + private Set labels = null; + + private boolean destructor; + + protected boolean inStatic = false; + + private boolean reportTransitions = false; + + private int stateLoopCallCount = 0; + + /** + * @param cppTypes + */ + public CppVisitor(CppTypes cppTypes, SymbolTable symbolTable) { + this.cppTypes = cppTypes; + this.symbolTable = symbolTable; + staticInitializerPrinter.indent(); + } + + public String getSource() { + return printer.getSource(); + } + + private String classNameFromExpression(Expression e) { + if (e instanceof NameExpr) { + NameExpr nameExpr = (NameExpr) e; + String name = nameExpr.getName(); + if (Arrays.binarySearch(CLASS_NAMES, name) > -1) { + return name; + } + } + return null; + } + + protected void printModifiers(int modifiers) { + } + + private void printMembers(List members, + LocalSymbolTable arg) { + for (BodyDeclaration member : members) { + if ("Tokenizer".equals(javaClassName) + && member instanceof MethodDeclaration + && "stateLoop".equals(((MethodDeclaration) member).getName())) { + reportTransitions = true; + } + member.accept(this, arg); + reportTransitions = false; + } + } + + private void printTypeArgs(List args, LocalSymbolTable arg) { + // if (args != null) { + // printer.print("<"); + // for (Iterator i = args.iterator(); i.hasNext();) { + // Type t = i.next(); + // t.accept(this, arg); + // if (i.hasNext()) { + // printer.print(", "); + // } + // } + // printer.print(">"); + // } + } + + private void printTypeParameters(List args, + LocalSymbolTable arg) { + // if (args != null) { + // printer.print("<"); + // for (Iterator i = args.iterator(); i.hasNext();) { + // TypeParameter t = i.next(); + // t.accept(this, arg); + // if (i.hasNext()) { + // printer.print(", "); + // } + // } + // printer.print(">"); + // } + } + + public void visit(Node n, LocalSymbolTable arg) { + throw new IllegalStateException(n.getClass().getName()); + } + + public void visit(CompilationUnit n, LocalSymbolTable arg) { + if (n.getTypes() != null) { + for (Iterator i = n.getTypes().iterator(); i.hasNext();) { + i.next().accept(this, arg); + printer.printLn(); + if (i.hasNext()) { + printer.printLn(); + } + } + } + } + + public void visit(PackageDeclaration n, LocalSymbolTable arg) { + throw new IllegalStateException(n.getClass().getName()); + } + + public void visit(NameExpr n, LocalSymbolTable arg) { + if ("mappingLangToXmlLang".equals(n.getName())) { + printer.print("0"); + } else if ("LANG_NS".equals(n.getName())) { + printer.print("ALL_NO_NS"); + } else if ("LANG_PREFIX".equals(n.getName())) { + printer.print("ALL_NO_PREFIX"); + } else if ("HTML_LOCAL".equals(n.getName())) { + printer.print(cppTypes.localForLiteral("html")); + } else if ("documentModeHandler".equals(n.getName())) { + printer.print("this"); + } else if ("errorHandler".equals(n.getName())) { + printer.print(cppTypes.errorHandler()); + } else { + String prefixedName = javaClassName + "." + n.getName(); + String constant = symbolTable.cppDefinesByJavaNames.get(prefixedName); + if (constant != null) { + printer.print(constant); + } else { + printer.print(n.getName()); + } + } + } + + public void visit(QualifiedNameExpr n, LocalSymbolTable arg) { + n.getQualifier().accept(this, arg); + printer.print("."); + printer.print(n.getName()); + } + + public void visit(ImportDeclaration n, LocalSymbolTable arg) { + throw new IllegalStateException(n.getClass().getName()); + } + + public void visit(ClassOrInterfaceDeclaration n, LocalSymbolTable arg) { + javaClassName = n.getName(); + className = cppTypes.classPrefix() + javaClassName; + definePrefix = makeDefinePrefix(className); + + startClassDeclaration(); + + if (n.getMembers() != null) { + printMembers(n.getMembers(), arg); + } + + endClassDeclaration(); + } + + private String makeDefinePrefix(String name) { + StringBuilder sb = new StringBuilder(); + boolean prevWasLowerCase = true; + for (int i = 0; i < name.length(); i++) { + char c = name.charAt(i); + if (c >= 'a' && c <= 'z') { + sb.append((char) (c - 0x20)); + prevWasLowerCase = true; + } else if (c >= 'A' && c <= 'Z') { + if (prevWasLowerCase) { + sb.append('_'); + } + sb.append(c); + prevWasLowerCase = false; + } else if (c >= '0' && c <= '9') { + sb.append(c); + prevWasLowerCase = false; + } + } + sb.append('_'); + return sb.toString(); + } + + protected void endClassDeclaration() { + printer.printLn("void"); + printer.print(className); + printer.printLn("::initializeStatics()"); + printer.printLn("{"); + printer.print(staticInitializerPrinter.getSource()); + printer.printLn("}"); + printer.printLn(); + + printer.printLn("void"); + printer.print(className); + printer.printLn("::releaseStatics()"); + printer.printLn("{"); + printer.indent(); + for (String del : staticReleases) { + printer.print(del); + printer.printLn(";"); + } + printer.unindent(); + printer.printLn("}"); + printer.printLn(); + + if (cppTypes.hasSupplement(javaClassName)) { + printer.printLn(); + printer.print("#include \""); + printer.print(className); + printer.printLn("CppSupplement.h\""); + } + } + + protected void startClassDeclaration() { + printer.print("#define "); + printer.print(className); + printer.printLn("_cpp__"); + printer.printLn(); + + String[] incs = cppTypes.boilerplateIncludes(javaClassName); + for (int i = 0; i < incs.length; i++) { + String inc = incs[i]; + printer.print("#include \""); + printer.print(inc); + printer.printLn(".h\""); + } + + printer.printLn(); + + for (int i = 0; i < Main.H_LIST.length; i++) { + String klazz = Main.H_LIST[i]; + if (!klazz.equals(javaClassName)) { + printer.print("#include \""); + printer.print(cppTypes.classPrefix()); + printer.print(klazz); + printer.printLn(".h\""); + } + } + + printer.printLn(); + printer.print("#include \""); + printer.print(className); + printer.printLn(".h\""); + if ("AttributeName".equals(javaClassName) + || "ElementName".equals(javaClassName)) { + printer.print("#include \""); + printer.print(cppTypes.classPrefix()); + printer.print("Releasable"); + printer.print(javaClassName); + printer.printLn(".h\""); + } + printer.printLn(); + } + + public void visit(EmptyTypeDeclaration n, LocalSymbolTable arg) { + if (n.getJavaDoc() != null) { + n.getJavaDoc().accept(this, arg); + } + printer.print(";"); + } + + public void visit(JavadocComment n, LocalSymbolTable arg) { + printer.print("/**"); + printer.print(n.getContent()); + printer.printLn("*/"); + } + + public void visit(ClassOrInterfaceType n, LocalSymbolTable arg) { + if (n.getScope() != null) { + n.getScope().accept(this, arg); + printer.print("."); + throw new IllegalStateException("Can't translate nested classes."); + } + String name = n.getName(); + if ("String".equals(name)) { + if (local()) { + name = cppTypes.localType(); + } else if (prefix()) { + name = cppTypes.prefixType(); + } else if (nsUri()) { + name = cppTypes.nsUriType(); + } else if (literal()) { + name = cppTypes.literalType(); + } else if (characterName()) { + name = cppTypes.characterNameType(); + } else { + name = cppTypes.stringType(); + } + } else if ("T".equals(name) || "Object".equals(name)) { + name = cppTypes.nodeType(); + } else if ("TokenHandler".equals(name)) { + name = cppTypes.classPrefix() + "TreeBuilder*"; + } else if ("EncodingDeclarationHandler".equals(name)) { + name = cppTypes.encodingDeclarationHandlerType(); + } else if ("Interner".equals(name)) { + name = cppTypes.internerType(); + } else if ("TreeBuilderState".equals(name)) { + name = cppTypes.treeBuilderStateType(); + } else if ("DocumentModeHandler".equals(name)) { + name = cppTypes.documentModeHandlerType(); + } else if ("DocumentMode".equals(name)) { + name = cppTypes.documentModeType(); + } else { + name = cppTypes.classPrefix() + name + (suppressPointer ? "" : "*"); + } + printer.print(name); + printTypeArgs(n.getTypeArgs(), arg); + } + + protected boolean inHeader() { + return false; + } + + public void visit(TypeParameter n, LocalSymbolTable arg) { + printer.print(n.getName()); + if (n.getTypeBound() != null) { + printer.print(" extends "); + for (Iterator i = n.getTypeBound().iterator(); i.hasNext();) { + ClassOrInterfaceType c = i.next(); + c.accept(this, arg); + if (i.hasNext()) { + printer.print(" & "); + } + } + } + } + + public void visit(PrimitiveType n, LocalSymbolTable arg) { + switch (n.getType()) { + case Boolean: + printer.print(cppTypes.booleanType()); + break; + case Byte: + printer.print(cppTypes.byteType()); + break; + case Char: + printer.print(cppTypes.charType()); + break; + case Double: + throw new IllegalStateException("Unsupported primitive."); + case Float: + throw new IllegalStateException("Unsupported primitive."); + case Int: + printer.print(cppTypes.intType()); + break; + case Long: + throw new IllegalStateException("Unsupported primitive."); + case Short: + throw new IllegalStateException("Unsupported primitive."); + } + } + + public void visit(ReferenceType n, LocalSymbolTable arg) { + if (isConst()) { + printer.print("const "); + } + if (noLength()) { + n.getType().accept(this, arg); + for (int i = 0; i < n.getArrayCount(); i++) { + if (!inPrimitiveNoLengthFieldDeclarator) { + printer.print("*"); + } + } + } else { + for (int i = 0; i < n.getArrayCount(); i++) { + if (inStatic) { + printer.print(cppTypes.staticArrayTemplate()); + } else { + if (auto()) { + printer.print(cppTypes.autoArrayTemplate()); + } else { + printer.print(cppTypes.arrayTemplate()); + } + } + printer.print("<"); + } + n.getType().accept(this, arg); + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print(","); + printer.print(cppTypes.intType()); + printer.print(">"); + } + } + } + + public void visit(WildcardType n, LocalSymbolTable arg) { + printer.print("?"); + if (n.getExtends() != null) { + printer.print(" extends "); + n.getExtends().accept(this, arg); + } + if (n.getSuper() != null) { + printer.print(" super "); + n.getSuper().accept(this, arg); + } + } + + public void visit(FieldDeclaration n, LocalSymbolTable arg) { + currentAnnotations = n.getAnnotations(); + fieldDeclaration(n, arg); + currentAnnotations = null; + } + + protected boolean isNonToCharArrayMethodCall(Expression exp) { + if (exp instanceof MethodCallExpr) { + MethodCallExpr mce = (MethodCallExpr) exp; + return !"toCharArray".equals(mce.getName()); + } else { + return false; + } + } + + protected void fieldDeclaration(FieldDeclaration n, LocalSymbolTable arg) { + tempPrinterHolder = printer; + printer = staticInitializerPrinter; + int modifiers = n.getModifiers(); + List variables = n.getVariables(); + VariableDeclarator declarator = variables.get(0); + if (ModifierSet.isStatic(modifiers) && ModifierSet.isFinal(modifiers) + && !(n.getType() instanceof PrimitiveType) + && declarator.getInit() != null) { + if (n.getType() instanceof ReferenceType) { + ReferenceType rt = (ReferenceType) n.getType(); + currentArrayCount = rt.getArrayCount(); + if (currentArrayCount > 0) { + if (currentArrayCount != 1) { + throw new IllegalStateException( + "Multidimensional arrays not supported. " + n); + } + if (noLength()) { + if (rt.getType() instanceof PrimitiveType) { + inPrimitiveNoLengthFieldDeclarator = true; + printer = tempPrinterHolder; + n.getType().accept(this, arg); + printer.print(" "); + printer.print(className); + printer.print("::"); + declarator.getId().accept(this, arg); + + printer.print(" = "); + + declarator.getInit().accept(this, arg); + + printer.printLn(";"); + printer = staticInitializerPrinter; + } else { + printer = tempPrinterHolder; + n.getType().accept(this, arg); + printer.print(" "); + printer.print(className); + printer.print("::"); + declarator.getId().accept(this, arg); + + printer.printLn(" = 0;"); + printer = staticInitializerPrinter; + + staticReleases.add("delete[] " + + declarator.getId().getName()); + + ArrayInitializerExpr aie = (ArrayInitializerExpr) declarator.getInit(); + + declarator.getId().accept(this, arg); + printer.print(" = new "); + // suppressPointer = true; + rt.getType().accept(this, arg); + // suppressPointer = false; + printer.print("["); + printer.print("" + aie.getValues().size()); + printer.printLn("];"); + + printArrayInit(declarator.getId(), aie.getValues(), + arg); + } + } else if ((rt.getType() instanceof PrimitiveType) || "String".equals(rt.getType().toString())) { + printer = tempPrinterHolder; + printer.print("static "); + rt.getType().accept(this, arg); + printer.print(" const "); + declarator.getId().accept(this, arg); + printer.print("_DATA[] = "); + declarator.getInit().accept(this, arg); + printer.printLn(";"); + printer.print(cppTypes.staticArrayTemplate()); + printer.print("<"); + suppressPointer = true; + rt.getType().accept(this, arg); + suppressPointer = false; + printer.print(","); + printer.print(cppTypes.intType()); + printer.print("> "); + printer.print(className); + printer.print("::"); + declarator.getId().accept(this, arg); + printer.print(" = { "); + declarator.getId().accept(this, arg); + printer.print("_DATA, "); + printer.print(cppTypes.arrayLengthMacro()); + printer.print("("); + declarator.getId().accept(this, arg); + printer.printLn("_DATA) };"); + printer = staticInitializerPrinter; + } else if (isNonToCharArrayMethodCall(declarator.getInit())) { + staticReleases.add(declarator.getId().getName() + + ".release()"); + declarator.getId().accept(this, arg); + printer.print(" = "); + if (declarator.getInit() instanceof ArrayInitializerExpr) { + + ArrayInitializerExpr aie = (ArrayInitializerExpr) declarator.getInit(); + printer.print(cppTypes.arrayTemplate()); + printer.print("<"); + suppressPointer = true; + rt.getType().accept(this, arg); + suppressPointer = false; + printer.print(","); + printer.print(cppTypes.intType()); + printer.print(">::"); + printer.print(cppTypes.newArrayCreator()); + printer.print("("); + printer.print("" + aie.getValues().size()); + printer.printLn(");"); + printArrayInit(declarator.getId(), aie.getValues(), + arg); + } else { + declarator.getInit().accept(this, arg); + printer.printLn(";"); + } + } + } else { + if (ModifierSet.isStatic(modifiers)) { + printer = tempPrinterHolder; + n.getType().accept(this, arg); + printer.print(" "); + printer.print(className); + printer.print("::"); + if ("AttributeName".equals(n.getType().toString())) { + printer.print("ATTR_"); + } else if ("ElementName".equals(n.getType().toString())) { + printer.print("ELT_"); + } + declarator.getId().accept(this, arg); + printer.print(" = "); + printer.print(cppTypes.nullLiteral()); + printer.printLn(";"); + printer = staticInitializerPrinter; + } + + if ("AttributeName".equals(n.getType().toString())) { + printer.print("ATTR_"); + staticReleases.add("delete ATTR_" + + declarator.getId().getName()); + } else if ("ElementName".equals(n.getType().toString())) { + printer.print("ELT_"); + staticReleases.add("delete ELT_" + + declarator.getId().getName()); + } else { + staticReleases.add("delete " + + declarator.getId().getName()); + } + declarator.accept(this, arg); + printer.printLn(";"); + } + } else { + throw new IllegalStateException( + "Non-reference, non-primitive fields not supported."); + } + } + currentArrayCount = 0; + printer = tempPrinterHolder; + inPrimitiveNoLengthFieldDeclarator = false; + } + + private void printArrayInit(VariableDeclaratorId variableDeclaratorId, + List values, LocalSymbolTable arg) { + for (int i = 0; i < values.size(); i++) { + Expression exp = values.get(i); + variableDeclaratorId.accept(this, arg); + printer.print("["); + printer.print("" + i); + printer.print("] = "); + if (exp instanceof NameExpr) { + if ("AttributeName".equals(javaClassName)) { + printer.print("ATTR_"); + } else if ("ElementName".equals(javaClassName)) { + printer.print("ELT_"); + } + } + exp.accept(this, arg); + printer.printLn(";"); + } + } + + public void visit(VariableDeclarator n, LocalSymbolTable arg) { + n.getId().accept(this, arg); + + if (n.getInit() != null) { + printer.print(" = "); + n.getInit().accept(this, arg); + } + } + + public void visit(VariableDeclaratorId n, LocalSymbolTable arg) { + printer.print(n.getName()); + if (noLength()) { + for (int i = 0; i < currentArrayCount; i++) { + if (inPrimitiveNoLengthFieldDeclarator) { + printer.print("[]"); + } + } + } + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("[]"); + } + } + + public void visit(ArrayInitializerExpr n, LocalSymbolTable arg) { + printer.print("{"); + if (n.getValues() != null) { + printer.print(" "); + for (Iterator i = n.getValues().iterator(); i.hasNext();) { + Expression expr = i.next(); + expr.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + printer.print(" "); + } + printer.print("}"); + } + + public void visit(VoidType n, LocalSymbolTable arg) { + printer.print("void"); + } + + public void visit(ArrayAccessExpr n, LocalSymbolTable arg) { + n.getName().accept(this, arg); + printer.print("["); + n.getIndex().accept(this, arg); + printer.print("]"); + } + + public void visit(ArrayCreationExpr n, LocalSymbolTable arg) { + // printer.print("new "); + // n.getType().accept(this, arg); + // printTypeArgs(n.getTypeArgs(), arg); + + if (n.getDimensions() != null) { + if (noLength()) { + for (Expression dim : n.getDimensions()) { + printer.print("new "); + n.getType().accept(this, arg); + printer.print("["); + dim.accept(this, arg); + printer.print("]"); + } + } else { + for (Expression dim : n.getDimensions()) { + printer.print(cppTypes.arrayTemplate()); + printer.print("<"); + n.getType().accept(this, arg); + printer.print(","); + printer.print(cppTypes.intType()); + printer.print(">::"); + printer.print(cppTypes.newArrayCreator()); + printer.print("("); + dim.accept(this, arg); + printer.print(")"); + } + } + if (n.getArrayCount() > 0) { + throw new IllegalStateException( + "Nested array allocation not supported. " + + n.toString()); + } + } else { + throw new IllegalStateException( + "Array initializer as part of array creation not supported. " + + n.toString()); + } + } + + public void visit(AssignExpr n, LocalSymbolTable arg) { + if (inConstructorBody) { + n.getTarget().accept(this, arg); + printer.print("("); + n.getValue().accept(this, arg); + printer.print(")"); + } else { + n.getTarget().accept(this, arg); + printer.print(" "); + switch (n.getOperator()) { + case assign: + printer.print("="); + break; + case and: + printer.print("&="); + break; + case or: + printer.print("|="); + break; + case xor: + printer.print("^="); + break; + case plus: + printer.print("+="); + break; + case minus: + printer.print("-="); + break; + case rem: + printer.print("%="); + break; + case slash: + printer.print("/="); + break; + case star: + printer.print("*="); + break; + case lShift: + printer.print("<<="); + break; + case rSignedShift: + printer.print(">>="); + break; + case rUnsignedShift: + printer.print(">>>="); + break; + } + printer.print(" "); + n.getValue().accept(this, arg); + } + } + + public void visit(BinaryExpr n, LocalSymbolTable arg) { + Expression right = n.getRight(); + switch (n.getOperator()) { + case notEquals: + if (right instanceof NullLiteralExpr) { + printer.print("!!"); + n.getLeft().accept(this, arg); + return; + } else if (right instanceof IntegerLiteralExpr) { + IntegerLiteralExpr ile = (IntegerLiteralExpr) right; + if ("0".equals(ile.getValue())) { + n.getLeft().accept(this, arg); + return; + } + } + case equals: + if (right instanceof NullLiteralExpr) { + printer.print("!"); + n.getLeft().accept(this, arg); + return; + } else if (right instanceof IntegerLiteralExpr) { + IntegerLiteralExpr ile = (IntegerLiteralExpr) right; + if ("0".equals(ile.getValue())) { + printer.print("!"); + n.getLeft().accept(this, arg); + return; + } + } + default: + // fall thru + } + + n.getLeft().accept(this, arg); + printer.print(" "); + switch (n.getOperator()) { + case or: + printer.print("||"); + break; + case and: + printer.print("&&"); + break; + case binOr: + printer.print("|"); + break; + case binAnd: + printer.print("&"); + break; + case xor: + printer.print("^"); + break; + case equals: + printer.print("=="); + break; + case notEquals: + printer.print("!="); + break; + case less: + printer.print("<"); + break; + case greater: + printer.print(">"); + break; + case lessEquals: + printer.print("<="); + break; + case greaterEquals: + printer.print(">="); + break; + case lShift: + printer.print("<<"); + break; + case rSignedShift: + printer.print(">>"); + break; + case rUnsignedShift: + printer.print(">>>"); + break; + case plus: + printer.print("+"); + break; + case minus: + printer.print("-"); + break; + case times: + printer.print("*"); + break; + case divide: + printer.print("/"); + break; + case remainder: + printer.print("%"); + break; + } + printer.print(" "); + n.getRight().accept(this, arg); + } + + public void visit(CastExpr n, LocalSymbolTable arg) { + printer.print("("); + n.getType().accept(this, arg); + printer.print(") "); + n.getExpr().accept(this, arg); + } + + public void visit(ClassExpr n, LocalSymbolTable arg) { + n.getType().accept(this, arg); + printer.print(".class"); + } + + public void visit(ConditionalExpr n, LocalSymbolTable arg) { + n.getCondition().accept(this, arg); + printer.print(" ? "); + n.getThenExpr().accept(this, arg); + printer.print(" : "); + n.getElseExpr().accept(this, arg); + } + + public void visit(EnclosedExpr n, LocalSymbolTable arg) { + printer.print("("); + n.getInner().accept(this, arg); + printer.print(")"); + } + + public void visit(FieldAccessExpr n, LocalSymbolTable arg) { + Expression scope = n.getScope(); + String field = n.getField(); + if (inConstructorBody && (scope instanceof ThisExpr)) { + printer.print(field); + } else if ("length".equals(field) && !(scope instanceof ThisExpr)) { + scope.accept(this, arg); + printer.print(".length"); + } else if ("MAX_VALUE".equals(field) + && "Integer".equals(scope.toString())) { + printer.print(cppTypes.maxInteger()); + } else { + String clazzName = classNameFromExpression(scope); + if (clazzName == null) { + if ("DocumentMode".equals(scope.toString())) { + // printer.print(cppTypes.documentModeType()); + // printer.print("."); + } else { + scope.accept(this, arg); + printer.print("->"); + } + } else { + String prefixedName = clazzName + "." + field; + String constant = symbolTable.cppDefinesByJavaNames.get(prefixedName); + if (constant != null) { + printer.print(constant); + return; + } else { + printer.print(cppTypes.classPrefix()); + printer.print(clazzName); + printer.print("::"); + if (symbolTable.isNotAnAttributeOrElementName(field)) { + if ("AttributeName".equals(clazzName)) { + printer.print("ATTR_"); + } else if ("ElementName".equals(clazzName)) { + printer.print("ELT_"); + } + } + } + } + printer.print(field); + } + } + + public void visit(InstanceOfExpr n, LocalSymbolTable arg) { + n.getExpr().accept(this, arg); + printer.print(" instanceof "); + n.getType().accept(this, arg); + } + + public void visit(CharLiteralExpr n, LocalSymbolTable arg) { + printCharLiteral(n.getValue()); + } + + private void printCharLiteral(String val) { + if (val.length() != 1) { + printer.print("'"); + printer.print(val); + printer.print("'"); + return; + } + char c = val.charAt(0); + switch (c) { + case 0: + printer.print("'\\0'"); + break; + case '\n': + printer.print("'\\n'"); + break; + case '\t': + printer.print("'\\t'"); + break; + case 0xB: + printer.print("'\\v'"); + break; + case '\b': + printer.print("'\\b'"); + break; + case '\r': + printer.print("'\\r'"); + break; + case 0xC: + printer.print("'\\f'"); + break; + case 0x7: + printer.print("'\\a'"); + break; + case '\\': + printer.print("'\\\\'"); + break; + case '?': + printer.print("'\\?'"); + break; + case '\'': + printer.print("'\\''"); + break; + case '"': + printer.print("'\\\"'"); + break; + default: + if (c >= 0x20 && c <= 0x7F) { + printer.print("'" + c); + printer.print("'"); + } else { + printer.print("0x"); + printer.print(Integer.toHexString(c)); + } + break; + } + } + + public void visit(DoubleLiteralExpr n, LocalSymbolTable arg) { + printer.print(n.getValue()); + } + + public void visit(IntegerLiteralExpr n, LocalSymbolTable arg) { + printer.print(n.getValue()); + } + + public void visit(LongLiteralExpr n, LocalSymbolTable arg) { + printer.print(n.getValue()); + } + + public void visit(IntegerLiteralMinValueExpr n, LocalSymbolTable arg) { + printer.print(n.getValue()); + } + + public void visit(LongLiteralMinValueExpr n, LocalSymbolTable arg) { + printer.print(n.getValue()); + } + + public void visit(StringLiteralExpr n, LocalSymbolTable arg) { + String val = n.getValue(); + if ("http://www.w3.org/1999/xhtml".equals(val)) { + printer.print(cppTypes.xhtmlNamespaceLiteral()); + } else if ("http://www.w3.org/2000/svg".equals(val)) { + printer.print(cppTypes.svgNamespaceLiteral()); + } else if ("http://www.w3.org/2000/xmlns/".equals(val)) { + printer.print(cppTypes.xmlnsNamespaceLiteral()); + } else if ("http://www.w3.org/XML/1998/namespace".equals(val)) { + printer.print(cppTypes.xmlNamespaceLiteral()); + } else if ("http://www.w3.org/1999/xlink".equals(val)) { + printer.print(cppTypes.xlinkNamespaceLiteral()); + } else if ("http://www.w3.org/1998/Math/MathML".equals(val)) { + printer.print(cppTypes.mathmlNamespaceLiteral()); + } else if ("".equals(val) && "AttributeName".equals(javaClassName)) { + printer.print(cppTypes.noNamespaceLiteral()); + } else if (val.startsWith("-/") || val.startsWith("+//") + || val.startsWith("http://") || val.startsWith("XSLT")) { + printer.print(cppTypes.stringForLiteral(val)); + } else if (("hidden".equals(val) || "isindex".equals(val) + || "text/html".equals(val) + || "application/xhtml+xml".equals(val) || "content-type".equals(val)) + && "TreeBuilder".equals(javaClassName)) { + printer.print(cppTypes.stringForLiteral(val)); + } else if ("isQuirky".equals(currentMethod) && "html".equals(val)) { + printer.print(cppTypes.stringForLiteral(val)); + } else { + printer.print(cppTypes.localForLiteral(val)); + } + } + + public void visit(BooleanLiteralExpr n, LocalSymbolTable arg) { + if (n.getValue()) { + printer.print(cppTypes.trueLiteral()); + } else { + printer.print(cppTypes.falseLiteral()); + } + } + + public void visit(NullLiteralExpr n, LocalSymbolTable arg) { + printer.print(cppTypes.nullLiteral()); + } + + public void visit(ThisExpr n, LocalSymbolTable arg) { + if (n.getClassExpr() != null) { + n.getClassExpr().accept(this, arg); + printer.print("."); + } + printer.print("this"); + } + + public void visit(SuperExpr n, LocalSymbolTable arg) { + if (n.getClassExpr() != null) { + n.getClassExpr().accept(this, arg); + printer.print("."); + } + printer.print("super"); + } + + public void visit(MethodCallExpr n, LocalSymbolTable arg) { + if ("releaseArray".equals(n.getName()) + && "Portability".equals(n.getScope().toString())) { + n.getArgs().get(0).accept(this, arg); + printer.print(".release()"); + } else if ("deleteArray".equals(n.getName()) + && "Portability".equals(n.getScope().toString())) { + printer.print("delete[] "); + n.getArgs().get(0).accept(this, arg); + } else if ("delete".equals(n.getName()) + && "Portability".equals(n.getScope().toString())) { + printer.print("delete "); + n.getArgs().get(0).accept(this, arg); + } else if (("retainElement".equals(n.getName()) || "releaseElement".equals(n.getName())) + && "Portability".equals(n.getScope().toString())) { + // ignore for now + } else if ("transition".equals(n.getName()) + && n.getScope() == null) { + visitTransition(n, arg); + } else if ("arraycopy".equals(n.getName()) + && "System".equals(n.getScope().toString())) { + printer.print(cppTypes.arrayCopy()); + printer.print("("); + if (n.getArgs().get(0).toString().equals( + n.getArgs().get(2).toString())) { + n.getArgs().get(0).accept(this, arg); + printer.print(", "); + n.getArgs().get(1).accept(this, arg); + printer.print(", "); + n.getArgs().get(3).accept(this, arg); + printer.print(", "); + n.getArgs().get(4).accept(this, arg); + } else if (n.getArgs().get(1).toString().equals("0") + && n.getArgs().get(3).toString().equals("0")) { + n.getArgs().get(0).accept(this, arg); + printer.print(", "); + n.getArgs().get(2).accept(this, arg); + printer.print(", "); + n.getArgs().get(4).accept(this, arg); + } else { + for (Iterator i = n.getArgs().iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + } else if ("binarySearch".equals(n.getName()) + && "Arrays".equals(n.getScope().toString())) { + n.getArgs().get(0).accept(this, arg); + printer.print(".binarySearch("); + n.getArgs().get(1).accept(this, arg); + printer.print(")"); + } else { + Expression scope = n.getScope(); + if (scope != null) { + if (scope instanceof StringLiteralExpr) { + StringLiteralExpr strLit = (StringLiteralExpr) scope; + String str = strLit.getValue(); + if (!"toCharArray".equals(n.getName())) { + throw new IllegalStateException( + "Unsupported method call on string literal: " + + n.getName()); + } + printer.print("{ "); + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (i != 0) { + printer.print(", "); + } + printCharLiteral("" + c); + } + printer.print(" }"); + return; + } else { + String clazzName = classNameFromExpression(scope); + if (clazzName == null) { + scope.accept(this, arg); + if ("length".equals(n.getName()) + || "charAt".equals(n.getName())) { + printer.print("."); + } else { + printer.print("->"); + } + } else { + printer.print(cppTypes.classPrefix()); + printer.print(clazzName); + printer.print("::"); + } + } + } + printTypeArgs(n.getTypeArgs(), arg); + printer.print(n.getName()); + if ("stateLoop".equals(n.getName()) + && "Tokenizer".equals(javaClassName) + && cppTypes.stateLoopPolicies().length > 0) { + printer.print("<"); + printer.print(cppTypes.stateLoopPolicies()[stateLoopCallCount]); + printer.print(">"); + stateLoopCallCount++; + } + printer.print("("); + if (n.getArgs() != null) { + for (Iterator i = n.getArgs().iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + } + } + + public void visit(ObjectCreationExpr n, LocalSymbolTable arg) { + if (n.getScope() != null) { + n.getScope().accept(this, arg); + printer.print("."); + } + + printer.print("new "); + + suppressPointer = true; + printTypeArgs(n.getTypeArgs(), arg); + if ("createAttributeName".equals(currentMethod) + || "elementNameByBuffer".equals(currentMethod)) { + printer.print(cppTypes.classPrefix()); + printer.print("Releasable"); + printer.print(n.getType().getName()); + } else { + n.getType().accept(this, arg); + } + suppressPointer = false; + + if ("AttributeName".equals(n.getType().getName())) { + List args = n.getArgs(); + while (args.size() > 3) { + args.remove(3); + } + } + + printer.print("("); + if (n.getArgs() != null) { + for (Iterator i = n.getArgs().iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + + if (n.getAnonymousClassBody() != null) { + printer.printLn(" {"); + printer.indent(); + printMembers(n.getAnonymousClassBody(), arg); + printer.unindent(); + printer.print("}"); + } + } + + public void visit(UnaryExpr n, LocalSymbolTable arg) { + switch (n.getOperator()) { + case positive: + printer.print("+"); + break; + case negative: + printer.print("-"); + break; + case inverse: + printer.print("~"); + break; + case not: + printer.print("!"); + break; + case preIncrement: + printer.print("++"); + break; + case preDecrement: + printer.print("--"); + break; + } + + n.getExpr().accept(this, arg); + + switch (n.getOperator()) { + case posIncrement: + printer.print("++"); + break; + case posDecrement: + printer.print("--"); + break; + } + } + + public void visit(ConstructorDeclaration n, LocalSymbolTable arg) { + if ("TreeBuilder".equals(javaClassName)) { + return; + } + + arg = new LocalSymbolTable(javaClassName, symbolTable); + + // if (n.getJavaDoc() != null) { + // n.getJavaDoc().accept(this, arg); + // } + currentAnnotations = n.getAnnotations(); + + printModifiers(n.getModifiers()); + + printMethodNamespace(); + printConstructorExplicit(n.getParameters()); + printer.print(className); + currentAnnotations = null; + + printer.print("("); + if (n.getParameters() != null) { + for (Iterator i = n.getParameters().iterator(); i.hasNext();) { + Parameter p = i.next(); + p.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + + printConstructorBody(n.getBlock(), arg); + } + + protected void printConstructorExplicit(List params) { + } + + protected void printConstructorBody(BlockStmt block, LocalSymbolTable arg) { + inConstructorBody = true; + List statements = block.getStmts(); + List nonAssigns = new LinkedList(); + int i = 0; + boolean needOutdent = false; + for (Statement statement : statements) { + if (statement instanceof ExpressionStmt + && ((ExpressionStmt) statement).getExpression() instanceof AssignExpr) { + if (i == 0) { + printer.printLn(); + printer.indent(); + printer.print(": "); + needOutdent = true; + } else { + printer.print(","); + printer.printLn(); + printer.print(" "); + } + statement.accept(this, arg); + i++; + } else { + nonAssigns.add(statement); + } + } + if (needOutdent) { + printer.unindent(); + } + inConstructorBody = false; + printer.printLn(); + printer.printLn("{"); + printer.indent(); + String boilerplate = cppTypes.constructorBoilerplate(className); + if (boilerplate != null) { + printer.printLn(boilerplate); + } + for (Statement statement : nonAssigns) { + statement.accept(this, arg); + printer.printLn(); + } + printer.unindent(); + printer.printLn("}"); + printer.printLn(); + } + + public void visit(MethodDeclaration n, LocalSymbolTable arg) { + arg = new LocalSymbolTable(javaClassName, symbolTable); + if (isPrintableMethod(n.getModifiers()) + && !(n.getName().equals("endCoalescing") || n.getName().equals( + "startCoalescing"))) { + printMethodDeclaration(n, arg); + } + } + + private boolean isPrintableMethod(int modifiers) { + return !(ModifierSet.isAbstract(modifiers) || (ModifierSet.isProtected(modifiers) && !(ModifierSet.isFinal(modifiers) || "Tokenizer".equals(javaClassName)))); + } + + protected void printMethodDeclaration(MethodDeclaration n, + LocalSymbolTable arg) { + if (n.getName().startsWith("fatal") || n.getName().startsWith("err") + || n.getName().startsWith("warn") + || n.getName().startsWith("maybeErr") + || n.getName().startsWith("maybeWarn") + || n.getName().startsWith("note") + || "releaseArray".equals(n.getName()) + || "deleteArray".equals(n.getName()) + || "delete".equals(n.getName())) { + return; + } + + currentMethod = n.getName(); + + destructor = "destructor".equals(currentMethod); + + // if (n.getJavaDoc() != null) { + // n.getJavaDoc().accept(this, arg); + // } + currentAnnotations = n.getAnnotations(); + boolean isInline = inline(); + if (isInline && !inHeader()) { + return; + } + + if (destructor) { + printModifiers(ModifierSet.PUBLIC); + } else { + printModifiers(n.getModifiers()); + } + + if ("stateLoop".equals(currentMethod) + && "Tokenizer".equals(javaClassName) + && cppTypes.stateLoopPolicies().length > 0) { + printer.print("template"); + if (inHeader()) { + printer.print(" "); + } else { + printer.printLn(); + } + } + + printTypeParameters(n.getTypeParameters(), arg); + if (n.getTypeParameters() != null) { + printer.print(" "); + } + if (!destructor) { + n.getType().accept(this, arg); + printer.print(" "); + } + printMethodNamespace(); + if (destructor) { + printer.print("~"); + printer.print(className); + } else { + printer.print(n.getName()); + } + + currentAnnotations = null; + printer.print("("); + if (n.getParameters() != null) { + for (Iterator i = n.getParameters().iterator(); i.hasNext();) { + Parameter p = i.next(); + p.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("[]"); + } + + if (inHeader() == isInline) { + printMethodBody(n.getBody(), arg); + } else { + printer.printLn(";"); + } + } + + private void printMethodBody(BlockStmt n, LocalSymbolTable arg) { + if (n == null) { + printer.print(";"); + } else { + printer.printLn(); + printer.printLn("{"); + printer.indent(); + if (destructor) { + String boilerplate = cppTypes.destructorBoilderplate(className); + if (boilerplate != null) { + printer.printLn(boilerplate); + } + } + if (n.getStmts() != null) { + for (Statement s : n.getStmts()) { + s.accept(this, arg); + printer.printLn(); + } + } + printer.unindent(); + printer.print("}"); + } + printer.printLn(); + printer.printLn(); + } + + protected void printMethodNamespace() { + printer.printLn(); + printer.print(className); + printer.print("::"); + } + + public void visit(Parameter n, LocalSymbolTable arg) { + currentAnnotations = n.getAnnotations(); + + arg.putLocalType(n.getId().getName(), convertType(n.getType(), + n.getModifiers())); + + n.getType().accept(this, arg); + if (n.isVarArgs()) { + printer.print("..."); + } + printer.print(" "); + n.getId().accept(this, arg); + currentAnnotations = null; + } + + public void visit(ExplicitConstructorInvocationStmt n, LocalSymbolTable arg) { + if (n.isThis()) { + printTypeArgs(n.getTypeArgs(), arg); + printer.print("this"); + } else { + if (n.getExpr() != null) { + n.getExpr().accept(this, arg); + printer.print("."); + } + printTypeArgs(n.getTypeArgs(), arg); + printer.print("super"); + } + printer.print("("); + if (n.getArgs() != null) { + for (Iterator i = n.getArgs().iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(");"); + } + + public void visit(VariableDeclarationExpr n, LocalSymbolTable arg) { + currentAnnotations = n.getAnnotations(); + + arg.putLocalType(n.getVars().get(0).toString(), convertType( + n.getType(), n.getModifiers())); + + n.getType().accept(this, arg); + printer.print(" "); + + for (Iterator i = n.getVars().iterator(); i.hasNext();) { + VariableDeclarator v = i.next(); + v.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + currentAnnotations = null; + } + + public void visit(TypeDeclarationStmt n, LocalSymbolTable arg) { + n.getTypeDeclaration().accept(this, arg); + } + + public void visit(AssertStmt n, LocalSymbolTable arg) { + String message = null; + Expression msg = n.getMessage(); + boolean hasCheck = true; + if (msg != null) { + if (msg instanceof StringLiteralExpr) { + StringLiteralExpr sle = (StringLiteralExpr) msg; + message = sle.getValue(); + } else { + throw new RuntimeException("Bad assertion message."); + } + } + String macro = cppTypes.assertionMacro(); + if (message != null && message.startsWith("RELEASE: ")) { + message = message.substring("RELEASE: ".length()); + macro = cppTypes.releaseAssertionMacro(); + Expression check = n.getCheck(); + if (check instanceof BooleanLiteralExpr) { + BooleanLiteralExpr expr = (BooleanLiteralExpr) check; + if (!expr.getValue()) { + hasCheck = false; + macro = cppTypes.crashMacro(); + } + } + } + if (macro != null) { + printer.print(macro); + printer.print("("); + if (hasCheck) { + n.getCheck().accept(this, arg); + } + if (message != null) { + if (hasCheck) { + printer.print(", "); + } + printer.print("\""); + for (int i = 0; i < message.length(); i++) { + char c = message.charAt(i); + if (c == '"') { + printer.print("\""); + } else if (c >= ' ' && c <= '~') { + printer.print("" + c); + } else { + throw new RuntimeException("Bad assertion message string."); + } + } + printer.print("\""); + } + printer.print(");"); + } + } + + public void visit(BlockStmt n, LocalSymbolTable arg) { + printer.printLn("{"); + if (n.getStmts() != null) { + printer.indent(); + for (Statement s : n.getStmts()) { + s.accept(this, arg); + printer.printLn(); + } + printer.unindent(); + } + printer.print("}"); + + } + + public void visit(LabeledStmt n, LocalSymbolTable arg) { + // Only conditionless for loops are needed and supported + // Not implementing general Java continue semantics in order + // to keep the generated C++ more readable. + Statement stmt = n.getStmt(); + if (stmt instanceof ForStmt) { + ForStmt forLoop = (ForStmt) stmt; + if (!(forLoop.getInit() == null && forLoop.getCompare() == null && forLoop.getUpdate() == null)) { + forLoopsWithCondition.add(n.getLabel()); + } + } else { + throw new IllegalStateException( + "Only for loop supported as labeled statement. Line: " + + n.getBeginLine()); + } + String label = n.getLabel(); + if (labels.contains(label)) { + printer.print(label); + printer.print(": "); + } + stmt.accept(this, arg); + printer.printLn(); + label += "_end"; + if (labels.contains(label)) { + printer.print(label); + printer.print(": ;"); + } + } + + public void visit(EmptyStmt n, LocalSymbolTable arg) { + printer.print(";"); + } + + public void visit(ExpressionStmt n, LocalSymbolTable arg) { + Expression e = n.getExpression(); + if (isCompletedCharacterReference(e)) { + printer.print(cppTypes.completedCharacterReference()); + printer.print(";"); + return; + } + boolean needsCondition = isTokenizerErrorReportingExpression(e); + if (!needsCondition && isDroppedExpression(e)) { + return; + } + if (needsCondition) { + printer.print("if ("); + printer.print(cppTypes.tokenizerErrorCondition()); + printer.printLn(") {"); + printer.indent(); + } + e.accept(this, arg); + if (!inConstructorBody) { + printer.print(";"); + } + if (needsCondition) { + printer.printLn(); + printer.unindent(); + printer.print("}"); + } + } + + private void visitTransition(MethodCallExpr call, LocalSymbolTable arg) { + List args = call.getArgs(); + if (reportTransitions) { + printer.print(cppTypes.transition()); + printer.print("("); + printer.print(cppTypes.firstTransitionArg()); + printer.print(", "); + args.get(1).accept(this, arg); + printer.print(", "); + args.get(2).accept(this, arg); + printer.print(", "); + args.get(3).accept(this, arg); + printer.print(")"); + } else { + args.get(1).accept(this, arg); + } + } + + private boolean isTokenizerErrorReportingExpression(Expression e) { + if (!reportTransitions) { + return false; + } + if (e instanceof MethodCallExpr) { + MethodCallExpr methodCallExpr = (MethodCallExpr) e; + String name = methodCallExpr.getName(); + if (supportErrorReporting && !name.startsWith("errHtml4") + && ("stateLoop".equals(currentMethod)) + && (name.startsWith("err") || name.startsWith("maybeErr"))) { + return true; + } + } + return false; + } + + private boolean isCompletedCharacterReference(Expression e) { + if (!reportTransitions) { + return false; + } + if (e instanceof MethodCallExpr) { + MethodCallExpr methodCallExpr = (MethodCallExpr) e; + String name = methodCallExpr.getName(); + if (name.equals("completedNamedCharacterReference")) { + return true; + } + } + return false; + } + + private boolean isDroppedExpression(Expression e) { + if (e instanceof MethodCallExpr) { + MethodCallExpr methodCallExpr = (MethodCallExpr) e; + String name = methodCallExpr.getName(); + if (name.startsWith("fatal") || name.startsWith("note") + || name.startsWith("errHtml4") || name.startsWith("warn") + || name.startsWith("maybeWarn")) { + return true; + } + if (supportErrorReporting + && ("stateLoop".equals(currentMethod) && !reportTransitions) + && (name.startsWith("err") || name.startsWith("maybeErr"))) { + return true; + } + if (name.equals("completedNamedCharacterReference") + && !reportTransitions) { + return true; + } + } + return false; + } + + public void visit(SwitchStmt n, LocalSymbolTable arg) { + printer.print("switch("); + n.getSelector().accept(this, arg); + printer.printLn(") {"); + if (n.getEntries() != null) { + printer.indent(); + for (SwitchEntryStmt e : n.getEntries()) { + e.accept(this, arg); + } + printer.unindent(); + } + printer.print("}"); + + } + + public void visit(SwitchEntryStmt n, LocalSymbolTable arg) { + if (n.getLabel() != null) { + boolean isMenuitem = n.getLabel().toString().equals("MENUITEM"); + if (isMenuitem) { + printer.printWithoutIndent("#ifdef ENABLE_VOID_MENUITEM\n"); + } + printer.print("case "); + n.getLabel().accept(this, arg); + printer.print(":"); + if (isMenuitem) { + printer.printWithoutIndent("\n#endif"); + } + } else { + printer.print("default:"); + } + if (isNoStatement(n.getStmts())) { + printer.printLn(); + printer.indent(); + if (n.getLabel() == null) { + printer.printLn("; // fall through"); + } + printer.unindent(); + } else { + printer.printLn(" {"); + printer.indent(); + for (Statement s : n.getStmts()) { + s.accept(this, arg); + printer.printLn(); + } + printer.unindent(); + printer.printLn("}"); + } + } + + private boolean isNoStatement(List stmts) { + if (stmts == null) { + return true; + } + for (Statement statement : stmts) { + if (!isDroppableStatement(statement)) { + return false; + } + } + return true; + } + + private boolean isDroppableStatement(Statement statement) { + if (statement instanceof AssertStmt) { + return true; + } else if (statement instanceof ExpressionStmt) { + ExpressionStmt es = (ExpressionStmt) statement; + if (isDroppedExpression(es.getExpression())) { + return true; + } + } + return false; + } + + public void visit(BreakStmt n, LocalSymbolTable arg) { + if (n.getId() != null) { + printer.print(cppTypes.breakMacro()); + printer.print("("); + printer.print(n.getId()); + printer.print(")"); + } else { + printer.print("break"); + } + printer.print(";"); + } + + public void visit(ReturnStmt n, LocalSymbolTable arg) { + printer.print("return"); + if (n.getExpr() != null) { + printer.print(" "); + n.getExpr().accept(this, arg); + } + printer.print(";"); + } + + public void visit(EnumDeclaration n, LocalSymbolTable arg) { + if (n.getJavaDoc() != null) { + n.getJavaDoc().accept(this, arg); + } + currentAnnotations = n.getAnnotations(); + // if (annotations != null) { + // for (AnnotationExpr a : annotations) { + // a.accept(this, arg); + // printer.printLn(); + // } + // } + printModifiers(n.getModifiers()); + + printer.print("enum "); + printer.print(n.getName()); + + currentAnnotations = null; + + if (n.getImplements() != null) { + printer.print(" implements "); + for (Iterator i = n.getImplements().iterator(); i.hasNext();) { + ClassOrInterfaceType c = i.next(); + c.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + + printer.printLn(" {"); + printer.indent(); + if (n.getEntries() != null) { + printer.printLn(); + for (Iterator i = n.getEntries().iterator(); i.hasNext();) { + EnumConstantDeclaration e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + if (n.getMembers() != null) { + printer.printLn(";"); + printMembers(n.getMembers(), arg); + } else { + if (n.getEntries() != null) { + printer.printLn(); + } + } + printer.unindent(); + printer.print("}"); + } + + public void visit(EnumConstantDeclaration n, LocalSymbolTable arg) { + if (n.getJavaDoc() != null) { + n.getJavaDoc().accept(this, arg); + } + currentAnnotations = n.getAnnotations(); + // if (annotations != null) { + // for (AnnotationExpr a : annotations) { + // a.accept(this, arg); + // printer.printLn(); + // } + // } + printer.print(n.getName()); + + currentAnnotations = null; + + if (n.getArgs() != null) { + printer.print("("); + for (Iterator i = n.getArgs().iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + printer.print(")"); + } + + if (n.getClassBody() != null) { + printer.printLn(" {"); + printer.indent(); + printMembers(n.getClassBody(), arg); + printer.unindent(); + printer.printLn("}"); + } + } + + public void visit(EmptyMemberDeclaration n, LocalSymbolTable arg) { + if (n.getJavaDoc() != null) { + n.getJavaDoc().accept(this, arg); + } + printer.print(";"); + } + + public void visit(InitializerDeclaration n, LocalSymbolTable arg) { + if (n.getJavaDoc() != null) { + n.getJavaDoc().accept(this, arg); + } + if (n.isStatic()) { + printer.print("static "); + } + n.getBlock().accept(this, arg); + } + + public void visit(IfStmt n, LocalSymbolTable arg) { + if (TranslatorUtils.isDocumentModeHandlerNullCheck(n.getCondition())) { + Statement then = n.getThenStmt(); + if (then instanceof BlockStmt) { + BlockStmt block = (BlockStmt) then; + List statements = block.getStmts(); + if (statements != null && statements.size() == 1) { + statements.get(0).accept(this, arg); + } else { + then.accept(this, arg); + } + } else { + then.accept(this, arg); + } + } else if (!TranslatorUtils.isErrorHandlerIf(n.getCondition(), supportErrorReporting)) { + if (TranslatorUtils.isErrorOnlyBlock(n.getThenStmt(), supportErrorReporting)) { + if (n.getElseStmt() != null + && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), supportErrorReporting)) { + printer.print("if ("); + if (n.getCondition() instanceof BinaryExpr) { + BinaryExpr binExpr = (BinaryExpr) n.getCondition(); + switch (binExpr.getOperator()) { + case equals: + binExpr.getLeft().accept(this, arg); + printer.print(" != "); + binExpr.getRight().accept(this, arg); + break; + case notEquals: + binExpr.getLeft().accept(this, arg); + printer.print(" == "); + binExpr.getRight().accept(this, arg); + break; + default: + printer.print("!("); + formatCondition(n.getCondition(), arg); + printer.print(")"); + break; + } + } else { + printer.print("!("); + formatCondition(n.getCondition(), arg); + printer.print(")"); + } + printer.print(") "); + n.getElseStmt().accept(this, arg); + } + } else { + boolean unlikely = (currentMethod != null) + && (Arrays.binarySearch( + METHODS_WITH_UNLIKELY_CONDITIONS, + currentMethod) >= 0); + printer.print("if ("); + if (unlikely) { + printer.print(cppTypes.unlikely()); + printer.print("("); + } + formatCondition(n.getCondition(), arg); + if (unlikely) { + printer.print(")"); + } + printer.print(") "); + n.getThenStmt().accept(this, arg); + if (n.getElseStmt() != null + && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), supportErrorReporting)) { + printer.print(" else "); + n.getElseStmt().accept(this, arg); + } + } + } + } + + private void formatCondition(Expression expr, LocalSymbolTable arg) { + if (expr instanceof BinaryExpr) { + BinaryExpr binExpr = (BinaryExpr) expr; + switch (binExpr.getOperator()) { + case notEquals: + if (binExpr.getRight() instanceof NullLiteralExpr) { + binExpr.getLeft().accept(this, arg); + return; + } + break; + default: + break; + } + } + expr.accept(this, arg); + } + + + public void visit(WhileStmt n, LocalSymbolTable arg) { + printer.print("while ("); + n.getCondition().accept(this, arg); + printer.print(") "); + n.getBody().accept(this, arg); + } + + public void visit(ContinueStmt n, LocalSymbolTable arg) { + // Not supporting the general Java continue semantics. + // Instead, making the generated code more readable for the + // case at hand. + if (n.getId() != null) { + printer.print(cppTypes.continueMacro()); + printer.print("("); + printer.print(n.getId()); + printer.print(")"); + if (forLoopsWithCondition.contains(n.getId())) { + throw new IllegalStateException( + "Continue attempted with a loop that has a condition. " + + className + " " + n.getId()); + } + } else { + printer.print("continue"); + } + printer.print(";"); + } + + public void visit(DoStmt n, LocalSymbolTable arg) { + printer.print("do "); + n.getBody().accept(this, arg); + printer.print(" while ("); + n.getCondition().accept(this, arg); + printer.print(");"); + } + + public void visit(ForeachStmt n, LocalSymbolTable arg) { + printer.print("for ("); + n.getVariable().accept(this, arg); + printer.print(" : "); + n.getIterable().accept(this, arg); + printer.print(") "); + n.getBody().accept(this, arg); + } + + public void visit(ForStmt n, LocalSymbolTable arg) { + printer.print("for ("); + if (n.getInit() != null) { + for (Iterator i = n.getInit().iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print("; "); + if (n.getCompare() != null) { + n.getCompare().accept(this, arg); + } + printer.print("; "); + if (n.getUpdate() != null) { + for (Iterator i = n.getUpdate().iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(") "); + n.getBody().accept(this, arg); + } + + public void visit(ThrowStmt n, LocalSymbolTable arg) { + printer.print("throw "); + n.getExpr().accept(this, arg); + printer.print(";"); + } + + public void visit(SynchronizedStmt n, LocalSymbolTable arg) { + printer.print("synchronized ("); + n.getExpr().accept(this, arg); + printer.print(") "); + n.getBlock().accept(this, arg); + } + + public void visit(TryStmt n, LocalSymbolTable arg) { + printer.print("try "); + n.getTryBlock().accept(this, arg); + if (n.getCatchs() != null) { + for (CatchClause c : n.getCatchs()) { + c.accept(this, arg); + } + } + if (n.getFinallyBlock() != null) { + printer.print(" finally "); + n.getFinallyBlock().accept(this, arg); + } + } + + public void visit(CatchClause n, LocalSymbolTable arg) { + printer.print(" catch ("); + n.getExcept().accept(this, arg); + printer.print(") "); + n.getCatchBlock().accept(this, arg); + + } + + public void visit(AnnotationDeclaration n, LocalSymbolTable arg) { + if (n.getJavaDoc() != null) { + n.getJavaDoc().accept(this, arg); + } + currentAnnotations = n.getAnnotations(); + // if (annotations != null) { + // for (AnnotationExpr a : annotations) { + // a.accept(this, arg); + // printer.printLn(); + // } + // } + printModifiers(n.getModifiers()); + + printer.print("@interface "); + printer.print(n.getName()); + currentAnnotations = null; + printer.printLn(" {"); + printer.indent(); + if (n.getMembers() != null) { + printMembers(n.getMembers(), arg); + } + printer.unindent(); + printer.print("}"); + } + + public void visit(AnnotationMemberDeclaration n, LocalSymbolTable arg) { + if (n.getJavaDoc() != null) { + n.getJavaDoc().accept(this, arg); + } + currentAnnotations = n.getAnnotations(); + // if (annotations != null) { + // for (AnnotationExpr a : annotations) { + // a.accept(this, arg); + // printer.printLn(); + // } + // } + printModifiers(n.getModifiers()); + + n.getType().accept(this, arg); + printer.print(" "); + printer.print(n.getName()); + currentAnnotations = null; + printer.print("()"); + if (n.getDefaultValue() != null) { + printer.print(" default "); + n.getDefaultValue().accept(this, arg); + } + printer.print(";"); + } + + public void visit(MarkerAnnotationExpr n, LocalSymbolTable arg) { + printer.print("@"); + n.getName().accept(this, arg); + } + + public void visit(SingleMemberAnnotationExpr n, LocalSymbolTable arg) { + printer.print("@"); + n.getName().accept(this, arg); + printer.print("("); + n.getMemberValue().accept(this, arg); + printer.print(")"); + } + + public void visit(NormalAnnotationExpr n, LocalSymbolTable arg) { + printer.print("@"); + n.getName().accept(this, arg); + printer.print("("); + if (n.getPairs() != null) { + for (Iterator i = n.getPairs().iterator(); i.hasNext();) { + MemberValuePair m = i.next(); + m.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + } + + public void visit(MemberValuePair n, LocalSymbolTable arg) { + printer.print(n.getName()); + printer.print(" = "); + n.getValue().accept(this, arg); + } + + public void visit(LineComment n, LocalSymbolTable arg) { + printer.print("//"); + printer.printLn(n.getContent()); + } + + public void visit(BlockComment n, LocalSymbolTable arg) { + printer.print("/*"); + printer.print(n.getContent()); + printer.printLn("*/"); + } + + public void setLabels(Set labels) { + this.labels = labels; + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java new file mode 100644 index 0000000000..3d642c0e0f --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java @@ -0,0 +1,70 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class GkAtomParser { + + private static final Pattern ATOM = Pattern.compile("^GK_ATOM\\(([^,]+),\\s*\"([^\"]*)\"\\).*$"); + + private final BufferedReader reader; + + public GkAtomParser(Reader reader) { + this.reader = new BufferedReader(reader); + } + + public Map parse() throws IOException { + Map map = new HashMap(); + String line; + while((line = reader.readLine()) != null) { + Matcher m = ATOM.matcher(line); + if (m.matches()) { + map.put(m.group(2), m.group(1)); + } + } + return map; + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java new file mode 100644 index 0000000000..25cf7aef16 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java @@ -0,0 +1,306 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import java.util.LinkedList; +import java.util.List; + +import japa.parser.ast.body.FieldDeclaration; +import japa.parser.ast.body.MethodDeclaration; +import japa.parser.ast.body.ModifierSet; +import japa.parser.ast.body.Parameter; +import japa.parser.ast.body.VariableDeclarator; +import japa.parser.ast.expr.IntegerLiteralExpr; +import japa.parser.ast.expr.MethodCallExpr; +import japa.parser.ast.stmt.BlockStmt; +import japa.parser.ast.type.PrimitiveType; +import japa.parser.ast.type.ReferenceType; +import japa.parser.ast.type.Type; + +public class HVisitor extends CppVisitor { + + private enum Visibility { + NONE, PRIVATE, PUBLIC, PROTECTED, + } + + private Visibility previousVisibility = Visibility.NONE; + + private List defines = new LinkedList(); + + /** + * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printMethodNamespace() + */ + @Override protected void printMethodNamespace() { + } + + public HVisitor(CppTypes cppTypes, SymbolTable symbolTable) { + super(cppTypes, symbolTable); + } + + /** + * @see nu.validator.htmlparser.cpptranslate.CppVisitor#startClassDeclaration() + */ + @Override protected void startClassDeclaration() { + printer.print("#ifndef "); + printer.print(className); + printer.printLn("_h"); + printer.print("#define "); + printer.print(className); + printer.printLn("_h"); + + printer.printLn(); + + String[] incs = cppTypes.boilerplateIncludes(javaClassName); + for (int i = 0; i < incs.length; i++) { + String inc = incs[i]; + if (className.equals(inc)) { + continue; + } + printer.print("#include \""); + printer.print(inc); + printer.printLn(".h\""); + } + + printer.printLn(); + + String[] forwDecls = cppTypes.boilerplateForwardDeclarations(); + for (int i = 0; i < forwDecls.length; i++) { + String decl = forwDecls[i]; + printer.print("class "); + printer.print(decl); + printer.printLn(";"); + } + + printer.printLn(); + + for (int i = 0; i < Main.H_LIST.length; i++) { + String klazz = Main.H_LIST[i]; + if (!(klazz.equals(javaClassName) || klazz.equals("StackNode"))) { + printer.print("class "); + printer.print(cppTypes.classPrefix()); + printer.print(klazz); + printer.printLn(";"); + } + } + + printer.printLn(); + + String[] otherDecls = cppTypes.boilerplateDeclarations(javaClassName); + for (int i = 0; i < otherDecls.length; i++) { + String decl = otherDecls[i]; + printer.printLn(decl); + } + + printer.printLn(); + + printer.print("class "); + printer.print(className); + if ("StateSnapshot".equals(javaClassName) || "TreeBuilder".equals(javaClassName)) { + printer.print(" : public "); + printer.print(cppTypes.treeBuilderStateInterface()); + } + printer.printLn(); + printer.printLn("{"); + printer.indent(); + printer.indent(); + } + + /** + * @see nu.validator.htmlparser.cpptranslate.CppVisitor#endClassDeclaration() + */ + @Override protected void endClassDeclaration() { + printModifiers(ModifierSet.PUBLIC | ModifierSet.STATIC); + printer.printLn("void initializeStatics();"); + printModifiers(ModifierSet.PUBLIC | ModifierSet.STATIC); + printer.printLn("void releaseStatics();"); + + printer.unindent(); + printer.unindent(); + + if (cppTypes.hasSupplement(javaClassName)) { + printer.printLn(); + printer.print("#include \""); + printer.print(className); + printer.printLn("HSupplement.h\""); + } + + printer.printLn("};"); + printer.printLn(); + + for (String define : defines) { + printer.printLn(define); + } + + printer.printLn(); + printer.printLn(); + printer.printLn("#endif"); + } + + /** + * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printModifiers(int) + */ + @Override protected void printModifiers(int modifiers) { + if (ModifierSet.isPrivate(modifiers)) { + if (previousVisibility != Visibility.PRIVATE) { + printer.unindent(); + printer.printLn("private:"); + printer.indent(); + previousVisibility = Visibility.PRIVATE; + } + } else if (ModifierSet.isProtected(modifiers)) { + if (previousVisibility != Visibility.PROTECTED) { + printer.unindent(); + printer.printLn("protected:"); + printer.indent(); + previousVisibility = Visibility.PROTECTED; + } + } else { + if (previousVisibility != Visibility.PUBLIC) { + printer.unindent(); + printer.printLn("public:"); + printer.indent(); + previousVisibility = Visibility.PUBLIC; + } + } + if (inline()) { + printer.print("inline "); + } + if (virtual()) { + printer.print("virtual "); + } + if (ModifierSet.isStatic(modifiers)) { + printer.print("static "); + } + } + + /** + * @see nu.validator.htmlparser.cpptranslate.CppVisitor#fieldDeclaration(japa.parser.ast.body.FieldDeclaration, java.lang.LocalSymbolTable) + */ + @Override protected void fieldDeclaration(FieldDeclaration n, LocalSymbolTable arg) { + int modifiers = n.getModifiers(); + List variables = n.getVariables(); + VariableDeclarator declarator = variables.get(0); + if (ModifierSet.isStatic(modifiers) && ModifierSet.isFinal(modifiers) + && n.getType() instanceof PrimitiveType) { + PrimitiveType type = (PrimitiveType) n.getType(); + if (type.getType() != PrimitiveType.Primitive.Int) { + throw new IllegalStateException( + "Only int constant #defines supported."); + } + if (variables.size() != 1) { + throw new IllegalStateException( + "More than one variable declared by one declarator."); + } + String name = javaClassName + "." + declarator.getId().getName(); + String value = declarator.getInit().toString(); + if ("Integer.MAX_VALUE".equals(value)) { + value = cppTypes.maxInteger(); + } + String longName = definePrefix + declarator.getId().getName(); + if (symbolTable.cppDefinesByJavaNames.containsKey(name)) { + throw new IllegalStateException( + "Duplicate #define constant local name: " + name); + } + symbolTable.cppDefinesByJavaNames.put(name, longName); + defines.add("#define " + longName + " " + value); + } else { + if (n.getType() instanceof ReferenceType) { + ReferenceType rt = (ReferenceType) n.getType(); + currentArrayCount = rt.getArrayCount(); + if (currentArrayCount > 0 + && (rt.getType() instanceof PrimitiveType) && declarator.getInit() != null) { + if (!ModifierSet.isStatic(modifiers)) { + throw new IllegalStateException( + "Non-static array case not supported here." + declarator); + } + if (noLength()) { + inPrimitiveNoLengthFieldDeclarator = true; + } + } + } + printModifiers(modifiers); + inStatic = ModifierSet.isStatic(modifiers); + n.getType().accept(this, arg); + printer.print(" "); + if (ModifierSet.isStatic(modifiers)) { + if ("AttributeName".equals(n.getType().toString())) { + printer.print("ATTR_"); + } else if ("ElementName".equals(n.getType().toString())) { + printer.print("ELT_"); + } + } + declarator.getId().accept(this, arg); + printer.printLn(";"); + currentArrayCount = 0; + inStatic = false; + inPrimitiveNoLengthFieldDeclarator = false; + } + } + + /** + * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printConstructorExplicit(java.util.List) + */ + @Override protected void printConstructorExplicit(List params) { + if (params != null && params.size() == 1) { + printer.print("explicit "); + } + } + + /** + * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printConstructorBody(japa.parser.ast.stmt.BlockStmt, java.lang.LocalSymbolTable) + */ + @Override protected void printConstructorBody(BlockStmt block, LocalSymbolTable arg) { + printer.printLn(";"); + } + + /** + * @see nu.validator.htmlparser.cpptranslate.CppVisitor#visit(japa.parser.ast.body.MethodDeclaration, java.lang.LocalSymbolTable) + */ + @Override public void visit(MethodDeclaration n, LocalSymbolTable arg) { + arg = new LocalSymbolTable(javaClassName, symbolTable); + printMethodDeclaration(n, arg); + } + + /** + * @see nu.validator.htmlparser.cpptranslate.CppVisitor#inHeader() + */ + @Override protected boolean inHeader() { + return true; + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java new file mode 100644 index 0000000000..f27d465a3c --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java @@ -0,0 +1,84 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import japa.parser.ast.stmt.BreakStmt; +import japa.parser.ast.stmt.ContinueStmt; +import japa.parser.ast.visitor.VoidVisitorAdapter; + +import java.util.HashSet; +import java.util.Set; + +public class LabelVisitor extends VoidVisitorAdapter { + + private final Set labels = new HashSet(); + + public LabelVisitor() { + } + + /** + * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.stmt.BreakStmt, java.lang.Object) + */ + @Override + public void visit(BreakStmt n, Object arg) { + String label = n.getId(); + if (label != null) { + labels.add(label + "_end"); + } + } + + /** + * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.stmt.ContinueStmt, java.lang.Object) + */ + @Override + public void visit(ContinueStmt n, Object arg) { + String label = n.getId(); + if (label != null) { + labels.add(label); + } + } + + /** + * Returns the labels. + * + * @return the labels + */ + public Set getLabels() { + return labels; + } +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java new file mode 100644 index 0000000000..e4030f4388 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java @@ -0,0 +1,75 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; + +public class LicenseExtractor { + + private final Reader reader; + + public LicenseExtractor(File file) throws IOException { + this.reader = new InputStreamReader(new FileInputStream(file), "utf-8"); + } + + public String extract() throws IOException { + boolean prevWasAsterisk = false; + StringBuilder sb = new StringBuilder(); + int c; + while ((c = reader.read()) != -1) { + sb.append((char)c); + switch (c) { + case '*': + prevWasAsterisk = true; + continue; + case '/': + if (prevWasAsterisk) { + return sb.toString(); + } + default: + prevWasAsterisk = false; + continue; + } + } + return ""; + } +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java new file mode 100644 index 0000000000..a9375e88a3 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java @@ -0,0 +1,89 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2009 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import java.util.HashMap; +import java.util.Map; + +public class LocalSymbolTable { + + private final Map locals = new HashMap(); + + private final String javaClassName; + + private final SymbolTable delegate; + + /** + * @param javaClassName + * @param delegate + */ + public LocalSymbolTable(String javaClassName, SymbolTable delegate) { + this.javaClassName = javaClassName; + this.delegate = delegate; + } + + public void putLocalType(String name, Type type) { + locals.put(name, type); + } + + /** + * @param klazz + * @param variable + * @return + * @see nu.validator.htmlparser.cpptranslate.SymbolTable#getFieldType(java.lang.String, java.lang.String) + */ + public Type getVariableType(String klazz, String variable) { + if (klazz == null) { + Type type = locals.get(variable); + if (type != null) { + return type; + } + } + return delegate.getFieldType(((klazz == null || "this".equals(klazz)) ? javaClassName : klazz), variable); + } + + /** + * @param klazz may be null or "this" + * @param method + * @return + * @see nu.validator.htmlparser.cpptranslate.SymbolTable#getMethodReturnType(java.lang.String, java.lang.String) + */ + public Type getMethodReturnType(String klazz, String method) { + return delegate.getMethodReturnType(((klazz == null || "this".equals(klazz)) ? javaClassName : klazz), method); + } +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Main.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Main.java new file mode 100644 index 0000000000..53347bd424 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Main.java @@ -0,0 +1,148 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import japa.parser.JavaParser; +import japa.parser.ParseException; +import japa.parser.ast.CompilationUnit; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; + +public class Main { + + static final String[] H_LIST = { + "Tokenizer", + "TreeBuilder", + "MetaScanner", + "AttributeName", + "ElementName", + "HtmlAttributes", + "StackNode", + "UTF16Buffer", + "StateSnapshot", + "Portability", + }; + + private static final String[] CPP_LIST = { + "Tokenizer", + "TreeBuilder", + "MetaScanner", + "AttributeName", + "ElementName", + "HtmlAttributes", + "StackNode", + "UTF16Buffer", + "StateSnapshot", + }; + + /** + * @param args + * @throws ParseException + * @throws IOException + */ + public static void main(String[] args) throws ParseException, IOException { + CppTypes cppTypes = new CppTypes(new File(args[2])); + SymbolTable symbolTable = new SymbolTable(); + + File javaDirectory = new File(args[0]); + File targetDirectory = new File(args[1]); + File cppDirectory = targetDirectory; + File javaCopyDirectory = new File(targetDirectory, "javasrc"); + + for (int i = 0; i < H_LIST.length; i++) { + parseFile(cppTypes, javaDirectory, cppDirectory, H_LIST[i], ".h", new HVisitor(cppTypes, symbolTable)); + copyFile(new File(javaDirectory, H_LIST[i] + ".java"), new File(javaCopyDirectory, H_LIST[i] + ".java")); + } + for (int i = 0; i < CPP_LIST.length; i++) { + parseFile(cppTypes, javaDirectory, cppDirectory, CPP_LIST[i], ".cpp", new CppVisitor(cppTypes, symbolTable)); + } + cppTypes.finished(); + } + + private static void copyFile(File input, File output) throws IOException { + if (input.getCanonicalFile().equals(output.getCanonicalFile())) { + return; // files are the same! + } + // This is horribly inefficient, but perf is not really much of a concern here. + FileInputStream in = new FileInputStream(input); + FileOutputStream out = new FileOutputStream(output); + int b; + while ((b = in.read()) != -1) { + out.write(b); + } + out.flush(); + out.close(); + in.close(); + } + + private static void parseFile(CppTypes cppTypes, File javaDirectory, + File cppDirectory, String className, String fne, CppVisitor visitor) + throws FileNotFoundException, UnsupportedEncodingException, + IOException { + File file = null; + try { + file = new File(javaDirectory, className + ".java"); + String license = new LicenseExtractor(file).extract(); + CompilationUnit cu = JavaParser.parse(new NoCppInputStream( + new CppOnlyInputStream(new FileInputStream(file))), "utf-8"); + LabelVisitor labelVisitor = new LabelVisitor(); + cu.accept(labelVisitor, null); + visitor.setLabels(labelVisitor.getLabels()); + cu.accept(visitor, null); + FileOutputStream out = new FileOutputStream(new File(cppDirectory, + cppTypes.classPrefix() + className + fne)); + OutputStreamWriter w = new OutputStreamWriter(out, "utf-8"); + w.write(license); + w.write("\n\n/*\n * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.\n * Please edit " + + className + ".java instead and regenerate.\n */\n\n"); + w.write(visitor.getSource()); + w.close(); + } catch (ParseException e) { + System.err.println(file); + e.printStackTrace(); + } + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java new file mode 100644 index 0000000000..86f9ae7ff7 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java @@ -0,0 +1,86 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import java.io.IOException; +import java.io.InputStream; + +public class NoCppInputStream extends InputStream { + + private final static char[] START = "[NOCPP[".toCharArray(); + + private final static char[] END = "]NOCPP]".toCharArray(); + + private int state; + + private final InputStream delegate; + + + + /** + * @param delegate + */ + public NoCppInputStream(InputStream delegate) { + this.delegate = delegate; + this.state = 0; + } + + @Override public int read() throws IOException { + int c; + if (state == START.length) { + int endState = 0; + while (endState != END.length) { + c = delegate.read(); + if (END[endState] == c) { + endState++; + } else { + endState = 0; + } + } + state = 0; + } + c = delegate.read(); + if (START[state] == c) { + state++; + } else { + state = 0; + } + return c; + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java new file mode 100644 index 0000000000..305f516a7c --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java @@ -0,0 +1,70 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class StringLiteralParser { + + private static final Pattern STRING_DECL = Pattern.compile("^.*\\(([^ ]+) = new nsString\\(\\)\\)->Assign\\(NS_LITERAL_STRING\\(\"([^\"]*)\"\\)\\);.*$"); + + private final BufferedReader reader; + + public StringLiteralParser(Reader reader) { + this.reader = new BufferedReader(reader); + } + + public Map parse() throws IOException { + Map map = new HashMap(); + String line; + while((line = reader.readLine()) != null) { + Matcher m = STRING_DECL.matcher(line); + if (m.matches()) { + map.put(m.group(2), m.group(1)); + } + } + return map; + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java new file mode 100644 index 0000000000..e24247f7e2 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java @@ -0,0 +1,73 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2009 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +public class StringPair { + + /** + * @param first + * @param second + */ + public StringPair(String first, String second) { + this.first = first; + this.second = second; + } + + private final String first; + + private final String second; + + /** + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override public boolean equals(Object o) { + if (o instanceof StringPair) { + StringPair other = (StringPair) o; + return first.equals(other.first) && second.equals(other.second); + } + return false; + } + + /** + * @see java.lang.Object#hashCode() + */ + @Override public int hashCode() { + return first.hashCode() ^ second.hashCode(); + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java new file mode 100644 index 0000000000..970a2b64b2 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java @@ -0,0 +1,80 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import java.util.HashMap; +import java.util.Map; + +public class SymbolTable { + + public final Map cppDefinesByJavaNames = new HashMap(); + + private final Map fields = new HashMap(); + + private final Map methodReturns = new HashMap(); + + /** + * This is a sad hack to work around the fact the there's no real symbol + * table yet. + * + * @param name + * @return + */ + public boolean isNotAnAttributeOrElementName(String name) { + return !("ATTRIBUTE_HASHES".equals(name) + || "ATTRIBUTE_NAMES".equals(name) + || "ELEMENT_HASHES".equals(name) + || "ELEMENT_NAMES".equals(name) || "ALL_NO_NS".equals(name)); + } + + public void putFieldType(String klazz, String field, Type type) { + fields.put(new StringPair(klazz, field), type); + } + + public void putMethodReturnType(String klazz, String method, Type type) { + methodReturns.put(new StringPair(klazz, method), type); + } + + public Type getFieldType(String klazz, String field) { + return fields.get(new StringPair(klazz, field)); + } + + public Type getMethodReturnType(String klazz, String method) { + return methodReturns.get(new StringPair(klazz, method)); + } +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java new file mode 100644 index 0000000000..00f7c57415 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java @@ -0,0 +1,71 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2009 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +import japa.parser.ast.body.ClassOrInterfaceDeclaration; +import japa.parser.ast.body.FieldDeclaration; +import japa.parser.ast.body.MethodDeclaration; + +public class SymbolTableVisitor extends AnnotationHelperVisitor { + + private String javaClassName; + + /** + * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.FieldDeclaration, java.lang.Object) + */ + @Override public void visit(FieldDeclaration n, SymbolTable arg) { + currentAnnotations = n.getAnnotations(); + arg.putFieldType(javaClassName, n.getVariables().get(0).getId().getName(), convertType(n.getType(), n.getModifiers())); + } + + /** + * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.MethodDeclaration, java.lang.Object) + */ + @Override public void visit(MethodDeclaration n, SymbolTable arg) { + currentAnnotations = n.getAnnotations(); + arg.putMethodReturnType(javaClassName, n.getName(), convertType(n.getType(), n.getModifiers())); + } + + /** + * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.ClassOrInterfaceDeclaration, java.lang.Object) + */ + @Override public void visit(ClassOrInterfaceDeclaration n, SymbolTable arg) { + javaClassName = n.getName(); + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java new file mode 100644 index 0000000000..866db093de --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java @@ -0,0 +1,81 @@ +package nu.validator.htmlparser.cpptranslate; + +import japa.parser.ast.expr.BinaryExpr; +import japa.parser.ast.expr.BinaryExpr.Operator; +import japa.parser.ast.expr.Expression; +import japa.parser.ast.expr.MethodCallExpr; +import japa.parser.ast.expr.NameExpr; +import japa.parser.ast.expr.NullLiteralExpr; +import japa.parser.ast.stmt.BlockStmt; +import japa.parser.ast.stmt.ExpressionStmt; +import japa.parser.ast.stmt.Statement; + +import java.util.List; + +public class TranslatorUtils { + public static boolean isErrorOnlyBlock(Statement elseStmt, boolean supportErrorReporting) { + if (supportErrorReporting) { + return false; + } + if (elseStmt instanceof BlockStmt) { + BlockStmt block = (BlockStmt) elseStmt; + List statements = block.getStmts(); + if (statements == null) { + return false; + } + if (statements.size() != 1) { + return false; + } + Statement statement = statements.get(0); + if (statement instanceof ExpressionStmt) { + ExpressionStmt exprStmt = (ExpressionStmt) statement; + Expression expr = exprStmt.getExpression(); + if (expr instanceof MethodCallExpr) { + MethodCallExpr call = (MethodCallExpr) expr; + if (call.getName().startsWith("err")) { + return true; + } + } + } + } + return false; + } + + public static boolean isErrorHandlerIf(Expression condition, boolean supportErrorReporting) { + if (supportErrorReporting) { + return false; + } + while (condition instanceof BinaryExpr) { + BinaryExpr binex = (BinaryExpr) condition; + condition = binex.getLeft(); + if (condition instanceof NameExpr) { + NameExpr name = (NameExpr) condition; + if ("errorHandler".equals(name.getName())) { + return true; + } + } + } + return false; + } + + public static boolean isDocumentModeHandlerNullCheck(Expression condition) { + if (condition instanceof BinaryExpr) { + BinaryExpr binex = (BinaryExpr) condition; + if (binex.getOperator() != Operator.notEquals) { + return false; + } + if (!(binex.getRight() instanceof NullLiteralExpr)) { + return false; + } + Expression left = binex.getLeft(); + if (left instanceof NameExpr) { + NameExpr name = (NameExpr) left; + if ("documentModeHandler".equals(name.getName())) { + return true; + } + } + } + return false; + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Type.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Type.java new file mode 100644 index 0000000000..783a3bbd0b --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/Type.java @@ -0,0 +1,99 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2009 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.cpptranslate; + +public class Type { + + /** + * @param type + * @param arrayCount + * @param noLength + * @param modifiers + */ + public Type(String type, int arrayCount, boolean noLength, int modifiers) { + this.type = type; + this.arrayCount = arrayCount; + this.noLength = noLength; + this.modifiers = modifiers; + } + + private final String type; + + private final int arrayCount; + + private final boolean noLength; + + private final int modifiers; + + /** + * Returns the type. + * + * @return the type + */ + public String getType() { + return type; + } + + /** + * Returns the arrayCount. + * + * @return the arrayCount + */ + public int getArrayCount() { + return arrayCount; + } + + /** + * Returns the noLength. + * + * @return the noLength + */ + public boolean isNoLength() { + return noLength; + } + + /** + * Returns the modifiers. + * + * @return the modifiers + */ + public int getModifiers() { + return modifiers; + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java new file mode 100644 index 0000000000..eb580e70c0 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2010-2011 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.generator; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Reader; +import java.io.Writer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Applies a workaround that splits the stateLoop method in the + * tokenizer into two methods. This way, each method stays under 8000 bytes in + * size. By default, HotSpot doesn't compile methods that are over 8000 bytes in + * size, which is a performance problem. + * + * This program should have been written in Perl, but to avoid introducing new + * dependencies, it's written in Java. No attempt at efficiency has been made. + * + * Warning! This modifies Tokenizer.java in place! + * + * @version $Id$ + * @author hsivonen + */ +public class ApplyHotSpotWorkaround { + + private static final String BEGIN_WORKAROUND = "// BEGIN HOTSPOT WORKAROUND"; + + private static final String END_WORKAROUND = "// END HOTSPOT WORKAROUND"; + + public static void main(String[] args) throws Throwable { + String tokenizer = readFileIntoString(args[0]); + String workaround = readFileIntoString(args[1]); + + int beginIndex = tokenizer.indexOf(BEGIN_WORKAROUND); + int endIndex = tokenizer.indexOf(END_WORKAROUND); + String tokenizerHead = tokenizer.substring(0, beginIndex); + String tokenizerMiddle = tokenizer.substring(beginIndex, endIndex); + String tokenizerTail = tokenizer.substring(endIndex); + + beginIndex = workaround.indexOf(BEGIN_WORKAROUND); + endIndex = workaround.indexOf(END_WORKAROUND); + String workaroundHead = workaround.substring(0, beginIndex); + String workaroundMiddle = workaround.substring(beginIndex, endIndex); + String workaroundTail = workaround.substring(endIndex); + + String newTokenizer = tokenizerHead + workaroundMiddle + tokenizerTail; + String newWorkaround = workaroundHead + tokenizerMiddle + + workaroundTail; + + int insertionPoint = newTokenizer.indexOf("// HOTSPOT WORKAROUND INSERTION POINT"); + + tokenizerHead = newTokenizer.substring(0, insertionPoint); + tokenizerTail = newTokenizer.substring(insertionPoint); + + newTokenizer = tokenizerHead + newWorkaround + tokenizerTail; + + Pattern pat = Pattern.compile("state = transition\\(state, ([^,]*), reconsume, pos\\)"); + Matcher m = pat.matcher(newTokenizer); + newTokenizer = m.replaceAll("state = $1"); + + Writer out = new OutputStreamWriter(new FileOutputStream(args[0]), + "utf-8"); + out.write(newTokenizer); + out.flush(); + out.close(); + } + + private static String readFileIntoString(String name) throws IOException { + Reader in = new InputStreamReader(new FileInputStream(name), "UTF-8"); + StringBuilder builder = new StringBuilder(); + int c; + while ((c = in.read()) != -1) { + builder.append((char) c); + } + in.close(); + return builder.toString(); + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java new file mode 100644 index 0000000000..69ddb318e4 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2008-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.generator; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Map; +import java.util.TreeMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class GenerateNamedCharacters { + + private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10); + + private static final Pattern LINE_PATTERN = Pattern.compile(" ([^<]*) U\\+(\\S*) (?:U\\+(\\S*) )?"); + + private static String toUString(int c) { + String hexString = Integer.toHexString(c); + switch (hexString.length()) { + case 1: + return "\\u000" + hexString; + case 2: + return "\\u00" + hexString; + case 3: + return "\\u0" + hexString; + case 4: + return "\\u" + hexString; + default: + throw new RuntimeException("Unreachable."); + } + } + + private static int charToIndex(char c) { + if (c >= 'a' && c <= 'z') { + return c - 'a' + 26; + } else if (c >= 'A' && c <= 'Z') { + return c - 'A'; + } + throw new IllegalArgumentException("Bad char in named character name: " + + c); + } + + private static boolean allZero(int[] arr) { + for (int i = 0; i < arr.length; i++) { + if (arr[i] != 0) { + return false; + } + } + return true; + } + + /** + * @param args + * @throws IOException + */ + public static void main(String[] args) throws IOException { + TreeMap entities = new TreeMap(); + BufferedReader reader = new BufferedReader(new InputStreamReader( + System.in, "utf-8")); + String line; + while ((line = reader.readLine()) != null) { + Matcher m = LINE_PATTERN.matcher(line); + while (m.find()) { + String value; + if (m.group(3) != null) { + // two BMP chars + int firstIntVal = Integer.parseInt(m.group(2), 16); + int secondIntVal = Integer.parseInt(m.group(3), 16); + value = ("" + (char)firstIntVal) + (char)secondIntVal; + } else { + // one code point + int intVal = Integer.parseInt(m.group(2), 16); + if (intVal <= 0xFFFF) { + value = "" + (char)intVal; + } else { + int high = (LEAD_OFFSET + (intVal >> 10)); + int low = (0xDC00 + (intVal & 0x3FF)); + value = ("" + (char)high) + (char)low; + } + } + entities.put(m.group(1), value); + } + } + + // Java initializes arrays to zero. Zero is our magic value for no hilo + // value. + int[][] hiLoTable = new int['z' + 1]['Z' - 'A' + 1 + 'z' - 'a' + 1]; + + String firstName = entities.entrySet().iterator().next().getKey(); + int firstKey = charToIndex(firstName.charAt(0)); + int secondKey = firstName.charAt(1); + int row = 0; + int lo = 0; + + System.out.print("static final @NoLength @CharacterName String[] NAMES = {\n"); + for (Map.Entry entity : entities.entrySet()) { + String name = entity.getKey(); + int newFirst = charToIndex(name.charAt(0)); + int newSecond = name.charAt(1); + assert !(newFirst == 0 && newSecond == 0) : "Not prepared for name starting with AA"; + if (firstKey != newFirst || secondKey != newSecond) { + hiLoTable[secondKey][firstKey] = ((row - 1) << 16) | lo; + lo = row; + firstKey = newFirst; + secondKey = newSecond; + } + System.out.print("\""); + System.out.print(name.substring(2)); + System.out.print("\",\n"); + row++; + } + System.out.print("};\n"); + + hiLoTable[secondKey][firstKey] = ((entities.size() - 1) << 16) | lo; + + System.out.print("static final @NoLength char[][] VALUES = {\n"); + for (Map.Entry entity : entities.entrySet()) { + String value = entity.getValue(); + System.out.print("{"); + if (value.length() == 1) { + char c = value.charAt(0); + if (c == '\'') { + System.out.print("\'\\\'\'"); + } else if (c == '\n') { + System.out.print("\'\\n\'"); + } else if (c == '\\') { + System.out.print("\'\\\\\'"); + } else if (c <= 0xFFFF) { + System.out.print("\'"); + System.out.print(toUString(c)); + System.out.print("\'"); + } + } else { + System.out.print("\'"); + System.out.print(toUString(value.charAt(0))); + System.out.print("\', \'"); + System.out.print(toUString(value.charAt(1))); + System.out.print("\'"); + } + System.out.print("},\n"); + } + System.out.print("};\n"); + + System.out.print("static final @NoLength int[][] HILO_ACCEL = {\n"); + for (int i = 0; i < hiLoTable.length; i++) { + if (allZero(hiLoTable[i])) { + System.out.print("null,\n"); + } else { + System.out.print("{"); + for (int j = 0; j < hiLoTable[i].length; j++) { + System.out.print(hiLoTable[i][j]); + System.out.print(", "); + } + System.out.print("},\n"); + } + } + System.out.print("};\n"); + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java new file mode 100644 index 0000000000..2cfe7b1123 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java @@ -0,0 +1,580 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser C++ Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.generator; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.Map; +import java.util.TreeMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import nu.validator.htmlparser.cpptranslate.CppTypes; + +public class GenerateNamedCharactersCpp { + + /** + * The license for the output of this program except for data files. + */ + private static final String OUTPUT_LICENSE = "/*\n" + + " * Copyright (c) 2008-2010 Mozilla Foundation\n" + + " *\n" + + " * Permission is hereby granted, free of charge, to any person obtaining a \n" + + " * copy of this software and associated documentation files (the \"Software\"), \n" + + " * to deal in the Software without restriction, including without limitation \n" + + " * the rights to use, copy, modify, merge, publish, distribute, sublicense, \n" + + " * and/or sell copies of the Software, and to permit persons to whom the \n" + + " * Software is furnished to do so, subject to the following conditions:\n" + + " *\n" + + " * The above copyright notice and this permission notice shall be included in \n" + + " * all copies or substantial portions of the Software.\n" + + " *\n" + + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR \n" + + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, \n" + + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL \n" + + " * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER \n" + + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING \n" + + " * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER \n" + + " * DEALINGS IN THE SOFTWARE.\n" + " */\n\n"; + + /** + * The license for the generated data files. + */ + private static final String DATA_LICENSE = "/*\n" + + " * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera \n" + + " * Software ASA.\n" + + " * \n" + + " * You are granted a license to use, reproduce and create derivative works of \n" + + " * this document.\n" + " */\n\n"; + + private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10); + + private static final Pattern LINE_PATTERN = Pattern.compile(" ([^<]*) U\\+(\\S*) (?:U\\+(\\S*) )?"); + + private static String toHexString(int c) { + String hexString = Integer.toHexString(c); + switch (hexString.length()) { + case 1: + return "0x000" + hexString; + case 2: + return "0x00" + hexString; + case 3: + return "0x0" + hexString; + case 4: + return "0x" + hexString; + default: + throw new RuntimeException("Unreachable."); + } + } + + /** + * @param args + * @throws IOException + */ + public static void main(String[] args) throws IOException { + TreeMap entities = new TreeMap(); + BufferedReader reader = new BufferedReader(new InputStreamReader( + new FileInputStream(args[0]), "utf-8")); + String line; + while ((line = reader.readLine()) != null) { + Matcher m = LINE_PATTERN.matcher(line); + while (m.find()) { + String value; + if (m.group(3) != null) { + // two BMP chars + int firstIntVal = Integer.parseInt(m.group(2), 16); + int secondIntVal = Integer.parseInt(m.group(3), 16); + value = ("" + (char)firstIntVal) + (char)secondIntVal; + } else { + // one code point + int intVal = Integer.parseInt(m.group(2), 16); + if (intVal <= 0xFFFF) { + value = "" + (char)intVal; + } else { + int high = (LEAD_OFFSET + (intVal >> 10)); + int low = (0xDC00 + (intVal & 0x3FF)); + value = ("" + (char)high) + (char)low; + } + } + entities.put(m.group(1), value); + } + } + + CppTypes cppTypes = new CppTypes(null); + File targetDirectory = new File(args[1]); + + generateH(targetDirectory, cppTypes, entities); + generateInclude(targetDirectory, cppTypes, entities); + generateCpp(targetDirectory, cppTypes, entities); + generateAccelH(targetDirectory, cppTypes, entities); + generateAccelCpp(targetDirectory, cppTypes, entities); + } + + private static void generateAccelCpp(File targetDirectory, + CppTypes cppTypes, TreeMap entities) throws IOException { + String includeFile = cppTypes.classPrefix() + + "NamedCharactersInclude.h"; + File cppFile = new File(targetDirectory, cppTypes.classPrefix() + + "NamedCharactersAccel.cpp"); + Writer out = new OutputStreamWriter(new FileOutputStream(cppFile), + "utf-8"); + + out.write(DATA_LICENSE); + out.write('\n'); + out.write("#include \"" + cppTypes.classPrefix() + + "NamedCharactersAccel.h\"\n"); + out.write("\n"); + + // Java initializes arrays to zero. Zero is our magic value for no hilo + // value. + int[][] hiLoTable = new int['z' + 1]['Z' - 'A' + 1 + 'z' - 'a' + 1]; + + String firstName = entities.entrySet().iterator().next().getKey(); + int firstKey = charToIndex(firstName.charAt(0)); + int secondKey = firstName.charAt(1); + int row = 0; + int lo = 0; + + for (Map.Entry entity : entities.entrySet()) { + String name = entity.getKey(); + int newFirst = charToIndex(name.charAt(0)); + int newSecond = name.charAt(1); + assert !(newFirst == 0 && newSecond == 0) : "Not prepared for name starting with AA"; + if (firstKey != newFirst || secondKey != newSecond) { + hiLoTable[secondKey][firstKey] = ((row - 1) << 16) | lo; + lo = row; + firstKey = newFirst; + secondKey = newSecond; + } + row++; + } + + hiLoTable[secondKey][firstKey] = ((entities.size() - 1) << 16) | lo; + + for (int i = 0; i < hiLoTable.length; i++) { + if (!allZero(hiLoTable[i])) { + out.write("static " + cppTypes.intType() + " const HILO_ACCEL_" + + i + "[] = {\n"); + for (int j = 0; j < hiLoTable[i].length; j++) { + if (j != 0) { + out.write(", "); + } + out.write("" + hiLoTable[i][j]); + } + out.write("\n};\n\n"); + } + } + + out.write("const int32_t* const " + cppTypes.classPrefix() + + "NamedCharactersAccel::HILO_ACCEL[] = {\n"); + for (int i = 0; i < hiLoTable.length; i++) { + if (i != 0) { + out.write(",\n"); + } + if (allZero(hiLoTable[i])) { + out.write(" 0"); + } else { + out.write(" HILO_ACCEL_" + i); + } + } + out.write("\n};\n\n"); + + out.flush(); + out.close(); + } + + private static void generateAccelH(File targetDirectory, CppTypes cppTypes, + TreeMap entities) throws IOException { + File hFile = new File(targetDirectory, cppTypes.classPrefix() + + "NamedCharactersAccel.h"); + Writer out = new OutputStreamWriter(new FileOutputStream(hFile), + "utf-8"); + out.write(DATA_LICENSE); + out.write("#ifndef " + cppTypes.classPrefix() + "NamedCharactersAccel_h\n"); + out.write("#define " + cppTypes.classPrefix() + "NamedCharactersAccel_h\n"); + out.write('\n'); + + String[] includes = cppTypes.namedCharactersIncludes(); + for (int i = 0; i < includes.length; i++) { + String include = includes[i]; + out.write("#include \"" + include + ".h\"\n"); + } + + out.write('\n'); + + out.write("class " + cppTypes.classPrefix() + "NamedCharactersAccel\n"); + out.write("{\n"); + out.write(" public:\n"); + out.write(" static const " + cppTypes.intType() + + "* const HILO_ACCEL[];\n"); + out.write("};\n"); + + out.write("\n#endif // " + cppTypes.classPrefix() + + "NamedCharactersAccel_h\n"); + out.flush(); + out.close(); + } + + private static void generateH(File targetDirectory, CppTypes cppTypes, + Map entities) throws IOException { + File hFile = new File(targetDirectory, cppTypes.classPrefix() + + "NamedCharacters.h"); + Writer out = new OutputStreamWriter(new FileOutputStream(hFile), + "utf-8"); + out.write(OUTPUT_LICENSE); + out.write("#ifndef " + cppTypes.classPrefix() + "NamedCharacters_h\n"); + out.write("#define " + cppTypes.classPrefix() + "NamedCharacters_h\n"); + out.write('\n'); + + String[] includes = cppTypes.namedCharactersIncludes(); + for (int i = 0; i < includes.length; i++) { + String include = includes[i]; + out.write("#include \"" + include + ".h\"\n"); + } + + out.write("\nstruct "); + out.write(cppTypes.characterNameTypeDeclaration()); + out.write(" {\n "); + out.write(cppTypes.unsignedShortType()); + out.write(" nameStart;\n "); + out.write(cppTypes.unsignedShortType()); + out.write(" nameLen;\n #ifdef DEBUG\n "); + out.write(cppTypes.intType()); + out.write(" n;\n #endif\n "); + out.write(cppTypes.intType()); + out.write(" length() const;\n "); + out.write(cppTypes.charType()); + out.write(" charAt("); + out.write(cppTypes.intType()); + out.write(" index) const;\n};\n\n"); + + out.write("class " + cppTypes.classPrefix() + "NamedCharacters\n"); + out.write("{\n"); + out.write(" public:\n"); + out.write(" static const " + cppTypes.characterNameTypeDeclaration() + " NAMES[];\n"); + out.write(" static const " + cppTypes.charType() + " VALUES[][2];\n"); + out.write(" static " + cppTypes.charType() + "** WINDOWS_1252;\n"); + out.write(" static void initializeStatics();\n"); + out.write(" static void releaseStatics();\n"); + out.write("};\n"); + + out.write("\n#endif // " + cppTypes.classPrefix() + + "NamedCharacters_h\n"); + out.flush(); + out.close(); + } + + private static void generateInclude(File targetDirectory, + CppTypes cppTypes, Map entities) throws IOException { + File includeFile = new File(targetDirectory, cppTypes.classPrefix() + + "NamedCharactersInclude.h"); + Writer out = new OutputStreamWriter(new FileOutputStream(includeFile), + "utf-8"); + + out.write(DATA_LICENSE); + out.write("/* Data generated from the table of named character references found at\n"); + out.write(" *\n"); + out.write(" * http://www.whatwg.org/specs/web-apps/current-work/multipage/named-character-references.html#named-character-references\n"); + out.write(" *\n"); + out.write(" * Files that #include this file must #define NAMED_CHARACTER_REFERENCE as a\n"); + out.write(" * macro of four parameters:\n"); + out.write(" *\n"); + out.write(" * 1. a unique integer N identifying the Nth [0,1,..] macro expansion in this file,\n"); + out.write(" * 2. a comma-separated sequence of characters comprising the character name,\n"); + out.write(" * without the first two letters or 0 if the sequence would be empty. \n"); + out.write(" * See Tokenizer.java.\n"); + out.write(" * 3. the length of this sequence of characters,\n"); + out.write(" * 4. placeholder flag (0 if argument #is not a placeholder and 1 if it is),\n"); + out.write(" * 5. a comma-separated sequence of char16_t literals corresponding\n"); + out.write(" * to the code-point(s) of the named character.\n"); + out.write(" *\n"); + out.write(" * The macro expansion doesn't have to refer to all or any of these parameters,\n"); + out.write(" * but common sense dictates that it should involve at least one of them.\n"); + out.write(" */\n"); + out.write("\n"); + out.write("// This #define allows the NAMED_CHARACTER_REFERENCE macro to accept comma-\n"); + out.write("// separated sequences as single macro arguments. Using commas directly would\n"); + out.write("// split the sequence into multiple macro arguments.\n"); + out.write("#define _ ,\n"); + out.write("\n"); + + int i = 0; + for (Map.Entry entity : entities.entrySet()) { + out.write("NAMED_CHARACTER_REFERENCE(" + i++ + ", "); + String name = entity.getKey(); + writeNameInitializer(out, name, " _ "); + out.write(", " + (name.length() - 2) + ", "); + out.write((name.length() == 2 ? "1" : "0") + ", "); + writeValueInitializer(out, entity.getValue(), " _ "); + out.write(")\n"); + } + + out.write("\n"); + out.write("#undef _\n"); + + out.flush(); + out.close(); + } + + private static void writeNameInitializer(Writer out, + String name, String separator) + throws IOException { + out.write("/* " + name.charAt(0) + " " + name.charAt(1) + " */ "); + if (name.length() == 2) { + out.write("0"); + } else { + for (int i = 2; i < name.length(); i++) { + out.write("'" + name.charAt(i) + "'"); + if (i < name.length() - 1) + out.write(separator); + } + } + } + + private static void writeValueInitializer(Writer out, + String value, String separator) + throws IOException { + if (value.length() == 1) { + out.write(toHexString(value.charAt(0))); + out.write(separator); + out.write("0"); + } else { + out.write(toHexString(value.charAt(0))); + out.write(separator); + out.write(toHexString(value.charAt(1))); + } + } + + private static void defineMacroAndInclude(Writer out, String expansion, + String includeFile) throws IOException { + out.write("#define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n" + + expansion + "\n"); + out.write("#include \"" + includeFile + "\"\n"); + out.write("#undef NAMED_CHARACTER_REFERENCE\n"); + } + + private static void defineMacroAndInclude(Writer out, String expansion, + String debugExpansion, String includeFile) throws IOException { + out.write("#ifdef DEBUG\n"); + out.write(" #define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n" + + debugExpansion + "\n"); + out.write("#else\n"); + out.write(" #define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n" + + expansion + "\n"); + out.write("#endif\n"); + out.write("#include \"" + includeFile + "\"\n"); + out.write("#undef NAMED_CHARACTER_REFERENCE\n"); + } + + private static void writeStaticMemberDeclaration(Writer out, + CppTypes cppTypes, String type, String name) throws IOException { + out.write(type + " " + cppTypes.classPrefix() + "NamedCharacters::" + + name + ";\n"); + } + + private static int charToIndex(char c) { + if (c >= 'a' && c <= 'z') { + return c - 'a' + 26; + } else if (c >= 'A' && c <= 'Z') { + return c - 'A'; + } + throw new IllegalArgumentException("Bad char in named character name: " + + c); + } + + private static boolean allZero(int[] arr) { + for (int i = 0; i < arr.length; i++) { + if (arr[i] != 0) { + return false; + } + } + return true; + } + + private static void generateCpp(File targetDirectory, CppTypes cppTypes, + Map entities) throws IOException { + String includeFile = cppTypes.classPrefix() + + "NamedCharactersInclude.h"; + File cppFile = new File(targetDirectory, cppTypes.classPrefix() + + "NamedCharacters.cpp"); + Writer out = new OutputStreamWriter(new FileOutputStream(cppFile), + "utf-8"); + + out.write(OUTPUT_LICENSE); + out.write("#define " + cppTypes.classPrefix() + + "NamedCharacters_cpp_\n"); + + String[] includes = cppTypes.namedCharactersIncludes(); + for (int i = 0; i < includes.length; i++) { + String include = includes[i]; + out.write("#include \"" + include + ".h\"\n"); + } + + out.write('\n'); + out.write("#include \"" + cppTypes.classPrefix() + + "NamedCharacters.h\"\n"); + out.write("\n"); + + out.write("const " + cppTypes.charType() + " " + cppTypes.classPrefix() + + "NamedCharacters::VALUES[][2] = {\n"); + defineMacroAndInclude(out, "{ VALUE },", includeFile); + // The useless terminator entry makes the above macro simpler with + // compilers that whine about a comma after the last item + out.write("{0, 0} };\n\n"); + + String staticMemberType = cppTypes.charType() + "**"; + writeStaticMemberDeclaration(out, cppTypes, staticMemberType, + "WINDOWS_1252"); + + out.write("static " + cppTypes.charType() + + " const WINDOWS_1252_DATA[] = {\n"); + out.write(" 0x20AC,\n"); + out.write(" 0x0081,\n"); + out.write(" 0x201A,\n"); + out.write(" 0x0192,\n"); + out.write(" 0x201E,\n"); + out.write(" 0x2026,\n"); + out.write(" 0x2020,\n"); + out.write(" 0x2021,\n"); + out.write(" 0x02C6,\n"); + out.write(" 0x2030,\n"); + out.write(" 0x0160,\n"); + out.write(" 0x2039,\n"); + out.write(" 0x0152,\n"); + out.write(" 0x008D,\n"); + out.write(" 0x017D,\n"); + out.write(" 0x008F,\n"); + out.write(" 0x0090,\n"); + out.write(" 0x2018,\n"); + out.write(" 0x2019,\n"); + out.write(" 0x201C,\n"); + out.write(" 0x201D,\n"); + out.write(" 0x2022,\n"); + out.write(" 0x2013,\n"); + out.write(" 0x2014,\n"); + out.write(" 0x02DC,\n"); + out.write(" 0x2122,\n"); + out.write(" 0x0161,\n"); + out.write(" 0x203A,\n"); + out.write(" 0x0153,\n"); + out.write(" 0x009D,\n"); + out.write(" 0x017E,\n"); + out.write(" 0x0178\n"); + out.write("};\n\n"); + + out.write("/**\n"); + out.write(" * To avoid having lots of pointers in the |charData| array, below,\n"); + out.write(" * which would cause us to have to do lots of relocations at library\n"); + out.write(" * load time, store all the string data for the names in one big array.\n"); + out.write(" * Then use tricks with enums to help us build an array that contains\n"); + out.write(" * the positions of each within the big arrays.\n"); + out.write(" */\n\n"); + + out.write("static const " + cppTypes.byteType() + " ALL_NAMES[] = {\n"); + + defineMacroAndInclude(out, "CHARS ,", includeFile); + + out.write("};\n\n"); + + out.write("enum NamePositions {\n"); + out.write(" DUMMY_INITIAL_NAME_POSITION = 0,\n"); + + out.write("/* enums don't take up space, so generate _START and _END */\n"); + defineMacroAndInclude(out, + "NAME_##N##_DUMMY, /* automatically one higher than previous */ \\\n" + + "NAME_##N##_START = NAME_##N##_DUMMY - 1, \\\n" + + "NAME_##N##_END = NAME_##N##_START + LEN + FLAG,", + includeFile); + + out.write(" DUMMY_FINAL_NAME_VALUE\n"); + out.write("};\n\n"); + + String arrayLengthMacro = cppTypes.arrayLengthMacro(); + String staticAssert = cppTypes.staticAssert(); + if (staticAssert != null && arrayLengthMacro != null) { + out.write("/* check that the start positions will fit in 16 bits */\n"); + out.write(staticAssert + "(" + arrayLengthMacro + + "(ALL_NAMES) < 0x10000);\n\n"); + } + + out.write("const " + cppTypes.characterNameTypeDeclaration() + " " + cppTypes.classPrefix() + + "NamedCharacters::NAMES[] = {\n"); + defineMacroAndInclude(out, "{ NAME_##N##_START, LEN, },", "{ NAME_##N##_START, LEN, N },", includeFile); + out.write("};\n\n"); + + out.write(cppTypes.intType()); + out.write("\n"); + out.write(cppTypes.characterNameTypeDeclaration()); + out.write("::length() const\n{\n return nameLen;\n}\n\n"); + out.write(cppTypes.charType()); + out.write("\n"); + out.write(cppTypes.characterNameTypeDeclaration()); + out.write("::charAt("); + out.write("int32_t"); + out.write(" index) const\n{\n return static_cast<"); + out.write(cppTypes.charType()); + out.write("> (ALL_NAMES[nameStart + index]);\n}\n\n"); + + out.write("void\n"); + out.write(cppTypes.classPrefix() + + "NamedCharacters::initializeStatics()\n"); + out.write("{\n"); + out.write(" WINDOWS_1252 = new " + cppTypes.charType() + "*[32];\n"); + out.write(" for (" + cppTypes.intType() + " i = 0; i < 32; ++i) {\n"); + out.write(" WINDOWS_1252[i] = (" + cppTypes.charType() + + "*)&(WINDOWS_1252_DATA[i]);\n"); + out.write(" }\n"); + out.write("}\n"); + out.write("\n"); + + out.write("void\n"); + out.write(cppTypes.classPrefix() + + "NamedCharacters::releaseStatics()\n"); + out.write("{\n"); + out.write(" delete[] WINDOWS_1252;\n"); + out.write("}\n"); + out.flush(); + out.close(); + } +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/DuplicatingFallThroughRemover.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/DuplicatingFallThroughRemover.java new file mode 100644 index 0000000000..b881073614 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/DuplicatingFallThroughRemover.java @@ -0,0 +1,79 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser Rust Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.rusttranslate; + +import japa.parser.ast.stmt.BreakStmt; +import japa.parser.ast.stmt.Statement; +import japa.parser.ast.stmt.SwitchEntryStmt; +import japa.parser.ast.stmt.SwitchStmt; +import japa.parser.ast.visitor.VoidVisitorAdapter; + +import java.util.LinkedList; +import java.util.List; + +public class DuplicatingFallThroughRemover extends VoidVisitorAdapter { + + private static final SwitchBreakAnalyzerVisitor ANALYZER_VISITOR = new SwitchBreakAnalyzerVisitor(); + + @Override public void visit(SwitchStmt sw, Object arg) { + if ("state".equals(sw.getSelector().toString())) { + super.visit(sw, arg); + return; + } + + List tail = new LinkedList(); + tail.add(new BreakStmt()); + + List entries = sw.getEntries(); + for (int i = entries.size() - 1; i >= 0; i--) { + SwitchEntryStmt stmt = entries.get(i); + List list = stmt.getStmts(); + if (list != null) { + if (!(list.size() > 0 + && list.get(list.size() - 1).accept(ANALYZER_VISITOR, true))) { + list.addAll(tail); + } + tail = list; + for (Statement statement : list) { + statement.accept(this, arg); + } + } + } + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/JavaVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/JavaVisitor.java new file mode 100644 index 0000000000..97ded525fd --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/JavaVisitor.java @@ -0,0 +1,1349 @@ +/* + * Copyright (C) 2007 Júlio Vilmar Gesser. + * Copyright (C) 2012 Mozilla Foundation + * + * This file is part of Java 1.5 parser and Abstract Syntax Tree. + * + * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Java 1.5 parser and Abstract Syntax Tree. If not, see . + */ +/* + * Created on 05/10/2006 + */ +package nu.validator.htmlparser.rusttranslate; + +import japa.parser.ast.BlockComment; +import japa.parser.ast.CompilationUnit; +import japa.parser.ast.ImportDeclaration; +import japa.parser.ast.LineComment; +import japa.parser.ast.PackageDeclaration; +import japa.parser.ast.TypeParameter; +import japa.parser.ast.body.AnnotationDeclaration; +import japa.parser.ast.body.AnnotationMemberDeclaration; +import japa.parser.ast.body.BodyDeclaration; +import japa.parser.ast.body.ClassOrInterfaceDeclaration; +import japa.parser.ast.body.ConstructorDeclaration; +import japa.parser.ast.body.EmptyMemberDeclaration; +import japa.parser.ast.body.EmptyTypeDeclaration; +import japa.parser.ast.body.EnumConstantDeclaration; +import japa.parser.ast.body.EnumDeclaration; +import japa.parser.ast.body.FieldDeclaration; +import japa.parser.ast.body.InitializerDeclaration; +import japa.parser.ast.body.JavadocComment; +import japa.parser.ast.body.MethodDeclaration; +import japa.parser.ast.body.ModifierSet; +import japa.parser.ast.body.Parameter; +import japa.parser.ast.body.TypeDeclaration; +import japa.parser.ast.body.VariableDeclarator; +import japa.parser.ast.body.VariableDeclaratorId; +import japa.parser.ast.expr.AnnotationExpr; +import japa.parser.ast.expr.ArrayAccessExpr; +import japa.parser.ast.expr.ArrayCreationExpr; +import japa.parser.ast.expr.ArrayInitializerExpr; +import japa.parser.ast.expr.AssignExpr; +import japa.parser.ast.expr.BinaryExpr; +import japa.parser.ast.expr.BooleanLiteralExpr; +import japa.parser.ast.expr.CastExpr; +import japa.parser.ast.expr.CharLiteralExpr; +import japa.parser.ast.expr.ClassExpr; +import japa.parser.ast.expr.ConditionalExpr; +import japa.parser.ast.expr.DoubleLiteralExpr; +import japa.parser.ast.expr.EnclosedExpr; +import japa.parser.ast.expr.Expression; +import japa.parser.ast.expr.FieldAccessExpr; +import japa.parser.ast.expr.InstanceOfExpr; +import japa.parser.ast.expr.IntegerLiteralExpr; +import japa.parser.ast.expr.IntegerLiteralMinValueExpr; +import japa.parser.ast.expr.LongLiteralExpr; +import japa.parser.ast.expr.LongLiteralMinValueExpr; +import japa.parser.ast.expr.MarkerAnnotationExpr; +import japa.parser.ast.expr.MemberValuePair; +import japa.parser.ast.expr.MethodCallExpr; +import japa.parser.ast.expr.NameExpr; +import japa.parser.ast.expr.NormalAnnotationExpr; +import japa.parser.ast.expr.NullLiteralExpr; +import japa.parser.ast.expr.ObjectCreationExpr; +import japa.parser.ast.expr.QualifiedNameExpr; +import japa.parser.ast.expr.SingleMemberAnnotationExpr; +import japa.parser.ast.expr.StringLiteralExpr; +import japa.parser.ast.expr.SuperExpr; +import japa.parser.ast.expr.ThisExpr; +import japa.parser.ast.expr.UnaryExpr; +import japa.parser.ast.expr.VariableDeclarationExpr; +import japa.parser.ast.stmt.AssertStmt; +import japa.parser.ast.stmt.BlockStmt; +import japa.parser.ast.stmt.BreakStmt; +import japa.parser.ast.stmt.CatchClause; +import japa.parser.ast.stmt.ContinueStmt; +import japa.parser.ast.stmt.DoStmt; +import japa.parser.ast.stmt.EmptyStmt; +import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt; +import japa.parser.ast.stmt.ExpressionStmt; +import japa.parser.ast.stmt.ForStmt; +import japa.parser.ast.stmt.ForeachStmt; +import japa.parser.ast.stmt.IfStmt; +import japa.parser.ast.stmt.LabeledStmt; +import japa.parser.ast.stmt.ReturnStmt; +import japa.parser.ast.stmt.Statement; +import japa.parser.ast.stmt.SwitchEntryStmt; +import japa.parser.ast.stmt.SwitchStmt; +import japa.parser.ast.stmt.SynchronizedStmt; +import japa.parser.ast.stmt.ThrowStmt; +import japa.parser.ast.stmt.TryStmt; +import japa.parser.ast.stmt.TypeDeclarationStmt; +import japa.parser.ast.stmt.WhileStmt; +import japa.parser.ast.type.ClassOrInterfaceType; +import japa.parser.ast.type.PrimitiveType; +import japa.parser.ast.type.ReferenceType; +import japa.parser.ast.type.Type; +import japa.parser.ast.type.VoidType; +import japa.parser.ast.type.WildcardType; +import japa.parser.ast.visitor.VoidVisitor; + +import java.util.Iterator; +import java.util.List; + +/** + * @author Julio Vilmar Gesser + * @author Henri Sivonen + */ + +public final class JavaVisitor implements VoidVisitor { + + private static class SourcePrinter { + + private int level = 0; + + private boolean indented = false; + + private final StringBuilder buf = new StringBuilder(); + + public void indent() { + level++; + } + + public void unindent() { + level--; + } + + private void makeIndent() { + for (int i = 0; i < level; i++) { + buf.append(" "); + } + } + + public void print(String arg) { + if (!indented) { + makeIndent(); + indented = true; + } + buf.append(arg); + } + + public void printLn(String arg) { + print(arg); + printLn(); + } + + public void printLn() { + buf.append("\n"); + indented = false; + } + + public String getSource() { + return buf.toString(); + } + + @Override + public String toString() { + return getSource(); + } + } + + private final SourcePrinter printer = new SourcePrinter(); + + public String getSource() { + return printer.getSource(); + } + + private void printModifiers(int modifiers) { + if (ModifierSet.isPrivate(modifiers)) { + printer.print("private "); + } + if (ModifierSet.isProtected(modifiers)) { + printer.print("protected "); + } + if (ModifierSet.isPublic(modifiers)) { + printer.print("public "); + } + if (ModifierSet.isAbstract(modifiers)) { + printer.print("abstract "); + } + if (ModifierSet.isStatic(modifiers)) { + printer.print("static "); + } + if (ModifierSet.isFinal(modifiers)) { + printer.print("final "); + } + if (ModifierSet.isNative(modifiers)) { + printer.print("native "); + } + if (ModifierSet.isStrictfp(modifiers)) { + printer.print("strictfp "); + } + if (ModifierSet.isSynchronized(modifiers)) { + printer.print("synchronized "); + } + if (ModifierSet.isTransient(modifiers)) { + printer.print("transient "); + } + if (ModifierSet.isVolatile(modifiers)) { + printer.print("volatile "); + } + } + + private void printMembers(List members, Object arg) { + for (BodyDeclaration member : members) { + printer.printLn(); + member.accept(this, arg); + printer.printLn(); + } + } + + private void printMemberAnnotations(List annotations, Object arg) { + if (annotations != null) { + for (AnnotationExpr a : annotations) { + a.accept(this, arg); + printer.printLn(); + } + } + } + + private void printAnnotations(List annotations, Object arg) { + if (annotations != null) { + for (AnnotationExpr a : annotations) { + a.accept(this, arg); + printer.print(" "); + } + } + } + + private void printTypeArgs(List args, Object arg) { + if (args != null) { + printer.print("<"); + for (Iterator i = args.iterator(); i.hasNext();) { + Type t = i.next(); + t.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + printer.print(">"); + } + } + + private void printTypeParameters(List args, Object arg) { + if (args != null) { + printer.print("<"); + for (Iterator i = args.iterator(); i.hasNext();) { + TypeParameter t = i.next(); + t.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + printer.print(">"); + } + } + + private void printArguments(List args, Object arg) { + printer.print("("); + if (args != null) { + for (Iterator i = args.iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + } + + private void printJavadoc(JavadocComment javadoc, Object arg) { + if (javadoc != null) { + javadoc.accept(this, arg); + } + } + + public void visit(CompilationUnit n, Object arg) { + if (n.getPackage() != null) { + n.getPackage().accept(this, arg); + } + if (n.getImports() != null) { + for (ImportDeclaration i : n.getImports()) { + i.accept(this, arg); + } + printer.printLn(); + } + if (n.getTypes() != null) { + for (Iterator i = n.getTypes().iterator(); i.hasNext();) { + i.next().accept(this, arg); + printer.printLn(); + if (i.hasNext()) { + printer.printLn(); + } + } + } + } + + public void visit(PackageDeclaration n, Object arg) { + printAnnotations(n.getAnnotations(), arg); + printer.print("package "); + n.getName().accept(this, arg); + printer.printLn(";"); + printer.printLn(); + } + + public void visit(NameExpr n, Object arg) { + printer.print(n.getName()); + } + + public void visit(QualifiedNameExpr n, Object arg) { + n.getQualifier().accept(this, arg); + printer.print("."); + printer.print(n.getName()); + } + + public void visit(ImportDeclaration n, Object arg) { + printer.print("import "); + if (n.isStatic()) { + printer.print("static "); + } + n.getName().accept(this, arg); + if (n.isAsterisk()) { + printer.print(".*"); + } + printer.printLn(";"); + } + + public void visit(ClassOrInterfaceDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + + if (n.isInterface()) { + printer.print("interface "); + } else { + printer.print("class "); + } + + printer.print(n.getName()); + + printTypeParameters(n.getTypeParameters(), arg); + + if (n.getExtends() != null) { + printer.print(" extends "); + for (Iterator i = n.getExtends().iterator(); i.hasNext();) { + ClassOrInterfaceType c = i.next(); + c.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + + if (n.getImplements() != null) { + printer.print(" implements "); + for (Iterator i = n.getImplements().iterator(); i.hasNext();) { + ClassOrInterfaceType c = i.next(); + c.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + + printer.printLn(" {"); + printer.indent(); + if (n.getMembers() != null) { + printMembers(n.getMembers(), arg); + } + printer.unindent(); + printer.print("}"); + } + + public void visit(EmptyTypeDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printer.print(";"); + } + + public void visit(JavadocComment n, Object arg) { + printer.print("/**"); + printer.print(n.getContent()); + printer.printLn("*/"); + } + + public void visit(ClassOrInterfaceType n, Object arg) { + if (n.getScope() != null) { + n.getScope().accept(this, arg); + printer.print("."); + } + printer.print(n.getName()); + printTypeArgs(n.getTypeArgs(), arg); + } + + public void visit(TypeParameter n, Object arg) { + printer.print(n.getName()); + if (n.getTypeBound() != null) { + printer.print(" extends "); + for (Iterator i = n.getTypeBound().iterator(); i.hasNext();) { + ClassOrInterfaceType c = i.next(); + c.accept(this, arg); + if (i.hasNext()) { + printer.print(" & "); + } + } + } + } + + public void visit(PrimitiveType n, Object arg) { + switch (n.getType()) { + case Boolean: + printer.print("boolean"); + break; + case Byte: + printer.print("byte"); + break; + case Char: + printer.print("char"); + break; + case Double: + printer.print("double"); + break; + case Float: + printer.print("float"); + break; + case Int: + printer.print("int"); + break; + case Long: + printer.print("long"); + break; + case Short: + printer.print("short"); + break; + } + } + + public void visit(ReferenceType n, Object arg) { + n.getType().accept(this, arg); + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("[]"); + } + } + + public void visit(WildcardType n, Object arg) { + printer.print("?"); + if (n.getExtends() != null) { + printer.print(" extends "); + n.getExtends().accept(this, arg); + } + if (n.getSuper() != null) { + printer.print(" super "); + n.getSuper().accept(this, arg); + } + } + + public void visit(FieldDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + n.getType().accept(this, arg); + + printer.print(" "); + for (Iterator i = n.getVariables().iterator(); i.hasNext();) { + VariableDeclarator var = i.next(); + var.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + + printer.print(";"); + } + + public void visit(VariableDeclarator n, Object arg) { + n.getId().accept(this, arg); + if (n.getInit() != null) { + printer.print(" = "); + n.getInit().accept(this, arg); + } + } + + public void visit(VariableDeclaratorId n, Object arg) { + printer.print(n.getName()); + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("[]"); + } + } + + public void visit(ArrayInitializerExpr n, Object arg) { + printer.print("{"); + if (n.getValues() != null) { + printer.print(" "); + for (Iterator i = n.getValues().iterator(); i.hasNext();) { + Expression expr = i.next(); + expr.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + printer.print(" "); + } + printer.print("}"); + } + + public void visit(VoidType n, Object arg) { + printer.print("void"); + } + + public void visit(ArrayAccessExpr n, Object arg) { + n.getName().accept(this, arg); + printer.print("["); + n.getIndex().accept(this, arg); + printer.print("]"); + } + + public void visit(ArrayCreationExpr n, Object arg) { + printer.print("new "); + n.getType().accept(this, arg); + + if (n.getDimensions() != null) { + for (Expression dim : n.getDimensions()) { + printer.print("["); + dim.accept(this, arg); + printer.print("]"); + } + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("[]"); + } + } else { + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("[]"); + } + printer.print(" "); + n.getInitializer().accept(this, arg); + } + } + + public void visit(AssignExpr n, Object arg) { + n.getTarget().accept(this, arg); + printer.print(" "); + switch (n.getOperator()) { + case assign: + printer.print("="); + break; + case and: + printer.print("&="); + break; + case or: + printer.print("|="); + break; + case xor: + printer.print("^="); + break; + case plus: + printer.print("+="); + break; + case minus: + printer.print("-="); + break; + case rem: + printer.print("%="); + break; + case slash: + printer.print("/="); + break; + case star: + printer.print("*="); + break; + case lShift: + printer.print("<<="); + break; + case rSignedShift: + printer.print(">>="); + break; + case rUnsignedShift: + printer.print(">>>="); + break; + } + printer.print(" "); + n.getValue().accept(this, arg); + } + + public void visit(BinaryExpr n, Object arg) { + n.getLeft().accept(this, arg); + printer.print(" "); + switch (n.getOperator()) { + case or: + printer.print("||"); + break; + case and: + printer.print("&&"); + break; + case binOr: + printer.print("|"); + break; + case binAnd: + printer.print("&"); + break; + case xor: + printer.print("^"); + break; + case equals: + printer.print("=="); + break; + case notEquals: + printer.print("!="); + break; + case less: + printer.print("<"); + break; + case greater: + printer.print(">"); + break; + case lessEquals: + printer.print("<="); + break; + case greaterEquals: + printer.print(">="); + break; + case lShift: + printer.print("<<"); + break; + case rSignedShift: + printer.print(">>"); + break; + case rUnsignedShift: + printer.print(">>>"); + break; + case plus: + printer.print("+"); + break; + case minus: + printer.print("-"); + break; + case times: + printer.print("*"); + break; + case divide: + printer.print("/"); + break; + case remainder: + printer.print("%"); + break; + } + printer.print(" "); + n.getRight().accept(this, arg); + } + + public void visit(CastExpr n, Object arg) { + printer.print("("); + n.getType().accept(this, arg); + printer.print(") "); + n.getExpr().accept(this, arg); + } + + public void visit(ClassExpr n, Object arg) { + n.getType().accept(this, arg); + printer.print(".class"); + } + + public void visit(ConditionalExpr n, Object arg) { + n.getCondition().accept(this, arg); + printer.print(" ? "); + n.getThenExpr().accept(this, arg); + printer.print(" : "); + n.getElseExpr().accept(this, arg); + } + + public void visit(EnclosedExpr n, Object arg) { + printer.print("("); + n.getInner().accept(this, arg); + printer.print(")"); + } + + public void visit(FieldAccessExpr n, Object arg) { + n.getScope().accept(this, arg); + printer.print("."); + printer.print(n.getField()); + } + + public void visit(InstanceOfExpr n, Object arg) { + n.getExpr().accept(this, arg); + printer.print(" instanceof "); + n.getType().accept(this, arg); + } + + public void visit(CharLiteralExpr n, Object arg) { + printer.print("'"); + char c = n.getValue().charAt(0); + switch (c) { + case '\b': + printer.print("\\b"); + break; + case '\t': + printer.print("\\t"); + break; + case '\n': + printer.print("\\n"); + break; + case '\f': + printer.print("\\f"); + break; + case '\r': + printer.print("\\r"); + break; + case '\'': + printer.print("\\'"); + break; + case '\\': + printer.print(n.getValue()); + break; + default: + if (c < ' ' || c > '~') { + String hex = Integer.toHexString(c); + switch (hex.length()) { + case 1: + printer.print("\\u000"+hex); + break; + case 2: + printer.print("\\u00"+hex); + break; + case 3: + printer.print("\\u0"+hex); + break; + case 4: + printer.print("\\u"+hex); + break; + } + } else { + printer.print(""+c); + } + break; + } + printer.print("'"); + } + + public void visit(DoubleLiteralExpr n, Object arg) { + printer.print(n.getValue()); + } + + public void visit(IntegerLiteralExpr n, Object arg) { + printer.print(n.getValue()); + } + + public void visit(LongLiteralExpr n, Object arg) { + printer.print(n.getValue()); + } + + public void visit(IntegerLiteralMinValueExpr n, Object arg) { + printer.print(n.getValue()); + } + + public void visit(LongLiteralMinValueExpr n, Object arg) { + printer.print(n.getValue()); + } + + public void visit(StringLiteralExpr n, Object arg) { + printer.print("\""); + printer.print(n.getValue()); + printer.print("\""); + } + + public void visit(BooleanLiteralExpr n, Object arg) { + printer.print(String.valueOf(n.getValue())); + } + + public void visit(NullLiteralExpr n, Object arg) { + printer.print("null"); + } + + public void visit(ThisExpr n, Object arg) { + if (n.getClassExpr() != null) { + n.getClassExpr().accept(this, arg); + printer.print("."); + } + printer.print("this"); + } + + public void visit(SuperExpr n, Object arg) { + if (n.getClassExpr() != null) { + n.getClassExpr().accept(this, arg); + printer.print("."); + } + printer.print("super"); + } + + public void visit(MethodCallExpr n, Object arg) { + if (n.getScope() != null) { + n.getScope().accept(this, arg); + printer.print("."); + } + printTypeArgs(n.getTypeArgs(), arg); + printer.print(n.getName()); + printArguments(n.getArgs(), arg); + } + + public void visit(ObjectCreationExpr n, Object arg) { + if (n.getScope() != null) { + n.getScope().accept(this, arg); + printer.print("."); + } + + printer.print("new "); + + printTypeArgs(n.getTypeArgs(), arg); + n.getType().accept(this, arg); + + printArguments(n.getArgs(), arg); + + if (n.getAnonymousClassBody() != null) { + printer.printLn(" {"); + printer.indent(); + printMembers(n.getAnonymousClassBody(), arg); + printer.unindent(); + printer.print("}"); + } + } + + public void visit(UnaryExpr n, Object arg) { + switch (n.getOperator()) { + case positive: + printer.print("+"); + break; + case negative: + printer.print("-"); + break; + case inverse: + printer.print("~"); + break; + case not: + printer.print("!"); + break; + case preIncrement: + printer.print("++"); + break; + case preDecrement: + printer.print("--"); + break; + } + + n.getExpr().accept(this, arg); + + switch (n.getOperator()) { + case posIncrement: + printer.print("++"); + break; + case posDecrement: + printer.print("--"); + break; + } + } + + public void visit(ConstructorDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + + printTypeParameters(n.getTypeParameters(), arg); + if (n.getTypeParameters() != null) { + printer.print(" "); + } + printer.print(n.getName()); + + printer.print("("); + if (n.getParameters() != null) { + for (Iterator i = n.getParameters().iterator(); i.hasNext();) { + Parameter p = i.next(); + p.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + + if (n.getThrows() != null) { + printer.print(" throws "); + for (Iterator i = n.getThrows().iterator(); i.hasNext();) { + NameExpr name = i.next(); + name.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(" "); + n.getBlock().accept(this, arg); + } + + public void visit(MethodDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + + printTypeParameters(n.getTypeParameters(), arg); + if (n.getTypeParameters() != null) { + printer.print(" "); + } + + n.getType().accept(this, arg); + printer.print(" "); + printer.print(n.getName()); + + printer.print("("); + if (n.getParameters() != null) { + for (Iterator i = n.getParameters().iterator(); i.hasNext();) { + Parameter p = i.next(); + p.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("[]"); + } + + if (n.getThrows() != null) { + printer.print(" throws "); + for (Iterator i = n.getThrows().iterator(); i.hasNext();) { + NameExpr name = i.next(); + name.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + if (n.getBody() == null) { + printer.print(";"); + } else { + printer.print(" "); + n.getBody().accept(this, arg); + } + } + + public void visit(Parameter n, Object arg) { + printAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + + n.getType().accept(this, arg); + if (n.isVarArgs()) { + printer.print("..."); + } + printer.print(" "); + n.getId().accept(this, arg); + } + + public void visit(ExplicitConstructorInvocationStmt n, Object arg) { + if (n.isThis()) { + printTypeArgs(n.getTypeArgs(), arg); + printer.print("this"); + } else { + if (n.getExpr() != null) { + n.getExpr().accept(this, arg); + printer.print("."); + } + printTypeArgs(n.getTypeArgs(), arg); + printer.print("super"); + } + printArguments(n.getArgs(), arg); + printer.print(";"); + } + + public void visit(VariableDeclarationExpr n, Object arg) { + printAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + + n.getType().accept(this, arg); + printer.print(" "); + + for (Iterator i = n.getVars().iterator(); i.hasNext();) { + VariableDeclarator v = i.next(); + v.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + + public void visit(TypeDeclarationStmt n, Object arg) { + n.getTypeDeclaration().accept(this, arg); + } + + public void visit(AssertStmt n, Object arg) { + printer.print("assert "); + n.getCheck().accept(this, arg); + if (n.getMessage() != null) { + printer.print(" : "); + n.getMessage().accept(this, arg); + } + printer.print(";"); + } + + public void visit(BlockStmt n, Object arg) { + printer.printLn("{"); + if (n.getStmts() != null) { + printer.indent(); + for (Statement s : n.getStmts()) { + s.accept(this, arg); + printer.printLn(); + } + printer.unindent(); + } + printer.print("}"); + + } + + public void visit(LabeledStmt n, Object arg) { + printer.print(n.getLabel()); + printer.print(": "); + n.getStmt().accept(this, arg); + } + + public void visit(EmptyStmt n, Object arg) { + printer.print(";"); + } + + public void visit(ExpressionStmt n, Object arg) { + n.getExpression().accept(this, arg); + printer.print(";"); + } + + public void visit(SwitchStmt n, Object arg) { + printer.print("switch("); + n.getSelector().accept(this, arg); + printer.printLn(") {"); + if (n.getEntries() != null) { + printer.indent(); + for (SwitchEntryStmt e : n.getEntries()) { + e.accept(this, arg); + } + printer.unindent(); + } + printer.print("}"); + + } + + public void visit(SwitchEntryStmt n, Object arg) { + if (n.getLabel() != null) { + printer.print("case "); + n.getLabel().accept(this, arg); + printer.print(":"); + } else { + printer.print("default:"); + } + printer.printLn(); + printer.indent(); + if (n.getStmts() != null) { + for (Statement s : n.getStmts()) { + s.accept(this, arg); + printer.printLn(); + } + } + printer.unindent(); + } + + public void visit(BreakStmt n, Object arg) { + printer.print("break"); + if (n.getId() != null) { + printer.print(" "); + printer.print(n.getId()); + } + printer.print(";"); + } + + public void visit(ReturnStmt n, Object arg) { + printer.print("return"); + if (n.getExpr() != null) { + printer.print(" "); + n.getExpr().accept(this, arg); + } + printer.print(";"); + } + + public void visit(EnumDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + + printer.print("enum "); + printer.print(n.getName()); + + if (n.getImplements() != null) { + printer.print(" implements "); + for (Iterator i = n.getImplements().iterator(); i.hasNext();) { + ClassOrInterfaceType c = i.next(); + c.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + + printer.printLn(" {"); + printer.indent(); + if (n.getEntries() != null) { + printer.printLn(); + for (Iterator i = n.getEntries().iterator(); i.hasNext();) { + EnumConstantDeclaration e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + if (n.getMembers() != null) { + printer.printLn(";"); + printMembers(n.getMembers(), arg); + } else { + if (n.getEntries() != null) { + printer.printLn(); + } + } + printer.unindent(); + printer.print("}"); + } + + public void visit(EnumConstantDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printer.print(n.getName()); + + if (n.getArgs() != null) { + printArguments(n.getArgs(), arg); + } + + if (n.getClassBody() != null) { + printer.printLn(" {"); + printer.indent(); + printMembers(n.getClassBody(), arg); + printer.unindent(); + printer.printLn("}"); + } + } + + public void visit(EmptyMemberDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printer.print(";"); + } + + public void visit(InitializerDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + if (n.isStatic()) { + printer.print("static "); + } + n.getBlock().accept(this, arg); + } + + public void visit(IfStmt n, Object arg) { + printer.print("if ("); + n.getCondition().accept(this, arg); + printer.print(") "); + n.getThenStmt().accept(this, arg); + if (n.getElseStmt() != null) { + printer.print(" else "); + n.getElseStmt().accept(this, arg); + } + } + + public void visit(WhileStmt n, Object arg) { + printer.print("while ("); + n.getCondition().accept(this, arg); + printer.print(") "); + n.getBody().accept(this, arg); + } + + public void visit(ContinueStmt n, Object arg) { + printer.print("continue"); + if (n.getId() != null) { + printer.print(" "); + printer.print(n.getId()); + } + printer.print(";"); + } + + public void visit(DoStmt n, Object arg) { + printer.print("do "); + n.getBody().accept(this, arg); + printer.print(" while ("); + n.getCondition().accept(this, arg); + printer.print(");"); + } + + public void visit(ForeachStmt n, Object arg) { + printer.print("for ("); + n.getVariable().accept(this, arg); + printer.print(" : "); + n.getIterable().accept(this, arg); + printer.print(") "); + n.getBody().accept(this, arg); + } + + public void visit(ForStmt n, Object arg) { + printer.print("for ("); + if (n.getInit() != null) { + for (Iterator i = n.getInit().iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print("; "); + if (n.getCompare() != null) { + n.getCompare().accept(this, arg); + } + printer.print("; "); + if (n.getUpdate() != null) { + for (Iterator i = n.getUpdate().iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(") "); + n.getBody().accept(this, arg); + } + + public void visit(ThrowStmt n, Object arg) { + printer.print("throw "); + n.getExpr().accept(this, arg); + printer.print(";"); + } + + public void visit(SynchronizedStmt n, Object arg) { + printer.print("synchronized ("); + n.getExpr().accept(this, arg); + printer.print(") "); + n.getBlock().accept(this, arg); + } + + public void visit(TryStmt n, Object arg) { + printer.print("try "); + n.getTryBlock().accept(this, arg); + if (n.getCatchs() != null) { + for (CatchClause c : n.getCatchs()) { + c.accept(this, arg); + } + } + if (n.getFinallyBlock() != null) { + printer.print(" finally "); + n.getFinallyBlock().accept(this, arg); + } + } + + public void visit(CatchClause n, Object arg) { + printer.print(" catch ("); + n.getExcept().accept(this, arg); + printer.print(") "); + n.getCatchBlock().accept(this, arg); + + } + + public void visit(AnnotationDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + + printer.print("@interface "); + printer.print(n.getName()); + printer.printLn(" {"); + printer.indent(); + if (n.getMembers() != null) { + printMembers(n.getMembers(), arg); + } + printer.unindent(); + printer.print("}"); + } + + public void visit(AnnotationMemberDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + + n.getType().accept(this, arg); + printer.print(" "); + printer.print(n.getName()); + printer.print("()"); + if (n.getDefaultValue() != null) { + printer.print(" default "); + n.getDefaultValue().accept(this, arg); + } + printer.print(";"); + } + + public void visit(MarkerAnnotationExpr n, Object arg) { + printer.print("@"); + n.getName().accept(this, arg); + } + + public void visit(SingleMemberAnnotationExpr n, Object arg) { + printer.print("@"); + n.getName().accept(this, arg); + printer.print("("); + n.getMemberValue().accept(this, arg); + printer.print(")"); + } + + public void visit(NormalAnnotationExpr n, Object arg) { + printer.print("@"); + n.getName().accept(this, arg); + printer.print("("); + if (n.getPairs() != null) { + for (Iterator i = n.getPairs().iterator(); i.hasNext();) { + MemberValuePair m = i.next(); + m.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + } + + public void visit(MemberValuePair n, Object arg) { + printer.print(n.getName()); + printer.print(" = "); + n.getValue().accept(this, arg); + } + + public void visit(LineComment n, Object arg) { + printer.print("//"); + printer.printLn(n.getContent()); + } + + public void visit(BlockComment n, Object arg) { + printer.print("/*"); + printer.print(n.getContent()); + printer.printLn("*/"); + } + +} \ No newline at end of file diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/LoopBreakAnalyzerVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/LoopBreakAnalyzerVisitor.java new file mode 100644 index 0000000000..384716e0b2 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/LoopBreakAnalyzerVisitor.java @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2008 Júlio Vilmar Gesser. + * Copyright (C) 2012 Mozilla Foundation + * + * This file is part of Java 1.5 parser and Abstract Syntax Tree. + * + * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Java 1.5 parser and Abstract Syntax Tree. If not, see . + */ +/* + * Created on 09/06/2008 + */ +package nu.validator.htmlparser.rusttranslate; + +import japa.parser.ast.stmt.AssertStmt; +import japa.parser.ast.stmt.BlockStmt; +import japa.parser.ast.stmt.BreakStmt; +import japa.parser.ast.stmt.CatchClause; +import japa.parser.ast.stmt.ContinueStmt; +import japa.parser.ast.stmt.DoStmt; +import japa.parser.ast.stmt.EmptyStmt; +import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt; +import japa.parser.ast.stmt.ExpressionStmt; +import japa.parser.ast.stmt.ForStmt; +import japa.parser.ast.stmt.ForeachStmt; +import japa.parser.ast.stmt.IfStmt; +import japa.parser.ast.stmt.LabeledStmt; +import japa.parser.ast.stmt.ReturnStmt; +import japa.parser.ast.stmt.Statement; +import japa.parser.ast.stmt.SwitchEntryStmt; +import japa.parser.ast.stmt.SwitchStmt; +import japa.parser.ast.stmt.SynchronizedStmt; +import japa.parser.ast.stmt.ThrowStmt; +import japa.parser.ast.stmt.TryStmt; +import japa.parser.ast.stmt.TypeDeclarationStmt; +import japa.parser.ast.stmt.WhileStmt; +import japa.parser.ast.type.WildcardType; +import japa.parser.ast.visitor.GenericVisitorAdapter; + +import java.util.List; + +/** + * @author Julio Vilmar Gesser + * @author Henri Sivonen + */ +public class LoopBreakAnalyzerVisitor extends GenericVisitorAdapter { + + public Boolean visit(AssertStmt n, Boolean arg) { + return false; + } + + public Boolean visit(BlockStmt n, Boolean arg) { + for (Statement stmt : n.getStmts()) { + if (stmt.accept(this, arg)) { + return true; + } + } + return false; + } + + public Boolean visit(BreakStmt n, Boolean arg) { + return n.getId() != null; + } + + public Boolean visit(CatchClause n, Boolean arg) { + return n.getCatchBlock().accept(this, arg); + } + + public Boolean visit(ContinueStmt n, Boolean arg) { + return false; + } + + public Boolean visit(DoStmt n, Boolean arg) { + return n.getBody().accept(this, arg); + } + + public Boolean visit(EmptyStmt n, Boolean arg) { + return false; + } + + public Boolean visit(ExplicitConstructorInvocationStmt n, Boolean arg) { + return false; + } + + public Boolean visit(ExpressionStmt n, Boolean arg) { + return false; + } + + public Boolean visit(ForeachStmt n, Boolean arg) { + return n.getBody().accept(this, arg); + } + + public Boolean visit(ForStmt n, Boolean arg) { + //bogus + return false; + } + + public Boolean visit(IfStmt n, Boolean arg) { + if (n.getElseStmt() != null) { + if (n.getElseStmt().accept(this, arg)) { + return true; + } + } + if (n.getThenStmt().accept(this, arg)) { + return true; + } + return false; + } + + public Boolean visit(LabeledStmt n, Boolean arg) { + return n.getStmt().accept(this, arg); + } + + public Boolean visit(ReturnStmt n, Boolean arg) { + return true; + } + + public Boolean visit(SwitchEntryStmt n, Boolean arg) { + return false; + } + + public Boolean visit(SwitchStmt n, Boolean arg) { + /* + List entries = n.getEntries(); + for (int i = 0; i < array.length; i++) { + array_type array_element = array[i]; + + } + */ + return true; + } + + public Boolean visit(SynchronizedStmt n, Boolean arg) { + return n.getBlock().accept(this, arg); + } + + public Boolean visit(ThrowStmt n, Boolean arg) { + return true; + } + + public Boolean visit(TryStmt n, Boolean arg) { + if (n.getFinallyBlock() != null) { + return n.getFinallyBlock().accept(this, arg); + } + if (n.getCatchs() != null) { + for (CatchClause c : n.getCatchs()) { + boolean brk = c.accept(this, arg); + if (!brk) { + return false; + } + } + } + return n.getTryBlock().accept(this, arg); + } + + public Boolean visit(TypeDeclarationStmt n, Boolean arg) { + return false; + } + + public Boolean visit(WhileStmt n, Boolean arg) { + return n.getBody().accept(this, arg); + } + + public Boolean visit(WildcardType n, Boolean arg) { + if (n.getExtends() != null) { + n.getExtends().accept(this, arg); + } + if (n.getSuper() != null) { + n.getSuper().accept(this, arg); + } + return null; + } +} \ No newline at end of file diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/Main.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/Main.java new file mode 100644 index 0000000000..4e1b0a7ddc --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/Main.java @@ -0,0 +1,144 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is HTML Parser Rust Translator code. + * + * The Initial Developer of the Original Code is + * Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Henri Sivonen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +package nu.validator.htmlparser.rusttranslate; + +import japa.parser.JavaParser; +import japa.parser.ParseException; +import japa.parser.ast.CompilationUnit; +import japa.parser.ast.visitor.DumpVisitor; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; + +import nu.validator.htmlparser.cpptranslate.CppOnlyInputStream; +import nu.validator.htmlparser.cpptranslate.LicenseExtractor; +import nu.validator.htmlparser.cpptranslate.NoCppInputStream; + +public class Main { + + private static final String[] CLASSLIST = { + "Tokenizer", + "TreeBuilder", + "MetaScanner", + "AttributeName", + "ElementName", + "HtmlAttributes", + "StackNode", + "UTF16Buffer", + "StateSnapshot", + }; + + /** + * @param args + * @throws ParseException + * @throws IOException + */ + public static void main(String[] args) throws ParseException, IOException { + File javaDirectory = new File(args[0]); + File targetDirectory = new File(args[1]); + + for (int i = 0; i < CLASSLIST.length; i++) { + parseFile(javaDirectory, targetDirectory, CLASSLIST[i], ".java"); + } + } + + private static void parseFile(File javaDirectory, + File targetDirectory, String className, String fne) + throws FileNotFoundException, UnsupportedEncodingException, + IOException { + File file = null; +// try { +// file = new File(javaDirectory, className + ".java"); +// String license = new LicenseExtractor(file).extract(); +// CompilationUnit cu = JavaParser.parse(new FileInputStream(file), "utf-8"); +// +// ModeFallThroughRemover mftr = new ModeFallThroughRemover(); +// cu.accept(mftr, null); +// +// DuplicatingFallThroughRemover dftr = new DuplicatingFallThroughRemover(); +// cu.accept(dftr, null); +// +// JavaVisitor visitor = new JavaVisitor(); +// cu.accept(visitor, null); +// FileOutputStream out = new FileOutputStream(new File(targetDirectory, +// className + fne)); +// OutputStreamWriter w = new OutputStreamWriter(out, "utf-8"); +// w.write(license); +// w.write("\n\n/*\n * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.\n * Please edit " +// + className + ".java instead and regenerate.\n */\n\n"); +// w.write(visitor.getSource()); +// w.close(); +// } catch (ParseException e) { +// System.err.println(file); +// e.printStackTrace(); +// } + try { + file = new File(javaDirectory, className + ".java"); + String license = new LicenseExtractor(file).extract(); + CompilationUnit cu = JavaParser.parse(new NoCppInputStream( + new CppOnlyInputStream(new FileInputStream(file))), "utf-8"); + + ModeFallThroughRemover mftr = new ModeFallThroughRemover(); + cu.accept(mftr, null); + + DuplicatingFallThroughRemover dftr = new DuplicatingFallThroughRemover(); + cu.accept(dftr, null); + + RustVisitor visitor = new RustVisitor(); + cu.accept(visitor, null); + FileOutputStream out = new FileOutputStream(new File(targetDirectory, + className + ".rs")); + OutputStreamWriter w = new OutputStreamWriter(out, "utf-8"); + w.write(license); + w.write("\n\n/*\n * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.\n * Please edit " + + className + ".java instead and regenerate.\n */\n\n"); + w.write(visitor.getSource()); + w.close(); + } catch (ParseException e) { + System.err.println(file); + e.printStackTrace(); + } + } + +} diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/ModeFallThroughRemover.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/ModeFallThroughRemover.java new file mode 100644 index 0000000000..a899267484 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/ModeFallThroughRemover.java @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2008 Júlio Vilmar Gesser. + * Copyright (C) 2012 Mozilla Foundation + * + * This file is part of Java 1.5 parser and Abstract Syntax Tree. + * + * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Java 1.5 parser and Abstract Syntax Tree. If not, see . + */ +/* + * Created on 09/06/2008 + */ +package nu.validator.htmlparser.rusttranslate; + +import java.util.LinkedList; +import java.util.List; + +import japa.parser.ast.body.MethodDeclaration; +import japa.parser.ast.expr.BinaryExpr; +import japa.parser.ast.expr.BinaryExpr.Operator; +import japa.parser.ast.expr.Expression; +import japa.parser.ast.stmt.BlockStmt; +import japa.parser.ast.stmt.BreakStmt; +import japa.parser.ast.stmt.IfStmt; +import japa.parser.ast.stmt.Statement; +import japa.parser.ast.stmt.SwitchEntryStmt; +import japa.parser.ast.stmt.SwitchStmt; +import japa.parser.ast.visitor.VoidVisitorAdapter; + +/** + * @author Julio Vilmar Gesser + * @author Henri Sivonen + */ +public class ModeFallThroughRemover extends VoidVisitorAdapter { + + private String method; + + public void visit(BlockStmt n, Object arg) { + if (!("startTag".equals(method) || "endTag".equals(method))) { + super.visit(n, arg); + return; + } + List list = n.getStmts(); + if (list != null) { + for (int i = 0; i < list.size(); i++) { + Statement s = list.get(i); + if (s instanceof SwitchStmt) { + SwitchStmt sw = (SwitchStmt) s; + if ("mode".equals(sw.getSelector().toString())) { + list.remove(i); + int j = 0; + for (SwitchEntryStmt entry : sw.getEntries()) { + List statements = entry.getStmts(); + if (statements == null) { + continue; + } + Statement last = statements.get(statements.size() - 1); + if (last instanceof BreakStmt) { + BreakStmt brk = (BreakStmt) last; + if (brk.getId() == null) { + statements.remove(last); + } + } + Statement stm; + Expression label = entry.getLabel(); + if (label == null) { + stm = new BlockStmt(statements); + } else { + Expression lte = new BinaryExpr( + sw.getSelector(), label, + Operator.lessEquals); + stm = new IfStmt(lte, + new BlockStmt(statements), null); + } + list.add(i + j, stm); + j++; + } + } else { + s.accept(this, arg); + } + } else { + s.accept(this, arg); + } + } + } + } + + /** + * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.MethodDeclaration, java.lang.Object) + */ + @Override public void visit(MethodDeclaration md, Object arg) { + method = md.getName(); + super.visit(md, arg); + } + +} \ No newline at end of file diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/RustVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/RustVisitor.java new file mode 100644 index 0000000000..36feced048 --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/RustVisitor.java @@ -0,0 +1,1586 @@ +/* + * Copyright (C) 2007 Júlio Vilmar Gesser. + * Copyright (C) 2012 Mozilla Foundation + * + * This file is part of Java 1.5 parser and Abstract Syntax Tree. + * + * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Java 1.5 parser and Abstract Syntax Tree. If not, see . + */ +/* + * Created on 05/10/2006 + */ +package nu.validator.htmlparser.rusttranslate; + +import japa.parser.ast.BlockComment; +import japa.parser.ast.CompilationUnit; +import japa.parser.ast.LineComment; +import japa.parser.ast.TypeParameter; +import japa.parser.ast.body.BodyDeclaration; +import japa.parser.ast.body.ClassOrInterfaceDeclaration; +import japa.parser.ast.body.ConstructorDeclaration; +import japa.parser.ast.body.EmptyMemberDeclaration; +import japa.parser.ast.body.EmptyTypeDeclaration; +import japa.parser.ast.body.EnumConstantDeclaration; +import japa.parser.ast.body.EnumDeclaration; +import japa.parser.ast.body.FieldDeclaration; +import japa.parser.ast.body.InitializerDeclaration; +import japa.parser.ast.body.JavadocComment; +import japa.parser.ast.body.MethodDeclaration; +import japa.parser.ast.body.ModifierSet; +import japa.parser.ast.body.Parameter; +import japa.parser.ast.body.TypeDeclaration; +import japa.parser.ast.body.VariableDeclarator; +import japa.parser.ast.body.VariableDeclaratorId; +import japa.parser.ast.expr.AnnotationExpr; +import japa.parser.ast.expr.ArrayAccessExpr; +import japa.parser.ast.expr.ArrayCreationExpr; +import japa.parser.ast.expr.ArrayInitializerExpr; +import japa.parser.ast.expr.AssignExpr; +import japa.parser.ast.expr.BinaryExpr; +import japa.parser.ast.expr.BooleanLiteralExpr; +import japa.parser.ast.expr.CastExpr; +import japa.parser.ast.expr.CharLiteralExpr; +import japa.parser.ast.expr.ClassExpr; +import japa.parser.ast.expr.ConditionalExpr; +import japa.parser.ast.expr.DoubleLiteralExpr; +import japa.parser.ast.expr.EnclosedExpr; +import japa.parser.ast.expr.Expression; +import japa.parser.ast.expr.FieldAccessExpr; +import japa.parser.ast.expr.InstanceOfExpr; +import japa.parser.ast.expr.IntegerLiteralExpr; +import japa.parser.ast.expr.IntegerLiteralMinValueExpr; +import japa.parser.ast.expr.LongLiteralExpr; +import japa.parser.ast.expr.LongLiteralMinValueExpr; +import japa.parser.ast.expr.MemberValuePair; +import japa.parser.ast.expr.MethodCallExpr; +import japa.parser.ast.expr.NameExpr; +import japa.parser.ast.expr.NullLiteralExpr; +import japa.parser.ast.expr.ObjectCreationExpr; +import japa.parser.ast.expr.QualifiedNameExpr; +import japa.parser.ast.expr.StringLiteralExpr; +import japa.parser.ast.expr.SuperExpr; +import japa.parser.ast.expr.ThisExpr; +import japa.parser.ast.expr.UnaryExpr; +import japa.parser.ast.expr.UnaryExpr.Operator; +import japa.parser.ast.expr.VariableDeclarationExpr; +import japa.parser.ast.stmt.AssertStmt; +import japa.parser.ast.stmt.BlockStmt; +import japa.parser.ast.stmt.BreakStmt; +import japa.parser.ast.stmt.CatchClause; +import japa.parser.ast.stmt.ContinueStmt; +import japa.parser.ast.stmt.DoStmt; +import japa.parser.ast.stmt.EmptyStmt; +import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt; +import japa.parser.ast.stmt.ExpressionStmt; +import japa.parser.ast.stmt.ForStmt; +import japa.parser.ast.stmt.ForeachStmt; +import japa.parser.ast.stmt.IfStmt; +import japa.parser.ast.stmt.LabeledStmt; +import japa.parser.ast.stmt.ReturnStmt; +import japa.parser.ast.stmt.Statement; +import japa.parser.ast.stmt.SwitchEntryStmt; +import japa.parser.ast.stmt.SwitchStmt; +import japa.parser.ast.stmt.SynchronizedStmt; +import japa.parser.ast.stmt.ThrowStmt; +import japa.parser.ast.stmt.TryStmt; +import japa.parser.ast.stmt.TypeDeclarationStmt; +import japa.parser.ast.stmt.WhileStmt; +import japa.parser.ast.type.ClassOrInterfaceType; +import japa.parser.ast.type.PrimitiveType; +import japa.parser.ast.type.ReferenceType; +import japa.parser.ast.type.Type; +import japa.parser.ast.type.VoidType; +import japa.parser.ast.type.WildcardType; +import japa.parser.ast.visitor.VoidVisitorAdapter; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import nu.validator.htmlparser.cpptranslate.TranslatorUtils; + +/** + * @author Julio Vilmar Gesser + * @author Henri Sivonen + */ + +public final class RustVisitor extends VoidVisitorAdapter { + + private static final String[] MODS = { + "Tokenizer", + "TreeBuilder", + "MetaScanner", + "AttributeName", + "ElementName", + "HtmlAttributes", + "StackNode", + "UTF16Buffer", + "StateSnapshot", + }; + + private boolean inMethodSignature = false; + + private Set fields = new HashSet(); + + private Set constants = new HashSet(); + + private Expression loopUpdate = null; + + private static class SourcePrinter { + + private int level = 0; + + private boolean indented = false; + + private final StringBuilder buf = new StringBuilder(); + + public void indent() { + level++; + } + + public void unindent() { + level--; + } + + private void makeIndent() { + for (int i = 0; i < level; i++) { + buf.append(" "); + } + } + + public void print(String arg) { + if (!indented) { + makeIndent(); + indented = true; + } + buf.append(arg); + } + + public void printLn(String arg) { + print(arg); + printLn(); + } + + public void printLn() { + buf.append("\n"); + indented = false; + } + + public String getSource() { + return buf.toString(); + } + + @Override + public String toString() { + return getSource(); + } + } + + private final SourcePrinter printer = new SourcePrinter(); + + public String getSource() { + return printer.getSource(); + } + + private void printModifiers(int modifiers) { + if (ModifierSet.isPrivate(modifiers)) { + printer.print("private "); + } + if (ModifierSet.isProtected(modifiers)) { + printer.print("protected "); + } + if (ModifierSet.isPublic(modifiers)) { + printer.print("public "); + } + if (ModifierSet.isAbstract(modifiers)) { + printer.print("abstract "); + } + if (ModifierSet.isStatic(modifiers)) { + printer.print("static "); + } + if (ModifierSet.isFinal(modifiers)) { + printer.print("final "); + } + if (ModifierSet.isNative(modifiers)) { + printer.print("native "); + } + if (ModifierSet.isStrictfp(modifiers)) { + printer.print("strictfp "); + } + if (ModifierSet.isSynchronized(modifiers)) { + printer.print("synchronized "); + } + if (ModifierSet.isTransient(modifiers)) { + printer.print("transient "); + } + if (ModifierSet.isVolatile(modifiers)) { + printer.print("volatile "); + } + } + + private void printMethods(List members, Object arg) { + for (BodyDeclaration member : members) { + if (member instanceof MethodDeclaration) { + MethodDeclaration meth = (MethodDeclaration) member; + if (meth.getName().startsWith("fatal") || meth.getName().startsWith("err") + || meth.getName().startsWith("warn") + || meth.getName().startsWith("maybeErr") + || meth.getName().startsWith("maybeWarn") + || meth.getName().startsWith("note") + || "releaseArray".equals(meth.getName()) + || "deleteArray".equals(meth.getName()) + || "delete".equals(meth.getName())) { + continue; + } + printer.printLn(); + member.accept(this, arg); + printer.printLn(); + } + } + } + + private void printFields(List members, Object arg) { + for (BodyDeclaration member : members) { + if (member instanceof FieldDeclaration) { + FieldDeclaration field = (FieldDeclaration) member; + int mods = field.getModifiers(); + if (ModifierSet.isStatic(mods) && ModifierSet.isFinal(mods)) { + continue; + } + fields.add(field.getVariables().get(0).getId().getName()); + printer.printLn(); + member.accept(this, arg); + printer.printLn(); + } + } + } + + private void printConstants(List members, Object arg) { + for (BodyDeclaration member : members) { + if (member instanceof FieldDeclaration) { + FieldDeclaration field = (FieldDeclaration) member; + int mods = field.getModifiers(); + if (!(ModifierSet.isStatic(mods) && ModifierSet.isFinal(mods))) { + continue; + } + constants.add(field.getVariables().get(0).getId().getName()); + printer.printLn(); + member.accept(this, arg); + printer.printLn(); + } + } + } + + private void printMemberAnnotations(List annotations, Object arg) { + if (annotations != null) { + for (AnnotationExpr a : annotations) { + a.accept(this, arg); + printer.printLn(); + } + } + } + + private void printArguments(List args, Object arg) { + printer.print("("); + if (args != null) { + for (Iterator i = args.iterator(); i.hasNext();) { + Expression e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + } + + private void printJavadoc(JavadocComment javadoc, Object arg) { + if (javadoc != null) { + javadoc.accept(this, arg); + } + } + + public void visit(CompilationUnit n, Object arg) { + if (n.getTypes() != null) { + for (Iterator i = n.getTypes().iterator(); i.hasNext();) { + i.next().accept(this, arg); + printer.printLn(); + if (i.hasNext()) { + printer.printLn(); + } + } + } + } + + public void visit(NameExpr n, Object arg) { + if (fields.contains(n.getName())) { + printer.print("self."); + } + printer.print(n.getName()); + } + + public void visit(QualifiedNameExpr n, Object arg) { + n.getQualifier().accept(this, arg); + printer.print("."); + printer.print(n.getName()); + } + + public void visit(ClassOrInterfaceDeclaration n, Object arg) { + for (int i = 0; i < MODS.length; i++) { + String mod = MODS[i]; + if (!mod.equals(n.getName())) { + printer.print("mod "); + printer.print(mod); + printer.printLn(";"); + } + } + + printJavadoc(n.getJavaDoc(), arg); + + + if (n.getMembers() != null) { + printConstants(n.getMembers(), arg); + } + printer.printLn(); + printer.printLn(); + + printer.print("struct "); + + printer.print(n.getName()); + + printer.printLn(" {"); + printer.indent(); + if (n.getMembers() != null) { + printFields(n.getMembers(), arg); + } + printer.unindent(); + printer.print("}"); + + printer.printLn(); + printer.printLn(); + + printer.print("impl "); + + printer.print(n.getName()); + + printer.printLn(" {"); + printer.indent(); + if (n.getMembers() != null) { + printMethods(n.getMembers(), arg); + } + printer.unindent(); + printer.print("}"); + + } + + public void visit(EmptyTypeDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printer.print(";"); + } + + public void visit(JavadocComment n, Object arg) { + printer.print("/**"); + printer.print(n.getContent()); + printer.printLn("*/"); + } + + public void visit(ClassOrInterfaceType n, Object arg) { + if (n.getScope() != null) { + n.getScope().accept(this, arg); + printer.print("."); + } + printer.print(n.getName()); + } + + public void visit(TypeParameter n, Object arg) { + printer.print(n.getName()); + if (n.getTypeBound() != null) { + printer.print(" extends "); + for (Iterator i = n.getTypeBound().iterator(); i.hasNext();) { + ClassOrInterfaceType c = i.next(); + c.accept(this, arg); + if (i.hasNext()) { + printer.print(" & "); + } + } + } + } + + public void visit(PrimitiveType n, Object arg) { + switch (n.getType()) { + case Boolean: + printer.print("bool"); + break; + case Byte: + printer.print("i8"); + break; + case Char: + printer.print("u16"); + break; + case Double: + printer.print("f64"); + break; + case Float: + printer.print("f32"); + break; + case Int: + printer.print("i32"); + break; + case Long: + printer.print("i64"); + break; + case Short: + printer.print("i16"); + break; + } + } + + public void visit(ReferenceType n, Object arg) { +// if (inMethodSignature) { +// printer.print("&"); +// } else { +// printer.print("~"); +// } + printer.print("@"); + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("["); + } + n.getType().accept(this, arg); + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("]"); + } + } + + public void visit(WildcardType n, Object arg) { + printer.print("?"); + if (n.getExtends() != null) { + printer.print(" extends "); + n.getExtends().accept(this, arg); + } + if (n.getSuper() != null) { + printer.print(" super "); + n.getSuper().accept(this, arg); + } + } + + public void visit(FieldDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); +// printMemberAnnotations(n.getAnnotations(), arg); + + boolean field = true; + int mods = n.getModifiers(); + if (ModifierSet.isStatic(mods) && ModifierSet.isFinal(mods)) { + if (!ModifierSet.isPrivate(mods)) { + printer.print("pub "); + } + printer.print("const "); + field = false; + } else if (!ModifierSet.isFinal(mods)) { + printer.print("mut "); + } + + List vars = n.getVariables(); + + printVariableDeclarator(n.getType(), vars, arg, field); + + printer.print(field ? "," : ";"); + } + + private void printVariableDeclarator(Type type, List vars, + Object arg, boolean field) { + if (vars.size() != 1) { + throw new RuntimeException(); + } + + VariableDeclarator decl = vars.get(0); + + VariableDeclaratorId id = decl.getId(); + + printer.print(id.getName()); + + printer.print(": "); + + for (int i = 0; i < id.getArrayCount(); i++) { + printer.print("["); + } + + type.accept(this, arg); + + for (int i = 0; i < id.getArrayCount(); i++) { + printer.print("]"); + } + + Expression init = decl.getInit(); + + if (init != null && !field) { + printer.print(" = "); + init.accept(this, arg); + } + } + + public void visit(ArrayInitializerExpr n, Object arg) { + printer.print("["); + if (n.getValues() != null) { + printer.print(" "); + for (Iterator i = n.getValues().iterator(); i.hasNext();) { + Expression expr = i.next(); + expr.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + printer.print(" "); + } + printer.print("]"); + } + + public void visit(VoidType n, Object arg) { + printer.print("void"); + } + + public void visit(ArrayAccessExpr n, Object arg) { + n.getName().accept(this, arg); + printer.print("["); + n.getIndex().accept(this, arg); + printer.print("]"); + } + + public void visit(ArrayCreationExpr n, Object arg) { + printer.print("new "); + n.getType().accept(this, arg); + + if (n.getDimensions() != null) { + for (Expression dim : n.getDimensions()) { + printer.print("["); + dim.accept(this, arg); + printer.print("]"); + } + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("[]"); + } + } else { + for (int i = 0; i < n.getArrayCount(); i++) { + printer.print("[]"); + } + printer.print(" "); + n.getInitializer().accept(this, arg); + } + } + + public void visit(AssignExpr n, Object arg) { + n.getTarget().accept(this, arg); + printer.print(" "); + switch (n.getOperator()) { + case assign: + printer.print("="); + break; + case and: + printer.print("&="); + break; + case or: + printer.print("|="); + break; + case xor: + printer.print("^="); + break; + case plus: + printer.print("+="); + break; + case minus: + printer.print("-="); + break; + case rem: + printer.print("%="); + break; + case slash: + printer.print("/="); + break; + case star: + printer.print("*="); + break; + case lShift: + printer.print("<<="); + break; + case rSignedShift: + printer.print(">>="); + break; + case rUnsignedShift: + printer.print(">>>="); + break; + } + printer.print(" "); + n.getValue().accept(this, arg); + } + + public void visit(BinaryExpr n, Object arg) { + n.getLeft().accept(this, arg); + printer.print(" "); + switch (n.getOperator()) { + case or: + printer.print("||"); + break; + case and: + printer.print("&&"); + break; + case binOr: + printer.print("|"); + break; + case binAnd: + printer.print("&"); + break; + case xor: + printer.print("^"); + break; + case equals: + printer.print("=="); + break; + case notEquals: + printer.print("!="); + break; + case less: + printer.print("<"); + break; + case greater: + printer.print(">"); + break; + case lessEquals: + printer.print("<="); + break; + case greaterEquals: + printer.print(">="); + break; + case lShift: + printer.print("<<"); + break; + case rSignedShift: + printer.print(">>"); + break; + case rUnsignedShift: + printer.print(">>>"); + break; + case plus: + printer.print("+"); + break; + case minus: + printer.print("-"); + break; + case times: + printer.print("*"); + break; + case divide: + printer.print("/"); + break; + case remainder: + printer.print("%"); + break; + } + printer.print(" "); + n.getRight().accept(this, arg); + } + + public void visit(CastExpr n, Object arg) { + printer.print("("); + n.getType().accept(this, arg); + printer.print(") "); + n.getExpr().accept(this, arg); + } + + public void visit(ClassExpr n, Object arg) { + n.getType().accept(this, arg); + printer.print(".class"); + } + + public void visit(ConditionalExpr n, Object arg) { + n.getCondition().accept(this, arg); + printer.print(" ? "); + n.getThenExpr().accept(this, arg); + printer.print(" : "); + n.getElseExpr().accept(this, arg); + } + + public void visit(EnclosedExpr n, Object arg) { + printer.print("("); + n.getInner().accept(this, arg); + printer.print(")"); + } + + public void visit(FieldAccessExpr n, Object arg) { + String scope = n.getScope().toString(); + printer.print(scope); + boolean mod = false; + for (int i = 0; i < MODS.length; i++) { + if (MODS[i].equals(scope)) { + mod = true; + break; + } + } + printer.print(mod ? "::" : "."); + if ("length".equals(n.getField())) { + printer.print("len() as i32"); + } else { + printer.print(n.getField()); + } + } + + public void visit(InstanceOfExpr n, Object arg) { + n.getExpr().accept(this, arg); + printer.print(" instanceof "); + n.getType().accept(this, arg); + } + + public void visit(CharLiteralExpr n, Object arg) { +// printer.print("'"); +// char c = n.getValue().charAt(0); +// switch (c) { +// case '\b': +// printer.print("\\b"); +// break; +// case '\t': +// printer.print("\\t"); +// break; +// case '\n': +// printer.print("\\n"); +// break; +// case '\f': +// printer.print("\\f"); +// break; +// case '\r': +// printer.print("\\r"); +// break; +// case '\'': +// printer.print("\\'"); +// break; +// case '\\': +// printer.print(n.getValue()); +// break; +// default: +// if (c < ' ' || c > '~') { +// String hex = Integer.toHexString(c); +// switch (hex.length()) { +// case 1: +// printer.print("\\u000"+hex); +// break; +// case 2: +// printer.print("\\u00"+hex); +// break; +// case 3: +// printer.print("\\u0"+hex); +// break; +// case 4: +// printer.print("\\u"+hex); +// break; +// } +// } else { +// printer.print(""+c); +// } +// break; +// } +// printer.print("'"); + String str = n.getValue(); + if (str.length() == 1) { + String hex = Integer.toHexString(str.charAt(0)); + switch (hex.length()) { + case 1: + printer.print("0x0"+hex); + break; + case 2: + printer.print("0x"+hex); + break; + case 3: + printer.print("0x0"+hex); + break; + case 4: + printer.print("0x"+hex); + break; + } + } else if ("\\n".equals(str)) { + printer.print("0x0A"); + } else if ("\\r".equals(str)) { + printer.print("0x0D"); + } else if ("\\t".equals(str)) { + printer.print("0x09"); + } else if ("\\\"".equals(str)) { + printer.print("0x22"); + } else if ("\\'".equals(str)) { + printer.print("0x27"); + } else { + throw new RuntimeException(str); + } + } + + public void visit(DoubleLiteralExpr n, Object arg) { + printer.print(n.getValue()); + } + + public void visit(IntegerLiteralExpr n, Object arg) { + printer.print(n.getValue()); + } + + public void visit(LongLiteralExpr n, Object arg) { + printer.print(n.getValue()); + } + + public void visit(IntegerLiteralMinValueExpr n, Object arg) { + printer.print(n.getValue()); + } + + public void visit(LongLiteralMinValueExpr n, Object arg) { + printer.print(n.getValue()); + } + + public void visit(StringLiteralExpr n, Object arg) { + printer.print("\""); + printer.print(n.getValue()); + printer.print("\""); + } + + public void visit(BooleanLiteralExpr n, Object arg) { + printer.print(String.valueOf(n.getValue())); + } + + public void visit(NullLiteralExpr n, Object arg) { + printer.print("null"); + } + + public void visit(ThisExpr n, Object arg) { + if (n.getClassExpr() != null) { + n.getClassExpr().accept(this, arg); + printer.print("."); + } + printer.print("self"); + } + + public void visit(SuperExpr n, Object arg) { + if (n.getClassExpr() != null) { + n.getClassExpr().accept(this, arg); + printer.print("."); + } + printer.print("super"); + } + + public void visit(MethodCallExpr n, Object arg) { + if (n.getScope() != null) { + n.getScope().accept(this, arg); + printer.print("."); + } + printer.print(n.getName()); + printArguments(n.getArgs(), arg); + } + + public void visit(ObjectCreationExpr n, Object arg) { + if (n.getScope() != null) { + n.getScope().accept(this, arg); + printer.print("."); + } + + printer.print("new "); + + n.getType().accept(this, arg); + + printArguments(n.getArgs(), arg); + + if (n.getAnonymousClassBody() != null) { + printer.printLn(" {"); + printer.indent(); + printMethods(n.getAnonymousClassBody(), arg); + printer.unindent(); + printer.print("}"); + } + } + + public void visit(UnaryExpr n, Object arg) { + Operator op = n.getOperator(); + if (op == null) { + n.getExpr().accept(this, arg); + return; + } + switch (op) { + case positive: + printer.print("+"); + n.getExpr().accept(this, arg); + break; + case negative: + printer.print("-"); + n.getExpr().accept(this, arg); + break; + case inverse: + printer.print("i32::compl("); + n.getExpr().accept(this, arg); + printer.print(")"); + break; + case not: + printer.print("!"); + n.getExpr().accept(this, arg); + break; + case preIncrement: + case posIncrement: + n.getExpr().accept(this, arg); + printer.print(" = "); + n.getExpr().accept(this, arg); + printer.print(" + 1"); + break; + case preDecrement: + case posDecrement: + n.getExpr().accept(this, arg); + printer.print(" = "); + n.getExpr().accept(this, arg); + printer.print(" - 1"); + break; + } + } + + public void visit(ConstructorDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + + if (n.getTypeParameters() != null) { + printer.print(" "); + } + printer.print(n.getName()); + + printer.print("("); + if (n.getParameters() != null) { + for (Iterator i = n.getParameters().iterator(); i.hasNext();) { + Parameter p = i.next(); + p.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(")"); + + if (n.getThrows() != null) { + printer.print(" throws "); + for (Iterator i = n.getThrows().iterator(); i.hasNext();) { + NameExpr name = i.next(); + name.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + printer.print(" "); + n.getBlock().accept(this, arg); + } + + public void visit(MethodDeclaration n, Object arg) { + + printJavadoc(n.getJavaDoc(), arg); +// printMemberAnnotations(n.getAnnotations(), arg); +// printModifiers(n.getModifiers()); + +// printTypeParameters(n.getTypeParameters(), arg); +// if (n.getTypeParameters() != null) { +// printer.print(" "); +// } + + printer.print("fn "); + printer.print(n.getName()); + + printer.print("("); + inMethodSignature = true; + if (n.getParameters() != null) { + for (Iterator i = n.getParameters().iterator(); i.hasNext();) { + Parameter p = i.next(); + p.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + inMethodSignature = false; + printer.print(")"); + + Type type = n.getType(); + + if (!(type instanceof VoidType)) { + printer.print(" -> "); + type.accept(this, arg); + } + +// for (int i = 0; i < n.getArrayCount(); i++) { +// printer.print("[]"); +// } + +// if (n.getThrows() != null) { +// printer.print(" throws "); +// for (Iterator i = n.getThrows().iterator(); i.hasNext();) { +// NameExpr name = i.next(); +// name.accept(this, arg); +// if (i.hasNext()) { +// printer.print(", "); +// } +// } +// } + if (n.getBody() == null) { + printer.print(";"); + } else { + printer.print(" "); + n.getBody().accept(this, arg); + } + } + + public void visit(Parameter n, Object arg) { +// printAnnotations(n.getAnnotations(), arg); +// printModifiers(n.getModifiers()); + + VariableDeclaratorId id = n.getId(); + + printer.print(id.getName()); +// if (n.isVarArgs()) { +// printer.print("..."); +// } + printer.print(": "); + n.getType().accept(this, arg); + } + + public void visit(ExplicitConstructorInvocationStmt n, Object arg) { + if (n.isThis()) { + printer.print("this"); + } else { + if (n.getExpr() != null) { + n.getExpr().accept(this, arg); + printer.print("."); + } + printer.print("super"); + } + printArguments(n.getArgs(), arg); + printer.print(";"); + } + + public void visit(VariableDeclarationExpr n, Object arg) { +// printAnnotations(n.getAnnotations(), arg); + + printer.print("let "); + + if (!ModifierSet.isFinal(n.getModifiers())) { + printer.print("mut "); + } + +// printModifiers(n.getModifiers()); + + List vars = n.getVars(); + + printVariableDeclarator(n.getType(), vars, arg, false); + } + + public void visit(TypeDeclarationStmt n, Object arg) { + n.getTypeDeclaration().accept(this, arg); + } + + public void visit(AssertStmt n, Object arg) { + Expression check = n.getCheck(); + if (check instanceof BooleanLiteralExpr) { + BooleanLiteralExpr bool = (BooleanLiteralExpr) check; + if (!bool.getValue()) { + printer.print("fail;"); + return; + } + } + printer.print("assert "); + check.accept(this, arg); + printer.print(";"); + } + + public void visit(BlockStmt n, Object arg) { + printer.printLn("{"); + if (n.getStmts() != null) { + printer.indent(); + for (Statement s : n.getStmts()) { + s.accept(this, arg); + printer.printLn(); + } + printer.unindent(); + } + printer.print("}"); + } + + public void visit(LabeledStmt n, Object arg) { + assert arg == null; + n.getStmt().accept(this, n.getLabel()); + } + + public void visit(EmptyStmt n, Object arg) { + printer.print(";"); + } + + public void visit(ExpressionStmt n, Object arg) { + Expression plusplus = null; + Expression ex = n.getExpression(); + + if (ex instanceof MethodCallExpr) { + MethodCallExpr meth = (MethodCallExpr) ex; + if (meth.getName().startsWith("fatal") || meth.getName().startsWith("err") + || meth.getName().startsWith("warn") + || meth.getName().startsWith("maybeErr") + || meth.getName().startsWith("maybeWarn") + || meth.getName().startsWith("note") + || "releaseArray".equals(meth.getName()) + || "deleteArray".equals(meth.getName()) + || "delete".equals(meth.getName())) { + return; + } + } + + if (ex instanceof AssignExpr) { + AssignExpr ax = (AssignExpr) ex; + Expression left = ax.getTarget(); + if (left instanceof ArrayAccessExpr) { + ArrayAccessExpr aae = (ArrayAccessExpr) left; + Expression index = aae.getIndex(); + if (index instanceof UnaryExpr) { + UnaryExpr unex = (UnaryExpr) index; + if (unex.getOperator() == Operator.posIncrement) { + plusplus = unex.getExpr(); + unex.setOperator(null); + } + } + } + } + n.getExpression().accept(this, arg); + printer.print(";"); + if (plusplus != null) { + printer.printLn(); + plusplus.accept(this, arg); + printer.print(" = "); + plusplus.accept(this, arg); + printer.print(" + 1;"); + } + } + + public void visit(SwitchStmt n, Object arg) { + printer.print("match "); + n.getSelector().accept(this, arg); + printer.printLn(" {"); + if (n.getEntries() != null) { + printer.indent(); + List labels = new LinkedList(); + for (SwitchEntryStmt e : n.getEntries()) { + labels.add(e.getLabel()); + List stmts = e.getStmts(); + if (stmts != null) { + if (stmts.get(stmts.size() - 1) instanceof BreakStmt) { + BreakStmt brk = (BreakStmt)stmts.get(stmts.size() - 1); + if (brk.getId() == null) { + stmts.remove(stmts.size() - 1); + } + } + if (!stmts.isEmpty()) { + boolean first = true; + for (Expression label : labels) { + if (!first) { + printer.print(" | "); + } + first = false; + if (label == null) { + printer.print("_"); + } else { + label.accept(this, arg); + } + } + printer.printLn(" => {"); + printer.indent(); + for (Statement statement : stmts) { + statement.accept(this, arg); + printer.printLn(); + } + printer.unindent(); + printer.printLn("}"); + } + labels.clear(); + } + } + printer.unindent(); + } + printer.print("}"); + + } + + public void visit(SwitchEntryStmt n, Object arg) { + throw new RuntimeException("Not supposed to come here."); + } + + public void visit(BreakStmt n, Object arg) { + printer.print("break"); + if (n.getId() != null && !"charsetloop".equals(n.getId()) && !"charactersloop".equals(n.getId())) { + printer.print(" "); + printer.print(n.getId()); + } + printer.print(";"); + } + + public void visit(ReturnStmt n, Object arg) { + printer.print("return"); + if (n.getExpr() != null) { + printer.print(" "); + n.getExpr().accept(this, arg); + } + printer.print(";"); + } + + public void visit(EnumDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printModifiers(n.getModifiers()); + + printer.print("enum "); + printer.print(n.getName()); + + if (n.getImplements() != null) { + printer.print(" implements "); + for (Iterator i = n.getImplements().iterator(); i.hasNext();) { + ClassOrInterfaceType c = i.next(); + c.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + + printer.printLn(" {"); + printer.indent(); + if (n.getEntries() != null) { + printer.printLn(); + for (Iterator i = n.getEntries().iterator(); i.hasNext();) { + EnumConstantDeclaration e = i.next(); + e.accept(this, arg); + if (i.hasNext()) { + printer.print(", "); + } + } + } + if (n.getMembers() != null) { + printer.printLn(";"); + printMethods(n.getMembers(), arg); + } else { + if (n.getEntries() != null) { + printer.printLn(); + } + } + printer.unindent(); + printer.print("}"); + throw new RuntimeException("Unsupported syntax."); + } + + public void visit(EnumConstantDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printMemberAnnotations(n.getAnnotations(), arg); + printer.print(n.getName()); + + if (n.getArgs() != null) { + printArguments(n.getArgs(), arg); + } + + if (n.getClassBody() != null) { + printer.printLn(" {"); + printer.indent(); + printMethods(n.getClassBody(), arg); + printer.unindent(); + printer.printLn("}"); + } + throw new RuntimeException("Unsupported syntax."); + } + + public void visit(EmptyMemberDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + printer.print(";"); + } + + public void visit(InitializerDeclaration n, Object arg) { + printJavadoc(n.getJavaDoc(), arg); + if (n.isStatic()) { + printer.print("static "); + } + n.getBlock().accept(this, arg); + } + + public void visit(IfStmt n, Object arg) { + Expression cond = n.getCondition(); + if (cond instanceof BinaryExpr) { + BinaryExpr binex = (BinaryExpr) cond; + Expression left = binex.getLeft(); + if (left instanceof UnaryExpr) { + UnaryExpr unex = (UnaryExpr) left; + if (unex.getOperator() == Operator.preIncrement) { + unex.getExpr().accept(this, arg); + printer.print(" = "); + unex.getExpr().accept(this, arg); + printer.printLn(" + 1;"); + unex.setOperator(null); + } + } + } + + if (!TranslatorUtils.isErrorHandlerIf(n.getCondition(), false)) { + if (TranslatorUtils.isErrorOnlyBlock(n.getThenStmt(), false)) { + if (n.getElseStmt() != null + && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), false)) { + printer.print("if "); + if (n.getCondition() instanceof BinaryExpr) { + BinaryExpr binExpr = (BinaryExpr) n.getCondition(); + switch (binExpr.getOperator()) { + case equals: + binExpr.getLeft().accept(this, arg); + printer.print(" != "); + binExpr.getRight().accept(this, arg); + break; + case notEquals: + binExpr.getLeft().accept(this, arg); + printer.print(" == "); + binExpr.getRight().accept(this, arg); + break; + default: + printer.print("!("); + n.getCondition().accept(this, arg); + printer.print(")"); + break; + } + } else { + printer.print("!("); + n.getCondition().accept(this, arg); + printer.print(")"); + } + printer.print(" "); + n.getElseStmt().accept(this, arg); + } + } else { + printer.print("if "); + n.getCondition().accept(this, arg); + printer.print(" "); + n.getThenStmt().accept(this, arg); + if (n.getElseStmt() != null + && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), false)) { + printer.print(" else "); + n.getElseStmt().accept(this, arg); + } + } + } + + } + + public void visit(WhileStmt n, Object arg) { + printer.print("while "); + n.getCondition().accept(this, arg); + printer.print(" "); + n.getBody().accept(this, arg); + } + + public void visit(ContinueStmt n, Object arg) { + if (loopUpdate != null) { + loopUpdate.accept(this, arg); + printer.printLn(";"); + } + printer.print("loop"); + if (n.getId() != null) { + printer.print(" "); + printer.print(n.getId()); + } + printer.print(";"); + } + + public void visit(DoStmt n, Object arg) { + printer.print("do "); + n.getBody().accept(this, arg); + printer.print(" while ("); + n.getCondition().accept(this, arg); + printer.print(");"); + throw new RuntimeException("Unsupported syntax."); + } + + public void visit(ForeachStmt n, Object arg) { + printer.print("for ("); + n.getVariable().accept(this, arg); + printer.print(" : "); + n.getIterable().accept(this, arg); + printer.print(") "); + n.getBody().accept(this, arg); + throw new RuntimeException("Unsupported syntax."); + } + + public void visit(ForStmt n, Object arg) { + String label = null; + if (arg instanceof String) { + label = (String) arg; + arg = null; + } + if (n.getInit() == null && n.getCompare() == null && n.getUpdate() == null) { + printer.print("loop "); + if (label != null) { + printer.print(label); + printer.print(": "); + } + n.getBody().accept(this, arg); + return; + } + + assert label == null || "charsetloop".equals(label) || "charactersloop".equals(label); + + Expression oldLoopUpdate = loopUpdate; + loopUpdate = n.getUpdate().get(0); + + if (n.getInit() != null) { + n.getInit().get(0).accept(this, arg); + printer.printLn(";"); + } + + if (n.getCompare() == null) { + printer.print("loop "); + } else { + printer.print("while "); + n.getCompare().accept(this, arg); + printer.print(" "); + } + + Statement body = n.getBody(); + if (body instanceof BlockStmt) { + BlockStmt blockStmt = (BlockStmt) body; + printer.printLn("{"); + printer.indent(); + if (blockStmt.getStmts() != null) { + for (Statement s : blockStmt.getStmts()) { + s.accept(this, arg); + printer.printLn(); + } + } + if (loopUpdate != null) { + loopUpdate.accept(this, arg); + printer.printLn(";"); + } + printer.unindent(); + printer.print("}"); + } else { + throw new RuntimeException(); + } + + loopUpdate = oldLoopUpdate; + } + + public void visit(ThrowStmt n, Object arg) { + printer.print("throw "); + n.getExpr().accept(this, arg); + printer.print(";"); + } + + public void visit(SynchronizedStmt n, Object arg) { + printer.print("synchronized ("); + n.getExpr().accept(this, arg); + printer.print(") "); + n.getBlock().accept(this, arg); + } + + public void visit(TryStmt n, Object arg) { + printer.print("try "); + n.getTryBlock().accept(this, arg); + if (n.getCatchs() != null) { + for (CatchClause c : n.getCatchs()) { + c.accept(this, arg); + } + } + if (n.getFinallyBlock() != null) { + printer.print(" finally "); + n.getFinallyBlock().accept(this, arg); + } + } + + public void visit(CatchClause n, Object arg) { + printer.print(" catch ("); + n.getExcept().accept(this, arg); + printer.print(") "); + n.getCatchBlock().accept(this, arg); + + } + +// public void visit(AnnotationDeclaration n, Object arg) { +// printJavadoc(n.getJavaDoc(), arg); +// printMemberAnnotations(n.getAnnotations(), arg); +// printModifiers(n.getModifiers()); +// +// printer.print("@interface "); +// printer.print(n.getName()); +// printer.printLn(" {"); +// printer.indent(); +// if (n.getMembers() != null) { +// printMembers(n.getMembers(), arg); +// } +// printer.unindent(); +// printer.print("}"); +// } +// +// public void visit(AnnotationMemberDeclaration n, Object arg) { +// printJavadoc(n.getJavaDoc(), arg); +// printMemberAnnotations(n.getAnnotations(), arg); +// printModifiers(n.getModifiers()); +// +// n.getType().accept(this, arg); +// printer.print(" "); +// printer.print(n.getName()); +// printer.print("()"); +// if (n.getDefaultValue() != null) { +// printer.print(" default "); +// n.getDefaultValue().accept(this, arg); +// } +// printer.print(";"); +// } +// +// public void visit(MarkerAnnotationExpr n, Object arg) { +// printer.print("@"); +// n.getName().accept(this, arg); +// } +// +// public void visit(SingleMemberAnnotationExpr n, Object arg) { +// printer.print("@"); +// n.getName().accept(this, arg); +// printer.print("("); +// n.getMemberValue().accept(this, arg); +// printer.print(")"); +// } +// +// public void visit(NormalAnnotationExpr n, Object arg) { +// printer.print("@"); +// n.getName().accept(this, arg); +// printer.print("("); +// if (n.getPairs() != null) { +// for (Iterator i = n.getPairs().iterator(); i.hasNext();) { +// MemberValuePair m = i.next(); +// m.accept(this, arg); +// if (i.hasNext()) { +// printer.print(", "); +// } +// } +// } +// printer.print(")"); +// } + + public void visit(MemberValuePair n, Object arg) { + printer.print(n.getName()); + printer.print(" = "); + n.getValue().accept(this, arg); + } + + public void visit(LineComment n, Object arg) { + printer.print("//"); + printer.printLn(n.getContent()); + } + + public void visit(BlockComment n, Object arg) { + printer.print("/*"); + printer.print(n.getContent()); + printer.printLn("*/"); + } + +} \ No newline at end of file diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/SwitchBreakAnalyzerVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/SwitchBreakAnalyzerVisitor.java new file mode 100644 index 0000000000..766b349cea --- /dev/null +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/rusttranslate/SwitchBreakAnalyzerVisitor.java @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2008 Júlio Vilmar Gesser. + * Copyright (C) 2012 Mozilla Foundation + * + * This file is part of Java 1.5 parser and Abstract Syntax Tree. + * + * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Java 1.5 parser and Abstract Syntax Tree. If not, see . + */ +/* + * Created on 09/06/2008 + */ +package nu.validator.htmlparser.rusttranslate; + +import japa.parser.ast.stmt.AssertStmt; +import japa.parser.ast.stmt.BlockStmt; +import japa.parser.ast.stmt.BreakStmt; +import japa.parser.ast.stmt.CatchClause; +import japa.parser.ast.stmt.ContinueStmt; +import japa.parser.ast.stmt.DoStmt; +import japa.parser.ast.stmt.EmptyStmt; +import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt; +import japa.parser.ast.stmt.ExpressionStmt; +import japa.parser.ast.stmt.ForStmt; +import japa.parser.ast.stmt.ForeachStmt; +import japa.parser.ast.stmt.IfStmt; +import japa.parser.ast.stmt.LabeledStmt; +import japa.parser.ast.stmt.ReturnStmt; +import japa.parser.ast.stmt.Statement; +import japa.parser.ast.stmt.SwitchEntryStmt; +import japa.parser.ast.stmt.SwitchStmt; +import japa.parser.ast.stmt.SynchronizedStmt; +import japa.parser.ast.stmt.ThrowStmt; +import japa.parser.ast.stmt.TryStmt; +import japa.parser.ast.stmt.TypeDeclarationStmt; +import japa.parser.ast.stmt.WhileStmt; +import japa.parser.ast.type.WildcardType; +import japa.parser.ast.visitor.GenericVisitorAdapter; + +import java.util.List; + +/** + * @author Julio Vilmar Gesser + * @author Henri Sivonen + */ +public class SwitchBreakAnalyzerVisitor extends GenericVisitorAdapter { + + private static final LoopBreakAnalyzerVisitor ANALYZER_VISITOR = new LoopBreakAnalyzerVisitor(); + + public Boolean visit(AssertStmt n, Boolean arg) { + return false; + } + + public Boolean visit(BlockStmt n, Boolean arg) { + // Bogus in the loop case + if (n.getStmts() != null) { + List stms = n.getStmts(); + return stms.get(stms.size() - 1).accept(this, arg); + } + return false; + } + + public Boolean visit(BreakStmt n, Boolean arg) { + // Bogus in the general case + if (arg) { + return true; + } + return n.getId() != null; + } + + public Boolean visit(CatchClause n, Boolean arg) { + return n.getCatchBlock().accept(this, arg); + } + + public Boolean visit(ContinueStmt n, Boolean arg) { + // Bogus in the general case + if (arg) { + return true; + } + return n.getId() != null; + } + + public Boolean visit(DoStmt n, Boolean arg) { + return n.getBody().accept(this, arg); + } + + public Boolean visit(EmptyStmt n, Boolean arg) { + return false; + } + + public Boolean visit(ExplicitConstructorInvocationStmt n, Boolean arg) { + return false; + } + + public Boolean visit(ExpressionStmt n, Boolean arg) { + return false; + } + + public Boolean visit(ForeachStmt n, Boolean arg) { + return n.getBody().accept(this, arg); + } + + public Boolean visit(ForStmt n, Boolean arg) { + return n.getBody().accept(ANALYZER_VISITOR, arg); + } + + public Boolean visit(IfStmt n, Boolean arg) { + if (n.getElseStmt() != null) { + return n.getThenStmt().accept(this, arg) && n.getElseStmt().accept(this, arg); + } + return false; + } + + public Boolean visit(LabeledStmt n, Boolean arg) { + return n.getStmt().accept(this, arg); + } + + public Boolean visit(ReturnStmt n, Boolean arg) { + return true; + } + + public Boolean visit(SwitchEntryStmt n, Boolean arg) { + if (n.getStmts() != null) { + List stms = n.getStmts(); + return stms.get(stms.size() - 1).accept(this, arg); + } + return false; + } + + public Boolean visit(SwitchStmt n, Boolean arg) { + /* + List entries = n.getEntries(); + for (int i = 0; i < array.length; i++) { + array_type array_element = array[i]; + + } + */ + return true; + } + + public Boolean visit(SynchronizedStmt n, Boolean arg) { + return n.getBlock().accept(this, arg); + } + + public Boolean visit(ThrowStmt n, Boolean arg) { + return true; + } + + public Boolean visit(TryStmt n, Boolean arg) { + if (n.getFinallyBlock() != null) { + return n.getFinallyBlock().accept(this, arg); + } + if (n.getCatchs() != null) { + for (CatchClause c : n.getCatchs()) { + boolean brk = c.accept(this, arg); + if (!brk) { + return false; + } + } + } + return n.getTryBlock().accept(this, arg); + } + + public Boolean visit(TypeDeclarationStmt n, Boolean arg) { + return false; + } + + public Boolean visit(WhileStmt n, Boolean arg) { + return n.getBody().accept(this, arg); + } + + public Boolean visit(WildcardType n, Boolean arg) { + if (n.getExtends() != null) { + n.getExtends().accept(this, arg); + } + if (n.getSuper() != null) { + n.getSuper().accept(this, arg); + } + return null; + } +} \ No newline at end of file -- cgit v1.2.3