diff options
Diffstat (limited to 'parser/html')
8 files changed, 109 insertions, 90 deletions
diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java index 06c924393e..b3ca30286c 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java @@ -30,7 +30,6 @@ import nu.validator.htmlparser.annotation.Inline; import nu.validator.htmlparser.annotation.Local; import nu.validator.htmlparser.annotation.NoLength; import nu.validator.htmlparser.annotation.Unsigned; -import nu.validator.htmlparser.annotation.Virtual; import nu.validator.htmlparser.common.Interner; public final class ElementName @@ -47,7 +46,7 @@ public final class ElementName * Indicates that the element is not a pre-interned element. Forbidden * on preinterned elements. */ - public static final int CUSTOM = (1 << 30); + public static final int NOT_INTERNED = (1 << 30); /** * Indicates that the element is in the "special" category. This bit @@ -87,17 +86,23 @@ public final class ElementName */ public static final int OPTIONAL_END_TAG = (1 << 23); - public static final ElementName NULL_ELEMENT_NAME = new ElementName(null); + private @Local String name; - public final @Local String name; - - public final @Local String camelCaseName; + private @Local String camelCaseName; /** * The lowest 7 bits are the dispatch group. The high bits are flags. */ public final int flags; + @Inline public @Local String getName() { + return name; + } + + @Inline public @Local String getCamelCaseName() { + return camelCaseName; + } + @Inline public int getFlags() { return flags; } @@ -106,21 +111,20 @@ public final class ElementName return flags & GROUP_MASK; } - public boolean isCustom() { - return (flags & CUSTOM) != 0; + public boolean isInterned() { + return (flags & NOT_INTERNED) == 0; } static ElementName elementNameByBuffer(@NoLength char[] buf, int offset, int length, Interner interner) { @Unsigned int hash = ElementName.bufToHash(buf, length); int index = Arrays.binarySearch(ElementName.ELEMENT_HASHES, hash); if (index < 0) { - return new ElementName(Portability.newLocalNameFromBuffer(buf, offset, length, interner)); + return null; } else { ElementName elementName = ElementName.ELEMENT_NAMES[index]; @Local String name = elementName.name; if (!Portability.localEqualsBuffer(name, buf, offset, length)) { - return new ElementName(Portability.newLocalNameFromBuffer(buf, - offset, length, interner)); + return null; } return elementName; } @@ -170,23 +174,22 @@ public final class ElementName this.flags = flags; } - protected ElementName(@Local String name) { - this.name = name; - this.camelCaseName = name; - this.flags = TreeBuilder.OTHER | CUSTOM; - } - - @Virtual void release() { - // No-op in Java. - // Implement as delete this in subclass. - // Be sure to release the local name + public ElementName() { + this.name = null; + this.camelCaseName = null; + this.flags = TreeBuilder.OTHER | NOT_INTERNED; } - @SuppressWarnings("unused") @Virtual private void destructor() { + public void destructor() { + // The translator adds refcount debug code here. } - @Virtual public ElementName cloneElementName(Interner interner) { - return this; + public void setNameForNonInterned(@Local String name) { + // No need to worry about refcounting the local name, because in the + // C++ case the scoped atom table remembers its own atoms. + this.name = name; + this.camelCaseName = name; + assert this.flags == (TreeBuilder.OTHER | NOT_INTERNED); } // START CODE ONLY USED FOR GENERATING CODE uncomment and run to regenerate diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java index f1749e0b36..437e830318 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java @@ -22,17 +22,17 @@ package nu.validator.htmlparser.impl; +import java.util.HashMap; + +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + import nu.validator.htmlparser.annotation.Inline; import nu.validator.htmlparser.annotation.NoLength; import nu.validator.htmlparser.common.TokenHandler; import nu.validator.htmlparser.common.TransitionHandler; import nu.validator.htmlparser.common.XmlViolationPolicy; -import java.util.HashMap; - -import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; - public class ErrorReportingTokenizer extends Tokenizer { /** @@ -388,7 +388,7 @@ public class ErrorReportingTokenizer extends Tokenizer { && ElementName.IFRAME != endTagExpectation) { err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA") + " element \u201C" - + endTagExpectation.name + + endTagExpectation.getName() + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)"); } } diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java index b671bc903c..3be6859202 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java @@ -129,13 +129,13 @@ final class StackNode<T> { // ]NOCPP] ) { this.flags = elementName.getFlags(); - this.name = elementName.name; - this.popName = elementName.name; + this.name = elementName.getName(); + this.popName = elementName.getName(); this.ns = "http://www.w3.org/1999/xhtml"; this.node = node; this.attributes = null; this.refcount = 1; - assert !elementName.isCustom() : "Don't use this constructor for custom elements."; + assert elementName.isInterned() : "Don't use this constructor for custom elements."; // [NOCPP[ this.locator = locator; // ]NOCPP] @@ -154,13 +154,13 @@ final class StackNode<T> { // ]NOCPP] ) { this.flags = elementName.getFlags(); - this.name = elementName.name; - this.popName = elementName.name; + this.name = elementName.getName(); + this.popName = elementName.getName(); this.ns = "http://www.w3.org/1999/xhtml"; this.node = node; this.attributes = attributes; this.refcount = 1; - assert !elementName.isCustom() : "Don't use this constructor for custom elements."; + assert elementName.isInterned() : "Don't use this constructor for custom elements."; // [NOCPP[ this.locator = locator; // ]NOCPP] @@ -179,7 +179,7 @@ final class StackNode<T> { // ]NOCPP] ) { this.flags = elementName.getFlags(); - this.name = elementName.name; + this.name = elementName.getName(); this.popName = popName; this.ns = "http://www.w3.org/1999/xhtml"; this.node = node; @@ -206,7 +206,7 @@ final class StackNode<T> { // ]NOCPP] ) { this.flags = prepareSvgFlags(elementName.getFlags()); - this.name = elementName.name; + this.name = elementName.getName(); this.popName = popName; this.ns = "http://www.w3.org/2000/svg"; this.node = node; @@ -233,7 +233,7 @@ final class StackNode<T> { ) { this.flags = prepareMathFlags(elementName.getFlags(), markAsIntegrationPoint); - this.name = elementName.name; + this.name = elementName.getName(); this.popName = popName; this.ns = "http://www.w3.org/1998/Math/MathML"; this.node = node; diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java index 125ef32663..59ff3bd7e2 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java @@ -417,11 +417,19 @@ public class Tokenizer implements Locator { protected boolean endTag; /** - * The current tag token name. + * The current tag token name. One of + * 1) null, + * 2) non-owning reference to nonInternedTagName + * 3) non-owning reference to a pre-interned ElementName */ private ElementName tagName = null; /** + * The recycled ElementName instance for the non-pre-interned cases. + */ + private ElementName nonInternedTagName = null; + + /** * The current attribute name. */ protected AttributeName attributeName = null; @@ -520,6 +528,7 @@ public class Tokenizer implements Locator { this.bmpChar = new char[1]; this.astralChar = new char[2]; this.tagName = null; + this.nonInternedTagName = new ElementName(); this.attributeName = null; this.doctypeName = null; this.publicIdentifier = null; @@ -549,6 +558,7 @@ public class Tokenizer implements Locator { this.bmpChar = new char[1]; this.astralChar = new char[2]; this.tagName = null; + this.nonInternedTagName = new ElementName(); this.attributeName = null; this.doctypeName = null; this.publicIdentifier = null; @@ -710,6 +720,7 @@ public class Tokenizer implements Locator { @Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation); this.endTagExpectation = ElementName.elementNameByBuffer(asArray, 0, asArray.length, interner); + assert this.endTagExpectation != null; endTagExpectationToArray(); } @@ -1112,6 +1123,11 @@ public class Tokenizer implements Locator { private void strBufToElementNameString() { tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen, interner); + if (tagName == null) { + nonInternedTagName.setNameForNonInterned(Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen, + interner)); + tagName = nonInternedTagName; + } clearStrBufAfterUse(); } @@ -1144,7 +1160,6 @@ public class Tokenizer implements Locator { tokenHandler.startTag(tagName, attrs, selfClosing); // CPPONLY: } } - tagName.release(); tagName = null; if (newAttributesEachTime) { attributes = null; @@ -6650,10 +6665,8 @@ public class Tokenizer implements Locator { Portability.releaseString(publicIdentifier); publicIdentifier = null; } - if (tagName != null) { - tagName.release(); - tagName = null; - } + tagName = null; + nonInternedTagName.setNameForNonInterned(null); if (attributeName != null) { attributeName.release(); attributeName = null; @@ -6735,7 +6748,6 @@ public class Tokenizer implements Locator { shouldSuspend = false; initDoctypeFields(); if (tagName != null) { - tagName.release(); tagName = null; } if (attributeName != null) { @@ -6801,13 +6813,17 @@ public class Tokenizer implements Locator { publicIdentifier = Portability.newStringFromString(other.publicIdentifier); } - if (tagName != null) { - tagName.release(); - } if (other.tagName == null) { tagName = null; + } else if (other.tagName.isInterned()) { + tagName = other.tagName; } else { - tagName = other.tagName.cloneElementName(interner); + // In the C++ case, We might be loading state from another + // tokenizer that has atoms from a different tokenizer-scoped + // atom table. Therefore, we have to obtain the correspoding + // atom from our own atom table. + nonInternedTagName.setNameForNonInterned(Portability.newLocalFromLocal(other.tagName.getName(), interner)); + tagName = nonInternedTagName; } if (attributeName != null) { @@ -7058,6 +7074,8 @@ public class Tokenizer implements Locator { } void destructor() { + Portability.delete(nonInternedTagName); + nonInternedTagName = null; // The translator will write refcount tracing stuff here Portability.delete(attributes); attributes = null; diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java index de7d8478d6..db0775e180 100644 --- a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java @@ -634,7 +634,7 @@ public abstract class TreeBuilder<T> implements TokenHandler, } // This is the SVG variant of the StackNode constructor. StackNode<T> node = new StackNode<T>(elementName, - elementName.camelCaseName, elt + elementName.getCamelCaseName(), elt // [NOCPP[ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) @@ -664,7 +664,7 @@ public abstract class TreeBuilder<T> implements TokenHandler, } // This is the MathML variant of the StackNode constructor. StackNode<T> node = new StackNode<T>(elementName, elt, - elementName.name, false + elementName.getName(), false // [NOCPP[ , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) @@ -1665,7 +1665,7 @@ public abstract class TreeBuilder<T> implements TokenHandler, needToDropLF = false; starttagloop: for (;;) { int group = elementName.getGroup(); - @Local String name = elementName.name; + @Local String name = elementName.getName(); if (isInForeign()) { StackNode<T> currentNode = stack[currentPtr]; @NsUri String currNs = currentNode.ns; @@ -2224,7 +2224,7 @@ public abstract class TreeBuilder<T> implements TokenHandler, case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U: case FONT: reconstructTheActiveFormattingElements(); - maybeForgetEarlierDuplicateFormattingElement(elementName.name, attributes); + maybeForgetEarlierDuplicateFormattingElement(elementName.getName(), attributes); appendToCurrentNodeAndPushFormattingElementMayFoster( elementName, attributes); @@ -3376,7 +3376,7 @@ public abstract class TreeBuilder<T> implements TokenHandler, needToDropLF = false; int eltPos; int group = elementName.getGroup(); - @Local String name = elementName.name; + @Local String name = elementName.getName(); endtagloop: for (;;) { if (isInForeign()) { if (stack[currentPtr].name != name) { @@ -5301,9 +5301,9 @@ public abstract class TreeBuilder<T> implements TokenHandler, StackNode<T> current = stack[currentPtr]; if (current.isFosterParenting()) { fatal(); - elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.name, attributes); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.getName(), attributes); } else { - elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes, current.node); + elt = createElement("http://www.w3.org/1999/xhtml", elementName.getName(), attributes, current.node); appendElement(elt, current.node); } StackNode<T> node = new StackNode<T>(elementName, elt, clone @@ -5324,7 +5324,7 @@ public abstract class TreeBuilder<T> implements TokenHandler, // ]NOCPP] // This method can't be called for custom elements T currentNode = stack[currentPtr].node; - T elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes, currentNode); + T elt = createElement("http://www.w3.org/1999/xhtml", elementName.getName(), attributes, currentNode); appendElement(elt, currentNode); if (ElementName.TEMPLATE == elementName) { elt = getDocumentFragmentForTemplate(elt); @@ -5340,10 +5340,10 @@ public abstract class TreeBuilder<T> implements TokenHandler, private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName, HtmlAttributes attributes) throws SAXException { - @Local String popName = elementName.name; + @Local String popName = elementName.getName(); // [NOCPP[ checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); - if (elementName.isCustom()) { + if (!elementName.isInterned()) { popName = checkPopName(popName); } // ]NOCPP] @@ -5367,10 +5367,10 @@ public abstract class TreeBuilder<T> implements TokenHandler, private void appendToCurrentNodeAndPushElementMayFosterMathML( ElementName elementName, HtmlAttributes attributes) throws SAXException { - @Local String popName = elementName.name; + @Local String popName = elementName.getName(); // [NOCPP[ checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML"); - if (elementName.isCustom()) { + if (!elementName.isInterned()) { popName = checkPopName(popName); } // ]NOCPP] @@ -5423,10 +5423,10 @@ public abstract class TreeBuilder<T> implements TokenHandler, private void appendToCurrentNodeAndPushElementMayFosterSVG( ElementName elementName, HtmlAttributes attributes) throws SAXException { - @Local String popName = elementName.camelCaseName; + @Local String popName = elementName.getCamelCaseName(); // [NOCPP[ checkAttributes(attributes, "http://www.w3.org/2000/svg"); - if (elementName.isCustom()) { + if (!elementName.isInterned()) { popName = checkPopName(popName); } // ]NOCPP] @@ -5459,10 +5459,10 @@ public abstract class TreeBuilder<T> implements TokenHandler, StackNode<T> current = stack[currentPtr]; if (current.isFosterParenting()) { fatal(); - elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.name, + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.getName(), attributes, formOwner); } else { - elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, + elt = createElement("http://www.w3.org/1999/xhtml", elementName.getName(), attributes, formOwner, current.node); appendElement(elt, current.node); } @@ -5499,10 +5499,10 @@ public abstract class TreeBuilder<T> implements TokenHandler, private void appendVoidElementToCurrentMayFoster( ElementName elementName, HtmlAttributes attributes) throws SAXException { - @Local String popName = elementName.name; + @Local String popName = elementName.getName(); // [NOCPP[ checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); - if (elementName.isCustom()) { + if (!elementName.isInterned()) { popName = checkPopName(popName); } // ]NOCPP] @@ -5522,10 +5522,10 @@ public abstract class TreeBuilder<T> implements TokenHandler, private void appendVoidElementToCurrentMayFosterSVG( ElementName elementName, HtmlAttributes attributes) throws SAXException { - @Local String popName = elementName.camelCaseName; + @Local String popName = elementName.getCamelCaseName(); // [NOCPP[ checkAttributes(attributes, "http://www.w3.org/2000/svg"); - if (elementName.isCustom()) { + if (!elementName.isInterned()) { popName = checkPopName(popName); } // ]NOCPP] @@ -5545,10 +5545,10 @@ public abstract class TreeBuilder<T> implements TokenHandler, private void appendVoidElementToCurrentMayFosterMathML( ElementName elementName, HtmlAttributes attributes) throws SAXException { - @Local String popName = elementName.name; + @Local String popName = elementName.getName(); // [NOCPP[ checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML"); - if (elementName.isCustom()) { + if (!elementName.isInterned()) { popName = checkPopName(popName); } // ]NOCPP] diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java index 2fcfc4960e..430bbdc44e 100644 --- a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java @@ -23,11 +23,6 @@ package nu.validator.htmlparser.test; -import nu.validator.htmlparser.common.TokenHandler; -import nu.validator.htmlparser.impl.ElementName; -import nu.validator.htmlparser.impl.HtmlAttributes; -import nu.validator.htmlparser.impl.Tokenizer; - import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -38,6 +33,11 @@ import com.sdicons.json.model.JSONNull; import com.sdicons.json.model.JSONObject; import com.sdicons.json.model.JSONString; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.impl.ElementName; +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.validator.htmlparser.impl.Tokenizer; + public class JSONArrayTokenHandler implements TokenHandler, ErrorHandler { private static final JSONString DOCTYPE = new JSONString("DOCTYPE"); @@ -102,7 +102,7 @@ public class JSONArrayTokenHandler implements TokenHandler, ErrorHandler { } public void endTag(ElementName eltName) throws SAXException { - String name = eltName.name; + String name = eltName.getName(); flushCharacters(); JSONArray token = new JSONArray(); token.getValue().add(END_TAG); @@ -123,7 +123,7 @@ public class JSONArrayTokenHandler implements TokenHandler, ErrorHandler { public void startTag(ElementName eltName, HtmlAttributes attributes, boolean selfClosing) throws SAXException { - String name = eltName.name; + String name = eltName.getName(); flushCharacters(); JSONArray token = new JSONArray(); token.getValue().add(START_TAG); diff --git a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java index 0fa5972c8a..03b8c85975 100644 --- a/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java +++ b/parser/html/java/htmlparser/test-src/nu/validator/htmlparser/test/TokenPrinter.java @@ -29,6 +29,11 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + import nu.validator.htmlparser.common.TokenHandler; import nu.validator.htmlparser.impl.ElementName; import nu.validator.htmlparser.impl.ErrorReportingTokenizer; @@ -36,11 +41,6 @@ import nu.validator.htmlparser.impl.HtmlAttributes; import nu.validator.htmlparser.impl.Tokenizer; import nu.validator.htmlparser.io.Driver; -import org.xml.sax.ErrorHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; - public class TokenPrinter implements TokenHandler, ErrorHandler { private final Writer writer; @@ -94,7 +94,7 @@ public class TokenPrinter implements TokenHandler, ErrorHandler { public void endTag(ElementName eltName) throws SAXException { try { writer.write(')'); - writer.write(eltName.name); + writer.write(eltName.getName()); writer.write('\n'); } catch (IOException e) { throw new SAXException(e); @@ -117,7 +117,7 @@ public class TokenPrinter implements TokenHandler, ErrorHandler { throws SAXException { try { writer.write('('); - writer.write(eltName.name); + writer.write(eltName.getName()); writer.write('\n'); for (int i = 0; i < attributes.getLength(); i++) { writer.write('A'); diff --git a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java index bf5775eb61..af042ce960 100644 --- a/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java +++ b/parser/html/java/htmlparser/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java @@ -448,8 +448,7 @@ public class CppVisitor extends AnnotationHelperVisitor<LocalSymbolTable> { printer.print("#include \""); printer.print(className); printer.printLn(".h\""); - if ("AttributeName".equals(javaClassName) - || "ElementName".equals(javaClassName)) { + if ("AttributeName".equals(javaClassName)) { printer.print("#include \""); printer.print(cppTypes.classPrefix()); printer.print("Releasable"); @@ -1387,8 +1386,7 @@ public class CppVisitor extends AnnotationHelperVisitor<LocalSymbolTable> { suppressPointer = true; printTypeArgs(n.getTypeArgs(), arg); - if ("createAttributeName".equals(currentMethod) - || "elementNameByBuffer".equals(currentMethod)) { + if ("createAttributeName".equals(currentMethod)) { printer.print(cppTypes.classPrefix()); printer.print("Releasable"); printer.print(n.getType().getName()); |