9 files changed, 461 insertions, 0 deletions
diff --git a/parser/html/java/htmlparser/ruby-gcj/DomUtils.java b/parser/html/java/htmlparser/ruby-gcj/DomUtils.java
new file mode 100644
index 0000000000..dc43da83d3
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/DomUtils.java
@@ -0,0 +1,36 @@
+import java.util.HashSet;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.Element;
+
+public class DomUtils {
+
+  private static HashSet<Document> pinned_list = new HashSet<Document>();
+
+  public static synchronized void pin(Document d) {
+    pinned_list.add(d);
+  }
+
+  public static synchronized void unpin(Document d) {
+    pinned_list.remove(d);
+  }
+
+  // return all the text content contained by a single element 
+  public static void getElementContent(Element e, StringBuffer b) {
+    for (Node n = e.getFirstChild(); n!=null; n=n.getNextSibling()) {
+      if (n.getNodeType() == n.TEXT_NODE) {
+        b.append(n.getNodeValue());
+      } else if (n.getNodeType() == n.ELEMENT_NODE) {
+        getElementContent((Element) e, b);
+      }
+    }
+  }
+
+  // replace all child nodes of a given element with a single text element
+  public static void setElementContent(Element e, String s) {
+    while (e.hasChildNodes()) {
+      e.removeChild(e.getFirstChild());
+    }
+    e.appendChild(e.getOwnerDocument().createTextNode(s));
+  }
+}
diff --git a/parser/html/java/htmlparser/ruby-gcj/README b/parser/html/java/htmlparser/ruby-gcj/README
new file mode 100644
index 0000000000..b368437f77
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/README
@@ -0,0 +1,65 @@
+Disclaimer:
+
+  This code is experimental.
+
+  When some people say experimental, they mean "it may not do what it is
+  intended to do; in fact, it might even wipe out your hard drive".  I mean
+  that too.  But I mean something more than that.
+
+  In this case, experimental means that I don't even know what it is intended
+  to do.  I just have a vague vision, and I am trying out various things in
+  the hopes that one of them will work out.
+
+Vision:
+
+  My vague vision is that I would like to see HTML 5 be a success.  For me to
+  consider it to be a success, it needs to be a standard, be interoperable,
+  and be ubiquitous.
+
+  I believe that the Validator.nu parser can be used to bootstrap that
+  process.  It is written in Java.  Has been compiled into JavaScript.  Has
+  been translated into C++ based on the Mozilla libraries with the intent of
+  being included in Firefox.  It very closely tracks to the standard.
+
+  For the moment, the effort is on extending that to another language (Ruby)
+  on a single environment (i.e., Linux).  Once that is complete, intent is to
+  evaluate the results, decide what needs to be changed, and what needs to be
+  done to support other languages and environments.
+
+  The bar I'm setting for myself isn't just another SWIG generated low level
+  interface to a DOM, but rather a best of breed interface; which for Ruby
+  seems to be the one pioneered by Hpricot and adopted by Nokogiri.  Success
+  will mean passing all of the tests from one of those two parsers as well as
+  all of the HTML5 tests.
+
+Build instructions:
+
+  You'll need icu4j and chardet jars.  If you checked out and ran dldeps you
+  are already all set:
+
+    svn co http://svn.versiondude.net/whattf/build/trunk/ build
+    python build/build.py checkout dldeps
+
+  Fedora 11:
+
+    yum install ruby-devel rubygem-rake java-1.5.0-gcj-devel gcc-c++ 
+
+  Ubuntu 9.04:
+
+    apt-get install ruby ruby1.8-dev rake gcj g++
+
+    Also at this time, you need to install a jdk (e.g. sun-java6-jdk), simply
+    because the javac that comes with gcj doesn't support -sourcepath, and
+    I haven't spent the time to find a replacement.
+
+    Finally, make sure that libjaxp1.3-java is *not* installed.
+
+      http://gcc.gnu.org/ml/java/2009-06/msg00055.html
+
+  If this is done, you should be all set.
+ 
+    cd htmlparser/ruby-gcj
+    rake test
+
+  If things are successful, the last lines of the output will list the
+  font attributes and values found in the test/google.html file.
diff --git a/parser/html/java/htmlparser/ruby-gcj/Rakefile b/parser/html/java/htmlparser/ruby-gcj/Rakefile
new file mode 100644
index 0000000000..7b51802539
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/Rakefile
@@ -0,0 +1,77 @@
+deps    = ENV['deps'] || '../../dependencies'
+icu4j   = "#{deps}/icu4j-4_0.jar"
+chardet = "#{deps}/mozilla/intl/chardet/java/dist/lib/chardet.jar"
+libgcj  = Dir['/usr/share/java/libgcj*.jar'].grep(/gcj[-\d.]*jar$/).sort.last
+
+task :default => %w(headers libs Makefile validator.so)
+
+# headers
+
+hdb = 'nu/validator/htmlparser/dom/HtmlDocumentBuilder'
+task :headers => %W(headers/DomUtils.h headers/#{hdb}.h)
+
+file 'headers/DomUtils.h' => 'DomUtils.java' do |t|
+  mkdir_p %w(classes headers), :verbose => false
+  sh "javac -d classes #{t.prerequisites.first}"
+  sh "gcjh -force -o #{t.name} -cp #{libgcj}:classes DomUtils"
+end
+
+file "headers/#{hdb}.h" => "../src/#{hdb}.java" do |t|
+  mkdir_p %w(classes headers), :verbose => false
+  sh "javac -cp #{icu4j}:#{chardet} -d classes -sourcepath ../src " +
+    t.prerequisites.first
+  sh "gcjh -force -cp classes -o #{t.name} -cp #{libgcj}:classes " + 
+    hdb.gsub('/','.')
+end
+
+# libs
+
+task :libs => %w(htmlparser chardet icu).map {|name| "lib/libnu-#{name}.so"}
+
+htmlparser = Dir['../src/**/*.java'].reject {|name| name.include? '/xom/'}
+file 'lib/libnu-htmlparser.so' => htmlparser + ['DomUtils.java'] do |t|
+  mkdir_p 'lib', :verbose => false
+  sh "gcj -shared --classpath=#{icu4j}:#{chardet} -fPIC " +
+    "-o #{t.name} #{t.prerequisites.join(' ')}"
+end
+
+file 'lib/libnu-chardet.so' => chardet do |t|
+  mkdir_p 'lib', :verbose => false
+  sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}"
+end
+
+file 'lib/libnu-icu.so' => icu4j do |t|
+  mkdir_p 'lib', :verbose => false
+  sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}"
+end
+
+# module
+
+file 'Makefile' do
+  sh "ruby extconf.rb --with-gcj=#{libgcj}"
+end
+
+file 'validator.so' => %w(Makefile validator.cpp headers/DomUtils.h) do
+  system 'make'
+end
+
+file 'nu/validator.so' do
+  mkdir_p 'nu', :verbose => false
+  system 'ln -s -t nu ../validator.so'
+end
+
+# tasks
+
+task :test => [:default, 'nu/validator.so'] do
+  ENV['LD_LIBRARY_PATH']='lib'
+  sh 'ruby test/fonts.rb test/google.html'
+end
+
+task :clean do
+  rm_rf %W(classes lib nu mkmf.log headers/DomUtils.h headers/#{hdb}.h) + 
+    Dir['*.o'] + Dir['*.so']
+end
+
+task :clobber => :clean do
+  rm_rf %w(headers Makefile)
+end
diff --git a/parser/html/java/htmlparser/ruby-gcj/extconf.rb b/parser/html/java/htmlparser/ruby-gcj/extconf.rb
new file mode 100644
index 0000000000..415cf430af
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/extconf.rb
@@ -0,0 +1,45 @@
+require 'mkmf'
+
+# system dependencies
+gcj  = with_config('gcj', '/usr/share/java/libgcj.jar')
+
+# headers for JAXP
+CONFIG['CC'] = 'g++'
+with_cppflags('-xc++') do
+
+  unless find_header('org/w3c/dom/Document.h', 'headers')
+  
+    `jar tf #{gcj}`.split.each do |file|
+      next unless file =~ /\.class$/
+      next unless file =~ /^(javax|org)\/(w3c|xml)/
+      next if file.include? '$'
+    
+      dest = 'headers/' + file.sub(/\.class$/,'.h')
+      name = file.sub(/\.class$/,'').gsub('/','.')
+    
+      next if File.exist? dest
+    
+      cmd = "gcjh -cp #{gcj} -o #{dest} #{name}"
+      puts cmd
+      break unless system cmd
+      system "ruby -pi -e '$_.sub!(/namespace namespace$/," +
+        "\"namespace namespace$\")' #{dest}"
+      system "ruby -pi -e '$_.sub!(/::namespace::/," +
+        "\"::namespace$::\")' #{dest}"
+    end
+
+    exit unless find_header('org/w3c/dom/Document.h', 'headers')
+  end
+
+  find_header 'nu/validator/htmlparser/dom/HtmlDocumentBuilder.h', 'headers'
+end
+
+# Java libraries
+Config::CONFIG['CC'] = 'g++ -shared'
+dir_config('nu-htmlparser', nil, 'lib')
+have_library 'nu-htmlparser'
+have_library 'nu-icu'
+have_library 'nu-chardet'
+
+# Ruby library
+create_makefile 'nu/validator'
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb b/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb
new file mode 100644
index 0000000000..1beb94c10e
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb
@@ -0,0 +1,5 @@
+require 'nu/validator'
+
+ARGV.each do |arg|
+  puts Nu::Validator::parse(open(arg)).root.name
+end
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb b/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb
new file mode 100644
index 0000000000..595e3ae062
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb
@@ -0,0 +1,11 @@
+require 'nu/validator'
+require 'open-uri'
+
+ARGV.each do |arg|
+  doc = Nu::Validator::parse(open(arg))
+  doc.xpath("//*[local-name()='font']").each do |font|
+    font.attributes.each do |name, attr|
+      puts "#{name} => #{attr.value}"
+    end
+  end
+end
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/google.html b/parser/html/java/htmlparser/ruby-gcj/test/google.html
new file mode 100644
index 0000000000..8d2183b295
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/google.html
@@ -0,0 +1,10 @@
+<!doctype html><html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><script>window.google={kEI:"vLhASujeGpTU9QT2iOnWAQ",kEXPI:"17259",kCSIE:"17259",kHL:"en"};
+window.google.sn="webhp";window.google.timers={load:{t:{start:(new Date).getTime()}}};try{window.google.pt=window.gtbExternal&&window.gtbExternal.pageT()||window.external&&window.external.pageT}catch(b){}
+window.google.jsrt_kill=1;
+var _gjwl=location;function _gjuc(){var e=_gjwl.href.indexOf("#");if(e>=0){var a=_gjwl.href.substring(e);if(a.indexOf("&q=")>0||a.indexOf("#q=")>=0){a=a.substring(1);if(a.indexOf("#")==-1){for(var c=0;c<a.length;){var d=c;if(a.charAt(d)=="&")++d;var b=a.indexOf("&",d);if(b==-1)b=a.length;var f=a.substring(d,b);if(f.indexOf("fp=")==0){a=a.substring(0,c)+a.substring(b,a.length);b=c}else if(f=="cad=h")return 0;c=b}_gjwl.href="/search?"+a+"&cad=h";return 1}}}return 0}function _gjp(){!(window._gjwl.hash&&
+window._gjuc())&&setTimeout(_gjp,500)};
+window._gjp && _gjp();</script><style>td{line-height:.8em;}.gac_c{line-height:normal;}form{margin-bottom:20px;}body,td,a,p,.h{font-family:arial,sans-serif}.h{color:#36c;font-size:20px}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}#gbar{height:22px;padding-left:0px}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}#guser{padding-bottom:7px !important;text-align:right}#gbar,#guser{font-size:13px;padding-top:1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb3{color:#00c !important}.gb3{text-decoration:none}</style><script>google.y={};google.x=function(e,g){google.y[e.id]=[e,g];return false};</script></head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="document.f.q.focus();if(document.images)new Image().src='/images/nav_logo4.png'" topmargin=3 marginheight=3><textarea id=csi style=display:none></textarea><iframe name=wgjf style="display:none"></iframe><div id=gbar><nobr><b class=gb1>Web</b> <a href="http://images.google.com/imghp?hl=en&tab=wi" class=gb1>Images</a> <a href="http://video.google.com/?hl=en&tab=wv" class=gb1>Video</a> <a href="http://maps.google.com/maps?hl=en&tab=wl" class=gb1>Maps</a> <a href="http://news.google.com/nwshp?hl=en&tab=wn" class=gb1>News</a> <a href="http://www.google.com/prdhp?hl=en&tab=wf" class=gb1>Shopping</a> <a href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a> <a href="http://www.google.com/intl/en/options/" class=gb3><u>more</u> &raquo;</a></nobr></div><div id=guser width=100%><nobr><a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg">iGoogle</a> | <a href="https://www.google.com/accounts/Login?hl=en&continue=http://www.google.com/">Sign in</a></nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><center><br clear=all id=lgpd><img alt="Google" height=110 src="/intl/en_ALL/images/logo.gif" width=276 id=logo onload="window.lol&&lol()"><br><br><form action="/search" name=f><table cellpadding=0 cellspacing=0><tr valign=top><td width=25%>&nbsp;</td><td align=center nowrap><input name=hl type=hidden value=en><input type=hidden name=ie value="ISO-8859-1"><input autocomplete="off" maxlength=2048 name=q size=55 title="Google Search" value=""><br><input name=btnG type=submit value="Google Search"><input name=btnI type=submit value="I'm Feeling Lucky"></td><td nowrap width=25% align=left><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><font size=-1><a href="/aclk?sa=L&ai=CqVchLbNASrv7IZa68gS13KTwAc3__IMB29PoogzB2ZzZExABIMFUUK_O0JX______wFgyQaqBAlP0BcDOBRYhqw&num=1&sig=AGiWqty21CD7ixNXZILwCnH7c_3n9v2-tg&q=http://www.allforgood.org#source=hpp">Find an opportunity to volunteer</a> in your community today.</font><br><br><br><font size=-1><a href="/intl/en/ads/">Advertising&nbsp;Programs</a> - <a href="/services/">Business Solutions</a> - <a href="/intl/en/about.html">About Google</a></font><p><font size=-2>&copy;2009 - <a href="/intl/en/privacy.html">Privacy</a></font></p></center><div id=xjsd></div><div id=xjsi><script>if(google.y)google.y.first=[];if(google.y)google.y.first=[];google.dstr=[];google.rein=[];window.setTimeout(function(){var a=document.createElement("script");a.src="/extern_js/f/CgJlbhICdXMgACswCjggQAgsKzAOOAUsKzAYOAQsKzAlOMmIASwrMCY4BCwrMCc4ACw/1t0T7hspHT4.js";(document.getElementById("xjsd")||document.body).appendChild(a)},0);
+;google.y.first.push(function(){google.ac.i(document.f,document.f.q,'','')});google.xjs&&google.j&&google.j.xi&&google.j.xi()</script></div><script>(function(){
+function a(){google.timers.load.t.ol=(new Date).getTime();google.report&&google.report(google.timers.load,{ei:google.kEI,e:google.kCSIE})}if(window.addEventListener)window.addEventListener("load",a,false);else if(window.attachEvent)window.attachEvent("onload",a);google.timers.load.t.prt=(new Date).getTime();
+})();
+</script>
+\ No newline at end of file
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/greek.xml b/parser/html/java/htmlparser/ruby-gcj/test/greek.xml
new file mode 100644
index 0000000000..a14d23eb1a
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/greek.xml
@@ -0,0 +1,2 @@
+<?xml version='1.0' encoding='iso-8859-7'?>
+<root/>
diff --git a/parser/html/java/htmlparser/ruby-gcj/validator.cpp b/parser/html/java/htmlparser/ruby-gcj/validator.cpp
new file mode 100644
index 0000000000..aadd24abe6
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/validator.cpp
@@ -0,0 +1,210 @@
+#include <gcj/cni.h>
+
+#include <java/io/ByteArrayInputStream.h>
+#include <java/lang/System.h>
+#include <java/lang/Throwable.h>
+#include <java/util/ArrayList.h>
+#include <javax/xml/xpath/XPath.h>
+#include <javax/xml/xpath/XPathFactory.h>
+#include <javax/xml/xpath/XPathExpression.h>
+#include <javax/xml/xpath/XPathConstants.h>
+#include <javax/xml/parsers/DocumentBuilderFactory.h>
+#include <javax/xml/parsers/DocumentBuilder.h>
+#include <org/w3c/dom/Attr.h>
+#include <org/w3c/dom/Document.h>
+#include <org/w3c/dom/Element.h>
+#include <org/w3c/dom/NodeList.h>
+#include <org/w3c/dom/NamedNodeMap.h>
+#include <org/xml/sax/InputSource.h>
+
+#include "nu/validator/htmlparser/dom/HtmlDocumentBuilder.h"
+
+#include "DomUtils.h"
+
+#include "ruby.h"
+
+using namespace java::io;
+using namespace java::lang;
+using namespace java::util;
+using namespace javax::xml::parsers;
+using namespace javax::xml::xpath;
+using namespace nu::validator::htmlparser::dom;
+using namespace org::w3c::dom;
+using namespace org::xml::sax;
+
+static VALUE jaxp_Document;
+static VALUE jaxp_Attr;
+static VALUE jaxp_Element;
+static ID ID_read;
+static ID ID_doc;
+static ID ID_element;
+
+// convert a Java string into a Ruby string
+static VALUE j2r(String *string) {
+  if (string == NULL) return Qnil;
+  jint len = JvGetStringUTFLength(string);
+  char buf[len];
+  JvGetStringUTFRegion(string, 0, len, buf);
+  return rb_str_new(buf, len);
+}
+
+// convert a Ruby string into a Java string
+static String *r2j(VALUE string) {
+  return JvNewStringUTF(RSTRING(string)->ptr);
+}
+
+// release the Java Document associated with this Ruby Document
+static void vnu_document_free(Document *doc) {
+  DomUtils::unpin(doc);
+}
+
+// Nu::Validator::parse( string|file )
+static VALUE vnu_parse(VALUE self, VALUE input) {
+  HtmlDocumentBuilder *parser = new HtmlDocumentBuilder();
+  
+  // read file-like objects into memory.  TODO: buffer such objects
+  if (rb_respond_to(input, ID_read))
+    input = rb_funcall(input, ID_read, 0);
+
+  // convert input in to a ByteArrayInputStream
+  jbyteArray bytes = JvNewByteArray(RSTRING(input)->len);
+  memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len);
+  InputSource *source = new InputSource(new ByteArrayInputStream(bytes));
+
+  // parse, pin, and wrap
+  Document *doc = parser->parse(source);
+  DomUtils::pin(doc);
+  return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc);
+}
+
+// Jaxp::parse( string|file )
+static VALUE jaxp_parse(VALUE self, VALUE input) {
+  DocumentBuilderFactory *factory = DocumentBuilderFactory::newInstance();
+  DocumentBuilder *parser = factory->newDocumentBuilder();
+   
+  // read file-like objects into memory.  TODO: buffer such objects
+  if (rb_respond_to(input, ID_read))
+    input = rb_funcall(input, ID_read, 0);
+ 
+  try {
+    jbyteArray bytes = JvNewByteArray(RSTRING(input)->len);
+    memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len);
+    Document *doc = parser->parse(new ByteArrayInputStream(bytes));
+    DomUtils::pin(doc);
+    return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc);
+  } catch (java::lang::Throwable *ex) {
+    ex->printStackTrace();
+    return Qnil;
+  }
+}
+
+
+// Nu::Validator::Document#encoding
+static VALUE jaxp_document_encoding(VALUE rdoc) {
+  Document *jdoc;
+  Data_Get_Struct(rdoc, Document, jdoc);
+  return j2r(jdoc->getXmlEncoding());
+}
+
+// Nu::Validator::Document#root
+static VALUE jaxp_document_root(VALUE rdoc) {
+  Document *jdoc;
+  Data_Get_Struct(rdoc, Document, jdoc);
+
+  Element *jelement = jdoc->getDocumentElement();
+  if (jelement==NULL) return Qnil;
+
+  VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, jelement);
+  rb_ivar_set(relement, ID_doc, rdoc);
+  return relement;
+}
+
+// Nu::Validator::Document#xpath
+static VALUE jaxp_document_xpath(VALUE rdoc, VALUE path) {
+  Document *jdoc;
+  Data_Get_Struct(rdoc, Document, jdoc);
+
+  Element *jelement = jdoc->getDocumentElement();
+  if (jelement==NULL) return Qnil;
+
+  XPath *xpath = XPathFactory::newInstance()->newXPath();
+  XPathExpression *expr = xpath->compile(r2j(path));
+  NodeList *list = (NodeList*) expr->evaluate(jdoc, XPathConstants::NODESET);
+
+  VALUE result = rb_ary_new();
+  for (int i=0; i<list->getLength(); i++) {
+    VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, list->item(i));
+    rb_ivar_set(relement, ID_doc, rdoc);
+    rb_ary_push(result, relement);
+  }
+  return result;
+}
+
+// Nu::Validator::Element#name
+static VALUE jaxp_element_name(VALUE relement) {
+  Element *jelement;
+  Data_Get_Struct(relement, Element, jelement);
+  return j2r(jelement->getNodeName());
+}
+
+// Nu::Validator::Element#attributes
+static VALUE jaxp_element_attributes(VALUE relement) {
+  Element *jelement;
+  Data_Get_Struct(relement, Element, jelement);
+  VALUE result = rb_hash_new();
+  NamedNodeMap *map = jelement->getAttributes();
+  for (int i=0; i<map->getLength(); i++) {
+    Attr *jattr = (Attr *) map->item(i);
+    VALUE rattr = Data_Wrap_Struct(jaxp_Attr, NULL, NULL, jattr);
+    rb_ivar_set(rattr, ID_element, relement);
+    rb_hash_aset(result, j2r(jattr->getName()), rattr);
+  }
+  return result;
+}
+
+// Nu::Validator::Attribute#value
+static VALUE jaxp_attribute_value(VALUE rattribute) {
+  Attr *jattribute;
+  Data_Get_Struct(rattribute, Attr, jattribute);
+  return j2r(jattribute->getValue());
+}
+
+typedef VALUE (ruby_method)(...);
+
+// Nu::Validator module initialization
+extern "C" void Init_validator() {
+  JvCreateJavaVM(NULL);
+  JvAttachCurrentThread(NULL, NULL);
+  JvInitClass(&DomUtils::class$);
+  JvInitClass(&XPathFactory::class$);
+  JvInitClass(&XPathConstants::class$);
+
+  VALUE jaxp = rb_define_module("Jaxp");
+  rb_define_singleton_method(jaxp, "parse", (ruby_method*)&jaxp_parse, 1);
+
+  VALUE nu = rb_define_module("Nu");
+  VALUE validator = rb_define_module_under(nu, "Validator");
+  rb_define_singleton_method(validator, "parse", (ruby_method*)&vnu_parse, 1);
+
+  jaxp_Document = rb_define_class_under(jaxp, "Document", rb_cObject);
+  rb_define_method(jaxp_Document, "encoding", 
+    (ruby_method*)&jaxp_document_encoding, 0);
+  rb_define_method(jaxp_Document, "root", 
+    (ruby_method*)&jaxp_document_root, 0);
+  rb_define_method(jaxp_Document, "xpath", 
+    (ruby_method*)&jaxp_document_xpath, 1);
+
+  jaxp_Element = rb_define_class_under(jaxp, "Element", rb_cObject);
+  rb_define_method(jaxp_Element, "name", 
+    (ruby_method*)&jaxp_element_name, 0);
+  rb_define_method(jaxp_Element, "attributes", 
+    (ruby_method*)&jaxp_element_attributes, 0);
+
+  jaxp_Attr = rb_define_class_under(jaxp, "Attr", rb_cObject);
+  rb_define_method(jaxp_Attr, "value", 
+    (ruby_method*)&jaxp_attribute_value, 0);
+
+  ID_read = rb_intern("read");
+  ID_doc  = rb_intern("@doc");
+  ID_element = rb_intern("@element");
+}