summaryrefslogtreecommitdiff
path: root/parser/html/java/htmlparser/ruby-gcj
diff options
context:
space:
mode:
Diffstat (limited to 'parser/html/java/htmlparser/ruby-gcj')
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/DomUtils.java36
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/README65
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/Rakefile77
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/extconf.rb45
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb5
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/test/fonts.rb11
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/test/google.html10
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/test/greek.xml2
-rw-r--r--parser/html/java/htmlparser/ruby-gcj/validator.cpp210
9 files changed, 461 insertions, 0 deletions
diff --git a/parser/html/java/htmlparser/ruby-gcj/DomUtils.java b/parser/html/java/htmlparser/ruby-gcj/DomUtils.java
new file mode 100644
index 0000000000..dc43da83d3
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/DomUtils.java
@@ -0,0 +1,36 @@
+import java.util.HashSet;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.Element;
+
+public class DomUtils {
+
+ private static HashSet<Document> pinned_list = new HashSet<Document>();
+
+ public static synchronized void pin(Document d) {
+ pinned_list.add(d);
+ }
+
+ public static synchronized void unpin(Document d) {
+ pinned_list.remove(d);
+ }
+
+ // return all the text content contained by a single element
+ public static void getElementContent(Element e, StringBuffer b) {
+ for (Node n = e.getFirstChild(); n!=null; n=n.getNextSibling()) {
+ if (n.getNodeType() == n.TEXT_NODE) {
+ b.append(n.getNodeValue());
+ } else if (n.getNodeType() == n.ELEMENT_NODE) {
+ getElementContent((Element) e, b);
+ }
+ }
+ }
+
+ // replace all child nodes of a given element with a single text element
+ public static void setElementContent(Element e, String s) {
+ while (e.hasChildNodes()) {
+ e.removeChild(e.getFirstChild());
+ }
+ e.appendChild(e.getOwnerDocument().createTextNode(s));
+ }
+}
diff --git a/parser/html/java/htmlparser/ruby-gcj/README b/parser/html/java/htmlparser/ruby-gcj/README
new file mode 100644
index 0000000000..b368437f77
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/README
@@ -0,0 +1,65 @@
+Disclaimer:
+
+ This code is experimental.
+
+ When some people say experimental, they mean "it may not do what it is
+ intended to do; in fact, it might even wipe out your hard drive". I mean
+ that too. But I mean something more than that.
+
+ In this case, experimental means that I don't even know what it is intended
+ to do. I just have a vague vision, and I am trying out various things in
+ the hopes that one of them will work out.
+
+Vision:
+
+ My vague vision is that I would like to see HTML 5 be a success. For me to
+ consider it to be a success, it needs to be a standard, be interoperable,
+ and be ubiquitous.
+
+ I believe that the Validator.nu parser can be used to bootstrap that
+ process. It is written in Java. Has been compiled into JavaScript. Has
+ been translated into C++ based on the Mozilla libraries with the intent of
+ being included in Firefox. It very closely tracks to the standard.
+
+ For the moment, the effort is on extending that to another language (Ruby)
+ on a single environment (i.e., Linux). Once that is complete, intent is to
+ evaluate the results, decide what needs to be changed, and what needs to be
+ done to support other languages and environments.
+
+ The bar I'm setting for myself isn't just another SWIG generated low level
+ interface to a DOM, but rather a best of breed interface; which for Ruby
+ seems to be the one pioneered by Hpricot and adopted by Nokogiri. Success
+ will mean passing all of the tests from one of those two parsers as well as
+ all of the HTML5 tests.
+
+Build instructions:
+
+ You'll need icu4j and chardet jars. If you checked out and ran dldeps you
+ are already all set:
+
+ svn co http://svn.versiondude.net/whattf/build/trunk/ build
+ python build/build.py checkout dldeps
+
+ Fedora 11:
+
+ yum install ruby-devel rubygem-rake java-1.5.0-gcj-devel gcc-c++
+
+ Ubuntu 9.04:
+
+ apt-get install ruby ruby1.8-dev rake gcj g++
+
+ Also at this time, you need to install a jdk (e.g. sun-java6-jdk), simply
+ because the javac that comes with gcj doesn't support -sourcepath, and
+ I haven't spent the time to find a replacement.
+
+ Finally, make sure that libjaxp1.3-java is *not* installed.
+
+ http://gcc.gnu.org/ml/java/2009-06/msg00055.html
+
+ If this is done, you should be all set.
+
+ cd htmlparser/ruby-gcj
+ rake test
+
+ If things are successful, the last lines of the output will list the
+ font attributes and values found in the test/google.html file.
diff --git a/parser/html/java/htmlparser/ruby-gcj/Rakefile b/parser/html/java/htmlparser/ruby-gcj/Rakefile
new file mode 100644
index 0000000000..7b51802539
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/Rakefile
@@ -0,0 +1,77 @@
+deps = ENV['deps'] || '../../dependencies'
+icu4j = "#{deps}/icu4j-4_0.jar"
+chardet = "#{deps}/mozilla/intl/chardet/java/dist/lib/chardet.jar"
+libgcj = Dir['/usr/share/java/libgcj*.jar'].grep(/gcj[-\d.]*jar$/).sort.last
+
+task :default => %w(headers libs Makefile validator.so)
+
+# headers
+
+hdb = 'nu/validator/htmlparser/dom/HtmlDocumentBuilder'
+task :headers => %W(headers/DomUtils.h headers/#{hdb}.h)
+
+file 'headers/DomUtils.h' => 'DomUtils.java' do |t|
+ mkdir_p %w(classes headers), :verbose => false
+ sh "javac -d classes #{t.prerequisites.first}"
+ sh "gcjh -force -o #{t.name} -cp #{libgcj}:classes DomUtils"
+end
+
+file "headers/#{hdb}.h" => "../src/#{hdb}.java" do |t|
+ mkdir_p %w(classes headers), :verbose => false
+ sh "javac -cp #{icu4j}:#{chardet} -d classes -sourcepath ../src " +
+ t.prerequisites.first
+ sh "gcjh -force -cp classes -o #{t.name} -cp #{libgcj}:classes " +
+ hdb.gsub('/','.')
+end
+
+# libs
+
+task :libs => %w(htmlparser chardet icu).map {|name| "lib/libnu-#{name}.so"}
+
+htmlparser = Dir['../src/**/*.java'].reject {|name| name.include? '/xom/'}
+file 'lib/libnu-htmlparser.so' => htmlparser + ['DomUtils.java'] do |t|
+ mkdir_p 'lib', :verbose => false
+ sh "gcj -shared --classpath=#{icu4j}:#{chardet} -fPIC " +
+ "-o #{t.name} #{t.prerequisites.join(' ')}"
+end
+
+file 'lib/libnu-chardet.so' => chardet do |t|
+ mkdir_p 'lib', :verbose => false
+ sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}"
+end
+
+file 'lib/libnu-icu.so' => icu4j do |t|
+ mkdir_p 'lib', :verbose => false
+ sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}"
+end
+
+# module
+
+file 'Makefile' do
+ sh "ruby extconf.rb --with-gcj=#{libgcj}"
+end
+
+file 'validator.so' => %w(Makefile validator.cpp headers/DomUtils.h) do
+ system 'make'
+end
+
+file 'nu/validator.so' do
+ mkdir_p 'nu', :verbose => false
+ system 'ln -s -t nu ../validator.so'
+end
+
+# tasks
+
+task :test => [:default, 'nu/validator.so'] do
+ ENV['LD_LIBRARY_PATH']='lib'
+ sh 'ruby test/fonts.rb test/google.html'
+end
+
+task :clean do
+ rm_rf %W(classes lib nu mkmf.log headers/DomUtils.h headers/#{hdb}.h) +
+ Dir['*.o'] + Dir['*.so']
+end
+
+task :clobber => :clean do
+ rm_rf %w(headers Makefile)
+end
diff --git a/parser/html/java/htmlparser/ruby-gcj/extconf.rb b/parser/html/java/htmlparser/ruby-gcj/extconf.rb
new file mode 100644
index 0000000000..415cf430af
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/extconf.rb
@@ -0,0 +1,45 @@
+require 'mkmf'
+
+# system dependencies
+gcj = with_config('gcj', '/usr/share/java/libgcj.jar')
+
+# headers for JAXP
+CONFIG['CC'] = 'g++'
+with_cppflags('-xc++') do
+
+ unless find_header('org/w3c/dom/Document.h', 'headers')
+
+ `jar tf #{gcj}`.split.each do |file|
+ next unless file =~ /\.class$/
+ next unless file =~ /^(javax|org)\/(w3c|xml)/
+ next if file.include? '$'
+
+ dest = 'headers/' + file.sub(/\.class$/,'.h')
+ name = file.sub(/\.class$/,'').gsub('/','.')
+
+ next if File.exist? dest
+
+ cmd = "gcjh -cp #{gcj} -o #{dest} #{name}"
+ puts cmd
+ break unless system cmd
+ system "ruby -pi -e '$_.sub!(/namespace namespace$/," +
+ "\"namespace namespace$\")' #{dest}"
+ system "ruby -pi -e '$_.sub!(/::namespace::/," +
+ "\"::namespace$::\")' #{dest}"
+ end
+
+ exit unless find_header('org/w3c/dom/Document.h', 'headers')
+ end
+
+ find_header 'nu/validator/htmlparser/dom/HtmlDocumentBuilder.h', 'headers'
+end
+
+# Java libraries
+Config::CONFIG['CC'] = 'g++ -shared'
+dir_config('nu-htmlparser', nil, 'lib')
+have_library 'nu-htmlparser'
+have_library 'nu-icu'
+have_library 'nu-chardet'
+
+# Ruby library
+create_makefile 'nu/validator'
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb b/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb
new file mode 100644
index 0000000000..1beb94c10e
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/domencoding.rb
@@ -0,0 +1,5 @@
+require 'nu/validator'
+
+ARGV.each do |arg|
+ puts Nu::Validator::parse(open(arg)).root.name
+end
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb b/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb
new file mode 100644
index 0000000000..595e3ae062
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/fonts.rb
@@ -0,0 +1,11 @@
+require 'nu/validator'
+require 'open-uri'
+
+ARGV.each do |arg|
+ doc = Nu::Validator::parse(open(arg))
+ doc.xpath("//*[local-name()='font']").each do |font|
+ font.attributes.each do |name, attr|
+ puts "#{name} => #{attr.value}"
+ end
+ end
+end
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/google.html b/parser/html/java/htmlparser/ruby-gcj/test/google.html
new file mode 100644
index 0000000000..8d2183b295
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/google.html
@@ -0,0 +1,10 @@
+<!doctype html><html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><script>window.google={kEI:"vLhASujeGpTU9QT2iOnWAQ",kEXPI:"17259",kCSIE:"17259",kHL:"en"};
+window.google.sn="webhp";window.google.timers={load:{t:{start:(new Date).getTime()}}};try{window.google.pt=window.gtbExternal&&window.gtbExternal.pageT()||window.external&&window.external.pageT}catch(b){}
+window.google.jsrt_kill=1;
+var _gjwl=location;function _gjuc(){var e=_gjwl.href.indexOf("#");if(e>=0){var a=_gjwl.href.substring(e);if(a.indexOf("&q=")>0||a.indexOf("#q=")>=0){a=a.substring(1);if(a.indexOf("#")==-1){for(var c=0;c<a.length;){var d=c;if(a.charAt(d)=="&")++d;var b=a.indexOf("&",d);if(b==-1)b=a.length;var f=a.substring(d,b);if(f.indexOf("fp=")==0){a=a.substring(0,c)+a.substring(b,a.length);b=c}else if(f=="cad=h")return 0;c=b}_gjwl.href="/search?"+a+"&cad=h";return 1}}}return 0}function _gjp(){!(window._gjwl.hash&&
+window._gjuc())&&setTimeout(_gjp,500)};
+window._gjp && _gjp();</script><style>td{line-height:.8em;}.gac_c{line-height:normal;}form{margin-bottom:20px;}body,td,a,p,.h{font-family:arial,sans-serif}.h{color:#36c;font-size:20px}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}#gbar{height:22px;padding-left:0px}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}#guser{padding-bottom:7px !important;text-align:right}#gbar,#guser{font-size:13px;padding-top:1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb3{color:#00c !important}.gb3{text-decoration:none}</style><script>google.y={};google.x=function(e,g){google.y[e.id]=[e,g];return false};</script></head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="document.f.q.focus();if(document.images)new Image().src='/images/nav_logo4.png'" topmargin=3 marginheight=3><textarea id=csi style=display:none></textarea><iframe name=wgjf style="display:none"></iframe><div id=gbar><nobr><b class=gb1>Web</b> <a href="http://images.google.com/imghp?hl=en&tab=wi" class=gb1>Images</a> <a href="http://video.google.com/?hl=en&tab=wv" class=gb1>Video</a> <a href="http://maps.google.com/maps?hl=en&tab=wl" class=gb1>Maps</a> <a href="http://news.google.com/nwshp?hl=en&tab=wn" class=gb1>News</a> <a href="http://www.google.com/prdhp?hl=en&tab=wf" class=gb1>Shopping</a> <a href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a> <a href="http://www.google.com/intl/en/options/" class=gb3><u>more</u> &raquo;</a></nobr></div><div id=guser width=100%><nobr><a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg">iGoogle</a> | <a href="https://www.google.com/accounts/Login?hl=en&continue=http://www.google.com/">Sign in</a></nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><center><br clear=all id=lgpd><img alt="Google" height=110 src="/intl/en_ALL/images/logo.gif" width=276 id=logo onload="window.lol&&lol()"><br><br><form action="/search" name=f><table cellpadding=0 cellspacing=0><tr valign=top><td width=25%>&nbsp;</td><td align=center nowrap><input name=hl type=hidden value=en><input type=hidden name=ie value="ISO-8859-1"><input autocomplete="off" maxlength=2048 name=q size=55 title="Google Search" value=""><br><input name=btnG type=submit value="Google Search"><input name=btnI type=submit value="I'm Feeling Lucky"></td><td nowrap width=25% align=left><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><font size=-1><a href="/aclk?sa=L&ai=CqVchLbNASrv7IZa68gS13KTwAc3__IMB29PoogzB2ZzZExABIMFUUK_O0JX______wFgyQaqBAlP0BcDOBRYhqw&num=1&sig=AGiWqty21CD7ixNXZILwCnH7c_3n9v2-tg&q=http://www.allforgood.org#source=hpp">Find an opportunity to volunteer</a> in your community today.</font><br><br><br><font size=-1><a href="/intl/en/ads/">Advertising&nbsp;Programs</a> - <a href="/services/">Business Solutions</a> - <a href="/intl/en/about.html">About Google</a></font><p><font size=-2>&copy;2009 - <a href="/intl/en/privacy.html">Privacy</a></font></p></center><div id=xjsd></div><div id=xjsi><script>if(google.y)google.y.first=[];if(google.y)google.y.first=[];google.dstr=[];google.rein=[];window.setTimeout(function(){var a=document.createElement("script");a.src="/extern_js/f/CgJlbhICdXMgACswCjggQAgsKzAOOAUsKzAYOAQsKzAlOMmIASwrMCY4BCwrMCc4ACw/1t0T7hspHT4.js";(document.getElementById("xjsd")||document.body).appendChild(a)},0);
+;google.y.first.push(function(){google.ac.i(document.f,document.f.q,'','')});google.xjs&&google.j&&google.j.xi&&google.j.xi()</script></div><script>(function(){
+function a(){google.timers.load.t.ol=(new Date).getTime();google.report&&google.report(google.timers.load,{ei:google.kEI,e:google.kCSIE})}if(window.addEventListener)window.addEventListener("load",a,false);else if(window.attachEvent)window.attachEvent("onload",a);google.timers.load.t.prt=(new Date).getTime();
+})();
+</script> \ No newline at end of file
diff --git a/parser/html/java/htmlparser/ruby-gcj/test/greek.xml b/parser/html/java/htmlparser/ruby-gcj/test/greek.xml
new file mode 100644
index 0000000000..a14d23eb1a
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/test/greek.xml
@@ -0,0 +1,2 @@
+<?xml version='1.0' encoding='iso-8859-7'?>
+<root/>
diff --git a/parser/html/java/htmlparser/ruby-gcj/validator.cpp b/parser/html/java/htmlparser/ruby-gcj/validator.cpp
new file mode 100644
index 0000000000..aadd24abe6
--- /dev/null
+++ b/parser/html/java/htmlparser/ruby-gcj/validator.cpp
@@ -0,0 +1,210 @@
+#include <gcj/cni.h>
+
+#include <java/io/ByteArrayInputStream.h>
+#include <java/lang/System.h>
+#include <java/lang/Throwable.h>
+#include <java/util/ArrayList.h>
+#include <javax/xml/xpath/XPath.h>
+#include <javax/xml/xpath/XPathFactory.h>
+#include <javax/xml/xpath/XPathExpression.h>
+#include <javax/xml/xpath/XPathConstants.h>
+#include <javax/xml/parsers/DocumentBuilderFactory.h>
+#include <javax/xml/parsers/DocumentBuilder.h>
+#include <org/w3c/dom/Attr.h>
+#include <org/w3c/dom/Document.h>
+#include <org/w3c/dom/Element.h>
+#include <org/w3c/dom/NodeList.h>
+#include <org/w3c/dom/NamedNodeMap.h>
+#include <org/xml/sax/InputSource.h>
+
+#include "nu/validator/htmlparser/dom/HtmlDocumentBuilder.h"
+
+#include "DomUtils.h"
+
+#include "ruby.h"
+
+using namespace java::io;
+using namespace java::lang;
+using namespace java::util;
+using namespace javax::xml::parsers;
+using namespace javax::xml::xpath;
+using namespace nu::validator::htmlparser::dom;
+using namespace org::w3c::dom;
+using namespace org::xml::sax;
+
+static VALUE jaxp_Document;
+static VALUE jaxp_Attr;
+static VALUE jaxp_Element;
+static ID ID_read;
+static ID ID_doc;
+static ID ID_element;
+
+// convert a Java string into a Ruby string
+static VALUE j2r(String *string) {
+ if (string == NULL) return Qnil;
+ jint len = JvGetStringUTFLength(string);
+ char buf[len];
+ JvGetStringUTFRegion(string, 0, len, buf);
+ return rb_str_new(buf, len);
+}
+
+// convert a Ruby string into a Java string
+static String *r2j(VALUE string) {
+ return JvNewStringUTF(RSTRING(string)->ptr);
+}
+
+// release the Java Document associated with this Ruby Document
+static void vnu_document_free(Document *doc) {
+ DomUtils::unpin(doc);
+}
+
+// Nu::Validator::parse( string|file )
+static VALUE vnu_parse(VALUE self, VALUE input) {
+ HtmlDocumentBuilder *parser = new HtmlDocumentBuilder();
+
+ // read file-like objects into memory. TODO: buffer such objects
+ if (rb_respond_to(input, ID_read))
+ input = rb_funcall(input, ID_read, 0);
+
+ // convert input in to a ByteArrayInputStream
+ jbyteArray bytes = JvNewByteArray(RSTRING(input)->len);
+ memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len);
+ InputSource *source = new InputSource(new ByteArrayInputStream(bytes));
+
+ // parse, pin, and wrap
+ Document *doc = parser->parse(source);
+ DomUtils::pin(doc);
+ return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc);
+}
+
+// Jaxp::parse( string|file )
+static VALUE jaxp_parse(VALUE self, VALUE input) {
+ DocumentBuilderFactory *factory = DocumentBuilderFactory::newInstance();
+ DocumentBuilder *parser = factory->newDocumentBuilder();
+
+ // read file-like objects into memory. TODO: buffer such objects
+ if (rb_respond_to(input, ID_read))
+ input = rb_funcall(input, ID_read, 0);
+
+ try {
+ jbyteArray bytes = JvNewByteArray(RSTRING(input)->len);
+ memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len);
+ Document *doc = parser->parse(new ByteArrayInputStream(bytes));
+ DomUtils::pin(doc);
+ return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc);
+ } catch (java::lang::Throwable *ex) {
+ ex->printStackTrace();
+ return Qnil;
+ }
+}
+
+
+// Nu::Validator::Document#encoding
+static VALUE jaxp_document_encoding(VALUE rdoc) {
+ Document *jdoc;
+ Data_Get_Struct(rdoc, Document, jdoc);
+ return j2r(jdoc->getXmlEncoding());
+}
+
+// Nu::Validator::Document#root
+static VALUE jaxp_document_root(VALUE rdoc) {
+ Document *jdoc;
+ Data_Get_Struct(rdoc, Document, jdoc);
+
+ Element *jelement = jdoc->getDocumentElement();
+ if (jelement==NULL) return Qnil;
+
+ VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, jelement);
+ rb_ivar_set(relement, ID_doc, rdoc);
+ return relement;
+}
+
+// Nu::Validator::Document#xpath
+static VALUE jaxp_document_xpath(VALUE rdoc, VALUE path) {
+ Document *jdoc;
+ Data_Get_Struct(rdoc, Document, jdoc);
+
+ Element *jelement = jdoc->getDocumentElement();
+ if (jelement==NULL) return Qnil;
+
+ XPath *xpath = XPathFactory::newInstance()->newXPath();
+ XPathExpression *expr = xpath->compile(r2j(path));
+ NodeList *list = (NodeList*) expr->evaluate(jdoc, XPathConstants::NODESET);
+
+ VALUE result = rb_ary_new();
+ for (int i=0; i<list->getLength(); i++) {
+ VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, list->item(i));
+ rb_ivar_set(relement, ID_doc, rdoc);
+ rb_ary_push(result, relement);
+ }
+ return result;
+}
+
+// Nu::Validator::Element#name
+static VALUE jaxp_element_name(VALUE relement) {
+ Element *jelement;
+ Data_Get_Struct(relement, Element, jelement);
+ return j2r(jelement->getNodeName());
+}
+
+// Nu::Validator::Element#attributes
+static VALUE jaxp_element_attributes(VALUE relement) {
+ Element *jelement;
+ Data_Get_Struct(relement, Element, jelement);
+ VALUE result = rb_hash_new();
+ NamedNodeMap *map = jelement->getAttributes();
+ for (int i=0; i<map->getLength(); i++) {
+ Attr *jattr = (Attr *) map->item(i);
+ VALUE rattr = Data_Wrap_Struct(jaxp_Attr, NULL, NULL, jattr);
+ rb_ivar_set(rattr, ID_element, relement);
+ rb_hash_aset(result, j2r(jattr->getName()), rattr);
+ }
+ return result;
+}
+
+// Nu::Validator::Attribute#value
+static VALUE jaxp_attribute_value(VALUE rattribute) {
+ Attr *jattribute;
+ Data_Get_Struct(rattribute, Attr, jattribute);
+ return j2r(jattribute->getValue());
+}
+
+typedef VALUE (ruby_method)(...);
+
+// Nu::Validator module initialization
+extern "C" void Init_validator() {
+ JvCreateJavaVM(NULL);
+ JvAttachCurrentThread(NULL, NULL);
+ JvInitClass(&DomUtils::class$);
+ JvInitClass(&XPathFactory::class$);
+ JvInitClass(&XPathConstants::class$);
+
+ VALUE jaxp = rb_define_module("Jaxp");
+ rb_define_singleton_method(jaxp, "parse", (ruby_method*)&jaxp_parse, 1);
+
+ VALUE nu = rb_define_module("Nu");
+ VALUE validator = rb_define_module_under(nu, "Validator");
+ rb_define_singleton_method(validator, "parse", (ruby_method*)&vnu_parse, 1);
+
+ jaxp_Document = rb_define_class_under(jaxp, "Document", rb_cObject);
+ rb_define_method(jaxp_Document, "encoding",
+ (ruby_method*)&jaxp_document_encoding, 0);
+ rb_define_method(jaxp_Document, "root",
+ (ruby_method*)&jaxp_document_root, 0);
+ rb_define_method(jaxp_Document, "xpath",
+ (ruby_method*)&jaxp_document_xpath, 1);
+
+ jaxp_Element = rb_define_class_under(jaxp, "Element", rb_cObject);
+ rb_define_method(jaxp_Element, "name",
+ (ruby_method*)&jaxp_element_name, 0);
+ rb_define_method(jaxp_Element, "attributes",
+ (ruby_method*)&jaxp_element_attributes, 0);
+
+ jaxp_Attr = rb_define_class_under(jaxp, "Attr", rb_cObject);
+ rb_define_method(jaxp_Attr, "value",
+ (ruby_method*)&jaxp_attribute_value, 0);
+
+ ID_read = rb_intern("read");
+ ID_doc = rb_intern("@doc");
+ ID_element = rb_intern("@element");
+}