summaryrefslogtreecommitdiff
path: root/parser/html/java/htmlparser/ruby-gcj/validator.cpp
blob: aadd24abe60df8d806ca5ed9df2f74c95037fdbe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#include <gcj/cni.h>

#include <java/io/ByteArrayInputStream.h>
#include <java/lang/System.h>
#include <java/lang/Throwable.h>
#include <java/util/ArrayList.h>
#include <javax/xml/xpath/XPath.h>
#include <javax/xml/xpath/XPathFactory.h>
#include <javax/xml/xpath/XPathExpression.h>
#include <javax/xml/xpath/XPathConstants.h>
#include <javax/xml/parsers/DocumentBuilderFactory.h>
#include <javax/xml/parsers/DocumentBuilder.h>
#include <org/w3c/dom/Attr.h>
#include <org/w3c/dom/Document.h>
#include <org/w3c/dom/Element.h>
#include <org/w3c/dom/NodeList.h>
#include <org/w3c/dom/NamedNodeMap.h>
#include <org/xml/sax/InputSource.h>

#include "nu/validator/htmlparser/dom/HtmlDocumentBuilder.h"

#include "DomUtils.h"

#include "ruby.h"

using namespace java::io;
using namespace java::lang;
using namespace java::util;
using namespace javax::xml::parsers;
using namespace javax::xml::xpath;
using namespace nu::validator::htmlparser::dom;
using namespace org::w3c::dom;
using namespace org::xml::sax;

static VALUE jaxp_Document;
static VALUE jaxp_Attr;
static VALUE jaxp_Element;
static ID ID_read;
static ID ID_doc;
static ID ID_element;

// convert a Java string into a Ruby string
static VALUE j2r(String *string) {
  if (string == NULL) return Qnil;
  jint len = JvGetStringUTFLength(string);
  char buf[len];
  JvGetStringUTFRegion(string, 0, len, buf);
  return rb_str_new(buf, len);
}

// convert a Ruby string into a Java string
static String *r2j(VALUE string) {
  return JvNewStringUTF(RSTRING(string)->ptr);
}

// release the Java Document associated with this Ruby Document
static void vnu_document_free(Document *doc) {
  DomUtils::unpin(doc);
}

// Nu::Validator::parse( string|file )
static VALUE vnu_parse(VALUE self, VALUE input) {
  HtmlDocumentBuilder *parser = new HtmlDocumentBuilder();
  
  // read file-like objects into memory.  TODO: buffer such objects
  if (rb_respond_to(input, ID_read))
    input = rb_funcall(input, ID_read, 0);

  // convert input in to a ByteArrayInputStream
  jbyteArray bytes = JvNewByteArray(RSTRING(input)->len);
  memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len);
  InputSource *source = new InputSource(new ByteArrayInputStream(bytes));

  // parse, pin, and wrap
  Document *doc = parser->parse(source);
  DomUtils::pin(doc);
  return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc);
}

// Jaxp::parse( string|file )
static VALUE jaxp_parse(VALUE self, VALUE input) {
  DocumentBuilderFactory *factory = DocumentBuilderFactory::newInstance();
  DocumentBuilder *parser = factory->newDocumentBuilder();
   
  // read file-like objects into memory.  TODO: buffer such objects
  if (rb_respond_to(input, ID_read))
    input = rb_funcall(input, ID_read, 0);
 
  try {
    jbyteArray bytes = JvNewByteArray(RSTRING(input)->len);
    memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len);
    Document *doc = parser->parse(new ByteArrayInputStream(bytes));
    DomUtils::pin(doc);
    return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc);
  } catch (java::lang::Throwable *ex) {
    ex->printStackTrace();
    return Qnil;
  }
}


// Nu::Validator::Document#encoding
static VALUE jaxp_document_encoding(VALUE rdoc) {
  Document *jdoc;
  Data_Get_Struct(rdoc, Document, jdoc);
  return j2r(jdoc->getXmlEncoding());
}

// Nu::Validator::Document#root
static VALUE jaxp_document_root(VALUE rdoc) {
  Document *jdoc;
  Data_Get_Struct(rdoc, Document, jdoc);

  Element *jelement = jdoc->getDocumentElement();
  if (jelement==NULL) return Qnil;

  VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, jelement);
  rb_ivar_set(relement, ID_doc, rdoc);
  return relement;
}

// Nu::Validator::Document#xpath
static VALUE jaxp_document_xpath(VALUE rdoc, VALUE path) {
  Document *jdoc;
  Data_Get_Struct(rdoc, Document, jdoc);

  Element *jelement = jdoc->getDocumentElement();
  if (jelement==NULL) return Qnil;

  XPath *xpath = XPathFactory::newInstance()->newXPath();
  XPathExpression *expr = xpath->compile(r2j(path));
  NodeList *list = (NodeList*) expr->evaluate(jdoc, XPathConstants::NODESET);

  VALUE result = rb_ary_new();
  for (int i=0; i<list->getLength(); i++) {
    VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, list->item(i));
    rb_ivar_set(relement, ID_doc, rdoc);
    rb_ary_push(result, relement);
  }
  return result;
}

// Nu::Validator::Element#name
static VALUE jaxp_element_name(VALUE relement) {
  Element *jelement;
  Data_Get_Struct(relement, Element, jelement);
  return j2r(jelement->getNodeName());
}

// Nu::Validator::Element#attributes
static VALUE jaxp_element_attributes(VALUE relement) {
  Element *jelement;
  Data_Get_Struct(relement, Element, jelement);
  VALUE result = rb_hash_new();
  NamedNodeMap *map = jelement->getAttributes();
  for (int i=0; i<map->getLength(); i++) {
    Attr *jattr = (Attr *) map->item(i);
    VALUE rattr = Data_Wrap_Struct(jaxp_Attr, NULL, NULL, jattr);
    rb_ivar_set(rattr, ID_element, relement);
    rb_hash_aset(result, j2r(jattr->getName()), rattr);
  }
  return result;
}

// Nu::Validator::Attribute#value
static VALUE jaxp_attribute_value(VALUE rattribute) {
  Attr *jattribute;
  Data_Get_Struct(rattribute, Attr, jattribute);
  return j2r(jattribute->getValue());
}

typedef VALUE (ruby_method)(...);

// Nu::Validator module initialization
extern "C" void Init_validator() {
  JvCreateJavaVM(NULL);
  JvAttachCurrentThread(NULL, NULL);
  JvInitClass(&DomUtils::class$);
  JvInitClass(&XPathFactory::class$);
  JvInitClass(&XPathConstants::class$);

  VALUE jaxp = rb_define_module("Jaxp");
  rb_define_singleton_method(jaxp, "parse", (ruby_method*)&jaxp_parse, 1);

  VALUE nu = rb_define_module("Nu");
  VALUE validator = rb_define_module_under(nu, "Validator");
  rb_define_singleton_method(validator, "parse", (ruby_method*)&vnu_parse, 1);

  jaxp_Document = rb_define_class_under(jaxp, "Document", rb_cObject);
  rb_define_method(jaxp_Document, "encoding", 
    (ruby_method*)&jaxp_document_encoding, 0);
  rb_define_method(jaxp_Document, "root", 
    (ruby_method*)&jaxp_document_root, 0);
  rb_define_method(jaxp_Document, "xpath", 
    (ruby_method*)&jaxp_document_xpath, 1);

  jaxp_Element = rb_define_class_under(jaxp, "Element", rb_cObject);
  rb_define_method(jaxp_Element, "name", 
    (ruby_method*)&jaxp_element_name, 0);
  rb_define_method(jaxp_Element, "attributes", 
    (ruby_method*)&jaxp_element_attributes, 0);

  jaxp_Attr = rb_define_class_under(jaxp, "Attr", rb_cObject);
  rb_define_method(jaxp_Attr, "value", 
    (ruby_method*)&jaxp_attribute_value, 0);

  ID_read = rb_intern("read");
  ID_doc  = rb_intern("@doc");
  ID_element = rb_intern("@element");
}