Skip to content

Commit df15251

Browse files
author
Kyle Maxwell
committed
remote html works, some function aliases work
1 parent fc71b61 commit df15251

File tree

10 files changed

+65
-95
lines changed

10 files changed

+65
-95
lines changed

TODO

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,21 @@
1010
# - p/br support needs multicase handling
1111
# - reorganize project (at least tests, makefile.am src?!)
1212
# - unfilter inside of nested explicit scope
13+
# - functions inside magic groups?!?!
14+
- shortcut functions (need c hash table)
15+
- fix position()
1316
- memory leaks
14-
- windows, linux builds
15-
- flags?!
16-
^ - force group-before
17-
$ - force group-after
18-
! - don't group?!
19-
? - optional field
20-
- fix not()/set-difference
2117
- debugging options
22-
- fix position()
23-
- shortcut functions
18+
- check linux build
19+
- python
20+
- fix not()/set-difference
2421
- complain if empty / runtime no-match errors
2522
- CSS equations
2623
- saxon compatibility?!
27-
- functions inside magic groups?!?!
28-
- XML input converter?!
24+
- XML input converter?!
25+
- check windows build
26+
- flags?!
27+
^ - force group-before
28+
$ - force group-after
29+
! - don't group?!
30+
? - optional field

configure.ac

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,17 @@ AC_PROG_CC_C99
1111
AM_PROG_LEX
1212
AC_PROG_YACC
1313
AC_PROG_LIBTOOL
14-
# AC_CHECK_HEADER(argp.h, , AC_MSG_ERROR([could not find argp.h]))
15-
# AC_CHECK_HEADER(json/json.h, , AC_MSG_ERROR([could not find json.h]))
14+
AC_CHECK_HEADER(pcre.h, , AC_MSG_ERROR([could not find pcre.h]))
15+
AC_CHECK_HEADER(argp.h, , AC_MSG_ERROR([could not find argp.h]))
16+
AC_CHECK_HEADER(json/json.h, , AC_MSG_ERROR([could not find json.h]))
1617

17-
# AC_CHECK_HEADER(/usr/include/libxml2/libxml/HTMLtree.h, , AC_MSG_ERROR([could not find libxml2/HTMLtree.h]))
18-
# AC_CHECK_HEADER(libexslt/exslt.h, , AC_MSG_ERROR([could not find exslt.h]))
18+
AC_PATH_PROG(XML2_CONFIG, xml2-config, , [$PATH])
19+
if test x$XML2_CONFIG = x ; then
20+
AC_MSG_ERROR([libxml2 not present or not configured])
21+
else
22+
XML2_CFLAGS="`$XML2_CONFIG --cflags`"
23+
XML2_LIBS="`$XML2_CONFIG --libs`"
24+
fi
1925

2026
AC_PATH_PROG(XSLT_CONFIG, xslt-config, , [$PATH])
2127
if test x$XSLT_CONFIG = x ; then
@@ -25,16 +31,16 @@ else
2531
XSLT_LIBS="`$XSLT_CONFIG --libs`"
2632
fi
2733

28-
CPPFLAGS="$CPPFLAGS $XSLT_CFLAGS"
29-
LIBS="$LIBS $XSLT_LIBS"
34+
CPPFLAGS="$CPPFLAGS $XML2_CFLAGS $XSLT_CFLAGS"
35+
LIBS="$LIBS $XML2_LIBS $XSLT_LIBS"
3036

31-
AC_CHECK_LIB(pcre, pcre_compile)
32-
AC_CHECK_LIB(argp, argp_parse)
37+
AC_CHECK_LIB(pcre, pcre_compile, , AC_MSG_ERROR([could not find pcre]))
38+
AC_CHECK_LIB(argp, argp_parse, , AC_MSG_ERROR([could not find argp]))
3339
AC_CHECK_LIB(json, json_object_new_string, , AC_MSG_ERROR([could not find the json library]))
3440

35-
AC_CHECK_LIB(xslt, xsltApplyStylesheet)
36-
AC_CHECK_LIB(xml, htmlParseFile)
37-
AC_CHECK_LIB(exslt, exsltRegisterAll)
41+
AC_CHECK_LIB(xslt, xsltApplyStylesheet, , AC_MSG_ERROR([could not find libxslt]))
42+
AC_CHECK_LIB(xml2, htmlParseFile, , AC_MSG_ERROR([could not find libxml2 with html parsing]))
43+
AC_CHECK_LIB(exslt, exsltRegisterAll, , AC_MSG_ERROR([could not find libexslt]))
3844

3945
AC_CONFIG_FILES([Makefile])
4046
AC_OUTPUT

dexter.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ dexPtr dex_compile(char* dex_str, char* incl) {
8080
if(!dex_exslt_registered) {
8181
exsltRegisterAll();
8282
dex_register_all();
83+
init_xpath_alias();
8384
exslt_org_regular_expressions_init();
8485
dex_exslt_registered = true;
8586
}

functions.c

Lines changed: 2 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -287,8 +287,7 @@ xsltLoadHtmlDocument(xsltTransformContextPtr ctxt, const xmlChar *URI) {
287287
ret = ret->next;
288288
}
289289

290-
doc = xsltHtmlDocLoader(URI, ctxt->dict, ctxt->parserOptions,
291-
(void *) ctxt, XSLT_LOAD_DOCUMENT);
290+
doc = htmlReadFile(URI, NULL, ctxt->parserOptions | HTML_PARSE_RECOVER);
292291

293292
if (doc == NULL)
294293
return(NULL);
@@ -316,71 +315,4 @@ xsltLoadHtmlDocument(xsltTransformContextPtr ctxt, const xmlChar *URI) {
316315

317316
ret = xsltNewDocument(ctxt, doc);
318317
return(ret);
319-
}
320-
321-
/**
322-
* xsltDocDefaultLoaderFunc:
323-
* @URI: the URI of the document to load
324-
* @dict: the dictionary to use when parsing that document
325-
* @options: parsing options, a set of xmlParserOption
326-
* @ctxt: the context, either a stylesheet or a transformation context
327-
* @type: the xsltLoadType indicating the kind of loading required
328-
*
329-
* Default function to load document not provided by the compilation or
330-
* transformation API themselve, for example when an xsl:import,
331-
* xsl:include is found at compilation time or when a document()
332-
* call is made at runtime.
333-
*
334-
* Returns the pointer to the document (which will be modified and
335-
* freed by the engine later), or NULL in case of error.
336-
*/
337-
static xmlDocPtr
338-
xsltHtmlDocLoader(const xmlChar * URI, xmlDictPtr dict, int options,
339-
void *ctxt ATTRIBUTE_UNUSED,
340-
xsltLoadType type ATTRIBUTE_UNUSED)
341-
{
342-
xmlParserCtxtPtr pctxt;
343-
xmlParserInputPtr inputStream;
344-
xmlDocPtr doc;
345-
346-
347-
348-
pctxt = htmlNewParserCtxt();
349-
if (pctxt == NULL)
350-
return(NULL);
351-
if ((dict != NULL) && (pctxt->dict != NULL)) {
352-
xmlDictFree(pctxt->dict);
353-
pctxt->dict = NULL;
354-
}
355-
if (dict != NULL) {
356-
pctxt->dict = dict;
357-
xmlDictReference(pctxt->dict);
358-
#ifdef WITH_XSLT_DEBUG
359-
xsltGenericDebug(xsltGenericDebugContext,
360-
"Reusing dictionary for document\n");
361-
#endif
362-
}
363-
// htmlCtxtUseOptions(pctxt, options | HTML_PARSE_RECOVER | HTML_PARSE_NOERROR );
364-
inputStream = xmlLoadExternalEntity((const char *) URI, NULL, pctxt);
365-
if (inputStream == NULL) {
366-
xmlFreeParserCtxt(pctxt);
367-
return(NULL);
368-
}
369-
inputPush(pctxt, inputStream);
370-
if (pctxt->directory == NULL)
371-
pctxt->directory = xmlParserGetDirectory((const char *) URI);
372-
373-
htmlDoRead(pctxt, NULL, NULL, options | HTML_PARSE_RECOVER, 1);
374-
375-
if (pctxt->wellFormed) {
376-
doc = pctxt->myDoc;
377-
}
378-
else {
379-
doc = NULL;
380-
xmlFreeDoc(pctxt->myDoc);
381-
pctxt->myDoc = NULL;
382-
}
383-
xmlFreeParserCtxt(pctxt);
384-
385-
return(doc);
386-
}
318+
}

functions.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
#ifndef DEX_FUNCTIONS_H_INCLUDED
2+
#define DEX_FUNCTIONS_H_INCLUDED
3+
14
#include <libxml/xpath.h>
5+
#include <libxml/HTMLparser.h>
26
#include <libxslt/xslt.h>
37
#include <libxslt/xsltInternals.h>
48
#include <libxslt/transform.h>
@@ -8,6 +12,6 @@ void dex_register_all();
812

913
static void xsltHtmlDocumentFunction(xmlXPathParserContextPtr, int);
1014
static void xsltHtmlDocumentFunctionLoadDocument(xmlXPathParserContextPtr, xmlChar*);
11-
static xsltDocumentPtr xsltLoadHtmlDocument(xsltTransformContextPtr, const xmlChar *);
12-
static xmlDocPtr xsltHtmlDocLoader(const xmlChar *, xmlDictPtr, int, void *, xsltLoadType type);
15+
xsltDocumentPtr xsltLoadHtmlDocument(xsltTransformContextPtr, const xmlChar *);
1316

17+
#endif

parser.y

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <stdlib.h>
55
#include <string.h>
66
#include "kstring.h"
7+
#include <libxml/hash.h>
78

89
#ifndef PARSER_Y_H_INCLUDED
910
#define PARSER_Y_H_INCLUDED
@@ -19,6 +20,11 @@ void prepare_parse(char*);
1920
void cleanup_parse(void);
2021
void start_debugging(void);
2122

23+
static xmlHashTablePtr alias_hash;
24+
25+
char* xpath_alias(char*);
26+
void init_xpath_alias();
27+
2228
int yyparse(void);
2329
char* myparse(char*);
2430
void answer(char*);
@@ -223,7 +229,7 @@ PrimaryExpr
223229
;
224230

225231
FunctionCall
226-
: FunctionName LPAREN Arguments RPAREN { $$ = astrcat4($1, $2, $3, $4); }
232+
: FunctionName LPAREN Arguments RPAREN { $$ = astrcat4(xpath_alias($1), $2, $3, $4); }
227233
| FunctionName LPAREN Arguments { yyerror("unclosed parenthesis"); }
228234
| FunctionName LPAREN Arguments RPAREN RPAREN { yyerror("too many parenthesis"); }
229235
;
@@ -548,6 +554,16 @@ OptS
548554

549555
%%
550556

557+
char* xpath_alias(char* key) {
558+
char* value = (char*) xmlHashLookup(alias_hash, key);
559+
return value == NULL ? key : value;
560+
}
561+
562+
void init_xpath_alias() {
563+
alias_hash = xmlHashCreate(100);
564+
xmlHashAddEntry(alias_hash, "html", "dex:html-document");
565+
}
566+
551567
char* myparse(char* string){
552568
// start_debugging();
553569
prepare_parse(string);

test/remote.dex

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"title": "html('http://www.google.com/')//title"
3+
}
4+

test/remote.html

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<html>
2+
<body>Doesn't matter</body>
3+
</html>

test/remote.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{ "title": "Google" }

xml2json.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <libxml/parser.h>
22
#include <json/json.h>
33
#include "xml2json.h"
4+
#include <string.h>
45

56
/**
67
* Handles a simplified xml

0 commit comments

Comments
 (0)