View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2000-2018, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(sgml,
   37          [ load_html/3,                % +Input, -DOM, +Options
   38            load_xml/3,                 % +Input, -DOM, +Options
   39            load_sgml/3,                % +Input, -DOM, +Options
   40
   41            load_sgml_file/2,           % +File, -ListOfContent
   42            load_xml_file/2,            % +File, -ListOfContent
   43            load_html_file/2,           % +File, -Document
   44
   45            load_structure/3,           % +File, -Term, +Options
   46
   47            load_dtd/2,                 % +DTD, +File
   48            load_dtd/3,                 % +DTD, +File, +Options
   49            dtd/2,                      % +Type, -DTD
   50            dtd_property/2,             % +DTD, ?Property
   51
   52            new_dtd/2,                  % +Doctype, -DTD
   53            free_dtd/1,                 % +DTD
   54            open_dtd/3,                 % +DTD, +Options, -Stream
   55
   56            new_sgml_parser/2,          % -Parser, +Options
   57            free_sgml_parser/1,         % +Parser
   58            set_sgml_parser/2,          % +Parser, +Options
   59            get_sgml_parser/2,          % +Parser, +Options
   60            sgml_parse/2,               % +Parser, +Options
   61
   62            sgml_register_catalog_file/2, % +File, +StartOrEnd
   63
   64            xml_quote_attribute/3,      % +In, -Quoted, +Encoding
   65            xml_quote_cdata/3,          % +In, -Quoted, +Encoding
   66            xml_quote_attribute/2,      % +In, -Quoted
   67            xml_quote_cdata/2,          % +In, -Quoted
   68            xml_name/1,                 % +In
   69            xml_name/2,                 % +In, +Encoding
   70
   71            xsd_number_string/2,        % ?Number, ?String
   72            xsd_time_string/3,          % ?Term, ?Type, ?String
   73
   74            xml_basechar/1,             % +Code
   75            xml_ideographic/1,          % +Code
   76            xml_combining_char/1,       % +Code
   77            xml_digit/1,                % +Code
   78            xml_extender/1,             % +Code
   79
   80            iri_xml_namespace/2,        % +IRI, -Namespace
   81            iri_xml_namespace/3,        % +IRI, -Namespace, -LocalName
   82            xml_is_dom/1                % +Term
   83          ]).   84:- use_module(library(lists)).   85:- use_module(library(option)).   86:- use_module(library(error)).   87:- use_module(library(iostream)).   88
   89:- meta_predicate
   90    load_structure(+, -, :),
   91    load_html(+, -, :),
   92    load_xml(+, -, :),
   93    load_sgml(+, -, :).   94
   95:- predicate_options(load_structure/3, 3,
   96                     [ charpos(integer),
   97                       cdata(oneof([atom,string])),
   98                       defaults(boolean),
   99                       dialect(oneof([html,html4,html5,sgml,xhtml,xhtml5,xml,xmlns])),
  100                       doctype(atom),
  101                       dtd(any),
  102                       encoding(oneof(['iso-8859-1', 'utf-8', 'us-ascii'])),
  103                       entity(atom,atom),
  104                       keep_prefix(boolean),
  105                       file(atom),
  106                       line(integer),
  107                       offset(integer),
  108                       number(oneof([token,integer])),
  109                       qualify_attributes(boolean),
  110                       shorttag(boolean),
  111                       case_sensitive_attributes(boolean),
  112                       case_preserving_attributes(boolean),
  113                       system_entities(boolean),
  114                       max_memory(integer),
  115                       space(oneof([sgml,preserve,default,remove])),
  116                       xmlns(atom),
  117                       xmlns(atom,atom),
  118                       pass_to(sgml_parse/2, 2)
  119                     ]).  120:- predicate_options(load_html/3, 3,
  121                     [ pass_to(load_structure/3, 3)
  122                     ]).  123:- predicate_options(load_xml/3, 3,
  124                     [ pass_to(load_structure/3, 3)
  125                     ]).  126:- predicate_options(load_sgml/3, 3,
  127                     [ pass_to(load_structure/3, 3)
  128                     ]).  129:- predicate_options(load_dtd/3, 3,
  130                     [ dialect(oneof([sgml,xml,xmlns])),
  131                       pass_to(open/4, 4)
  132                     ]).  133:- predicate_options(sgml_parse/2, 2,
  134                     [ call(oneof([begin,end,cdata,pi,decl,error,xmlns,urlns]),
  135                            callable),
  136                       cdata(oneof([atom,string])),
  137                       content_length(integer),
  138                       document(-any),
  139                       max_errors(integer),
  140                       parse(oneof([file,element,content,declaration,input])),
  141                       source(any),
  142                       syntax_errors(oneof([quiet,print,style])),
  143                       xml_no_ns(oneof([error,quiet]))
  144                     ]).  145:- predicate_options(new_sgml_parser/2, 2,
  146                     [ dtd(any)
  147                     ]).

SGML, XML and HTML parser

This library allows you to parse SGML, XML and HTML data into a Prolog data structure. The library defines several families of predicates:

High-level predicates
Most users will only use load_html/3, load_xml/3 or load_sgml/3 to parse arbitrary input into a DOM structure. These predicates all call load_structure/3, which provides more options and may be used for processing non-standard documents.

The DOM structure can be used by library(xpath) to extract information from the document.

The low-level parser
The actual parser is written in C and consists of two parts: one for processing DTD (Document Type Definitions) and one for parsing data. The data can either be parsed to a Prolog (DOM) term or the parser can perform callbacks for the DOM events.
Utility predicates
Finally, this library provides prmitives for classifying characters and strings according to the XML specification such as xml_name/1 to verify whether an atom is a valid XML name (identifier). It also provides primitives to quote attributes and CDATA elements. */
  177:- multifile user:file_search_path/2.  178:- dynamic   user:file_search_path/2.  179
  180user:file_search_path(dtd, '.').
  181user:file_search_path(dtd, swi('library/DTD')).
  182
  183sgml_register_catalog_file(File, Location) :-
  184    prolog_to_os_filename(File, OsFile),
  185    '_sgml_register_catalog_file'(OsFile, Location).
  186
  187:- use_foreign_library(foreign(sgml2pl)).  188
  189register_catalog(Base) :-
  190    absolute_file_name(dtd(Base),
  191                           [ extensions([soc]),
  192                             access(read),
  193                             file_errors(fail)
  194                           ],
  195                           SocFile),
  196    sgml_register_catalog_file(SocFile, end).
  197
  198:- initialization
  199    ignore(register_catalog('HTML4')).  200
  201
  202                 /*******************************
  203                 *         DTD HANDLING         *
  204                 *******************************/
  205
  206/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  207Note that concurrent access to DTD objects  is not allowed, and hence we
  208will allocate and destroy them in each   thread.  Possibibly it would be
  209nicer to find out why  concurrent  access   to  DTD's  is  flawed. It is
  210diagnosed to mess with the entity resolution by Fabien Todescato.
  211- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  212
  213:- thread_local
  214    current_dtd/2.  215:- volatile
  216    current_dtd/2.  217:- thread_local
  218    registered_cleanup/0.  219:- volatile
  220    registered_cleanup/0.  221
  222:- multifile
  223    dtd_alias/2.  224
  225:- create_prolog_flag(html_dialect, html5, [type(atom)]).  226
  227dtd_alias(html4, 'HTML4').
  228dtd_alias(html5, 'HTML5').
  229dtd_alias(html,  DTD) :-
  230    current_prolog_flag(html_dialect, Dialect),
  231    dtd_alias(Dialect, DTD).
 dtd(+Type, -DTD) is det
DTD is a DTD object created from the file dtd(Type). Loaded DTD objects are cached. Note that DTD objects may not be shared between threads. Therefore, dtd/2 maintains the pool of DTD objects using a thread_local predicate. DTD objects are destroyed if a thread terminates.
Errors
- existence_error(source_sink, dtd(Type))
  243dtd(Type, DTD) :-
  244    current_dtd(Type, DTD),
  245    !.
  246dtd(Type, DTD) :-
  247    new_dtd(Type, DTD),
  248    (   dtd_alias(Type, Base)
  249    ->  true
  250    ;   Base = Type
  251    ),
  252    absolute_file_name(dtd(Base),
  253                       [ extensions([dtd]),
  254                         access(read)
  255                       ], DtdFile),
  256    load_dtd(DTD, DtdFile),
  257    register_cleanup,
  258    asserta(current_dtd(Type, DTD)).
 load_dtd(+DTD, +DtdFile, +Options)
Load DtdFile into a DTD. Defined options are:
dialect(+Dialect)
Dialect to use (xml, xmlns, sgml)
encoding(+Encoding)
Encoding of DTD file
Arguments:
DTD- is a fresh DTD object, normally created using new_dtd/1.
  273load_dtd(DTD, DtdFile) :-
  274    load_dtd(DTD, DtdFile, []).
  275load_dtd(DTD, DtdFile, Options) :-
  276    sgml_open_options(sgml:Options, OpenOptions, sgml:DTDOptions),
  277    setup_call_cleanup(
  278        open_dtd(DTD, DTDOptions, DtdOut),
  279        setup_call_cleanup(
  280            open(DtdFile, read, DtdIn, OpenOptions),
  281            copy_stream_data(DtdIn, DtdOut),
  282            close(DtdIn)),
  283        close(DtdOut)).
  284
  285split_dtd_options([], [], []).
  286split_dtd_options([H|T], [H|TD], S) :-
  287    dtd_option(H),
  288    !,
  289    split_dtd_options(T, TD, S).
  290split_dtd_options([H|T], TD, [H|S]) :-
  291    split_dtd_options(T, TD, S).
  292
  293dtd_option(dialect(_)).
 destroy_dtds
Destroy DTDs cached by this thread as they will become unreachable anyway.
  301destroy_dtds :-
  302    (   current_dtd(_Type, DTD),
  303        free_dtd(DTD),
  304        fail
  305    ;   true
  306    ).
 register_cleanup
Register cleanup of DTDs created for this thread.
  312register_cleanup :-
  313    registered_cleanup,
  314    !.
  315register_cleanup :-
  316    catch(thread_at_exit(destroy_dtds), _, true),
  317    assert(registered_cleanup).
  318
  319
  320                 /*******************************
  321                 *          EXAMINE DTD         *
  322                 *******************************/
  323
  324prop(doctype(_), _).
  325prop(elements(_), _).
  326prop(entities(_), _).
  327prop(notations(_), _).
  328prop(entity(E, _), DTD) :-
  329    (   nonvar(E)
  330    ->  true
  331    ;   '$dtd_property'(DTD, entities(EL)),
  332        member(E, EL)
  333    ).
  334prop(element(E, _, _), DTD) :-
  335    (   nonvar(E)
  336    ->  true
  337    ;   '$dtd_property'(DTD, elements(EL)),
  338        member(E, EL)
  339    ).
  340prop(attributes(E, _), DTD) :-
  341    (   nonvar(E)
  342    ->  true
  343    ;   '$dtd_property'(DTD, elements(EL)),
  344        member(E, EL)
  345    ).
  346prop(attribute(E, A, _, _), DTD) :-
  347    (   nonvar(E)
  348    ->  true
  349    ;   '$dtd_property'(DTD, elements(EL)),
  350        member(E, EL)
  351    ),
  352    (   nonvar(A)
  353    ->  true
  354    ;   '$dtd_property'(DTD, attributes(E, AL)),
  355        member(A, AL)
  356    ).
  357prop(notation(N, _), DTD) :-
  358    (   nonvar(N)
  359    ->  true
  360    ;   '$dtd_property'(DTD, notations(NL)),
  361        member(N, NL)
  362    ).
  363
  364dtd_property(DTD, Prop) :-
  365    prop(Prop, DTD),
  366    '$dtd_property'(DTD, Prop).
  367
  368
  369                 /*******************************
  370                 *             SGML             *
  371                 *******************************/
 load_structure(+Source, -ListOfContent, :Options) is det
Parse Source and return the resulting structure in ListOfContent. Source is handed to open_any/5, which allows for processing an extensible set of input sources.

A proper XML document contains only a single toplevel element whose name matches the document type. Nevertheless, a list is returned for consistency with the representation of element content.

The encoding(+Encoding) option is treated special for compatibility reasons:

  395load_structure(Spec, DOM, Options) :-
  396    sgml_open_options(Options, OpenOptions, SGMLOptions),
  397    setup_call_cleanup(
  398        open_any(Spec, read, In, Close, OpenOptions),
  399        load_structure_from_stream(In, DOM, SGMLOptions),
  400        close_any(Close)).
  401
  402sgml_open_options(Options, OpenOptions, SGMLOptions) :-
  403    Options = M:Plain,
  404    (   select_option(encoding(Encoding), Plain, NoEnc)
  405    ->  (   sgml_encoding(Encoding)
  406        ->  merge_options(NoEnc, [type(binary)], OpenOptions),
  407            SGMLOptions = Options
  408        ;   OpenOptions = Plain,
  409            SGMLOptions = M:NoEnc
  410        )
  411    ;   merge_options(Plain, [type(binary)], OpenOptions),
  412        SGMLOptions = Options
  413    ).
  414
  415sgml_encoding(Enc) :-
  416    downcase_atom(Enc, Enc1),
  417    sgml_encoding_l(Enc1).
  418
  419sgml_encoding_l('iso-8859-1').
  420sgml_encoding_l('us-ascii').
  421sgml_encoding_l('utf-8').
  422sgml_encoding_l('utf8').
  423sgml_encoding_l('iso_latin_1').
  424sgml_encoding_l('ascii').
  425
  426load_structure_from_stream(In, Term, M:Options) :-
  427    (   select_option(dtd(DTD), Options, Options1)
  428    ->  ExplicitDTD = true
  429    ;   ExplicitDTD = false,
  430        Options1 = Options
  431    ),
  432    move_front(Options1, dialect(_), Options2), % dialect sets defaults
  433    setup_call_cleanup(
  434        new_sgml_parser(Parser,
  435                        [ dtd(DTD)
  436                        ]),
  437        parse(Parser, M:Options2, TermRead, In),
  438        free_sgml_parser(Parser)),
  439    (   ExplicitDTD == true
  440    ->  (   DTD = dtd(_, DocType),
  441            dtd_property(DTD, doctype(DocType))
  442        ->  true
  443        ;   true
  444        )
  445    ;   free_dtd(DTD)
  446    ),
  447    Term = TermRead.
  448
  449move_front(Options0, Opt, Options) :-
  450    selectchk(Opt, Options0, Options1),
  451    !,
  452    Options = [Opt|Options1].
  453move_front(Options, _, Options).
  454
  455
  456parse(Parser, M:Options, Document, In) :-
  457    set_parser_options(Options, Parser, In, Options1),
  458    parser_meta_options(Options1, M, Options2),
  459    set_input_location(Parser, In),
  460    sgml_parse(Parser,
  461               [ document(Document),
  462                 source(In)
  463               | Options2
  464               ]).
  465
  466set_parser_options([], _, _, []).
  467set_parser_options([H|T], Parser, In, Rest) :-
  468    (   set_parser_option(H, Parser, In)
  469    ->  set_parser_options(T, Parser, In, Rest)
  470    ;   Rest = [H|R2],
  471        set_parser_options(T, Parser, In, R2)
  472    ).
  473
  474set_parser_option(Var, _Parser, _In) :-
  475    var(Var),
  476    !,
  477    instantiation_error(Var).
  478set_parser_option(Option, Parser, _) :-
  479    def_entity(Option, Parser),
  480    !.
  481set_parser_option(offset(Offset), _Parser, In) :-
  482    !,
  483    seek(In, Offset, bof, _).
  484set_parser_option(Option, Parser, _In) :-
  485    parser_option(Option),
  486    !,
  487    set_sgml_parser(Parser, Option).
  488set_parser_option(Name=Value, Parser, In) :-
  489    Option =.. [Name,Value],
  490    set_parser_option(Option, Parser, In).
  491
  492
  493parser_option(dialect(_)).
  494parser_option(shorttag(_)).
  495parser_option(case_sensitive_attributes(_)).
  496parser_option(case_preserving_attributes(_)).
  497parser_option(system_entities(_)).
  498parser_option(max_memory(_)).
  499parser_option(file(_)).
  500parser_option(line(_)).
  501parser_option(space(_)).
  502parser_option(number(_)).
  503parser_option(defaults(_)).
  504parser_option(doctype(_)).
  505parser_option(qualify_attributes(_)).
  506parser_option(encoding(_)).
  507parser_option(keep_prefix(_)).
  508
  509
  510def_entity(entity(Name, Value), Parser) :-
  511    get_sgml_parser(Parser, dtd(DTD)),
  512    xml_quote_attribute(Value, QValue),
  513    setup_call_cleanup(open_dtd(DTD, [], Stream),
  514                       format(Stream, '<!ENTITY ~w "~w">~n',
  515                              [Name, QValue]),
  516                       close(Stream)).
  517def_entity(xmlns(URI), Parser) :-
  518    set_sgml_parser(Parser, xmlns(URI)).
  519def_entity(xmlns(NS, URI), Parser) :-
  520    set_sgml_parser(Parser, xmlns(NS, URI)).
 parser_meta_options(+Options0, +Module, -Options)
Qualify meta-calling options to the parser.
  526parser_meta_options([], _, []).
  527parser_meta_options([call(When, Closure)|T0], M, [call(When, M:Closure)|T]) :-
  528    !,
  529    parser_meta_options(T0, M, T).
  530parser_meta_options([H|T0], M, [H|T]) :-
  531    parser_meta_options(T0, M, T).
 set_input_location(+Parser, +In:stream) is det
Set the input location if this was not set explicitly
  538set_input_location(Parser, _In) :-
  539    get_sgml_parser(Parser, file(_)),
  540    !.
  541set_input_location(Parser, In) :-
  542    stream_property(In, file_name(File)),
  543    !,
  544    set_sgml_parser(Parser, file(File)),
  545    stream_property(In, position(Pos)),
  546    set_sgml_parser(Parser, position(Pos)).
  547set_input_location(_, _).
  548
  549                 /*******************************
  550                 *           UTILITIES          *
  551                 *******************************/
 load_sgml_file(+File, -DOM) is det
Load SGML from File and unify the resulting DOM structure with DOM.
deprecated
- New code should use load_sgml/3.
  560load_sgml_file(File, Term) :-
  561    load_sgml(File, Term, []).
 load_xml_file(+File, -DOM) is det
Load XML from File and unify the resulting DOM structure with DOM.
deprecated
- New code should use load_xml/3.
  570load_xml_file(File, Term) :-
  571    load_xml(File, Term, []).
 load_html_file(+File, -DOM) is det
Load HTML from File and unify the resulting DOM structure with DOM.
deprecated
- New code should use load_html/3.
  580load_html_file(File, DOM) :-
  581    load_html(File, DOM, []).
 load_html(+Input, -DOM, +Options) is det
Load HTML text from Input and unify the resulting DOM structure with DOM. Options are passed to load_structure/3, after adding the following default options:
dtd(DTD)
Pass the DTD for HTML as obtained using dtd(html, DTD).
dialect(Dialect)
Current dialect from the Prolog flag html_dialect
max_errors(-1)
syntax_errors(quiet)
Most HTML encountered in the wild contains errors. Even in the context of errors, the resulting DOM term is often a reasonable guess at the intent of the author.

You may also want to use the library(http/http_open) to support loading from HTTP and HTTPS URLs. For example:

:- use_module(library(http/http_open)).
:- use_module(library(sgml)).

load_html_url(URL, DOM) :-
    load_html(URL, DOM, []).
  610load_html(File, Term, M:Options) :-
  611    current_prolog_flag(html_dialect, Dialect),
  612    dtd(Dialect, DTD),
  613    merge_options(Options,
  614                  [ dtd(DTD),
  615                    dialect(Dialect),
  616                    max_errors(-1),
  617                    syntax_errors(quiet)
  618                  ], Options1),
  619    load_structure(File, Term, M:Options1).
 load_xml(+Input, -DOM, +Options) is det
Load XML text from Input and unify the resulting DOM structure with DOM. Options are passed to load_structure/3, after adding the following default options:
  629load_xml(Input, DOM, M:Options) :-
  630    merge_options(Options,
  631                  [ dialect(xml)
  632                  ], Options1),
  633    load_structure(Input, DOM, M:Options1).
 load_sgml(+Input, -DOM, +Options) is det
Load SGML text from Input and unify the resulting DOM structure with DOM. Options are passed to load_structure/3, after adding the following default options:
  643load_sgml(Input, DOM, M:Options) :-
  644    merge_options(Options,
  645                  [ dialect(sgml)
  646                  ], Options1),
  647    load_structure(Input, DOM, M:Options1).
  648
  649
  650
  651                 /*******************************
  652                 *            ENCODING          *
  653                 *******************************/
 xml_quote_attribute(+In, -Quoted) is det
 xml_quote_cdata(+In, -Quoted) is det
Backward compatibility for versions that allow to specify encoding. All characters that cannot fit the encoding are mapped to XML character entities (&#dd;). Using ASCII is the safest value.
  663xml_quote_attribute(In, Quoted) :-
  664    xml_quote_attribute(In, Quoted, ascii).
  665
  666xml_quote_cdata(In, Quoted) :-
  667    xml_quote_cdata(In, Quoted, ascii).
 xml_name(+Atom) is semidet
True if Atom is a valid XML name.
  673xml_name(In) :-
  674    xml_name(In, ascii).
  675
  676
  677                 /*******************************
  678                 *    XML CHARACTER CLASSES     *
  679                 *******************************/
 xml_basechar(+CodeOrChar) is semidet
 xml_ideographic(+CodeOrChar) is semidet
 xml_combining_char(+CodeOrChar) is semidet
 xml_digit(+CodeOrChar) is semidet
 xml_extender(+CodeOrChar) is semidet
XML character classification predicates. Each of these predicates accept both a character (one-character atom) and a code (integer).
See also
- http://www.w3.org/TR/2006/REC-xml-20060816
  694                 /*******************************
  695                 *         TYPE CHECKING        *
  696                 *******************************/
 xml_is_dom(@Term) is semidet
True if term statisfies the structure as returned by load_structure/3 and friends.
  703xml_is_dom(0) :- !, fail.               % catch variables
  704xml_is_dom(List) :-
  705    is_list(List),
  706    !,
  707    xml_is_content_list(List).
  708xml_is_dom(Term) :-
  709    xml_is_element(Term).
  710
  711xml_is_content_list([]).
  712xml_is_content_list([H|T]) :-
  713    xml_is_content(H),
  714    xml_is_content_list(T).
  715
  716xml_is_content(0) :- !, fail.
  717xml_is_content(pi(Pi)) :-
  718    !,
  719    atom(Pi).
  720xml_is_content(CDATA) :-
  721    atom(CDATA),
  722    !.
  723xml_is_content(CDATA) :-
  724    string(CDATA),
  725    !.
  726xml_is_content(Term) :-
  727    xml_is_element(Term).
  728
  729xml_is_element(element(Name, Attributes, Content)) :-
  730    dom_name(Name),
  731    dom_attributes(Attributes),
  732    xml_is_content_list(Content).
  733
  734dom_name(NS:Local) :-
  735    atom(NS),
  736    atom(Local),
  737    !.
  738dom_name(Local) :-
  739    atom(Local).
  740
  741dom_attributes(0) :- !, fail.
  742dom_attributes([]).
  743dom_attributes([H|T]) :-
  744    dom_attribute(H),
  745    dom_attributes(T).
  746
  747dom_attribute(Name=Value) :-
  748    dom_name(Name),
  749    atomic(Value).
  750
  751
  752                 /*******************************
  753                 *            MESSAGES          *
  754                 *******************************/
  755:- multifile
  756    prolog:message/3.  757
  758%       Catch messages.  sgml/4 is generated by the SGML2PL binding.
  759
  760prolog:message(sgml(Parser, File, Line, Message)) -->
  761    { get_sgml_parser(Parser, dialect(Dialect))
  762    },
  763    [ 'SGML2PL(~w): ~w:~w: ~w'-[Dialect, File, Line, Message] ].
  764
  765
  766                 /*******************************
  767                 *         XREF SUPPORT         *
  768                 *******************************/
  769
  770:- multifile
  771    prolog:called_by/2.  772
  773prolog:called_by(sgml_parse(_, Options), Called) :-
  774    findall(Meta, meta_call_term(_, Meta, Options), Called).
  775
  776meta_call_term(T, G+N, Options) :-
  777    T = call(Event, G),
  778    pmember(T, Options),
  779    call_params(Event, Term),
  780    functor(Term, _, N).
  781
  782pmember(X, List) :-                     % member for partial lists
  783    nonvar(List),
  784    List = [H|T],
  785    (   X = H
  786    ;   pmember(X, T)
  787    ).
  788
  789call_params(begin, begin(tag,attributes,parser)).
  790call_params(end,   end(tag,parser)).
  791call_params(cdata, cdata(cdata,parser)).
  792call_params(pi,    pi(cdata,parser)).
  793call_params(decl,  decl(cdata,parser)).
  794call_params(error, error(severity,message,parser)).
  795call_params(xmlns, xmlns(namespace,url,parser)).
  796call_params(urlns, urlns(url,url,parser)).
  797
  798                 /*******************************
  799                 *           SANDBOX            *
  800                 *******************************/
  801
  802:- multifile
  803    sandbox:safe_primitive/1,
  804    sandbox:safe_meta_predicate/1.  805
  806sandbox:safe_meta_predicate(sgml:load_structure/3).
  807sandbox:safe_primitive(sgml:dtd(Dialect, _)) :-
  808    dtd_alias(Dialect, _).
  809sandbox:safe_primitive(sgml:xml_quote_attribute(_,_,_)).
  810sandbox:safe_primitive(sgml:xml_quote_cdata(_,_,_)).
  811sandbox:safe_primitive(sgml:xml_name(_,_)).
  812sandbox:safe_primitive(sgml:xml_basechar(_)).
  813sandbox:safe_primitive(sgml:xml_ideographic(_)).
  814sandbox:safe_primitive(sgml:xml_combining_char(_)).
  815sandbox:safe_primitive(sgml:xml_digit(_)).
  816sandbox:safe_primitive(sgml:xml_extender(_)).
  817sandbox:safe_primitive(sgml:iri_xml_namespace(_,_,_)).
  818sandbox:safe_primitive(sgml:xsd_number_string(_,_)).
  819sandbox:safe_primitive(sgml:xsd_time_string(_,_,_))