View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2003-2017, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf_db,
   37          [ rdf_version/1,              % -Version
   38
   39            rdf/3,                      % ?Subject, ?Predicate, ?Object
   40            rdf/4,                      % ?Subject, ?Predicate, ?Object, ?DB
   41            rdf_has/3,                  % ?Subject, +Pred, ?Obj
   42            rdf_has/4,                  % ?Subject, +Pred, ?Obj, -RealPred
   43            rdf_reachable/3,            % ?Subject, +Pred, ?Object
   44            rdf_reachable/5,            % ?Subject, +Pred, ?Object, +MaxD, ?D
   45            rdf_resource/1,             % ?Resource
   46            rdf_subject/1,              % ?Subject
   47
   48            rdf_member_property/2,      % ?Property, ?Index
   49
   50            rdf_assert/3,               % +Subject, +Predicate, +Object
   51            rdf_assert/4,               % +Subject, +Predicate, +Object, +DB
   52            rdf_retractall/3,           % ?Subject, ?Predicate, ?Object
   53            rdf_retractall/4,           % ?Subject, ?Predicate, ?Object, +DB
   54            rdf_update/4,               % +Subject, +Predicate, +Object, +Act
   55            rdf_update/5,               % +Subject, +Predicate, +Object, +Src, +Act
   56            rdf_set_predicate/2,        % +Predicate, +Property
   57            rdf_predicate_property/2,   % +Predicate, ?Property
   58            rdf_current_predicate/1,    % -Predicate
   59            rdf_current_literal/1,      % -Literal
   60            rdf_transaction/1,          % :Goal
   61            rdf_transaction/2,          % :Goal, +Id
   62            rdf_transaction/3,          % :Goal, +Id, +Options
   63            rdf_active_transaction/1,   % ?Id
   64
   65            rdf_monitor/2,              % :Goal, +Options
   66
   67            rdf_save_db/1,              % +File
   68            rdf_save_db/2,              % +File, +DB
   69            rdf_load_db/1,              % +File
   70            rdf_reset_db/0,
   71
   72            rdf_node/1,                 % -Id
   73            rdf_bnode/1,                % -Id
   74            rdf_is_bnode/1,             % +Id
   75
   76            rdf_is_resource/1,          % +Term
   77            rdf_is_literal/1,           % +Term
   78            rdf_literal_value/2,        % +Term, -Value
   79
   80            rdf_load/1,                 % +File
   81            rdf_load/2,                 % +File, +Options
   82            rdf_save/1,                 % +File
   83            rdf_save/2,                 % +File, +Options
   84            rdf_unload/1,               % +File
   85            rdf_unload_graph/1,         % +Graph
   86
   87            rdf_md5/2,                  % +DB, -MD5
   88            rdf_atom_md5/3,             % +Text, +Times, -MD5
   89
   90            rdf_create_graph/1,         % ?Graph
   91            rdf_graph_property/2,       % ?Graph, ?Property
   92            rdf_set_graph/2,            % +Graph, +Property
   93            rdf_graph/1,                % ?Graph
   94            rdf_source/1,               % ?File
   95            rdf_source/2,               % ?DB, ?SourceURL
   96            rdf_make/0,                 % Reload modified databases
   97            rdf_gc/0,                   % Garbage collection
   98
   99            rdf_source_location/2,      % +Subject, -Source
  100            rdf_statistics/1,           % -Key
  101            rdf_set/1,                  % +Term
  102            rdf_generation/1,           % -Generation
  103            rdf_snapshot/1,             % -Snapshot
  104            rdf_delete_snapshot/1,      % +Snapshot
  105            rdf_current_snapshot/1,     % +Snapshot
  106            rdf_estimate_complexity/4,  % +S,+P,+O,-Count
  107
  108            rdf_save_subject/3,         % +Stream, +Subject, +DB
  109            rdf_save_header/2,          % +Out, +Options
  110            rdf_save_footer/1,          % +Out
  111
  112            rdf_equal/2,                % ?Resource, ?Resource
  113            lang_equal/2,               % +Lang1, +Lang2
  114            lang_matches/2,             % +Lang, +Pattern
  115
  116            rdf_prefix/2,               % :Alias, +URI
  117            rdf_current_prefix/2,       % :Alias, ?URI
  118            rdf_register_prefix/2,      % +Alias, +URI
  119            rdf_register_prefix/3,      % +Alias, +URI, +Options
  120            rdf_unregister_prefix/1,    % +Alias
  121            rdf_current_ns/2,           % :Alias, ?URI
  122            rdf_register_ns/2,          % +Alias, +URI
  123            rdf_register_ns/3,          % +Alias, +URI, +Options
  124            rdf_global_id/2,            % ?NS:Name, :Global
  125            rdf_global_object/2,        % +Object, :NSExpandedObject
  126            rdf_global_term/2,          % +Term, :WithExpandedNS
  127
  128            rdf_compare/3,              % -Dif, +Object1, +Object2
  129            rdf_match_label/3,          % +How, +String, +Label
  130            rdf_split_url/3,            % ?Base, ?Local, ?URL
  131            rdf_url_namespace/2,        % +URL, ?Base
  132
  133            rdf_warm_indexes/0,
  134            rdf_warm_indexes/1,         % +Indexed
  135            rdf_update_duplicates/0,
  136
  137            rdf_debug/1,                % Set verbosity
  138
  139            rdf_new_literal_map/1,      % -Handle
  140            rdf_destroy_literal_map/1,  % +Handle
  141            rdf_reset_literal_map/1,    % +Handle
  142            rdf_insert_literal_map/3,   % +Handle, +Key, +Literal
  143            rdf_insert_literal_map/4,   % +Handle, +Key, +Literal, -NewKeys
  144            rdf_delete_literal_map/3,   % +Handle, +Key, +Literal
  145            rdf_delete_literal_map/2,   % +Handle, +Key
  146            rdf_find_literal_map/3,     % +Handle, +KeyList, -Literals
  147            rdf_keys_in_literal_map/3,  % +Handle, +Spec, -Keys
  148            rdf_statistics_literal_map/2, % +Handle, +Name(-Arg...)
  149
  150            rdf_graph_prefixes/2,       % ?Graph, -Prefixes
  151            rdf_graph_prefixes/3,       % ?Graph, -Prefixes, :Filter
  152
  153            (rdf_meta)/1,               % +Heads
  154            op(1150, fx, (rdf_meta))
  155          ]).  156:- use_module(library(rdf)).  157:- use_module(library(lists)).  158:- use_module(library(shlib)).  159:- use_module(library(gensym)).  160:- use_module(library(sgml)).  161:- use_module(library(sgml_write)).  162:- use_module(library(option)).  163:- use_module(library(error)).  164:- use_module(library(uri)).  165:- use_module(library(debug)).  166:- use_module(library(apply)).  167:- use_module(library(xsdp_types)).  168:- if(exists_source(library(thread))).  169:- use_module(library(thread)).  170:- endif.  171:- use_module(library(semweb/rdf_cache)).  172:- use_module(library(semweb/rdf_prefixes)).  173
  174:- use_foreign_library(foreign(rdf_db)).  175:- public rdf_print_predicate_cloud/2.  % print matrix of reachable predicates
  176
  177:- meta_predicate
  178    rdf_transaction(0),
  179    rdf_transaction(0, +),
  180    rdf_transaction(0, +, +),
  181    rdf_monitor(1, +),
  182    rdf_save(+, :),
  183    rdf_load(+, :).  184
  185:- predicate_options(rdf_graph_prefixes/3, 3,
  186                     [expand(callable), filter(callable), min_count(nonneg)]).  187:- predicate_options(rdf_load/2, 2,
  188                     [ base_uri(atom),
  189                       cache(boolean),
  190                       concurrent(positive_integer),
  191                       db(atom),
  192                       format(oneof([xml,triples,turtle,trig,nquads,ntriples])),
  193                       graph(atom),
  194                       if(oneof([true,changed,not_loaded])),
  195                       modified(-float),
  196                       prefixes(-list),
  197                       silent(boolean),
  198                       register_namespaces(boolean)
  199                     ]).  200:- predicate_options(rdf_save/2, 2,
  201                     [ graph(atom),
  202                       db(atom),
  203                       anon(boolean),
  204                       base_uri(atom),
  205                       write_xml_base(boolean),
  206                       convert_typed_literal(callable),
  207                       encoding(encoding),
  208                       document_language(atom),
  209                       namespaces(list(atom)),
  210                       xml_attributes(boolean),
  211                       inline(boolean)
  212                     ]).  213:- predicate_options(rdf_save_header/2, 2,
  214                     [ graph(atom),
  215                       db(atom),
  216                       namespaces(list(atom))
  217                     ]).  218:- predicate_options(rdf_save_subject/3, 3,
  219                     [ graph(atom),
  220                       base_uri(atom),
  221                       convert_typed_literal(callable),
  222                       document_language(atom)
  223                     ]).  224:- predicate_options(rdf_transaction/3, 3,
  225                     [ snapshot(any)
  226                     ]).  227
  228:- discontiguous
  229    term_expansion/2.  230
  231/** <module> Core RDF database
  232
  233The file library(semweb/rdf_db) provides the core  of the SWI-Prolog RDF
  234store.
  235
  236@deprecated     New applications should use library(semweb/rdf11), which
  237                provides a much more intuitive API to the RDF store, notably
  238                for handling literals.  The library(semweb/rdf11) runs
  239                currently on top of this library and both can run side-by-side
  240                in the same application.  Terms retrieved from the database
  241                however have a different shape and can not be exchanged without
  242                precautions.
  243*/
  244
  245		 /*******************************
  246		 *            PREFIXES		*
  247		 *******************************/
  248
  249% the ns/2 predicate is historically defined  in this module. We'll keep
  250% that for compatibility reasons.
  251
  252:- multifile ns/2.  253:- dynamic   ns/2.                      % ID, URL
  254
  255:- multifile
  256    rdf_prefixes:rdf_empty_prefix_cache/2.  257
  258rdf_prefixes:rdf_empty_prefix_cache(_Prefix, _IRI) :-
  259    rdf_empty_prefix_cache.
  260
  261:- rdf_meta
  262    rdf(r,r,o),
  263    rdf_has(r,r,o,r),
  264    rdf_has(r,r,o),
  265    rdf_assert(r,r,o),
  266    rdf_retractall(r,r,o),
  267    rdf(r,r,o,?),
  268    rdf_assert(r,r,o,+),
  269    rdf_retractall(r,r,o,?),
  270    rdf_reachable(r,r,o),
  271    rdf_reachable(r,r,o,+,?),
  272    rdf_update(r,r,o,t),
  273    rdf_update(r,r,o,+,t),
  274    rdf_equal(o,o),
  275    rdf_source_location(r,-),
  276    rdf_resource(r),
  277    rdf_subject(r),
  278    rdf_create_graph(r),
  279    rdf_graph(r),
  280    rdf_graph_property(r,?),
  281    rdf_set_graph(r,+),
  282    rdf_unload_graph(r),
  283    rdf_set_predicate(r, t),
  284    rdf_predicate_property(r, -),
  285    rdf_estimate_complexity(r,r,r,-),
  286    rdf_print_predicate_cloud(r,+).  287
  288%!  rdf_equal(?Resource1, ?Resource2)
  289%
  290%   Simple equality test to exploit goal-expansion
  291
  292rdf_equal(Resource, Resource).
  293
  294%!  lang_equal(+Lang1, +Lang2) is semidet.
  295%
  296%   True if two RFC language specifiers denote the same language
  297%
  298%   @see lang_matches/2.
  299
  300lang_equal(Lang, Lang) :- !.
  301lang_equal(Lang1, Lang2) :-
  302    downcase_atom(Lang1, LangCannon),
  303    downcase_atom(Lang2, LangCannon).
  304
  305%!  lang_matches(+Lang, +Pattern) is semidet.
  306%
  307%   True if Lang  matches  Pattern.   This  implements  XML language
  308%   matching  conform  RFC  4647.   Both    Lang   and  Pattern  are
  309%   dash-separated strings of  identifiers  or   (for  Pattern)  the
  310%   wildcart *. Identifiers are  matched   case-insensitive  and a *
  311%   matches any number of identifiers. A   short pattern is the same
  312%   as *.
  313
  314
  315                 /*******************************
  316                 *     BASIC TRIPLE QUERIES     *
  317                 *******************************/
  318
  319%!  rdf(?Subject, ?Predicate, ?Object) is nondet.
  320%
  321%   Elementary query for triples. Subject   and  Predicate are atoms
  322%   representing the fully qualified URL of  the resource. Object is
  323%   either an atom representing a resource  or literal(Value) if the
  324%   object  is  a  literal  value.   If    a   value   of  the  form
  325%   NameSpaceID:LocalName is provided it  is   expanded  to a ground
  326%   atom  using  expand_goal/2.  This  implies   you  can  use  this
  327%   construct in compiled code without paying a performance penalty.
  328%   Literal values take one of the following forms:
  329%
  330%     * Atom
  331%     If the value is a simple atom it is the textual representation
  332%     of a string literal without explicit type or language
  333%     qualifier.
  334%
  335%     * lang(LangID, Atom)
  336%     Atom represents the text of a string literal qualified with
  337%     the given language.
  338%
  339%     * type(TypeID, Value)
  340%     Used for attributes qualified using the =|rdf:datatype|=
  341%     TypeID. The Value is either the textual representation or a
  342%     natural Prolog representation. See the option
  343%     convert_typed_literal(:Convertor) of the parser. The storage
  344%     layer provides efficient handling of atoms, integers (64-bit)
  345%     and floats (native C-doubles). All other data is represented
  346%     as a Prolog record.
  347%
  348%   For literal querying purposes, Object can be of the form
  349%   literal(+Query, -Value), where Query is one of the terms below.
  350%   If the Query takes a literal argument and the value has a
  351%   numeric type numerical comparison is performed.
  352%
  353%     * plain(+Text)
  354%     Perform exact match and demand the language or type qualifiers
  355%     to match. This query is fully indexed.
  356%
  357%     * icase(+Text)
  358%     Perform a full but case-insensitive match. This query is
  359%     fully indexed.
  360%
  361%     * exact(+Text)
  362%     Same as icase(Text).  Backward compatibility.
  363%
  364%     * substring(+Text)
  365%     Match any literal that contains Text as a case-insensitive
  366%     substring. The query is not indexed on Object.
  367%
  368%     * word(+Text)
  369%     Match any literal that contains Text delimited by a non
  370%     alpha-numeric character, the start or end of the string. The
  371%     query is not indexed on Object.
  372%
  373%     * prefix(+Text)
  374%     Match any literal that starts with Text. This call is intended
  375%     for completion. The query is indexed using the skip list of
  376%     literals.
  377%
  378%     * ge(+Literal)
  379%     Match any literal that is equal or larger then Literal in the
  380%     ordered set of literals.
  381%
  382%     * gt(+Literal)
  383%     Match any literal that is larger then Literal in the ordered set
  384%     of literals.
  385%
  386%     * eq(+Literal)
  387%     Match any literal that is equal to Literal in the ordered set
  388%     of literals.
  389%
  390%     * le(+Literal)
  391%     Match any literal that is equal or smaller then Literal in the
  392%     ordered set of literals.
  393%
  394%     * lt(+Literal)
  395%     Match any literal that is smaller then Literal in the ordered set
  396%     of literals.
  397%
  398%     * between(+Literal1, +Literal2)
  399%     Match any literal that is between Literal1 and Literal2 in the
  400%     ordered set of literals. This may include both Literal1 and
  401%     Literal2.
  402%
  403%     * like(+Pattern)
  404%     Match any literal that matches Pattern case insensitively,
  405%     where the `*' character in Pattern matches zero or more
  406%     characters.
  407%
  408%   Backtracking never returns duplicate triples.  Duplicates can be
  409%   retrieved using rdf/4. The predicate   rdf/3 raises a type-error
  410%   if called with improper arguments.  If   rdf/3  is called with a
  411%   term  literal(_)  as  Subject  or   Predicate  object  it  fails
  412%   silently.  This  allows   for   graph    matching   goals   like
  413%   rdf(S,P,O),rdf(O,P2,O2) to proceed without errors.
  414
  415%!  rdf(?Subject, ?Predicate, ?Object, ?Source) is nondet.
  416%
  417%   As rdf/3 but in addition query  the   graph  to which the triple
  418%   belongs. Unlike rdf/3, this predicate does not remove duplicates
  419%   from the result set.
  420%
  421%   @param Source is a term Graph:Line.  If Source is instatiated,
  422%   passing an atom is the same as passing Atom:_.
  423
  424
  425%!  rdf_has(?Subject, +Predicate, ?Object) is nondet.
  426%
  427%   Succeeds if the triple rdf(Subject,   Predicate, Object) is true
  428%   exploiting the rdfs:subPropertyOf predicate as   well as inverse
  429%   predicates   declared   using   rdf_set_predicate/2   with   the
  430%   =inverse_of= property.
  431
  432%!  rdf_has(?Subject, +Predicate, ?Object, -RealPredicate) is nondet.
  433%
  434%   Same as rdf_has/3, but RealPredicate is   unified  to the actual
  435%   predicate that makes this relation   true. RealPredicate must be
  436%   Predicate or an rdfs:subPropertyOf  Predicate.   If  an  inverse
  437%   match is found, RealPredicate is the term inverse_of(Pred).
  438
  439%!  rdf_reachable(?Subject, +Predicate, ?Object) is nondet.
  440%
  441%   Is true if Object can  be   reached  from  Subject following the
  442%   transitive predicate Predicate or a  sub-property thereof, while
  443%   repecting the symetric(true) or inverse_of(P2) properties.
  444%
  445%   If used with either Subject or  Object unbound, it first returns
  446%   the origin, followed by  the   reachable  nodes  in breath-first
  447%   search-order. The implementation internally   looks one solution
  448%   ahead and succeeds deterministically on  the last solution. This
  449%   predicate never generates the same  node   twice  and  is robust
  450%   against cycles in the transitive relation.
  451%
  452%   With all arguments instantiated,   it succeeds deterministically
  453%   if a path can be found from  Subject to Object. Searching starts
  454%   at Subject, assuming the branching factor   is normally lower. A
  455%   call  with  both  Subject   and    Object   unbound   raises  an
  456%   instantiation  error.  The  following    example  generates  all
  457%   subclasses of rdfs:Resource:
  458%
  459%     ==
  460%     ?- rdf_reachable(X, rdfs:subClassOf, rdfs:'Resource').
  461%     X = 'http://www.w3.org/2000/01/rdf-schema#Resource' ;
  462%     X = 'http://www.w3.org/2000/01/rdf-schema#Class' ;
  463%     X = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property' ;
  464%     ...
  465%     ==
  466
  467
  468%!  rdf_reachable(?Subject, +Predicate, ?Object, +MaxD, -D) is nondet.
  469%
  470%   Same as rdf_reachable/3, but in addition, MaxD limits the number
  471%   of edges expanded and D is   unified with the `distance' between
  472%   Subject and Object. Distance 0 means  Subject and Object are the
  473%   same resource. MaxD can be the  constant =infinite= to impose no
  474%   distance-limit.
  475
  476%!  rdf_subject(?Resource) is nondet.
  477%
  478%   True if Resource appears as a   subject. This query respects the
  479%   visibility rules implied by the logical update view.
  480%
  481%   @see rdf_resource/1.
  482
  483rdf_subject(Resource) :-
  484    rdf_resource(Resource),
  485    ( rdf(Resource, _, _) -> true ).
  486
  487%!  rdf_resource(?Resource) is nondet.
  488%
  489%   True when Resource is a resource used as a subject or object in
  490%   a triple.
  491%
  492%   This predicate is primarily intended  as   a  way to process all
  493%   resources without processing resources twice.   The user must be
  494%   aware that some of the returned resources  may not appear in any
  495%   _visible_ triple.
  496
  497
  498                 /*******************************
  499                 *     TRIPLE MODIFICATIONS     *
  500                 *******************************/
  501
  502%!  rdf_assert(+Subject, +Predicate, +Object) is det.
  503%
  504%   Assert a new triple into  the   database.  This is equivalent to
  505%   rdf_assert/4 using Graph  =user=.  Subject   and  Predicate  are
  506%   resources. Object is either a resource or a term literal(Value).
  507%   See rdf/3 for an explanation  of   Value  for typed and language
  508%   qualified literals. All arguments  are   subject  to  name-space
  509%   expansion. Complete duplicates (including  the   same  graph and
  510%   `line' and with a compatible `lifespan')   are  not added to the
  511%   database.
  512
  513%!  rdf_assert(+Subject, +Predicate, +Object, +Graph) is det.
  514%
  515%   As rdf_assert/3, adding the  predicate   to  the indicated named
  516%   graph.
  517%
  518%   @param Graph is either the name of a   graph (an atom) or a term
  519%   Graph:Line, where Line is an integer that denotes a line number.
  520
  521%!  rdf_retractall(?Subject, ?Predicate, ?Object) is det.
  522%
  523%   Remove   all   matching   triples   from    the   database.   As
  524%   rdf_retractall/4 using an unbound graph.
  525
  526%!  rdf_retractall(?Subject, ?Predicate, ?Object, ?Graph) is det.
  527%
  528%   As rdf_retractall/3, also matching Graph.   This  is particulary
  529%   useful to remove all triples coming from a loaded file. See also
  530%   rdf_unload/1.
  531
  532%!  rdf_update(+Subject, +Predicate, +Object, +Action) is det.
  533%
  534%   Replaces one of  the  three  fields   on  the  matching  triples
  535%   depending on Action:
  536%
  537%     * subject(Resource)
  538%     Changes the first field of the triple.
  539%     * predicate(Resource)
  540%     Changes the second field of the triple.
  541%     * object(Object)
  542%     Changes the last field of the triple to the given resource or
  543%     literal(Value).
  544%     * graph(Graph)
  545%     Moves the triple from its current named graph to Graph.
  546
  547%!  rdf_update(+Subject, +Predicate, +Object, +Graph, +Action) is det
  548%
  549%   As rdf_update/4 but allows for specifying the graph.
  550
  551
  552                 /*******************************
  553                 *          COLLECTIONS         *
  554                 *******************************/
  555
  556%!  rdf_member_property(?Prop, ?Index)
  557%
  558%   Deal with the rdf:_1, ... properties.
  559
  560term_expansion(member_prefix(x),
  561               member_prefix(Prefix)) :-
  562    rdf_db:ns(rdf, NS),
  563    atom_concat(NS, '_', Prefix).
  564member_prefix(x).
  565
  566rdf_member_property(P, N) :-
  567    integer(N),
  568    !,
  569    member_prefix(Prefix),
  570    atom_concat(Prefix, N, P).
  571rdf_member_property(P, N) :-
  572    member_prefix(Prefix),
  573    atom_concat(Prefix, Sub, P),
  574    atom_number(Sub, N).
  575
  576
  577                 /*******************************
  578                 *      ANONYMOUS SUBJECTS      *
  579                 *******************************/
  580
  581%!  rdf_node(-Id)
  582%
  583%   Generate a unique blank node identifier for a subject.
  584%
  585%   @deprecated     New code should use rdf_bnode/1.
  586
  587rdf_node(Resource) :-
  588    rdf_bnode(Resource).
  589
  590%!  rdf_bnode(-Id)
  591%
  592%   Generate a unique anonymous identifier for a subject.
  593
  594rdf_bnode(Value) :-
  595    repeat,
  596    gensym('_:genid', Value),
  597    \+ rdf(Value, _, _),
  598    \+ rdf(_, _, Value),
  599    \+ rdf(_, Value, _),
  600    !.
  601
  602
  603
  604                 /*******************************
  605                 *             TYPES            *
  606                 *******************************/
  607
  608%!  rdf_is_bnode(+Id)
  609%
  610%   Tests if a resource is  a  blank   node  (i.e.  is  an anonymous
  611%   resource). A blank node is represented   as  an atom that starts
  612%   with =|_:|=. For backward compatibility   reason, =|__|= is also
  613%   considered to be a blank node.
  614%
  615%   @see rdf_bnode/1.
  616
  617%!  rdf_is_resource(@Term) is semidet.
  618%
  619%   True if Term is an RDF  resource.   Note  that  this is merely a
  620%   type-test; it does not mean  this   resource  is involved in any
  621%   triple.  Blank nodes are also considered resources.
  622%
  623%   @see rdf_is_bnode/1
  624
  625rdf_is_resource(Term) :-
  626    atom(Term).
  627
  628%!  rdf_is_literal(@Term) is semidet.
  629%
  630%   True if Term is an RDF literal object. Currently only checks for
  631%   groundness and the literal functor.
  632
  633rdf_is_literal(literal(Value)) :-
  634    ground(Value).
  635
  636                 /*******************************
  637                 *             LITERALS         *
  638                 *******************************/
  639
  640%!  rdf_current_literal(-Literal) is nondet.
  641%
  642%   True when Literal is a currently  known literal. Enumerates each
  643%   unique literal exactly once. Note that   it is possible that the
  644%   literal only appears in already deleted triples. Deleted triples
  645%   may be locked due to active   queries, transactions or snapshots
  646%   or may not yet be reclaimed by the garbage collector.
  647
  648
  649%!  rdf_literal_value(+Literal, -Value) is semidet.
  650%
  651%   True when value is  the   appropriate  Prolog  representation of
  652%   Literal in the RDF _|value space|_.  Current mapping:
  653%
  654%     | Plain literals              | Atom                    |
  655%     | Language tagged literal     | Atom holding plain text |
  656%     | xsd:string                  | Atom                    |
  657%     | rdf:XMLLiteral              | XML DOM Tree            |
  658%     | Numeric XSD type            | Number                  |
  659%
  660%   @tbd    Well, this is the long-term idea.
  661%   @tbd    Add mode (-,+)
  662
  663:- rdf_meta
  664    rdf_literal_value(o, -),
  665    typed_value(r, +, -),
  666    numeric_value(r, +, -).  667
  668rdf_literal_value(literal(String), Value) :-
  669    atom(String),
  670    !,
  671    Value = String.
  672rdf_literal_value(literal(lang(_Lang, String)), String).
  673rdf_literal_value(literal(type(Type, String)), Value) :-
  674    typed_value(Type, String, Value).
  675
  676typed_value(Numeric, String, Value) :-
  677    xsdp_numeric_uri(Numeric, NumType),
  678    !,
  679    numeric_value(NumType, String, Value).
  680typed_value(xsd:string, String, String).
  681typed_value(rdf:'XMLLiteral', Value, DOM) :-
  682    (   atom(Value)
  683    ->  setup_call_cleanup(
  684            ( atom_to_memory_file(Value, MF),
  685              open_memory_file(MF, read, In, [free_on_close(true)])
  686            ),
  687            load_structure(stream(In), DOM, [dialect(xml)]),
  688            close(In))
  689    ;   DOM = Value
  690    ).
  691
  692numeric_value(xsd:integer, String, Value) :-
  693    atom_number(String, Value),
  694    integer(Value).
  695numeric_value(xsd:float, String, Value) :-
  696    atom_number(String, Number),
  697    Value is float(Number).
  698numeric_value(xsd:double, String, Value) :-
  699    atom_number(String, Number),
  700    Value is float(Number).
  701numeric_value(xsd:decimal, String, Value) :-
  702    atom_number(String, Value).
  703
  704
  705                 /*******************************
  706                 *            SOURCE            *
  707                 *******************************/
  708
  709%!  rdf_source_location(+Subject, -Location) is nondet.
  710%
  711%   True when triples for Subject are loaded from Location.
  712%
  713%   @param Location is a term File:Line.
  714
  715rdf_source_location(Subject, Source) :-
  716    findall(Source, rdf(Subject, _, _, Source), Sources),
  717    sort(Sources, Unique),
  718    member(Source, Unique).
  719
  720
  721                 /*******************************
  722                 *       GARBAGE COLLECT        *
  723                 *******************************/
  724
  725%!  rdf_create_gc_thread
  726%
  727%   Create the garbage collection thread.
  728
  729:- public
  730    rdf_create_gc_thread/0.  731
  732rdf_create_gc_thread :-
  733    thread_create(rdf_gc_loop, _,
  734                  [ alias('__rdf_GC')
  735                  ]).
  736
  737%!  rdf_gc_loop
  738%
  739%   Take care of running the RDF garbage collection.  This predicate
  740%   is called from a thread started by creating the RDF DB.
  741
  742rdf_gc_loop :-
  743    catch(rdf_gc_loop(0), E, recover_gc(E)).
  744
  745recover_gc('$aborted') :-
  746    !,
  747    thread_self(Me),
  748    thread_detach(Me).
  749recover_gc(Error) :-
  750    print_message(error, Error),
  751    rdf_gc_loop.
  752
  753rdf_gc_loop(CPU) :-
  754    repeat,
  755    (   consider_gc(CPU)
  756    ->  rdf_gc(CPU1),
  757        sleep(CPU1)
  758    ;   sleep(0.1)
  759    ),
  760    fail.
  761
  762%!  rdf_gc(-CPU) is det.
  763%
  764%   Run RDF GC one time. CPU is  the   amount  of CPU time spent. We
  765%   update this in Prolog because portable access to thread specific
  766%   CPU is really hard in C.
  767
  768rdf_gc(CPU) :-
  769    statistics(cputime, CPU0),
  770    (   rdf_gc_
  771    ->  statistics(cputime, CPU1),
  772        CPU is CPU1-CPU0,
  773        rdf_add_gc_time(CPU)
  774    ;   CPU = 0.0
  775    ).
  776
  777%!  rdf_gc is det.
  778%
  779%   Run the RDF-DB garbage collector until no   garbage  is left and all
  780%   tables are fully optimized. Under normal operation a seperate thread
  781%   with identifier =|__rdf_GC|= performs garbage  collection as long as
  782%   it is considered `useful'.
  783%
  784%   Using rdf_gc/0 should  only  be  needed   to  ensure  a  fully clean
  785%   database for analysis purposes such as leak detection.
  786
  787rdf_gc :-
  788    has_garbage,
  789    !,
  790    rdf_gc(_),
  791    rdf_gc.
  792rdf_gc.
  793
  794%!  has_garbage is semidet.
  795%
  796%   True if there is something to gain using GC.
  797
  798has_garbage :-
  799    rdf_gc_info_(Info),
  800    has_garbage(Info),
  801    !.
  802
  803has_garbage(Info) :- arg(2, Info, Garbage),     Garbage > 0.
  804has_garbage(Info) :- arg(3, Info, Reindexed),   Reindexed > 0.
  805has_garbage(Info) :- arg(4, Info, Optimizable), Optimizable > 0.
  806
  807%!  consider_gc(+CPU) is semidet.
  808%
  809%   @param CPU is the amount of CPU time spent in the most recent
  810%   GC.
  811
  812consider_gc(_CPU) :-
  813    (   rdf_gc_info_(gc_info(Triples,       % Total #triples in DB
  814                             Garbage,       % Garbage triples in DB
  815                             Reindexed,     % Reindexed & not reclaimed
  816                             Optimizable,   % Non-optimized tables
  817                             _KeepGen,      % Oldest active generation
  818                             _LastGCGen,    % Oldest active gen at last GC
  819                             _ReindexGen,
  820                             _LastGCReindexGen))
  821    ->  (   (Garbage+Reindexed) * 5 > Triples
  822        ;   Optimizable > 4
  823        )
  824    ;   print_message(error, rdf(invalid_gc_info)),
  825        sleep(10)
  826    ),
  827    !.
  828
  829
  830                 /*******************************
  831                 *           STATISTICS         *
  832                 *******************************/
  833
  834%!  rdf_statistics(?KeyValue) is nondet.
  835%
  836%   Obtain statistics on the RDF database.  Defined statistics are:
  837%
  838%     * graphs(-Count)
  839%     Number of named graphs
  840%
  841%     * triples(-Count)
  842%     Total number of triples in the database.  This is the number
  843%     of asserted triples minus the number of retracted ones.  The
  844%     number of _visible_ triples in a particular context may be
  845%     different due to visibility rules defined by the logical
  846%     update view and transaction isolation.
  847%
  848%     * resources(-Count)
  849%     Number of resources that appear as subject or object in a
  850%     triple.  See rdf_resource/1.
  851%
  852%     * properties(-Count)
  853%     Number of current predicates.  See rdf_current_predicate/1.
  854%
  855%     * literals(-Count)
  856%     Number of current literals.  See rdf_current_literal/1.
  857%
  858%     * gc(GCCount, ReclaimedTriples, ReindexedTriples, Time)
  859%     Information about the garbage collector.
  860%
  861%     * searched_nodes(-Count)
  862%     Number of nodes expanded by rdf_reachable/3 and
  863%     rdf_reachable/5.
  864%
  865%     * lookup(rdf(S,P,O,G), Count)
  866%     Number of queries that have been performed for this particular
  867%     instantiation pattern.  Each of S,P,O,G is either + or -.
  868%     Fails in case the number of performed queries is zero.
  869%
  870%     * hash_quality(rdf(S,P,O,G), Buckets, Quality, PendingResize)
  871%     Statistics on the index for this pattern.  Indices are created
  872%     lazily on the first relevant query.
  873%
  874%     * triples_by_graph(Graph, Count)
  875%     This statistics is produced for each named graph. See
  876%     =triples= for the interpretation of this value.
  877
  878rdf_statistics(graphs(Count)) :-
  879    rdf_statistics_(graphs(Count)).
  880rdf_statistics(triples(Count)) :-
  881    rdf_statistics_(triples(Count)).
  882rdf_statistics(duplicates(Count)) :-
  883    rdf_statistics_(duplicates(Count)).
  884rdf_statistics(lingering(Count)) :-
  885    rdf_statistics_(lingering(Count)).
  886rdf_statistics(resources(Count)) :-
  887    rdf_statistics_(resources(Count)).
  888rdf_statistics(properties(Count)) :-
  889    rdf_statistics_(predicates(Count)).
  890rdf_statistics(literals(Count)) :-
  891    rdf_statistics_(literals(Count)).
  892rdf_statistics(gc(Count, Reclaimed, Reindexed, Time)) :-
  893    rdf_statistics_(gc(Count, Reclaimed, Reindexed, Time)).
  894rdf_statistics(searched_nodes(Count)) :-
  895    rdf_statistics_(searched_nodes(Count)).
  896rdf_statistics(lookup(Index, Count)) :-
  897    functor(Indexed, indexed, 16),
  898    rdf_statistics_(Indexed),
  899    index(Index, I),
  900    Arg is I + 1,
  901    arg(Arg, Indexed, Count),
  902    Count \== 0.
  903rdf_statistics(hash_quality(Index, Size, Quality,Optimize)) :-
  904    rdf_statistics_(hash_quality(List)),
  905    member(hash(Place,Size,Quality,Optimize), List),
  906    index(Index, Place).
  907rdf_statistics(triples_by_graph(Graph, Count)) :-
  908    rdf_graph_(Graph, Count).
  909
  910index(rdf(-,-,-,-), 0).
  911index(rdf(+,-,-,-), 1).
  912index(rdf(-,+,-,-), 2).
  913index(rdf(+,+,-,-), 3).
  914index(rdf(-,-,+,-), 4).
  915index(rdf(+,-,+,-), 5).
  916index(rdf(-,+,+,-), 6).
  917index(rdf(+,+,+,-), 7).
  918
  919index(rdf(-,-,-,+), 8).
  920index(rdf(+,-,-,+), 9).
  921index(rdf(-,+,-,+), 10).
  922index(rdf(+,+,-,+), 11).
  923index(rdf(-,-,+,+), 12).
  924index(rdf(+,-,+,+), 13).
  925index(rdf(-,+,+,+), 14).
  926index(rdf(+,+,+,+), 15).
  927
  928
  929                 /*******************************
  930                 *           PREDICATES         *
  931                 *******************************/
  932
  933%!  rdf_current_predicate(?Predicate) is nondet.
  934%
  935%   True when Predicate is a   currently known predicate. Predicates
  936%   are created if a triples is created  that uses this predicate or
  937%   a property of the predicate   is  set using rdf_set_predicate/2.
  938%   The predicate may (no longer) have triples associated with it.
  939%
  940%   Note that resources that have  =|rdf:type|= =|rdf:Property|= are
  941%   not automatically included in the  result-set of this predicate,
  942%   while _all_ resources that appear as   the  second argument of a
  943%   triple _are_ included.
  944%
  945%   @see rdf_predicate_property/2.
  946
  947rdf_current_predicate(P, DB) :-
  948    rdf_current_predicate(P),
  949    (   rdf(_,P,_,DB)
  950    ->  true
  951    ).
  952
  953%!  rdf_predicate_property(?Predicate, ?Property)
  954%
  955%   Query properties of  a  defined   predicate.  Currently  defined
  956%   properties are given below.
  957%
  958%     * symmetric(Bool)
  959%     True if the predicate is defined to be symetric. I.e., {A} P
  960%     {B} implies {B} P {A}. Setting symmetric is equivalent to
  961%     inverse_of(Self).
  962%
  963%     * inverse_of(Inverse)
  964%     True if this predicate is the inverse of Inverse. This
  965%     property is used by rdf_has/3, rdf_has/4, rdf_reachable/3 and
  966%     rdf_reachable/5.
  967%
  968%     * transitive(Bool)
  969%     True if this predicate is transitive. This predicate is
  970%     currently not used. It might be used to make rdf_has/3 imply
  971%     rdf_reachable/3 for transitive predicates.
  972%
  973%     * triples(Triples)
  974%     Unify Triples with the number of existing triples using this
  975%     predicate as second argument. Reporting the number of triples
  976%     is intended to support query optimization.
  977%
  978%     * rdf_subject_branch_factor(-Float)
  979%     Unify Float with the average number of triples associated with
  980%     each unique value for the subject-side of this relation. If
  981%     there are no triples the value 0.0 is returned. This value is
  982%     cached with the predicate and recomputed only after
  983%     substantial changes to the triple set associated to this
  984%     relation. This property is intended for path optimalisation
  985%     when solving conjunctions of rdf/3 goals.
  986%
  987%     * rdf_object_branch_factor(-Float)
  988%     Unify Float with the average number of triples associated with
  989%     each unique value for the object-side of this relation. In
  990%     addition to the comments with the subject_branch_factor
  991%     property, uniqueness of the object value is computed from the
  992%     hash key rather than the actual values.
  993%
  994%     * rdfs_subject_branch_factor(-Float)
  995%     Same as =rdf_subject_branch_factor=, but also considering
  996%     triples of `subPropertyOf' this relation. See also rdf_has/3.
  997%
  998%     * rdfs_object_branch_factor(-Float)
  999%     Same as =rdf_object_branch_factor=, but also considering
 1000%     triples of `subPropertyOf' this relation. See also rdf_has/3.
 1001%
 1002%   @see rdf_set_predicate/2.
 1003
 1004rdf_predicate_property(P, Prop) :-
 1005    var(P),
 1006    !,
 1007    rdf_current_predicate(P),
 1008    rdf_predicate_property_(P, Prop).
 1009rdf_predicate_property(P, Prop) :-
 1010    rdf_predicate_property_(P, Prop).
 1011
 1012%!  rdf_set_predicate(+Predicate, +Property) is det.
 1013%
 1014%   Define a property of  the   predicate.  This predicate currently
 1015%   supports the following properties:
 1016%
 1017%       - symmetric(+Boolean)
 1018%       Set/unset the predicate as being symmetric.  Using
 1019%       symmetric(true) is the same as inverse_of(Predicate),
 1020%       i.e., creating a predicate that is the inverse of
 1021%       itself.
 1022%       - transitive(+Boolean)
 1023%       Sets the transitive property.
 1024%       - inverse_of(+Predicate2)
 1025%       Define Predicate as the inverse of Predicate2. An inverse
 1026%       relation is deleted using inverse_of([]).
 1027%
 1028%   The `transitive` property is currently not used. The `symmetric`
 1029%   and `inverse_of` properties are considered   by  rdf_has/3,4 and
 1030%   rdf_reachable/3.
 1031%
 1032%   @tbd    Maintain these properties based on OWL triples.
 1033
 1034
 1035                 /*******************************
 1036                 *            SNAPSHOTS         *
 1037                 *******************************/
 1038
 1039%!  rdf_snapshot(-Snapshot) is det.
 1040%
 1041%   Take a snapshot of the current state   of  the RDF store. Later,
 1042%   goals may be executed in the  context   of  the database at this
 1043%   moment using rdf_transaction/3 with  the   =snapshot=  option. A
 1044%   snapshot created outside  a  transaction   exists  until  it  is
 1045%   deleted. Snapshots taken inside a transaction   can only be used
 1046%   inside this transaction.
 1047
 1048%!  rdf_delete_snapshot(+Snapshot) is det.
 1049%
 1050%   Delete a snapshot as obtained   from  rdf_snapshot/1. After this
 1051%   call, resources used for maintaining the snapshot become subject
 1052%   to garbage collection.
 1053
 1054%!  rdf_current_snapshot(?Term) is nondet.
 1055%
 1056%   True when Term is a currently known snapshot.
 1057%
 1058%   @bug    Enumeration of snapshots is slow.
 1059
 1060rdf_current_snapshot(Term) :-
 1061    current_blob(Term, rdf_snapshot).
 1062
 1063
 1064                 /*******************************
 1065                 *          TRANSACTION         *
 1066                 *******************************/
 1067
 1068%!  rdf_transaction(:Goal) is semidet.
 1069%
 1070%   Same as rdf_transaction(Goal, user, []).  See rdf_transaction/3.
 1071
 1072%!  rdf_transaction(:Goal, +Id) is semidet.
 1073%
 1074%   Same as rdf_transaction(Goal, Id, []).  See rdf_transaction/3.
 1075
 1076%!  rdf_transaction(:Goal, +Id, +Options) is semidet.
 1077%
 1078%   Run Goal in an RDF  transaction.   Compared to the ACID model,
 1079%   RDF transactions have the following properties:
 1080%
 1081%     1. Modifications inside the transactions become all atomically
 1082%        visible to the outside world if Goal succeeds or remain
 1083%        invisible if Goal fails or throws an exception.  I.e.,
 1084%        the _atomicy_ property is fully supported.
 1085%     2. _Consistency_ is not guaranteed. Later versions may
 1086%        implement consistency constraints that will be checked
 1087%        serialized just before the actual commit of a transaction.
 1088%     3. Concurrently executing transactions do not infuence each
 1089%        other.  I.e., the _isolation_ property is fully supported.
 1090%     4. _Durability_ can be activated by loading
 1091%        library(semweb/rdf_persistency).
 1092%
 1093%   Processed options are:
 1094%
 1095%     * snapshot(+Snapshot)
 1096%     Execute Goal using the state of the RDF store as stored in
 1097%     Snapshot.  See rdf_snapshot/1.  Snapshot can also be the
 1098%     atom =true=, which implies that an anonymous snapshot is
 1099%     created at the current state of the store.  Modifications
 1100%     due to executing Goal are only visible to Goal.
 1101
 1102rdf_transaction(Goal) :-
 1103    rdf_transaction(Goal, user, []).
 1104rdf_transaction(Goal, Id) :-
 1105    rdf_transaction(Goal, Id, []).
 1106
 1107%!  rdf_active_transaction(?Id) is nondet.
 1108%
 1109%   True if Id is the identifier of  a transaction in the context of
 1110%   which  this  call  is  executed.  If  Id  is  not  instantiated,
 1111%   backtracking yields transaction identifiers   starting  with the
 1112%   innermost nested transaction. Transaction   identifier terms are
 1113%   not copied, need not be ground   and  can be instantiated during
 1114%   the transaction.
 1115
 1116rdf_active_transaction(Id) :-
 1117    rdf_active_transactions_(List),
 1118    member(Id, List).
 1119
 1120%!  rdf_monitor(:Goal, +Options)
 1121%
 1122%   Call Goal if specified actions occur on the database.
 1123
 1124rdf_monitor(Goal, Options) :-
 1125    monitor_mask(Options, 0xffff, Mask),
 1126    rdf_monitor_(Goal, Mask).
 1127
 1128monitor_mask([], Mask, Mask).
 1129monitor_mask([H|T], Mask0, Mask) :-
 1130    update_mask(H, Mask0, Mask1),
 1131    monitor_mask(T, Mask1, Mask).
 1132
 1133update_mask(-X, Mask0, Mask) :-
 1134    !,
 1135    monitor_mask(X, M),
 1136    Mask is Mask0 /\ \M.
 1137update_mask(+X, Mask0, Mask) :-
 1138    !,
 1139    monitor_mask(X, M),
 1140    Mask is Mask0 \/ M.
 1141update_mask(X, Mask0, Mask) :-
 1142    monitor_mask(X, M),
 1143    Mask is Mask0 \/ M.
 1144
 1145%!  monitor_mask(Name, Mask)
 1146%
 1147%   Mask bit for the monitor events.  Note that this must be kept
 1148%   consistent with the enum broadcast_id defined in rdf_db.c
 1149
 1150                                        % C-defined broadcasts
 1151monitor_mask(assert,       0x0001).
 1152monitor_mask(assert(load), 0x0002).
 1153monitor_mask(retract,      0x0004).
 1154monitor_mask(update,       0x0008).
 1155monitor_mask(new_literal,  0x0010).
 1156monitor_mask(old_literal,  0x0020).
 1157monitor_mask(transaction,  0x0040).
 1158monitor_mask(load,         0x0080).
 1159monitor_mask(create_graph, 0x0100).
 1160monitor_mask(reset,        0x0200).
 1161                                        % prolog defined broadcasts
 1162monitor_mask(parse,        0x1000).
 1163monitor_mask(unload,       0x1000).     % FIXME: Duplicate
 1164                                        % mask for all
 1165monitor_mask(all,          0xffff).
 1166
 1167%rdf_broadcast(Term, MaskName) :-
 1168%%      monitor_mask(MaskName, Mask),
 1169%%      rdf_broadcast_(Term, Mask).
 1170
 1171
 1172                 /*******************************
 1173                 *            WARM              *
 1174                 *******************************/
 1175
 1176%!  rdf_warm_indexes
 1177%
 1178%   Warm all indexes.  See rdf_warm_indexes/1.
 1179
 1180rdf_warm_indexes :-
 1181    findall(Index, rdf_index(Index), Indexes),
 1182    rdf_warm_indexes(Indexes).
 1183
 1184rdf_index(s).
 1185rdf_index(p).
 1186rdf_index(o).
 1187rdf_index(sp).
 1188rdf_index(o).
 1189rdf_index(po).
 1190rdf_index(spo).
 1191rdf_index(g).
 1192rdf_index(sg).
 1193rdf_index(pg).
 1194
 1195%!  rdf_warm_indexes(+Indexes) is det.
 1196%
 1197%   Create the named indexes.  Normally,   the  RDF database creates
 1198%   indexes on lazily the first time they are needed. This predicate
 1199%   serves two purposes: it provides an   explicit  way to make sure
 1200%   that the required indexes  are   present  and  creating multiple
 1201%   indexes at the same time is more efficient.
 1202
 1203
 1204                 /*******************************
 1205                 *          DUPLICATES          *
 1206                 *******************************/
 1207
 1208%!  rdf_update_duplicates is det.
 1209%
 1210%   Update the duplicate administration of the RDF store. This marks
 1211%   every triple that is potentionally  a   duplicate  of another as
 1212%   duplicate. Being potentially a  duplicate   means  that subject,
 1213%   predicate and object are equivalent and   the  life-times of the
 1214%   two triples overlap.
 1215%
 1216%   The duplicates marks are used to  reduce the administrative load
 1217%   of avoiding duplicate answers.  Normally,   the  duplicates  are
 1218%   marked using a background thread that   is  started on the first
 1219%   query that produces a substantial amount of duplicates.
 1220
 1221:- public
 1222    rdf_update_duplicates_thread/0. 1223
 1224%!  rdf_update_duplicates_thread
 1225%
 1226%   Start a thread to initialize the duplicate administration.
 1227
 1228rdf_update_duplicates_thread :-
 1229    thread_create(rdf_update_duplicates, _,
 1230                  [ detached(true),
 1231                    alias('__rdf_duplicate_detecter')
 1232                  ]).
 1233
 1234%!  rdf_update_duplicates is det.
 1235%
 1236%   Update the duplicate administration. If   this  adminstration is
 1237%   up-to-date, each triples that _may_ have a duplicate is flagged.
 1238%   The predicate rdf/3 uses this administration to speedup checking
 1239%   for duplicate answers.
 1240%
 1241%   This predicate is normally  executed   from  a background thread
 1242%   named =__rdf_duplicate_detecter= which is created   when a query
 1243%   discovers that checking for duplicates becomes too expensive.
 1244
 1245
 1246                 /*******************************
 1247                 *    QUICK BINARY LOAD/SAVE    *
 1248                 *******************************/
 1249
 1250%!  rdf_save_db(+File) is det.
 1251%!  rdf_save_db(+File, +Graph) is det.
 1252%
 1253%   Save triples into File in a   quick-to-load binary format. If Graph
 1254%   is supplied only triples flagged to originate from that database
 1255%   are  added.  Files  created  this  way    can  be  loaded  using
 1256%   rdf_load_db/1.
 1257
 1258:- create_prolog_flag(rdf_triple_format, 3, [type(integer)]). 1259
 1260rdf_save_db(File) :-
 1261    current_prolog_flag(rdf_triple_format, Version),
 1262    setup_call_cleanup(
 1263        open(File, write, Out, [type(binary)]),
 1264        ( set_stream(Out, record_position(false)),
 1265          rdf_save_db_(Out, _, Version)
 1266        ),
 1267        close(Out)).
 1268
 1269
 1270rdf_save_db(File, Graph) :-
 1271    current_prolog_flag(rdf_triple_format, Version),
 1272    setup_call_cleanup(
 1273        open(File, write, Out, [type(binary)]),
 1274        ( set_stream(Out, record_position(false)),
 1275          rdf_save_db_(Out, Graph, Version)
 1276        ),
 1277        close(Out)).
 1278
 1279
 1280%!  rdf_load_db_no_admin(+File, +Id, -Graphs) is det.
 1281%
 1282%   Load triples from a  .trp  file   without  updating  the  source
 1283%   administration. Id is  handled  to   monitor  action.  Graphs is
 1284%   a list of graph-names encountered in File.
 1285
 1286rdf_load_db_no_admin(File, Id, Graphs) :-
 1287    open(File, read, In, [type(binary)]),
 1288    set_stream(In, record_position(false)),
 1289    call_cleanup(rdf_load_db_(In, Id, Graphs), close(In)).
 1290
 1291
 1292%!  check_loaded_cache(+Graph, +Graphs, +Modified) is det.
 1293%
 1294%   Verify the loaded cache file and optionally fix the modification
 1295%   time (new versions save this along with the snapshot).
 1296%
 1297%   @tbd    What to do if there is a cache mismatch? Delete the loaded
 1298%           graphs and fail?
 1299
 1300check_loaded_cache(DB, [DB], _Modified) :- !.
 1301check_loaded_cache(DB, Graphs, _) :-
 1302    print_message(warning, rdf(inconsistent_cache(DB, Graphs))).
 1303
 1304
 1305%!  rdf_load_db(+File) is det.
 1306%
 1307%   Load triples from a file created using rdf_save_db/2.
 1308
 1309rdf_load_db(File) :-
 1310    uri_file_name(URL, File),
 1311    rdf_load_db_no_admin(File, URL, _Graphs).
 1312
 1313
 1314                 /*******************************
 1315                 *          LOADING RDF         *
 1316                 *******************************/
 1317
 1318:- multifile
 1319    rdf_open_hook/8,
 1320    rdf_open_decode/4,              % +Encoding, +File, -Stream, -Cleanup
 1321    rdf_load_stream/3,              % +Format, +Stream, +Options
 1322    rdf_file_type/2,                % ?Extension, ?Format
 1323    rdf_storage_encoding/2,         % ?Extension, ?Encoding
 1324    url_protocol/1.                 % ?Protocol
 1325
 1326%!  rdf_load(+FileOrList) is det.
 1327%
 1328%   Same as rdf_load(FileOrList, []).  See rdf_load/2.
 1329
 1330%!  rdf_load(+FileOrList, :Options) is det.
 1331%
 1332%   Load RDF data. Options provides   additional processing options.
 1333%   Defined options are:
 1334%
 1335%       * blank_nodes(+ShareMode)
 1336%       How to handle equivalent blank nodes.  If =share= (default),
 1337%       equivalent blank nodes are shared in the same resource.
 1338%
 1339%       * base_uri(+URI)
 1340%       URI that is used for rdf:about="" and other RDF constructs
 1341%       that are relative to the base uri.  Default is the source
 1342%       URL.
 1343%
 1344%       * concurrent(+Jobs)
 1345%       If FileOrList is a list of files, process the input files
 1346%       using Jobs threads concurrently.  Default is the mininum
 1347%       of the number of cores and the number of inputs.  Higher
 1348%       values can be useful when loading inputs from (slow)
 1349%       network connections.  Using 1 (one) does not use
 1350%       separate worker threads.
 1351%
 1352%       * format(+Format)
 1353%       Specify the source format explicitly. Normally this is
 1354%       deduced from the filename extension or the mime-type. The
 1355%       core library understands the formats xml (RDF/XML) and
 1356%       triples (internal quick load and cache format).  Plugins,
 1357%       such as library(semweb/turtle) extend the set of recognised
 1358%       extensions.
 1359%
 1360%       * graph(?Graph)
 1361%       Named graph in which to load the data.  It is *not* allowed
 1362%       to load two sources into the same named graph.  If Graph is
 1363%       unbound, it is unified to the graph into which the data is
 1364%       loaded.  The default graph is a =file://= URL when loading
 1365%       a file or, if the specification is a URL, its normalized
 1366%       version without the optional _|#fragment|_.
 1367%
 1368%       * if(Condition)
 1369%       When to load the file. One of =true=, =changed= (default) or
 1370%       =not_loaded=.
 1371%
 1372%       * modified(-Modified)
 1373%       Unify Modified with one of =not_modified=, cached(File),
 1374%       last_modified(Stamp) or =unknown=.
 1375%
 1376%       * cache(Bool)
 1377%       If =false=, do not use or create a cache file.
 1378%
 1379%       * register_namespaces(Bool)
 1380%       If =true= (default =false=), register =xmlns= namespace
 1381%       declarations or Turtle =|@prefix|= prefixes using
 1382%       rdf_register_prefix/3 if there is no conflict.
 1383%
 1384%       * silent(+Bool)
 1385%       If =true=, the message reporting completion is printed using
 1386%       level =silent=. Otherwise the level is =informational=. See
 1387%       also print_message/2.
 1388%
 1389%       * prefixes(-Prefixes)
 1390%       Returns the prefixes defined in the source   data file as a list
 1391%       of pairs.
 1392%
 1393%   Other  options  are  forwarded  to  process_rdf/3.  By  default,
 1394%   rdf_load/2 only loads RDF/XML from files.  It can be extended to
 1395%   load data from other formats and   locations  using plugins. The
 1396%   full set of plugins relevant to   support  different formats and
 1397%   locations is below:
 1398%
 1399%     ==
 1400%     :- use_module(library(semweb/turtle)).        % Turtle and TriG
 1401%     :- use_module(library(semweb/rdf_ntriples)).
 1402%     :- use_module(library(semweb/rdf_zlib_plugin)).
 1403%     :- use_module(library(semweb/rdf_http_plugin)).
 1404%     :- use_module(library(http/http_ssl_plugin)).
 1405%     ==
 1406%
 1407%   @see    rdf_db:rdf_open_hook/3, library(semweb/rdf_persistency) and
 1408%           library(semweb/rdf_cache)
 1409
 1410:- dynamic
 1411    rdf_loading/3.                          % Graph, Queue, Thread
 1412
 1413rdf_load(Spec) :-
 1414    rdf_load(Spec, []).
 1415
 1416:- if(\+current_predicate(concurrent/3)). 1417concurrent(_, Goals, _) :-
 1418    forall(member(G, Goals), call(G)).
 1419:- endif. 1420
 1421% Note that we kill atom garbage collection.  This improves performance
 1422% with about 15% loading the LUBM Univ_50 benchmark.
 1423
 1424rdf_load(Spec, M:Options) :-
 1425    must_be(list, Options),
 1426    current_prolog_flag(agc_margin, Old),
 1427    setup_call_cleanup(
 1428        set_prolog_flag(agc_margin, 0),
 1429        rdf_load_noagc(Spec, M, Options),
 1430        set_prolog_flag(agc_margin, Old)).
 1431
 1432rdf_load_noagc(List, M, Options) :-
 1433    is_list(List),
 1434    !,
 1435    flatten(List, Inputs),          % Compatibility: allow nested lists
 1436    maplist(must_be(ground), Inputs),
 1437    length(Inputs, Count),
 1438    load_jobs(Count, Jobs, Options),
 1439    (   Jobs =:= 1
 1440    ->  forall(member(Spec, Inputs),
 1441               rdf_load_one(Spec, M, Options))
 1442    ;   maplist(load_goal(Options, M), Inputs, Goals),
 1443        concurrent(Jobs, Goals, [])
 1444    ).
 1445rdf_load_noagc(One, M, Options) :-
 1446    must_be(ground, One),
 1447    rdf_load_one(One, M, Options).
 1448
 1449load_goal(Options, M, Spec, rdf_load_one(Spec, M, Options)).
 1450
 1451load_jobs(_, Jobs, Options) :-
 1452    option(concurrent(Jobs), Options),
 1453    !,
 1454    must_be(positive_integer, Jobs).
 1455load_jobs(Count, Jobs, _) :-
 1456    current_prolog_flag(cpu_count, CPUs),
 1457    CPUs > 0,
 1458    !,
 1459    Jobs is max(1, min(CPUs, Count)).
 1460load_jobs(_, 1, _).
 1461
 1462
 1463rdf_load_one(Spec, M, Options) :-
 1464    source_url(Spec, Protocol, SourceURL),
 1465    load_graph(SourceURL, Graph, Options),
 1466    setup_call_cleanup(
 1467        with_mutex(rdf_load_file,
 1468                   rdf_start_load(SourceURL, Loading)),
 1469        rdf_load_file(Loading, Spec, SourceURL, Protocol,
 1470                      Graph, M, Options),
 1471        rdf_end_load(Loading)).
 1472
 1473%!  rdf_start_load(+SourceURL, -WhatToDo) is det.
 1474%!  rdf_end_load(+WhatToDo) is det.
 1475%!  rdf_load_file(+WhatToDo, +Spec, +SourceURL, +Protocol, +Graph,
 1476%!                +Module, +Options) is det.
 1477%
 1478%   Of these three predicates, rdf_load_file/7   does the real work.
 1479%   The others deal with the  possibility   that  the graph is being
 1480%   loaded by another thread. In that case,   we  wait for the other
 1481%   thread to complete the work.
 1482%
 1483%   @tbd    What if both threads disagree on what is loaded into the
 1484%           graph?
 1485%   @see    Code is modelled closely after how concurrent loading
 1486%           is handled in SWI-Prolog's boot/init.pl
 1487
 1488rdf_start_load(SourceURL, queue(Queue)) :-
 1489    rdf_loading(SourceURL, Queue, LoadThread),
 1490    \+ thread_self(LoadThread),
 1491    !,
 1492    debug(rdf(load), '~p is being loaded by thread ~w; waiting ...',
 1493          [ SourceURL, LoadThread]).
 1494rdf_start_load(SourceURL, Ref) :-
 1495    thread_self(Me),
 1496    message_queue_create(Queue),
 1497    assertz(rdf_loading(SourceURL, Queue, Me), Ref).
 1498
 1499rdf_end_load(queue(_)) :- !.
 1500rdf_end_load(Ref) :-
 1501    clause(rdf_loading(_, Queue, _), _, Ref),
 1502    erase(Ref),
 1503    thread_send_message(Queue, done),
 1504    message_queue_destroy(Queue).
 1505
 1506rdf_load_file(queue(Queue), _Spec, _SourceURL, _Protocol, _Graph, _M, _Options) :-
 1507    !,
 1508    catch(thread_get_message(Queue, _), _, true).
 1509rdf_load_file(_Ref, _Spec, SourceURL, Protocol, Graph, M, Options) :-
 1510    debug(rdf(load), 'RDF: Loading ~q into ~q', [SourceURL, Graph]),
 1511    statistics(cputime, T0),
 1512    rdf_open_input(SourceURL, Protocol, Graph,
 1513                   In, Cleanup, Modified, Format, Options),
 1514    supported_format(Format, Cleanup),
 1515    return_modified(Modified, Options),
 1516    (   Modified == not_modified
 1517    ->  Action = none
 1518    ;   Modified = cached(CacheFile)
 1519    ->  do_unload(Graph),
 1520        catch(rdf_load_db_no_admin(CacheFile, cache(Graph), Graphs), _, fail),
 1521        check_loaded_cache(Graph, Graphs, Modified),
 1522        Action = load
 1523    ;   option(base_uri(BaseURI), Options, Graph),
 1524        (   var(BaseURI)
 1525        ->  BaseURI = SourceURL
 1526        ;   true
 1527        ),
 1528        once(phrase(derived_options(Options, NSList), Extra)),
 1529        merge_options([ base_uri(BaseURI),
 1530                        graph(Graph),
 1531                        format(Format)
 1532                      | Extra
 1533                      ], Options, RDFOptions),
 1534        do_unload(Graph),
 1535        graph_modified(Modified, ModifiedStamp),
 1536        rdf_set_graph_source(Graph, SourceURL, ModifiedStamp),
 1537        call_cleanup(rdf_load_stream(Format, In, M:RDFOptions),
 1538                     Cleanup),
 1539        save_cache(Graph, SourceURL, Options),
 1540        register_file_prefixes(NSList),
 1541        format_action(Format, Action)
 1542    ),
 1543    rdf_statistics_(triples(Graph, Triples)),
 1544    report_loaded(Action, SourceURL, Graph, Triples, T0, Options).
 1545
 1546supported_format(Format, _Cleanup) :-
 1547    rdf_file_type(_, Format),
 1548    !.
 1549supported_format(Format, Cleanup) :-
 1550    call(Cleanup),
 1551    existence_error(rdf_format_plugin, Format).
 1552
 1553format_action(triples, load) :- !.
 1554format_action(_, parsed).
 1555
 1556save_cache(Graph, SourceURL, Options) :-
 1557    option(cache(true), Options, true),
 1558    rdf_cache_file(SourceURL, write, CacheFile),
 1559    !,
 1560    catch(save_cache(Graph, CacheFile), E,
 1561          print_message(warning, E)).
 1562save_cache(_, _, _).
 1563
 1564derived_options([], _) -->
 1565    [].
 1566derived_options([H|T], NSList) -->
 1567    (   {   H == register_namespaces(true)
 1568        ;   H == (register_namespaces = true)
 1569        }
 1570    ->  [ namespaces(NSList) ]
 1571    ;   []
 1572    ),
 1573    derived_options(T, NSList).
 1574
 1575graph_modified(last_modified(Stamp), Stamp).
 1576graph_modified(unknown, Stamp) :-
 1577    get_time(Stamp).
 1578
 1579return_modified(Modified, Options) :-
 1580    option(modified(M0), Options),
 1581    !,
 1582    M0 = Modified.
 1583return_modified(_, _).
 1584
 1585
 1586                 /*******************************
 1587                 *        INPUT HANDLING        *
 1588                 *******************************/
 1589
 1590/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 1591This section deals with pluggable input sources.  The task of the input
 1592layer is
 1593
 1594    * Decide on the graph-name
 1595    * Decide on the source-location
 1596    * Decide whether loading is needed (if-modified)
 1597    * Decide on the serialization in the input
 1598
 1599The protocol must ensure minimal  overhead,   in  particular for network
 1600protocols. E.g. for HTTP we want to make a single call on the server and
 1601use If-modified-since to verify that we need not reloading this file.
 1602- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 1603
 1604%!  rdf_open_input(+SourceURL, +Protocol, +Graph,
 1605%!                 -Stream, -Cleanup, -Modified, -Format, +Options)
 1606%
 1607%   Open an input source.
 1608%
 1609%   Options processed:
 1610%
 1611%       * graph(Graph)
 1612%       * db(Graph)
 1613%       * if(Condition)
 1614%       * cache(Cache)
 1615%       * format(Format)
 1616%
 1617%   @param  Modified is one of =not_modified=, last_modified(Time),
 1618%           cached(CacheFile) or =unknown=
 1619
 1620rdf_open_input(SourceURL, Protocol, Graph,
 1621               Stream, Cleanup, Modified, Format, Options) :-
 1622    option(if(If), Options, changed),
 1623    (   If == true
 1624    ->  true
 1625    ;   rdf_graph_source_(Graph, SourceURL, HaveModified)
 1626    ->  true
 1627    ;   option(cache(true), Options, true),
 1628        rdf_cache_file(SourceURL, read, CacheFile)
 1629    ->  time_file(CacheFile, HaveModified)
 1630    ;   true
 1631    ),
 1632    option(format(Format), Options, _),
 1633    open_input_if_modified(Protocol, SourceURL, HaveModified,
 1634                           Stream, Cleanup, Modified0, Format, Options),
 1635    (   Modified0 == not_modified
 1636    ->  (   nonvar(CacheFile)
 1637        ->  Modified = cached(CacheFile)
 1638        ;   Modified = not_modified
 1639        )
 1640    ;   Modified = Modified0
 1641    ).
 1642
 1643
 1644%!  source_url(+Spec, -Class, -SourceURL) is det.
 1645%
 1646%   Determine class and url of the source.  Class is one of
 1647%
 1648%       * stream(Stream)
 1649%       * file
 1650%       * a url-protocol (e.g., =http=)
 1651
 1652source_url(stream(In), stream(In), SourceURL) :-
 1653    !,
 1654    (   stream_property(In, file_name(File))
 1655    ->  to_url(File, SourceURL)
 1656    ;   gensym('stream://', SourceURL)
 1657    ).
 1658source_url(Stream, Class, SourceURL) :-
 1659    is_stream(Stream),
 1660    !,
 1661    source_url(stream(Stream), Class, SourceURL).
 1662source_url(Spec, Protocol, SourceURL) :-
 1663    compound(Spec),
 1664    !,
 1665    source_file(Spec, Protocol, SourceURL).
 1666source_url(FileURL, Protocol, SourceURL) :-             % or return FileURL?
 1667    uri_file_name(FileURL, File),
 1668    !,
 1669    source_file(File, Protocol, SourceURL).
 1670source_url(SourceURL0, Protocol, SourceURL) :-
 1671    is_url(SourceURL0, Protocol, SourceURL),
 1672    !.
 1673source_url(File, Protocol, SourceURL) :-
 1674    source_file(File, Protocol, SourceURL).
 1675
 1676source_file(Spec, file(SExt), SourceURL) :-
 1677    findall(Ext, valid_extension(Ext), Exts),
 1678    absolute_file_name(Spec, File, [access(read), extensions([''|Exts])]),
 1679    storage_extension(_Plain, SExt, File),
 1680    uri_file_name(SourceURL, File).
 1681
 1682to_url(URL, URL) :-
 1683    uri_is_global(URL),
 1684    !.
 1685to_url(File, URL) :-
 1686    absolute_file_name(File, Path),
 1687    uri_file_name(URL, Path).
 1688
 1689storage_extension(Plain, SExt, File) :-
 1690    file_name_extension(Plain, SExt, File),
 1691    SExt \== '',
 1692    rdf_storage_encoding(SExt, _),
 1693    !.
 1694storage_extension(File, '', File).
 1695
 1696%!  load_graph(+SourceURL, -Graph, +Options) is det.
 1697%
 1698%   Graph is the graph into which  we   load  the  data. Tries these
 1699%   options:
 1700%
 1701%     1. The graph(Graph) option
 1702%     2. The db(Graph) option (backward compatibility)
 1703%     3. The base_uri(BaseURI) option
 1704%     4. The source URL
 1705
 1706load_graph(Source, Graph, Options) :-
 1707    (   option(graph(Graph), Options)
 1708    ;   option(db(Graph), Options)
 1709    ),
 1710    !,
 1711    load_graph2(Source, Graph, Options).
 1712load_graph(Source, Graph, Options) :-
 1713    load_graph2(Source, Graph, Options).
 1714
 1715load_graph2(_, Graph, _) :-
 1716    ground(Graph),
 1717    !.
 1718load_graph2(_Source, Graph, Options) :-
 1719    option(base_uri(Graph), Options),
 1720    Graph \== [],
 1721    ground(Graph),
 1722    !.
 1723load_graph2(Source, Graph, _) :-
 1724    load_graph(Source, Graph).
 1725
 1726load_graph(SourceURL, BaseURI) :-
 1727    file_name_extension(BaseURI, Ext, SourceURL),
 1728    rdf_storage_encoding(Ext, _),
 1729    !.
 1730load_graph(SourceURL, SourceURL).
 1731
 1732
 1733open_input_if_modified(stream(In), SourceURL, _, In, true,
 1734                       unknown, Format, _) :-
 1735    !,
 1736    (   var(Format)
 1737    ->  guess_format(SourceURL, Format)
 1738    ;   true
 1739    ).
 1740open_input_if_modified(file(SExt), SourceURL, HaveModified, Stream, Cleanup,
 1741                       Modified, Format, _) :-
 1742    !,
 1743    uri_file_name(SourceURL, File),
 1744    (   SExt == '' -> Plain = File; file_name_extension(Plain, SExt, File)),
 1745    time_file(File, LastModified),
 1746    (   nonvar(HaveModified),
 1747        HaveModified >= LastModified
 1748    ->  Modified = not_modified,
 1749        Cleanup = true
 1750    ;   storage_open(SExt, File, Stream, Cleanup),
 1751        Modified = last_modified(LastModified),
 1752        (   var(Format)
 1753        ->  guess_format(Plain, Format)
 1754        ;   true
 1755        )
 1756    ).
 1757open_input_if_modified(file, SourceURL, HaveModified, Stream, Cleanup,
 1758                       Modified, Format, Options) :-
 1759    !,
 1760    open_input_if_modified(file(''), SourceURL, HaveModified,
 1761                           Stream, Cleanup,
 1762                           Modified, Format, Options).
 1763open_input_if_modified(Protocol, SourceURL, HaveModified, Stream, Cleanup,
 1764                       Modified, Format, Options) :-
 1765    rdf_open_hook(Protocol, SourceURL, HaveModified, Stream, Cleanup,
 1766                  Modified, Format, Options).
 1767
 1768guess_format(File, Format) :-
 1769    file_name_extension(_, Ext, File),
 1770    (   rdf_file_type(Ext, Format)
 1771    ->  true
 1772    ;   Format = xml,
 1773        print_message(warning, rdf(guess_format(Ext)))
 1774    ).
 1775
 1776%!  storage_open(+Extension, +File, -Stream, -Cleanup)
 1777%
 1778%   Open the low-level storage. Note  that   the  file  is opened as
 1779%   binary. This is the same  as   for  HTTP  resources. The correct
 1780%   encoding will be set by the XML parser or the Turtle parser.
 1781
 1782storage_open('', File, Stream, close(Stream)) :-
 1783    !,
 1784    open(File, read, Stream, [type(binary)]).
 1785storage_open(Ext, File, Stream, Cleanup) :-
 1786    rdf_storage_encoding(Ext, Encoding),
 1787    rdf_open_decode(Encoding, File, Stream, Cleanup).
 1788
 1789valid_extension(Ext) :-
 1790    rdf_file_type(Ext, _).
 1791valid_extension(Ext) :-
 1792    rdf_storage_encoding(Ext, _).
 1793
 1794%!  is_url(@Term, -Scheme, -URL) is semidet.
 1795%
 1796%   True if Term is an atom denoting URL of the given Scheme. URL is
 1797%   normalized  (see  uri_normalized/2)  and   a  possible  fragment
 1798%   identifier (#fragment) is removed. This  predicate only succeeds
 1799%   if  the  scheme  is   registered    using   the  multifile  hook
 1800%   url_protocol/1.
 1801
 1802is_url(URL, Scheme, FetchURL) :-
 1803    atom(URL),
 1804    uri_is_global(URL),
 1805    uri_normalized(URL, URL1),              % case normalization
 1806    uri_components(URL1, Components),
 1807    uri_data(scheme, Components, Scheme0),
 1808    url_protocol(Scheme0),
 1809    !,
 1810    Scheme = Scheme0,
 1811    uri_data(fragment, Components, _, Components1),
 1812    uri_components(FetchURL, Components1).
 1813
 1814url_protocol(file).                     % built-in
 1815
 1816%!  rdf_file_type(+Extension, -Format) is semidet.
 1817%
 1818%   True if Format  is  the  format   belonging  to  the  given file
 1819%   extension.  This predicate is multifile and can thus be extended
 1820%   by plugins.
 1821
 1822rdf_file_type(xml,   xml).
 1823rdf_file_type(rdf,   xml).
 1824rdf_file_type(rdfs,  xml).
 1825rdf_file_type(owl,   xml).
 1826rdf_file_type(htm,   xhtml).
 1827rdf_file_type(html,  xhtml).
 1828rdf_file_type(xhtml, xhtml).
 1829rdf_file_type(trp,   triples).
 1830
 1831
 1832%!  rdf_file_encoding(+Extension, -Format) is semidet.
 1833%
 1834%   True if Format describes the storage encoding of file.
 1835
 1836rdf_storage_encoding('', plain).
 1837
 1838
 1839%!  rdf_load_stream(+Format, +Stream, :Options)
 1840%
 1841%   Load RDF data from Stream.
 1842%
 1843%   @tbd    Handle mime-types?
 1844
 1845rdf_load_stream(xml, Stream, Options) :-
 1846    !,
 1847    graph(Options, Graph),
 1848    rdf_transaction(load_stream(Stream, Options),
 1849                    parse(Graph)).
 1850rdf_load_stream(xhtml, Stream, M:Options) :-
 1851    !,
 1852    graph(Options, Graph),
 1853    rdf_transaction(load_stream(Stream, M:[embedded(true)|Options]),
 1854                    parse(Graph)).
 1855rdf_load_stream(triples, Stream, Options) :-
 1856    !,
 1857    graph(Options, Graph),
 1858    rdf_load_db_(Stream, Graph, _Graphs).
 1859
 1860load_stream(Stream, M:Options) :-
 1861    process_rdf(Stream, assert_triples, M:Options),
 1862    option(graph(Graph), Options),
 1863    rdf_graph_clear_modified_(Graph).
 1864
 1865
 1866%!  report_loaded(+Action, +Source, +DB, +Triples, +StartCPU, +Options)
 1867
 1868report_loaded(none, _, _, _, _, _) :- !.
 1869report_loaded(Action, Source, DB, Triples, T0, Options) :-
 1870    statistics(cputime, T1),
 1871    Time is T1 - T0,
 1872    (   option(silent(true), Options)
 1873    ->  Level = silent
 1874    ;   Level = informational
 1875    ),
 1876    print_message(Level,
 1877                  rdf(loaded(Action, Source, DB, Triples, Time))).
 1878
 1879
 1880%!  rdf_unload(+Source) is det.
 1881%
 1882%   Identify the graph loaded from Source and use rdf_unload_graph/1
 1883%   to erase this graph.
 1884%
 1885%   @deprecated     For compatibility, this predicate also accepts a
 1886%                   graph name instead of a source specification.
 1887%                   Please update your code to use
 1888%                   rdf_unload_graph/1.
 1889
 1890rdf_unload(Spec) :-
 1891    source_url(Spec, _Protocol, SourceURL),
 1892    rdf_graph_source_(Graph, SourceURL, _),
 1893    !,
 1894    rdf_unload_graph(Graph).
 1895rdf_unload(Graph) :-
 1896    atom(Graph),
 1897    rdf_graph(Graph),
 1898    !,
 1899    warn_deprecated_unload(Graph),
 1900    rdf_unload_graph(Graph).
 1901rdf_unload(_).
 1902
 1903:- dynamic
 1904    warned/0. 1905
 1906warn_deprecated_unload(_) :-
 1907    warned,
 1908    !.
 1909warn_deprecated_unload(Graph) :-
 1910    assertz(warned),
 1911    print_message(warning, rdf(deprecated(rdf_unload(Graph)))).
 1912
 1913
 1914%!  rdf_unload_graph(+Graph) is det.
 1915%
 1916%   Remove Graph from the RDF store.  Succeeds silently if the named
 1917%   graph does not exist.
 1918
 1919rdf_unload_graph(Graph) :-
 1920    must_be(atom, Graph),
 1921    (   rdf_graph(Graph)
 1922    ->  rdf_transaction(do_unload(Graph), unload(Graph))
 1923    ;   true
 1924    ).
 1925
 1926do_unload(Graph) :-
 1927    (   rdf_graph_(Graph, Triples),
 1928        Triples > 0
 1929    ->  rdf_retractall(_,_,_,Graph)
 1930    ;   true
 1931    ),
 1932    rdf_destroy_graph(Graph).
 1933
 1934                 /*******************************
 1935                 *         GRAPH QUERIES        *
 1936                 *******************************/
 1937
 1938%!  rdf_create_graph(+Graph) is det.
 1939%
 1940%   Create an RDF graph without triples.   Succeeds  silently if the
 1941%   graph already exists.
 1942
 1943
 1944%!  rdf_graph(?Graph) is nondet.
 1945%
 1946%   True when Graph is an existing graph.
 1947
 1948rdf_graph(Graph) :-
 1949    rdf_graph_(Graph, _Triples).
 1950
 1951%!  rdf_source(?Graph, ?SourceURL) is nondet.
 1952%
 1953%   True if named Graph is loaded from SourceURL.
 1954%
 1955%   @deprecated Use rdf_graph_property(Graph, source(SourceURL)).
 1956
 1957rdf_source(Graph, SourceURL) :-
 1958    rdf_graph(Graph),
 1959    rdf_graph_source_(Graph, SourceURL, _Modified).
 1960
 1961%!  rdf_source(?Source)
 1962%
 1963%   True if Source is a loaded source.
 1964%
 1965%   @deprecated     Use rdf_graph/1 or rdf_source/2.
 1966
 1967rdf_source(SourceURL) :-
 1968    rdf_source(_Graph, SourceURL).
 1969
 1970%!  rdf_make
 1971%
 1972%   Reload all loaded files that have been modified since the last
 1973%   time they were loaded.
 1974
 1975rdf_make :-
 1976    findall(Source-Graph, modified_graph(Source, Graph), Modified),
 1977    forall(member(Source-Graph, Modified),
 1978           catch(rdf_load(Source, [graph(Graph), if(changed)]), E,
 1979                 print_message(error, E))).
 1980
 1981modified_graph(SourceURL, Graph) :-
 1982    rdf_graph(Graph),
 1983    rdf_graph_source_(Graph, SourceURL, Modified),
 1984    \+ sub_atom(SourceURL, 0, _, _, 'stream://'),
 1985    Modified > 0.
 1986
 1987%!  rdf_graph_property(?Graph, ?Property) is nondet.
 1988%
 1989%   True when Property is a property of Graph.  Defined properties
 1990%   are:
 1991%
 1992%       * hash(Hash)
 1993%       Hash is the (MD5-)hash for the content of Graph.
 1994%       * modified(Boolean)
 1995%       True if the graph is modified since it was loaded or
 1996%       rdf_set_graph/2 was called with modified(false).
 1997%       * source(Source)
 1998%       The graph is loaded from the Source (a URL)
 1999%       * source_last_modified(?Time)
 2000%       Time is the last-modified timestamp of Source at the moment
 2001%       that the graph was loaded from Source.
 2002%       * triples(Count)
 2003%       True when Count is the number of triples in Graph.
 2004%
 2005%    Additional graph properties can be added  by defining rules for
 2006%    the multifile predicate  property_of_graph/2.   Currently,  the
 2007%    following extensions are defined:
 2008%
 2009%       - library(semweb/rdf_persistency)
 2010%         - persistent(Boolean)
 2011%           Boolean is =true= if the graph is persistent.
 2012
 2013rdf_graph_property(Graph, Property) :-
 2014    rdf_graph(Graph),
 2015    property_of_graph(Property, Graph).
 2016
 2017:- multifile
 2018    property_of_graph/2. 2019
 2020property_of_graph(hash(Hash), Graph) :-
 2021    rdf_md5(Graph, Hash).
 2022property_of_graph(modified(Boolean), Graph) :-
 2023    rdf_graph_modified_(Graph, Boolean, _).
 2024property_of_graph(source(URL), Graph) :-
 2025    rdf_graph_source_(Graph, URL, _).
 2026property_of_graph(source_last_modified(Time), Graph) :-
 2027    rdf_graph_source_(Graph, _, Time),
 2028    Time > 0.0.
 2029property_of_graph(triples(Count), Graph) :-
 2030    rdf_graph_(Graph, Count).
 2031
 2032%!  rdf_set_graph(+Graph, +Property) is det.
 2033%
 2034%   Set properties of Graph.  Defined properties are:
 2035%
 2036%       * modified(false)
 2037%       Set the modified state of Graph to false.
 2038
 2039rdf_set_graph(Graph, modified(Modified)) :-
 2040    must_be(oneof([false]), Modified),
 2041    rdf_graph_clear_modified_(Graph).
 2042
 2043
 2044%!  save_cache(+DB, +Cache) is det.
 2045%
 2046%   Save triples belonging to DB in the file Cache.
 2047
 2048save_cache(DB, Cache) :-
 2049    current_prolog_flag(rdf_triple_format, Version),
 2050    setup_call_cleanup(
 2051        catch(open(Cache, write, CacheStream, [type(binary)]), _, fail),
 2052        rdf_save_db_(CacheStream, DB, Version),
 2053        close(CacheStream)).
 2054
 2055%!  assert_triples(+Triples, +Source)
 2056%
 2057%   Assert a list of triples into the database. Foir security
 2058%   reasons we check we aren't inserting anything but nice RDF
 2059%   triples.
 2060
 2061assert_triples([], _).
 2062assert_triples([rdf(S,P,O)|T], DB) :-
 2063    !,
 2064    rdf_assert(S, P, O, DB),
 2065    assert_triples(T, DB).
 2066assert_triples([H|_], _) :-
 2067    throw(error(type_error(rdf_triple, H), _)).
 2068
 2069
 2070                 /*******************************
 2071                 *             RESET            *
 2072                 *******************************/
 2073
 2074%!  rdf_reset_db
 2075%
 2076%   Remove all triples from the RDF database and reset all its
 2077%   statistics.
 2078%
 2079%   @bug    This predicate checks for active queries, but this check is
 2080%           not properly synchronized and therefore the use of this
 2081%           predicate is unsafe in multi-threaded contexts. It is
 2082%           mainly used to run functionality tests that need to
 2083%           start with an empty database.
 2084
 2085rdf_reset_db :-
 2086    reset_gensym('_:genid'),
 2087    rdf_reset_db_.
 2088
 2089
 2090                 /*******************************
 2091                 *           SAVE RDF           *
 2092                 *******************************/
 2093
 2094%!  rdf_save(+Out) is det.
 2095%
 2096%   Same as rdf_save(Out, []).  See rdf_save/2 for details.
 2097
 2098%!  rdf_save(+Out, :Options) is det.
 2099%
 2100%   Write RDF data as RDF/XML. Options is a list of one or more of
 2101%   the following options:
 2102%
 2103%           * graph(+Graph)
 2104%           Save only triples associated to the given named Graph.
 2105%
 2106%           * anon(Bool)
 2107%           If false (default true) do not save blank nodes that do
 2108%           not appear (indirectly) as object of a named resource.
 2109%
 2110%           * base_uri(URI)
 2111%           BaseURI used. If present, all URIs that can be
 2112%           represented relative to this base are written using
 2113%           their shorthand.  See also =write_xml_base= option
 2114%
 2115%           * convert_typed_literal(:Convertor)
 2116%           Call Convertor(-Type, -Content, +RDFObject), providing
 2117%           the opposite for the convert_typed_literal option of
 2118%           the RDF parser.
 2119%
 2120%           * document_language(+Lang)
 2121%           Initial xml:lang saved with rdf:RDF element
 2122%
 2123%           * encoding(Encoding)
 2124%           Encoding for the output.  Either utf8 or iso_latin_1
 2125%
 2126%           * inline(+Bool)
 2127%           If =true= (default =false=), inline resources when
 2128%           encountered for the first time. Normally, only bnodes
 2129%           are handled this way.
 2130%
 2131%           * namespaces(+List)
 2132%           Explicitely specify saved namespace declarations. See
 2133%           rdf_save_header/2 option namespaces for details.
 2134%
 2135%           * sorted(+Boolean)
 2136%           If =true= (default =false=), emit subjects sorted on
 2137%           the full URI.  Useful to make file comparison easier.
 2138%
 2139%           * write_xml_base(Bool)
 2140%           If =false=, do _not_ include the =|xml:base|=
 2141%           declaration that is written normally when using the
 2142%           =base_uri= option.
 2143%
 2144%           * xml_attributes(+Bool)
 2145%           If =false= (default =true=), never use xml attributes to
 2146%           save plain literal attributes, i.e., always used an XML
 2147%           element as in =|<name>Joe</name>|=.
 2148%
 2149%   @param Out      Location to save the data.  This can also be a
 2150%                   file-url (=|file://path|=) or a stream wrapped
 2151%                   in a term stream(Out).
 2152%   @see rdf_save_db/1
 2153
 2154:- thread_local
 2155    named_anon/2,                   % +Resource, -Id
 2156    inlined/1.                      % +Resource
 2157
 2158rdf_save(File) :-
 2159    rdf_save2(File, []).
 2160
 2161rdf_save(Spec, M:Options0) :-
 2162    is_list(Options0),
 2163    !,
 2164    meta_options(save_meta_option, M:Options0, Options),
 2165    to_file(Spec, File),
 2166    rdf_save2(File, Options).
 2167rdf_save(Spec, _:DB) :-
 2168    atom(DB),                      % backward compatibility
 2169    !,
 2170    to_file(Spec, File),
 2171    rdf_save2(File, [graph(DB)]).
 2172
 2173save_meta_option(convert_typed_literal).
 2174
 2175to_file(URL, File) :-
 2176    atom(URL),
 2177    uri_file_name(URL, File),
 2178    !.
 2179to_file(File, File).
 2180
 2181rdf_save2(File, Options) :-
 2182    option(encoding(Encoding), Options, utf8),
 2183    valid_encoding(Encoding),
 2184    open_output(File, Encoding, Out, Close),
 2185    flag(rdf_db_saved_subjects, OSavedSubjects, 0),
 2186    flag(rdf_db_saved_triples, OSavedTriples, 0),
 2187    call_cleanup(rdf_do_save(Out, Options),
 2188                 Reason,
 2189                 cleanup_save(Reason,
 2190                              File,
 2191                              OSavedSubjects,
 2192                              OSavedTriples,
 2193                              Close)).
 2194
 2195open_output(stream(Out), Encoding, Out,
 2196            set_stream(Out, encoding(Old))) :-
 2197    !,
 2198    stream_property(Out, encoding(Old)),
 2199    set_stream(Out, encoding(Encoding)).
 2200open_output(File, Encoding, Out,
 2201            close(Out)) :-
 2202    open(File, write, Out, [encoding(Encoding)]).
 2203
 2204valid_encoding(Enc) :-
 2205    (   xml_encoding_name(Enc, _)
 2206    ->  true
 2207    ;   throw(error(domain_error(encoding, Enc), _))
 2208    ).
 2209
 2210
 2211cleanup_save(Reason,
 2212             File,
 2213             OSavedSubjects,
 2214             OSavedTriples,
 2215             Close) :-
 2216    call(Close),
 2217    flag(rdf_db_saved_subjects, SavedSubjects, OSavedSubjects),
 2218    flag(rdf_db_saved_triples, SavedTriples, OSavedTriples),
 2219    retractall(named_anon(_, _)),
 2220    retractall(inlined(_)),
 2221    (   Reason == exit
 2222    ->  print_message(informational,
 2223                      rdf(saved(File, SavedSubjects, SavedTriples)))
 2224    ;   format(user_error, 'Reason = ~w~n', [Reason])
 2225    ).
 2226
 2227rdf_do_save(Out, Options0) :-
 2228    rdf_save_header(Out, Options0, Options),
 2229    graph(Options, DB),
 2230    (   option(sorted(true), Options, false)
 2231    ->  (   var(DB)
 2232        ->  setof(Subject, rdf_subject(Subject), Subjects)
 2233        ;   findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
 2234            sort(SubjectList, Subjects)
 2235        ),
 2236        forall(member(Subject, Subjects),
 2237               rdf_save_non_anon_subject(Out, Subject, Options))
 2238    ;   forall(rdf_subject_in_graph(Subject, DB),
 2239               rdf_save_non_anon_subject(Out, Subject, Options))
 2240    ),
 2241    rdf_save_footer(Out),
 2242    !.                                  % dubious cut; without the
 2243                                        % cleanup handlers isn't called!?
 2244
 2245%!  rdf_subject_in_graph(-Subject, ?DB) is nondet.
 2246%
 2247%   True when Subject is a subject in the   graph  DB. If DB is unbound,
 2248%   all  subjects  are  enumerated.  Otherwise   we  have  two  options:
 2249%   enumerate all subjects and filter by graph or collect all triples of
 2250%   the graph and get the unique subjects.   The  first is attractive if
 2251%   the graph is big compared  to  the   DB,  also  because  it does not
 2252%   require memory, the second if the graph is small compared to the DB.
 2253
 2254rdf_subject_in_graph(Subject, DB) :-
 2255    var(DB),
 2256    !,
 2257    rdf_subject(Subject).
 2258rdf_subject_in_graph(Subject, DB) :-
 2259    rdf_statistics(triples(AllTriples)),
 2260    rdf_graph_property(DB, triples(DBTriples)),
 2261    DBTriples > AllTriples // 10,
 2262    !,
 2263    rdf_resource(Subject),
 2264    (   rdf(Subject, _, _, DB:_)
 2265    ->  true
 2266    ).
 2267rdf_subject_in_graph(Subject, DB) :-
 2268    findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
 2269    list_to_set(SubjectList, Subjects),
 2270    member(Subject, Subjects).
 2271
 2272
 2273graph(Options0, DB) :-
 2274    strip_module(Options0, _, Options),
 2275    (   memberchk(graph(DB0), Options)
 2276    ->  DB = DB0
 2277    ;   memberchk(db(DB0), Options)
 2278    ->  DB = DB0
 2279    ;   true                            % leave unbound
 2280    ).
 2281
 2282
 2283%!  rdf_save_header(+Fd, +Options)
 2284%
 2285%   Save XML document header, doctype and open the RDF environment.
 2286%   This predicate also sets up the namespace notation.
 2287%
 2288%   Save an RDF header, with the XML header, DOCTYPE, ENTITY and
 2289%   opening the rdf:RDF element with appropriate namespace
 2290%   declarations. It uses the primitives from section 3.5 to
 2291%   generate the required namespaces and desired short-name. Options
 2292%   is one of:
 2293%
 2294%     * graph(+URI)
 2295%     Only search for namespaces used in triples that belong to the
 2296%     given named graph.
 2297%
 2298%     * namespaces(+List)
 2299%     Where List is a list of namespace abbreviations. With this
 2300%     option, the expensive search for all namespaces that may be
 2301%     used by your data is omitted. The namespaces =rdf= and =rdfs=
 2302%     are added to the provided List. If a namespace is not
 2303%     declared, the resource is emitted in non-abreviated form.
 2304
 2305rdf_save_header(Out, Options) :-
 2306    rdf_save_header(Out, Options, _).
 2307
 2308rdf_save_header(Out, Options, OptionsOut) :-
 2309    is_list(Options),
 2310    !,
 2311    stream_property(Out, encoding(Enc)),
 2312    xml_encoding(Enc, Encoding),
 2313    format(Out, '<?xml version=\'1.0\' encoding=\'~w\'?>~n', [Encoding]),
 2314    format(Out, '<!DOCTYPE rdf:RDF [', []),
 2315    header_namespaces(Options, NSIdList),
 2316    nsmap(NSIdList, NsMap),
 2317    append(Options, [nsmap(NsMap)], OptionsOut),
 2318    forall(member(Id=URI, NsMap),
 2319           (   xml_quote_attribute(URI, NSText0, Enc),
 2320               xml_escape_parameter_entity(NSText0, NSText),
 2321               format(Out, '~N    <!ENTITY ~w \'~w\'>', [Id, NSText])
 2322           )),
 2323    format(Out, '~N]>~n~n', []),
 2324    format(Out, '<rdf:RDF', []),
 2325    (   member(Id, NSIdList),
 2326        format(Out, '~N    xmlns:~w="&~w;"~n', [Id, Id]),
 2327        fail
 2328    ;   true
 2329    ),
 2330    (   option(base_uri(Base), Options),
 2331        option(write_xml_base(true), Options, true)
 2332    ->  xml_quote_attribute(Base, BaseText, Enc),
 2333        format(Out, '~N    xml:base="~w"~n', [BaseText])
 2334    ;   true
 2335    ),
 2336    (   memberchk(document_language(Lang), Options)
 2337    ->  format(Out, '~N    xml:lang="~w"', [Lang])
 2338    ;   true
 2339    ),
 2340    format(Out, '>~n', []).
 2341rdf_save_header(Out, FileRef, OptionsOut) :-    % compatibility
 2342    atom(FileRef),
 2343    rdf_save_header(Out, [graph(FileRef)], OptionsOut).
 2344
 2345xml_encoding(Enc, Encoding) :-
 2346    (   xml_encoding_name(Enc, Encoding)
 2347    ->  true
 2348    ;   throw(error(domain_error(rdf_encoding, Enc), _))
 2349    ).
 2350
 2351xml_encoding_name(ascii,       'US-ASCII').
 2352xml_encoding_name(iso_latin_1, 'ISO-8859-1').
 2353xml_encoding_name(utf8,        'UTF-8').
 2354
 2355%!  nsmap(+NSIds, -Map:list(id=uri)) is det.
 2356%
 2357%   Create a namespace-map that is compatible to xml_write/2
 2358%   for dealing with XML-Literals
 2359
 2360nsmap([], []).
 2361nsmap([Id|T0], [Id=URI|T]) :-
 2362    ns(Id, URI),
 2363    nsmap(T0, T).
 2364
 2365%!  xml_escape_parameter_entity(+In, -Out) is det.
 2366%
 2367%   Escape % as &#37; for entity declarations.
 2368
 2369xml_escape_parameter_entity(In, Out) :-
 2370    sub_atom(In, _, _, _, '%'),
 2371    !,
 2372    atom_codes(In, Codes),
 2373    phrase(escape_parent(Codes), OutCodes),
 2374    atom_codes(Out, OutCodes).
 2375xml_escape_parameter_entity(In, In).
 2376
 2377escape_parent([]) --> [].
 2378escape_parent([H|T]) -->
 2379    (   { H == 37 }
 2380    ->  "&#37;"
 2381    ;   [H]
 2382    ),
 2383    escape_parent(T).
 2384
 2385
 2386%!  header_namespaces(Options, -List)
 2387%
 2388%   Get namespaces we will define as entities
 2389
 2390header_namespaces(Options, List) :-
 2391    memberchk(namespaces(NSL0), Options),
 2392    !,
 2393    sort([rdf,rdfs|NSL0], List).
 2394header_namespaces(Options, List) :-
 2395    graph(Options, DB),
 2396    used_namespace_entities(List, DB).
 2397
 2398%!  rdf_graph_prefixes(?Graph, -List:ord_set) is det.
 2399%!  rdf_graph_prefixes(?Graph, -List:ord_set, :Options) is det.
 2400%
 2401%   List is a sorted list of  prefixes (namepaces) in Graph. Options
 2402%   defined are:
 2403%
 2404%       * filter(:Filter)
 2405%       optional Filter argument is used to filter the results. It
 2406%       is called with 3 additional arguments:
 2407%
 2408%           ==
 2409%           call(Filter, Where, Prefix, URI)
 2410%           ==
 2411%
 2412%       The Where argument gives the location of the prefix ans is
 2413%       one of =subject=, =predicate=, =object= or =type=. The
 2414%       Prefix argument is the potentionally new prefix and URI is
 2415%       the full URI that is being processed.
 2416%
 2417%       * expand(:Goal)
 2418%       Hook to generate the graph.  Called using
 2419%
 2420%           ==
 2421%           call(Goal,S,P,O,Graph)
 2422%           ==
 2423%
 2424%       * min_count(+Count)
 2425%       Only include prefixes that appear at least N times.  Default
 2426%       is 1. Declared prefixes are always returned if found at
 2427%       least one time.
 2428%
 2429%       * get_prefix(:GetPrefix)
 2430%       Predicate to extract the candidate prefix from an IRI.  Default
 2431%       is iri_xml_namespace/2.
 2432
 2433
 2434:- thread_local
 2435    graph_prefix/3. 2436:- meta_predicate
 2437    rdf_graph_prefixes(?, -, :). 2438
 2439rdf_graph_prefixes(Graph, List) :-
 2440    rdf_graph_prefixes(Graph, List, []).
 2441
 2442rdf_graph_prefixes(Graph, List, M:QOptions) :-
 2443    is_list(QOptions),
 2444    !,
 2445    meta_options(is_meta, M:QOptions, Options),
 2446    option(filter(Filter), Options, true),
 2447    option(expand(Expand), Options, rdf_db),
 2448    option(min_count(MinCount), Options, 1),
 2449    option(get_prefix(GetPrefix), Options, iri_xml_namespace),
 2450    call_cleanup(prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix),
 2451                 retractall(graph_prefix(_,_,_))),
 2452    sort(Prefixes, List).
 2453rdf_graph_prefixes(Graph, List, M:Filter) :-
 2454    rdf_graph_prefixes(Graph, List, M:[filter(Filter)]).
 2455
 2456is_meta(filter).
 2457is_meta(expand).
 2458is_meta(get_prefix).
 2459
 2460
 2461prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix) :-
 2462    (   call(Expand, S, P, O, Graph),
 2463        add_ns(subject, GetPrefix, Filter, S, MinCount, s(S)),
 2464        add_ns(predicate, GetPrefix, Filter, P, MinCount, sp(S,P)),
 2465        add_ns_obj(GetPrefix, Filter, O, MinCount, spo(S,P,O)),
 2466        fail
 2467    ;   true
 2468    ),
 2469    findall(Prefix, graph_prefix(Prefix, MinCount, _), Prefixes).
 2470
 2471add_ns(Where, GetPrefix, Filter, S, MinCount, Context) :-
 2472    \+ rdf_is_bnode(S),
 2473    call(GetPrefix, S, Full),
 2474    Full \== '',
 2475    !,
 2476    (   graph_prefix(Full, MinCount, _)
 2477    ->  true
 2478    ;   Filter == true
 2479    ->  add_ns(Full, Context)
 2480    ;   call(Filter, Where, Full, S)
 2481    ->  add_ns(Full, Context)
 2482    ;   true
 2483    ).
 2484add_ns(_, _, _, _, _, _).
 2485
 2486add_ns(Full, Context) :-
 2487    graph_prefix(Full, _, Contexts),
 2488    memberchk(Context, Contexts),
 2489    !.
 2490add_ns(Full, Context) :-
 2491    retract(graph_prefix(Full, C0, Contexts)),
 2492    !,
 2493    C1 is C0+1,
 2494    asserta(graph_prefix(Full, C1, [Context|Contexts])).
 2495add_ns(Full, _) :-
 2496    ns(_, Full),
 2497    !,
 2498    asserta(graph_prefix(Full, _, _)).
 2499add_ns(Full, Context) :-
 2500    asserta(graph_prefix(Full, 1, [Context])).
 2501
 2502
 2503add_ns_obj(GetPrefix, Filter, O, MinCount, Context) :-
 2504    atom(O),
 2505    !,
 2506    add_ns(object, GetPrefix, Filter, O, MinCount, Context).
 2507add_ns_obj(GetPrefix, Filter, literal(type(Type, _)), MinCount, _) :-
 2508    atom(Type),
 2509    !,
 2510    add_ns(type, GetPrefix, Filter, Type, MinCount, t(Type)).
 2511add_ns_obj(_, _, _, _, _).
 2512
 2513
 2514%!  used_namespace_entities(-List, ?Graph) is det.
 2515%
 2516%   Return the namespace aliases that are actually used in Graph. In
 2517%   addition, this predicate creates ns<N>   aliases  for namespaces
 2518%   used in predicates because RDF/XML cannot write predicates other
 2519%   than as an XML name.
 2520
 2521used_namespace_entities(List, Graph) :-
 2522    decl_used_predicate_ns(Graph),
 2523    used_namespaces(List, Graph).
 2524
 2525used_namespaces(List, DB) :-
 2526    rdf_graph_prefixes(DB, FullList),
 2527    ns_abbreviations(FullList, List0),
 2528    sort([rdf|List0], List).
 2529
 2530ns_abbreviations([], []).
 2531ns_abbreviations([H0|T0], [H|T]) :-
 2532    ns(H, H0),
 2533    !,
 2534    ns_abbreviations(T0, T).
 2535ns_abbreviations([_|T0], T) :-
 2536    ns_abbreviations(T0, T).
 2537
 2538
 2539/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 2540For every URL used as a predicate  we   *MUST*  define a namespace as we
 2541cannot use names holding /, :, etc. as XML identifiers.
 2542- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 2543
 2544:- thread_local
 2545    predicate_ns/2. 2546
 2547decl_used_predicate_ns(DB) :-
 2548    retractall(predicate_ns(_,_)),
 2549    (   rdf_current_predicate(P, DB),
 2550        decl_predicate_ns(P),
 2551        fail
 2552    ;   true
 2553    ).
 2554
 2555decl_predicate_ns(Pred) :-
 2556    predicate_ns(Pred, _),
 2557    !.
 2558decl_predicate_ns(Pred) :-
 2559    rdf_global_id(NS:Local, Pred),
 2560    xml_name(Local),
 2561    !,
 2562    assert(predicate_ns(Pred, NS)).
 2563decl_predicate_ns(Pred) :-
 2564    atom_codes(Pred, Codes),
 2565    append(NSCodes, LocalCodes, Codes),
 2566    xml_codes(LocalCodes),
 2567    !,
 2568    (   NSCodes \== []
 2569    ->  atom_codes(NS, NSCodes),
 2570        (   ns(Id, NS)
 2571        ->  assert(predicate_ns(Pred, Id))
 2572        ;   between(1, infinite, N),
 2573            atom_concat(ns, N, Id),
 2574            \+ ns(Id, _)
 2575        ->  rdf_register_ns(Id, NS),
 2576            print_message(informational,
 2577                          rdf(using_namespace(Id, NS)))
 2578        ),
 2579        assert(predicate_ns(Pred, Id))
 2580    ;   assert(predicate_ns(Pred, -)) % no namespace used
 2581    ).
 2582
 2583xml_codes([]).
 2584xml_codes([H|T]) :-
 2585    xml_code(H),
 2586    xml_codes(T).
 2587
 2588xml_code(X) :-
 2589    code_type(X, csym),
 2590    !.
 2591xml_code(0'-).                          % Match 0'-
 2592
 2593
 2594%!  rdf_save_footer(Out:stream) is det.
 2595%
 2596%   Finish XML generation and write the document footer.
 2597%
 2598%   @see rdf_save_header/2, rdf_save_subject/3.
 2599
 2600rdf_save_footer(Out) :-
 2601    retractall(named_anon(_, _)),
 2602    retractall(inlined(_)),
 2603    format(Out, '</rdf:RDF>~n', []).
 2604
 2605%!  rdf_save_non_anon_subject(+Out, +Subject, +Options)
 2606%
 2607%   Save an object.  Anonymous objects not saved if anon(false)
 2608%   is present in the Options list.
 2609
 2610rdf_save_non_anon_subject(_Out, Subject, Options) :-
 2611    rdf_is_bnode(Subject),
 2612    (   memberchk(anon(false), Options)
 2613    ;   graph(Options, DB),
 2614        rdf_db(_, _, Subject, DB)
 2615    ),
 2616    !.
 2617rdf_save_non_anon_subject(Out, Subject, Options) :-
 2618    rdf_save_subject(Out, Subject, Options),
 2619    flag(rdf_db_saved_subjects, X, X+1).
 2620
 2621
 2622%!  rdf_save_subject(+Out, +Subject:resource, +Options) is det.
 2623%
 2624%   Save the triples associated to Subject to Out. Options:
 2625%
 2626%     * graph(+Graph)
 2627%     Only save properties from Graph.
 2628%     * base_uri(+URI)
 2629%     * convert_typed_literal(:Goal)
 2630%     * document_language(+XMLLang)
 2631%
 2632%   @see rdf_save/2 for a description of these options.
 2633
 2634rdf_save_subject(Out, Subject, Options) :-
 2635    is_list(Options),
 2636    !,
 2637    option(base_uri(BaseURI), Options, '-'),
 2638    (   rdf_save_subject(Out, Subject, BaseURI, 0, Options)
 2639    ->  format(Out, '~n', [])
 2640    ;   throw(error(rdf_save_failed(Subject), 'Internal error'))
 2641    ).
 2642rdf_save_subject(Out, Subject, DB) :-
 2643    (   var(DB)
 2644    ->  rdf_save_subject(Out, Subject, [])
 2645    ;   rdf_save_subject(Out, Subject, [graph(DB)])
 2646    ).
 2647
 2648
 2649%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
 2650%!                   +Indent:int, +Options) is det.
 2651%
 2652%   Save properties of Subject.
 2653%
 2654%   @param Indent   Current indentation
 2655
 2656rdf_save_subject(_, Subject, _, _, _) :-
 2657    inlined(Subject),
 2658    !.
 2659rdf_save_subject(Out, Subject, BaseURI, Indent, Options) :-
 2660    do_save_subject(Out, Subject, BaseURI, Indent, Options).
 2661
 2662do_save_subject(Out, Subject, BaseURI, Indent, Options) :-
 2663    graph(Options, DB),
 2664    findall(Pred=Object, rdf_db(Subject, Pred, Object, DB), Atts0),
 2665    sort(Atts0, Atts),              % remove duplicates
 2666    length(Atts, L),
 2667    (   length(Atts0, L0),
 2668        Del is L0-L,
 2669        Del > 0
 2670    ->  print_message(informational,
 2671                      rdf(save_removed_duplicates(Del, Subject)))
 2672    ;   true
 2673    ),
 2674    rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options),
 2675    flag(rdf_db_saved_triples, X, X+L).
 2676
 2677rdf_db(Subject, Pred, Object, DB) :-
 2678    var(DB),
 2679    !,
 2680    rdf(Subject, Pred, Object).
 2681rdf_db(Subject, Pred, Object, DB) :-
 2682    rdf(Subject, Pred, Object, DB:_).
 2683
 2684%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
 2685%!                   +Atts:list(Pred=Obj), +Indent:int, +Options) is det.
 2686%
 2687%   Save triples defined by Atts on Subject.
 2688
 2689rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
 2690    rdf_equal(rdf:type, RdfType),
 2691    select(RdfType=Type, Atts, Atts1),
 2692    \+ rdf_is_bnode(Type),
 2693    rdf_id(Type, BaseURI, TypeId),
 2694    xml_is_name(TypeId),
 2695    !,
 2696    format(Out, '~*|<', [Indent]),
 2697    rdf_write_id(Out, TypeId),
 2698    save_about(Out, BaseURI, Subject),
 2699    save_attributes(Atts1, BaseURI, Out, TypeId, Indent, Options).
 2700rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
 2701    format(Out, '~*|<rdf:Description', [Indent]),
 2702    save_about(Out, BaseURI, Subject),
 2703    save_attributes(Atts, BaseURI, Out, rdf:'Description', Indent, Options).
 2704
 2705xml_is_name(_NS:Atom) :-
 2706    !,
 2707    xml_name(Atom).
 2708xml_is_name(Atom) :-
 2709    xml_name(Atom).
 2710
 2711%!  save_about(+Out, +BaseURI, +Subject) is det.
 2712%
 2713%   Save the rdf:about. If Subject is a  blank node, save the nodeID
 2714%   if any.
 2715
 2716save_about(Out, _, Subject) :-
 2717    rdf_is_bnode(Subject),
 2718    !,
 2719    (   named_anon(Subject, NodeID)
 2720    ->  format(Out, ' rdf:nodeID="~w"', [NodeID])
 2721    ;   true
 2722    ).
 2723save_about(Out, BaseURI, Subject) :-
 2724    stream_property(Out, encoding(Encoding)),
 2725    rdf_value(Subject, BaseURI, QSubject, Encoding),
 2726    format(Out, ' rdf:about="~w"', [QSubject]).
 2727
 2728%!  save_attributes(+List, +BaseURI, +Stream, +Element, +Indent, +Options)
 2729%
 2730%   Save the attributes.  Short literal attributes are saved in the
 2731%   tag.  Others as the content of the description element.  The
 2732%   begin tag has already been filled.
 2733
 2734save_attributes(Atts, BaseURI, Out, Element, Indent, Options) :-
 2735    split_attributes(Atts, InTag, InBody, Options),
 2736    SubIndent is Indent + 2,
 2737    save_attributes2(InTag, BaseURI, tag, Out, SubIndent, Options),
 2738    (   InBody == []
 2739    ->  format(Out, '/>~n', [])
 2740    ;   format(Out, '>~n', []),
 2741        save_attributes2(InBody, BaseURI, body, Out, SubIndent, Options),
 2742        format(Out, '~N~*|</', [Indent]),
 2743        rdf_write_id(Out, Element),
 2744        format(Out, '>~n', [])
 2745    ).
 2746
 2747%!  split_attributes(+Attributes, -HeadAttrs, -BodyAttr, Options)
 2748%
 2749%   Split attribute (Name=Value) list into attributes for the head
 2750%   and body. Attributes can only be in the head if they are literal
 2751%   and appear only one time in the attribute list.
 2752
 2753split_attributes(Atts, [], Atts, Options) :-
 2754    option(xml_attributes(false), Options),
 2755    !.
 2756split_attributes(Atts, HeadAttr, BodyAttr, _) :-
 2757    duplicate_attributes(Atts, Dupls, Singles),
 2758    simple_literal_attributes(Singles, HeadAttr, Rest),
 2759    append(Dupls, Rest, BodyAttr).
 2760
 2761%!  duplicate_attributes(+Attrs, -Duplicates, -Singles)
 2762%
 2763%   Extract attributes that appear more than onces as we cannot
 2764%   dublicate an attribute in the head according to the XML rules.
 2765
 2766duplicate_attributes([], [], []).
 2767duplicate_attributes([H|T], Dupls, Singles) :-
 2768    H = (Name=_),
 2769    named_attributes(Name, T, D, R),
 2770    D \== [],
 2771    append([H|D], Dupls2, Dupls),
 2772    !,
 2773    duplicate_attributes(R, Dupls2, Singles).
 2774duplicate_attributes([H|T], Dupls2, [H|Singles]) :-
 2775    duplicate_attributes(T, Dupls2, Singles).
 2776
 2777named_attributes(_, [], [], []) :- !.
 2778named_attributes(Name, [H|T], D, R) :-
 2779    (   H = (Name=_)
 2780    ->  D = [H|DT],
 2781        named_attributes(Name, T, DT, R)
 2782    ;   R = [H|RT],
 2783        named_attributes(Name, T, D, RT)
 2784    ).
 2785
 2786%!  simple_literal_attributes(+Attributes, -Inline, -Body)
 2787%
 2788%   Split attributes for (literal) attributes to be used in the
 2789%   begin-tag and ones that have to go into the body of the description.
 2790
 2791simple_literal_attributes([], [], []).
 2792simple_literal_attributes([H|TA], [H|TI], B) :-
 2793    in_tag_attribute(H),
 2794    !,
 2795    simple_literal_attributes(TA, TI, B).
 2796simple_literal_attributes([H|TA], I, [H|TB]) :-
 2797    simple_literal_attributes(TA, I, TB).
 2798
 2799in_tag_attribute(_=literal(Text)) :-
 2800    atom(Text),                     % may not have lang qualifier
 2801    atom_length(Text, Len),
 2802    Len < 60.
 2803
 2804%!  save_attributes(+List, +BaseURI, +TagOrBody, +Stream)
 2805%
 2806%   Save a list of attributes.
 2807
 2808save_attributes2([], _, _, _, _, _).
 2809save_attributes2([H|T], BaseURI, Where, Out, Indent, Options) :-
 2810    save_attribute(Where, H, BaseURI, Out, Indent, Options),
 2811    save_attributes2(T, BaseURI, Where, Out, Indent, Options).
 2812
 2813save_attribute(tag, Name=literal(Value), BaseURI, Out, Indent, _DB) :-
 2814    AttIndent is Indent + 2,
 2815    rdf_id(Name, BaseURI, NameText),
 2816    stream_property(Out, encoding(Encoding)),
 2817    xml_quote_attribute(Value, QVal, Encoding),
 2818    format(Out, '~N~*|', [AttIndent]),
 2819    rdf_write_id(Out, NameText),
 2820    format(Out, '="~w"', [QVal]).
 2821save_attribute(body, Name=literal(Literal0), BaseURI, Out, Indent, Options) :-
 2822    !,
 2823    rdf_id(Name, BaseURI, NameText),
 2824    (   memberchk(convert_typed_literal(Converter), Options),
 2825        call(Converter, Type, Content, Literal0)
 2826    ->  Literal = type(Type, Content)
 2827    ;   Literal = Literal0
 2828    ),
 2829    save_body_literal(Literal, NameText, BaseURI, Out, Indent, Options).
 2830save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2831    rdf_is_bnode(Value),
 2832    !,
 2833    rdf_id(Name, BaseURI, NameText),
 2834    format(Out, '~N~*|<', [Indent]),
 2835    rdf_write_id(Out, NameText),
 2836    (   named_anon(Value, NodeID)
 2837    ->  format(Out, ' rdf:nodeID="~w"/>', [NodeID])
 2838    ;   (   rdf(S1, Name, Value),
 2839            rdf(S2, P2, Value),
 2840            (S1 \== S2 ; Name \== P2)
 2841        ->  predicate_property(named_anon(_,_), number_of_clauses(N)),
 2842            atom_concat('bn', N, NodeID),
 2843            assertz(named_anon(Value, NodeID))
 2844        ;   true
 2845        ),
 2846        SubIndent is Indent + 2,
 2847        (   rdf_collection(Value)
 2848        ->  save_about(Out, BaseURI, Value),
 2849            format(Out, ' rdf:parseType="Collection">~n', []),
 2850            rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
 2851        ;   format(Out, '>~n', []),
 2852            rdf_save_subject(Out, Value, BaseURI, SubIndent, Options)
 2853        ),
 2854        format(Out, '~N~*|</', [Indent]),
 2855        rdf_write_id(Out, NameText),
 2856        format(Out, '>~n', [])
 2857    ).
 2858save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2859    option(inline(true), Options),
 2860    has_attributes(Value, Options),
 2861    \+ inlined(Value),
 2862    !,
 2863    assertz(inlined(Value)),
 2864    rdf_id(Name, BaseURI, NameText),
 2865    format(Out, '~N~*|<', [Indent]),
 2866    rdf_write_id(Out, NameText),
 2867    SubIndent is Indent + 2,
 2868    (   rdf_collection(Value)
 2869    ->  save_about(Out, BaseURI, Value),
 2870        format(Out, ' rdf:parseType="Collection">~n', []),
 2871        rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
 2872    ;   format(Out, '>~n', []),
 2873        do_save_subject(Out, Value, BaseURI, SubIndent, Options)
 2874    ),
 2875    format(Out, '~N~*|</', [Indent]),
 2876    rdf_write_id(Out, NameText),
 2877    format(Out, '>~n', []).
 2878save_attribute(body, Name=Value, BaseURI, Out, Indent, _DB) :-
 2879    stream_property(Out, encoding(Encoding)),
 2880    rdf_value(Value, BaseURI, QVal, Encoding),
 2881    rdf_id(Name, BaseURI, NameText),
 2882    format(Out, '~N~*|<', [Indent]),
 2883    rdf_write_id(Out, NameText),
 2884    format(Out, ' rdf:resource="~w"/>', [QVal]).
 2885
 2886has_attributes(URI, Options) :-
 2887    graph(Options, DB),
 2888    rdf_db(URI, _, _, DB),
 2889    !.
 2890
 2891%!  save_body_literal(+Literal, +NameText, +BaseURI,
 2892%!                    +Out, +Indent, +Options).
 2893
 2894save_body_literal(lang(Lang, Value),
 2895                  NameText, BaseURI, Out, Indent, Options) :-
 2896    !,
 2897    format(Out, '~N~*|<', [Indent]),
 2898    rdf_write_id(Out, NameText),
 2899    (   memberchk(document_language(Lang), Options)
 2900    ->  write(Out, '>')
 2901    ;   rdf_id(Lang, BaseURI, LangText),
 2902        format(Out, ' xml:lang="~w">', [LangText])
 2903    ),
 2904    save_attribute_value(Value, Out, Indent),
 2905    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 2906save_body_literal(type(Type, DOM),
 2907                  NameText, _BaseURI, Out, Indent, Options) :-
 2908    rdf_equal(Type, rdf:'XMLLiteral'),
 2909    !,
 2910    (   atom(DOM)
 2911    ->  format(Out, '~N~*|<', [Indent]),
 2912        rdf_write_id(Out, NameText),
 2913        format(Out, ' rdf:parseType="Literal">~w</', [DOM]),
 2914        rdf_write_id(Out, NameText), write(Out, '>')
 2915    ;   save_xml_literal(DOM, NameText, Out, Indent, Options)
 2916    ).
 2917save_body_literal(type(Type, Value),
 2918                  NameText, BaseURI, Out, Indent, _) :-
 2919    !,
 2920    format(Out, '~N~*|<', [Indent]),
 2921    rdf_write_id(Out, NameText),
 2922    stream_property(Out, encoding(Encoding)),
 2923    rdf_value(Type, BaseURI, QVal, Encoding),
 2924    format(Out, ' rdf:datatype="~w">', [QVal]),
 2925    save_attribute_value(Value, Out, Indent),
 2926    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 2927save_body_literal(Literal,
 2928                  NameText, _, Out, Indent, _) :-
 2929    atomic(Literal),
 2930    !,
 2931    format(Out, '~N~*|<', [Indent]),
 2932    rdf_write_id(Out, NameText),
 2933    write(Out, '>'),
 2934    save_attribute_value(Literal, Out, Indent),
 2935    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 2936save_body_literal(DOM,
 2937                  NameText, BaseURI, Out, Indent, Options) :-
 2938    rdf_equal(Type, rdf:'XMLLiteral'),
 2939    save_body_literal(type(Type, DOM),
 2940                      NameText, BaseURI, Out, Indent, Options).
 2941
 2942save_attribute_value(Value, Out, _) :-  % strings
 2943    atom(Value),
 2944    !,
 2945    stream_property(Out, encoding(Encoding)),
 2946    xml_quote_cdata(Value, QVal, Encoding),
 2947    write(Out, QVal).
 2948save_attribute_value(Value, Out, _) :-  % numbers
 2949    number(Value),
 2950    !,
 2951    writeq(Out, Value).             % quoted: preserve floats
 2952save_attribute_value(Value, _Out, _) :-
 2953    throw(error(save_attribute_value(Value), _)).
 2954
 2955%!  save_xml_literal(+DOM, +Attr, +Out, +Indent, +Options) is det.
 2956%
 2957%   Save an XMLLiteral value. We already emitted
 2958%
 2959%           ==
 2960%           <prop parseType="literal"
 2961%           ==
 2962%
 2963%   but  not  the  terminating  =|>|=.  We  need  to  establish  the
 2964%   namespaces used in the DOM. The   namespaces in the rdf document
 2965%   are in the nsmap-option of Options.
 2966
 2967save_xml_literal(DOM, Attr, Out, Indent, Options) :-
 2968    xml_is_dom(DOM),
 2969    !,
 2970    memberchk(nsmap(NsMap), Options),
 2971    id_to_atom(Attr, Atom),
 2972    xml_write(Out,
 2973              element(Atom, ['rdf:parseType'='Literal'], DOM),
 2974              [ header(false),
 2975                indent(Indent),
 2976                nsmap(NsMap)
 2977              ]).
 2978save_xml_literal(NoDOM, _, _, _, _) :-
 2979    must_be(xml_dom, NoDOM).
 2980
 2981id_to_atom(NS:Local, Atom) :-
 2982    !,
 2983    atomic_list_concat([NS,Local], :, Atom).
 2984id_to_atom(ID, ID).
 2985
 2986
 2987%!  rdf_collection(+URI) is semidet.
 2988%
 2989%   True  if  URI  represents  an  RDF    list  that  fits  the  RDF
 2990%   parseType=collection syntax. This means it is   a linked list of
 2991%   bnode-cells with a rdf:first that is   a  resource, optionally a
 2992%   rdf:type that is an rdf:list and the list ends in an rdf:nil.
 2993
 2994:- rdf_meta
 2995    rdf_collection(r),
 2996    collection_p(r,r). 2997
 2998rdf_collection(rdf:nil) :- !.
 2999rdf_collection(Cell) :-
 3000    rdf_is_bnode(Cell),
 3001    findall(F, rdf(Cell, rdf:first, F), [_]),
 3002    findall(F, rdf(Cell, rdf:rest, F), [Rest]),
 3003    forall(rdf(Cell, P, V),
 3004           collection_p(P, V)),
 3005    rdf_collection(Rest).
 3006
 3007collection_p(rdf:first, V) :- atom(V).
 3008collection_p(rdf:rest, _).
 3009collection_p(rdf:type, rdf:'List').
 3010
 3011
 3012%!  rdf_save_list(+Out, +List, +BaseURI, +Indent, +Options)
 3013
 3014rdf_save_list(_, List, _, _, _) :-
 3015    rdf_equal(List, rdf:nil),
 3016    !.
 3017rdf_save_list(Out, List, BaseURI, Indent, Options) :-
 3018    rdf_has(List, rdf:first, First),
 3019    (   rdf_is_bnode(First)
 3020    ->  nl(Out),
 3021        rdf_save_subject(Out, First, BaseURI, Indent, Options)
 3022    ;   stream_property(Out, encoding(Encoding)),
 3023        rdf_value(First, BaseURI, QVal, Encoding),
 3024        format(Out, '~N~*|<rdf:Description rdf:about="~w"/>',
 3025               [Indent, QVal])
 3026    ),
 3027    flag(rdf_db_saved_triples, X, X+3),
 3028    (   rdf_has(List, rdf:rest, List2),
 3029        \+ rdf_equal(List2, rdf:nil)
 3030    ->  rdf_save_list(Out, List2, BaseURI, Indent, Options)
 3031    ;   true
 3032    ).
 3033
 3034
 3035%!  rdf_id(+Resource, +BaseURI, -NSLocal)
 3036%
 3037%   Generate a NS:Local  name  for   Resource  given  the  indicated
 3038%   default namespace. This call is used for elements.
 3039
 3040rdf_id(Id, BaseURI, Local) :-
 3041    assertion(atom(BaseURI)),
 3042    atom_concat(BaseURI, Local, Id),
 3043    sub_atom(Local, 0, 1, _, #),
 3044    !.
 3045rdf_id(Id, _, NS:Local) :-
 3046    iri_xml_namespace(Id, Full, Local),
 3047    ns(NS, Full),
 3048    !.
 3049rdf_id(Id, _, NS:Local) :-
 3050    ns(NS, Full),
 3051    Full \== '',
 3052    atom_concat(Full, Local, Id),
 3053    !.
 3054rdf_id(Id, _, Id).
 3055
 3056
 3057%!  rdf_write_id(+Out, +NSLocal) is det.
 3058%
 3059%   Write an identifier. We cannot use native write on it as both NS
 3060%   and Local can be operators.
 3061
 3062rdf_write_id(Out, NS:Local) :-
 3063    !,
 3064    format(Out, '~w:~w', [NS, Local]).
 3065rdf_write_id(Out, Atom) :-
 3066    write(Out, Atom).
 3067
 3068%!  rdf_value(+Resource, +BaseURI, -Text, +Encoding)
 3069%
 3070%   According  to  "6.4  RDF  URI  References"  of  the  RDF  Syntax
 3071%   specification, a URI reference is  UNICODE string not containing
 3072%   control sequences, represented as  UTF-8   and  then  as escaped
 3073%   US-ASCII.
 3074
 3075rdf_value(Base, Base, '', _) :- !.
 3076rdf_value(V, Base, Text, Encoding) :-
 3077    atom_concat(Base, Local, V),
 3078    sub_atom(Local, 0, _, _, #),
 3079    !,
 3080    xml_quote_attribute(Local, Text, Encoding).
 3081rdf_value(V, _, Text, Encoding) :-
 3082    ns(NS, Full),
 3083    atom_concat(Full, Local, V),
 3084    xml_is_name(Local),
 3085    !,
 3086    xml_quote_attribute(Local, QLocal, Encoding),
 3087    atomic_list_concat(['&', NS, (';'), QLocal], Text).
 3088rdf_value(V, _, Q, Encoding) :-
 3089    xml_quote_attribute(V, Q, Encoding).
 3090
 3091
 3092                 /*******************************
 3093                 *       MATCH AND COMPARE      *
 3094                 *******************************/
 3095
 3096%!  rdf_compare(-Dif, +Object1, +Object2) is det.
 3097%
 3098%   Compare  two  object  terms.  Where  SPARQL  defines  a  partial
 3099%   ordering, we define a complete ordering   of terms. The ordering
 3100%   is defines as:
 3101%
 3102%     - Blank nodes < IRIs < Literals
 3103%     - Numeric literals < other literals
 3104%     - Numeric literals are compared by value and then by type,
 3105%       where Integer < Decimal < Double
 3106%     - Other literals are compare lexically, case insensitive.
 3107%       If equal, uppercase preceeds lowercase.  If still equal,
 3108%       the types are compared lexically.
 3109
 3110%!  rdf_match_label(+How, +Pattern, +Label) is semidet.
 3111%
 3112%   True if Label matches Pattern according to   How.  How is one of
 3113%   `icase`, `substring`, `word`, `prefix` or   `like`. For backward
 3114%   compatibility, `exact` is a synonym for `icase`.
 3115
 3116
 3117                 /*******************************
 3118                 *      DEPRECATED MATERIAL     *
 3119                 *******************************/
 3120
 3121%!  rdf_split_url(+Prefix, +Local, -URL) is det.
 3122%!  rdf_split_url(-Prefix, -Local, +URL) is det.
 3123%
 3124%   Split/join a URL.  This functionality is moved to library(sgml).
 3125%
 3126%   @deprecated Use iri_xml_namespace/3. Note that the argument
 3127%   order is iri_xml_namespace(+IRI, -Namespace, -Localname).
 3128
 3129rdf_split_url(Prefix, Local, URL) :-
 3130    atomic(URL),
 3131    !,
 3132    iri_xml_namespace(URL, Prefix, Local).
 3133rdf_split_url(Prefix, Local, URL) :-
 3134    atom_concat(Prefix, Local, URL).
 3135
 3136%!  rdf_url_namespace(+URL, -Namespace)
 3137%
 3138%   Namespace is the namespace of URL.
 3139%
 3140%   @deprecated Use iri_xml_namespace/2
 3141
 3142rdf_url_namespace(URL, Prefix) :-
 3143    iri_xml_namespace(URL, Prefix).
 3144
 3145
 3146                 /*******************************
 3147                 *            LITERALS          *
 3148                 *******************************/
 3149
 3150%!  rdf_new_literal_map(-Map) is det.
 3151%
 3152%   Create a new literal map, returning an opaque handle.
 3153
 3154%!  rdf_destroy_literal_map(+Map) is det.
 3155%
 3156%   Destroy a literal map. After this call,   further use of the Map
 3157%   handle is illegal. Additional synchronisation  is needed if maps
 3158%   that are shared between threads are   destroyed to guarantee the
 3159%   handle    is    no    longer    used.    In    some    scenarios
 3160%   rdf_reset_literal_map/1 provides a safe alternative.
 3161
 3162%!  rdf_reset_literal_map(+Map) is det.
 3163%
 3164%   Delete all content from the literal map.
 3165
 3166%!  rdf_insert_literal_map(+Map, +Key, +Value) is det.
 3167%
 3168%   Add a relation between  Key  and  Value   to  the  map.  If this
 3169%   relation already exists no action is performed.
 3170
 3171%!  rdf_insert_literal_map(+Map, +Key, +Value, -KeyCount) is det.
 3172%
 3173%   As rdf_insert_literal_map/3. In addition, if Key is a new key in
 3174%   Map, unify KeyCount with the number of  keys in Map. This serves
 3175%   two purposes. Derived maps, such as  the stem and metaphone maps
 3176%   need to know about new  keys   and  it avoids additional foreign
 3177%   calls for doing the progress in rdf_litindex.pl.
 3178
 3179%!  rdf_delete_literal_map(+Map, +Key) is det.
 3180%
 3181%   Delete Key and all associated values from the map.
 3182
 3183%!  rdf_delete_literal_map(+Map, +Key, +Value) is det.
 3184%
 3185%   Delete the association between Key and Value from the map.
 3186
 3187%!  rdf_find_literal_map(+Map, +KeyList, -ValueList) is det.
 3188%
 3189%   Unify ValueList with an ordered set  of values associated to all
 3190%   keys from KeyList. Each key in  KeyList   is  either an atom, an
 3191%   integer or a term not(Key).  If   not-terms  are provided, there
 3192%   must be at least one positive keywords. The negations are tested
 3193%   after establishing the positive matches.
 3194
 3195%!  rdf_keys_in_literal_map(+Map, +Spec, -Answer) is det.
 3196%
 3197%   Realises various queries on the key-set:
 3198%
 3199%     * all
 3200%
 3201%     Unify Answer with an ordered list of all keys.
 3202%     * key(+Key)
 3203%
 3204%     Succeeds if Key is a key in the map and unify Answer with the
 3205%     number of values associated with the key. This provides a fast
 3206%     test of existence without fetching the possibly large
 3207%     associated value set as with rdf_find_literal_map/3.
 3208%
 3209%     * prefix(+Prefix)
 3210%     Unify Answer with an ordered set of all keys that have the
 3211%     given prefix. See section 3.1 for details on prefix matching.
 3212%     Prefix must be an atom. This call is intended for
 3213%     auto-completion in user interfaces.
 3214%
 3215%     * ge(+Min)
 3216%     Unify Answer with all keys that are larger or equal to the
 3217%     integer Min.
 3218%
 3219%     * le(+Max)
 3220%     Unify Answer with all keys that are smaller or equal to the integer
 3221%     Max.
 3222%
 3223%     * between(+Min, +Max) Unify
 3224%     Answer with all keys between Min and Max (including).
 3225
 3226%!  rdf_statistics_literal_map(+Map, -KeyValue)
 3227%
 3228%   Query some statistics of the map. Provides KeyValue are:
 3229%
 3230%     * size(-Keys, -Relations)
 3231%     Unify Keys with the total key-count of the index and Relation
 3232%     with the total Key-Value count.
 3233
 3234
 3235
 3236                 /*******************************
 3237                 *             MISC             *
 3238                 *******************************/
 3239
 3240%!  rdf_version(-Version) is det.
 3241%
 3242%   True when Version is the numerical version-id of this library.
 3243%   The version is computed as
 3244%
 3245%           Major*10000 + Minor*100 + Patch.
 3246
 3247%!  rdf_set(+Term) is det.
 3248%
 3249%   Set properties of the RDF store.  Currently defines:
 3250%
 3251%     * hash(+Hash, +Parameter, +Value)
 3252%     Set properties for a triple index.  Hash is one of =s=,
 3253%     =p=, =sp=, =o=, =po=, =spo=, =g=, =sg= or =pg=.  Parameter
 3254%     is one of:
 3255%
 3256%       - size
 3257%       Value defines the number of entries in the hash-table.
 3258%       Value is rounded _down_ to a power of 2.  After setting
 3259%       the size explicitly, auto-sizing for this table is
 3260%       disabled.  Setting the size smaller than the current
 3261%       size results in a =permission_error= exception.
 3262%
 3263%       - average_chain_len
 3264%       Set maximum average collision number for the hash.
 3265%
 3266%       - optimize_threshold
 3267%       Related to resizing hash-tables.  If 0, all triples are
 3268%       moved to the new size by the garbage collector.  If more
 3269%       then zero, those of the last Value resize steps remain at
 3270%       their current location.  Leaving cells at their current
 3271%       location reduces memory fragmentation and slows down
 3272%       access.
 3273
 3274%!  rdf_md5(+Graph, -MD5) is det.
 3275%
 3276%   True when MD5 is the MD5 hash for  all triples in graph. The MD5
 3277%   digest itself is represented as an   atom holding a 32-character
 3278%   hexadecimal   string.   The   library   maintains   the   digest
 3279%   incrementally on rdf_load/[1,2], rdf_load_db/1, rdf_assert/[3,4]
 3280%   and  rdf_retractall/[3,4].  Checking  whether   the  digest  has
 3281%   changed since the last rdf_load/[1,2]  call provides a practical
 3282%   means for checking whether the file needs to be saved.
 3283%
 3284%   @deprecated New code should use rdf_graph_property(Graph,
 3285%   hash(Hash)).
 3286
 3287%!  rdf_generation(-Generation) is det.
 3288%
 3289%   True when Generation is the current  generation of the database.
 3290%   Each modification to the database  increments the generation. It
 3291%   can be used to check the validity of cached results deduced from
 3292%   the database. Committing a non-empty  transaction increments the
 3293%   generation by one.
 3294%
 3295%   When inside a transaction,  Generation  is   unified  to  a term
 3296%   _TransactionStartGen_ + _InsideTransactionGen_. E.g.,  4+3 means
 3297%   that the transaction was started at   generation 4 of the global
 3298%   database and we have  created  3   new  generations  inside  the
 3299%   transaction. Note that this choice  of representation allows for
 3300%   comparing  generations  using  Prolog  arithmetic.  Comparing  a
 3301%   generation in one  transaction  with   a  generation  in another
 3302%   transaction is meaningless.
 3303
 3304%!  rdf_estimate_complexity(?Subject, ?Predicate, ?Object, -Complexity)
 3305%
 3306%   Return the number of alternatives as   indicated by the database
 3307%   internal hashed indexing. This is a rough measure for the number
 3308%   of alternatives we can expect for   an  rdf_has/3 call using the
 3309%   given three arguments. When  called   with  three variables, the
 3310%   total number of triples is returned.   This  estimate is used in
 3311%   query  optimisation.  See  also    rdf_predicate_property/2  and
 3312%   rdf_statistics/1 for additional information to help optimizers.
 3313
 3314%!  rdf_debug(+Level) is det.
 3315%
 3316%   Set debugging to Level.  Level is an integer 0..9.  Default is
 3317%   0 no debugging.
 3318
 3319%!  rdf_atom_md5(+Text, +Times, -MD5) is det.
 3320%
 3321%   Computes the MD5 hash from Text, which is an atom, string or list of
 3322%   character codes. Times is  an  integer  >=   1.  When  >  0, the MD5
 3323%   algorithm is repeated Times times on the generated hash. This can be
 3324%   used for password encryption algorithms   to  make generate-and-test
 3325%   loops slow.
 3326%
 3327%   @deprecated Obviously, password hash  primitives   do  not belong in
 3328%   this library. The  library(crypto)  from   the  \const{ssl}  package
 3329%   provides extensive support for  hashes.   The  \const{clib}  package
 3330%   provides library(crypt) to  access  the   OS  (Unix)  password  hash
 3331%   implementation as well as  lightweight   implementations  of several
 3332%   popular hashes.
 3333
 3334
 3335                 /*******************************
 3336                 *             MESSAGES         *
 3337                 *******************************/
 3338
 3339:- multifile
 3340    prolog:message//1. 3341
 3342prolog:message(rdf(Term)) -->
 3343    message(Term).
 3344
 3345message(loaded(How, What, BaseURI, Triples, Time)) -->
 3346    how(How),
 3347    source(What),
 3348    into(What, BaseURI),
 3349    in_time(Triples, Time).
 3350message(save_removed_duplicates(N, Subject)) -->
 3351    [ 'Removed ~d duplicate triples about "~p"'-[N,Subject] ].
 3352message(saved(File, SavedSubjects, SavedTriples)) -->
 3353    [ 'Saved ~D triples about ~D subjects into ~p'-
 3354      [SavedTriples, SavedSubjects, File]
 3355    ].
 3356message(using_namespace(Id, NS)) -->
 3357    [ 'Using namespace id ~w for ~w'-[Id, NS] ].
 3358message(inconsistent_cache(DB, Graphs)) -->
 3359    [ 'RDF cache file for ~w contains the following graphs'-[DB], nl,
 3360      '~t~8|~p'-[Graphs]
 3361    ].
 3362message(guess_format(Ext)) -->
 3363    [ 'Unknown file-extension: ~w.  Assuming RDF/XML'-[Ext] ].
 3364message(meta(not_expanded(G))) -->
 3365    [ 'rdf_meta/1: ~p is not expanded'-[G] ].
 3366message(deprecated(rdf_unload(Graph))) -->
 3367    [ 'rdf_unload/1: Use ~q'-[rdf_unload_graph(Graph)] ].
 3368
 3369
 3370how(load)   --> [ 'Loaded' ].
 3371how(parsed) --> [ 'Parsed' ].
 3372
 3373source(SourceURL) -->
 3374    { uri_file_name(SourceURL, File),
 3375      !,
 3376      file_base_name(File, Base)    % TBD: relative file?
 3377    },
 3378    [ ' "~w"'-[Base] ].
 3379source(SourceURL) -->
 3380    [ ' "~w"'-[SourceURL] ].
 3381
 3382into(_, _) --> [].                      % TBD
 3383
 3384in_time(Triples, ParseTime) -->
 3385    [ ' in ~2f sec; ~D triples'-[ParseTime, Triples]
 3386    ]