View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2009-2017, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf_turtle_write,
   37          [ rdf_save_turtle/2,                  % +File, +Options
   38            rdf_save_canonical_turtle/2,        % +File, +Options
   39            rdf_save_trig/2,                    % +File, +Options
   40            rdf_save_canonical_trig/2,          % +File, +Options
   41            rdf_save_ntriples/2                 % +File, +Options
   42          ]).   43:- use_module(library(semweb/rdf_db)).   44:- use_module(library(semweb/turtle), []). % we make calls to public preds here
   45:- use_module(library(option)).   46:- use_module(library(record)).   47:- use_module(library(error)).   48:- use_module(library(lists)).   49:- use_module(library(rbtrees)).   50:- use_module(library(apply)).   51:- use_module(library(url)).   52:- use_module(library(pairs)).   53:- use_module(library(debug)).   54:- use_module(library(sgml_write)).   55:- use_module(library(sgml)).   56
   57:- predicate_options(rdf_save_turtle/2, 2,
   58                     [ graph(atom),
   59                       base(atom),
   60                       encoding(oneof([utf8])),
   61                       indent(nonneg),
   62                       tab_distance(nonneg),
   63                       silent(boolean),
   64                       subject_white_lines(nonneg),
   65                       align_prefixes(boolean),
   66                       user_prefixes(boolean),
   67                       prefixes(list),
   68                       only_known_prefixes(boolean),
   69                       comment(boolean),
   70                       group(boolean),
   71                       inline_bnodes(boolean),
   72                       single_line_bnodes(boolean),
   73                       abbreviate_literals(boolean),
   74                       canonize_numbers(boolean),
   75                       canonical(boolean),
   76                       a(boolean),
   77                       expand(any)
   78                     ]).   79:- predicate_options(rdf_save_canonical_turtle/2, 2,
   80                     [ pass_to(rdf_save_turtle/2, 2)
   81                     ]).   82
   83/** <module> Turtle - Terse RDF Triple Language writer
   84
   85This module implements the Turtle  language   for  representing  the RDF
   86triple model as defined by Dave Beckett  from the Institute for Learning
   87and Research Technology University of Bristol in the document:
   88
   89  * http://www.w3.org/TeamSubmission/turtle/
   90  * http://www.w3.org/TeamSubmission/2008/SUBM-turtle-20080114/#sec-conformance
   91
   92The Turtle format is designed as an   RDF  serialization that is easy to
   93read and write by both machines and  humans. Due to the latter property,
   94this library goes a long way in trying to produce human-readable output.
   95
   96In addition to the  human-readable  format,   this  library  can write a
   97_canonical_ representation of RDF graphs.   The canonical representation
   98has the following properties:
   99
  100  * Equivalent graphs result in the same document.  Graphs are
  101  considered equivalent iff they contain the same _set_ of
  102  triples, regardless of the labeling of blank nodes in the
  103  graph.
  104
  105  * Changes to the graph are diff-friendly.  This means
  106
  107    - Prefixes are combined in the header and thus changes
  108    to the namespaces only result in changes in the header.
  109    - Blank nodes that are used only once (including collections)
  110    are written in-line with the object they belong to.
  111    - For other blank nodes we to realise stable labeling that
  112    is based on property-values.
  113
  114@tbd    Low-level string output takes 28% of the time.  Move to C?
  115*/
  116
  117:- record
  118    tw_state(graph,                 % graph being saved
  119             graphs:list(atom),     % TriG graphs being saved
  120             base,                  % The base-URI
  121             encoding=utf8,         % Desired encoding
  122             indent:nonneg=8,       % Indent for ; and ,-lists
  123             tab_distance:nonneg=8, % Tab distance
  124             silent:boolean=false,  % If true, do not print a message
  125             subject_white_lines:nonneg=1,%Extra lines between subjects
  126             a:boolean=true,        % Use 'a' for rdf:type
  127             align_prefixes:boolean=true,%Align prefix declarations
  128             prefixes:list,         % Provide prefixes
  129             user_prefixes:boolean=true,% Use rdf_current_ns/2?
  130             only_known_prefixes:boolean=false,% Only use known prefixes
  131             comment:boolean=true,  % write some comments into the file
  132             group:boolean=true,    % Group using ; and ,
  133             inline_bnodes:boolean=true, % Inline single-used bnodes
  134             single_line_bnodes:boolean=false, % No newline after ;
  135             abbreviate_literals:boolean=true, % Abbreviate known types
  136             canonize_numbers:boolean=false, % How to write numbers
  137             canonical:boolean=false,
  138             expand:any=lookup,     % Access to the triples
  139                                    % Private fields
  140             bnode_id=0,            % Incrementing bnode-id
  141             nodeid_map,            % RBTree mapping NodeIDs to Refs
  142             bnode_hash,            % RBTree holding reuse-count of hashes
  143             subject_count=0,       % # subjects saved
  144             triple_count=0,        % # triples saved
  145             base_root,             % Root URL of base
  146             base_dir,              % Directory
  147             base_path,             % Path of base
  148             prefix_map).           % List of Prefix-Map
  149
  150
  151:- meta_predicate
  152    rdf_save_turtle(+, :),
  153    rdf_save_canonical_turtle(+, :),
  154    rdf_save_canonical_trig(+, :),
  155    rdf_save_trig(+, :).  156
  157%!  rdf_save_turtle(+Out, :Options) is det.
  158%
  159%   Save an RDF graph as Turtle.  Options processed are:
  160%
  161%       * a(+Boolean)
  162%       If =true= (default), use =a= for the predicate =rdf:type=.
  163%       Otherwise use the full resource.
  164%       * align_prefixes(+Boolean)
  165%       Nicely align the @prefix declarations
  166%       * base(+Base)
  167%       Save relative to the given Base
  168%       * canonize_numbers(+Boolean)
  169%       If =true= (default =false=), emit numeric datatypes using
  170%       Prolog's write to achieve canonical output.
  171%       * comment(+Boolean)
  172%       It =true= (default), write some informative comments
  173%       between the output segments
  174%       * encoding(+Encoding)
  175%       Encoding used for the output stream.  Default is UTF-8.
  176%       * expand(:Goal)
  177%       Query an alternative graph-representation.  See below.
  178%       * indent(+Column)
  179%       Indentation for ; -lists.  `0' does not indent, but
  180%       writes on the same line.  Default is 8.
  181%       * graph(+Graph)
  182%       Save only the named graph
  183%       * group(+Boolean)
  184%       If =true= (default), using P-O and O-grouping.
  185%       * inline_bnodes(+Boolean)
  186%       if =true= (default), inline bnodes that are used once.
  187%       * abbreviate_literals(+Boolean)
  188%       if =true= (default), omit the type if allowed by turtle.
  189%       * only_known_prefixes(+Boolean)
  190%       Only use prefix notation for known prefixes.  Without, some
  191%       documents produce _huge_ amounts of prefixes.
  192%       * prefixes(+List)
  193%       If provided, uses exactly these prefixes.  List is a list
  194%       of prefix specifications, where each specification is either
  195%       a term _Prefix_-_URI_ or a prefix that is known to
  196%       rdf_current_prefix/2.
  197%       * silent(+Boolean)
  198%       If =true= (default =false=), do not print the final
  199%       informational message.
  200%       * single_line_bnodes(+Bool)
  201%       If =true= (default =false=), write [...] and (...) on a
  202%       single line.
  203%       * subject_white_lines(+Count)
  204%       Extra white lines to insert between statements about a
  205%       different subject.  Default is 1.
  206%       * tab_distance(+Tab)
  207%       Distance between tab-stops.  `0' forces the library to
  208%       use only spaces for layout.  Default is 8.
  209%       * user_prefixes(+Boolean)
  210%       If =true= (default), use prefixes from rdf_current_prefix/2.
  211%
  212%   The option =expand= allows  for   serializing  alternative graph
  213%   representations. It is called through   call/5,  where the first
  214%   argument is the expand-option, followed  by   S,P,O,G.  G is the
  215%   graph-option (which is by  default   a  variable).  This notably
  216%   allows for writing RDF graphs   represented  as rdf(S,P,O) using
  217%   the following code fragment:
  218%
  219%       ==
  220%       triple_in(RDF, S,P,O,_G) :-
  221%           member(rdf(S,P,O), RDF).
  222%
  223%           ...,
  224%           rdf_save_turtle(Out, [ expand(triple_in(RDF)) ]),
  225%       ==
  226%
  227%   @param  Out is one of stream(Stream), a stream handle, a file-URL
  228%           or an atom that denotes a filename.
  229
  230rdf_save_turtle(Spec, QOptions) :-
  231    meta_options(is_meta, QOptions, Options),
  232    thread_self(Me),
  233    thread_statistics(Me, cputime, T0),
  234    must_be(list, Options),
  235    make_tw_state(Options, State0, _Rest),
  236    init_base(State0, State1),
  237    init_prefix_map(State1, State),
  238    tw_state_encoding(State, Enc),
  239    setup_call_cleanup(
  240        open_output(Spec, Enc, Stream, Cleanup),
  241        ( tw_prefix_map(State, Stream),
  242          tw_graph(State, Stream)
  243        ),
  244        Cleanup),
  245    thread_statistics(Me, cputime, T1),
  246    Time is T1-T0,
  247    tw_state_triple_count(State, SavedTriples),
  248    tw_state_subject_count(State, SavedSubjects),
  249    (   tw_state_silent(State, true)
  250    ->  true
  251    ;   print_message(informational,
  252                      rdf(saved(Spec, Time, SavedSubjects, SavedTriples)))
  253    ).
  254
  255is_meta(expand).
  256
  257%!  rdf_save_canonical_turtle(+Spec, :Options) is det.
  258%
  259%   Save triples in  a  canonical  format.   This  is  the  same  as
  260%   rdf_save_turtle/2, but using different defaults. In particular:
  261%
  262%       * encoding(utf8),
  263%       * indent(0),
  264%       * tab_distance(0),
  265%       * subject_white_lines(1),
  266%       * align_prefixes(false),
  267%       * user_prefixes(false)
  268%       * comment(false),
  269%       * group(false),
  270%       * single_line_bnodes(true)
  271%
  272%   @tbd Work in progress. Notably blank-node handling is
  273%   incomplete.
  274
  275rdf_save_canonical_turtle(Spec, M:Options) :-
  276    canonical_options(CannonicalOptions, Options),
  277    rdf_save_turtle(Spec, M:CannonicalOptions).
  278
  279canonical_options([ encoding(utf8),
  280                    indent(0),
  281                    tab_distance(0),
  282                    subject_white_lines(1),
  283                    align_prefixes(false),
  284                    user_prefixes(false),
  285                    comment(false),
  286                    group(false),
  287                    single_line_bnodes(true),
  288                    canonical(true)
  289                  | Options
  290                  ],
  291                  Options).
  292
  293
  294%!  rdf_save_ntriples(+Spec, :Options) is det.
  295%
  296%   Save RDF using ntriples format. The  ntriples format is a subset
  297%   of Turtle, writing each triple fully qualified on its own line.
  298
  299rdf_save_ntriples(File, Options):-
  300    rdf_save_turtle(File,
  301                    [ comment(false),
  302                      encoding(utf8),
  303                      group(false),
  304                      prefixes([]),
  305                      subject_white_lines(0),
  306                      a(false),
  307                      inline_bnodes(false),
  308                      abbreviate_literals(false)
  309                    | Options
  310                    ]).
  311
  312
  313%!  rdf_save_trig(+Spec, :Options) is det.
  314%
  315%   Save multiple RDF graphs into a TriG  file. Options are the same
  316%   as   for   rdf_save_turtle/2.   rdf_save_trig/2    ignores   the
  317%   graph(+Graph)  option  and  instead   processes  one  additional
  318%   option:
  319%
  320%     - graphs(+ListOfGraphs)
  321%     List of graphs to save. When omitted, all graphs in the RDF
  322%     store are stored in the TriG file.
  323
  324rdf_save_trig(Spec, QOptions) :-
  325    meta_options(is_meta, QOptions, Options),
  326    thread_self(Me),
  327    thread_statistics(Me, cputime, T0),
  328    must_be(list, Options),
  329    make_tw_state(Options, State0, _Rest),
  330    init_base(State0, State1),
  331    trig_graphs(State1, Graphs),
  332    init_prefix_map(State1, Graphs, State2),
  333    tw_state_encoding(State2, Enc),
  334    setup_call_cleanup(
  335        open_output(Spec, Enc, Stream, Cleanup),
  336        ( tw_prefix_map(State2, Stream),
  337          tw_trig_graphs(Graphs, Stream, State2, State)
  338        ),
  339        Cleanup),
  340    thread_statistics(Me, cputime, T1),
  341    Time is T1-T0,
  342    tw_state_triple_count(State, SavedTriples),
  343    tw_state_subject_count(State, SavedSubjects),
  344    length(Graphs, SavedGraphs),
  345    (   tw_state_silent(State, true)
  346    ->  true
  347    ;   print_message(informational,
  348                      rdf(saved(Spec, Time, SavedSubjects, SavedTriples, SavedGraphs)))
  349    ).
  350
  351%!  rdf_save_canonical_trig(+Spec, :Options) is det.
  352%
  353%   Save     triples     in     a      canonical     format.     See
  354%   rdf_save_canonical_turtle/2 foir details.
  355
  356
  357rdf_save_canonical_trig(Spec, M:Options) :-
  358    canonical_options(CannonicalOptions, Options),
  359    rdf_save_trig(Spec, M:CannonicalOptions).
  360
  361tw_trig_graphs([], _, State, State).
  362tw_trig_graphs([H|T], Stream, State0, State) :-
  363    set_graph_of_tw_state(H, State0, State1),
  364    nl(Stream),
  365    tw_resource(H, State1, Stream),
  366    format(Stream, ' {~n', []),
  367    tw_graph(State1, Stream),
  368    format(Stream, '~N}~n', []),
  369    set_bnode_id_of_tw_state(0, State1, State2),
  370    set_nodeid_map_of_tw_state(_, State2, State3),
  371    set_bnode_hash_of_tw_state(_, State3, State4),
  372    tw_trig_graphs(T, Stream, State4, State).
  373
  374
  375%!  trig_graphs(+State, -Graphs) is det.
  376%
  377%   True when Graphs is the (sorted) list of graphs we must save. If
  378%   the _expand_ argument is used and   no  graphs are specified, it
  379%   enumerates all triples and extracts the graphs.
  380
  381trig_graphs(State, Graphs) :-
  382    tw_state_graphs(State, Graphs),
  383    (   nonvar(Graphs)
  384    ->  true
  385    ;   tw_state_expand(State, Expand),
  386        (   Expand == lookup
  387        ->  findall(G, rdf_graph(G), Graphs0)
  388        ;   findall(G, call(Expand,_S,_P,_O,G), Graphs0)
  389        ),
  390        sort(Graphs0, Graphs)
  391    ).
  392
  393
  394%!  open_output(+Spec, +Encoding, -Stream, -Cleanup) is det.
  395%
  396%   Open output Spec, returning a stream using Encoding.
  397%
  398%   @param  Cleanup is a goal that must be used to revert the side
  399%           effects of open_output/4.
  400
  401open_output(stream(Out), Encoding, Out, Cleanup) :-
  402    !,
  403    stream_property(Out, encoding(Old)),
  404    (   (   Old == Encoding
  405        ;   Old == wchar_t          % Internal encoding
  406        )
  407    ->  Cleanup = true
  408    ;   set_stream(Out, encoding(Encoding)),
  409        Cleanup = set_stream(Out, encoding(Old))
  410    ).
  411open_output(Stream, Encoding, Out, Cleanup) :-
  412    \+ atom(Stream),
  413    is_stream(Stream),
  414    !,
  415    open_output(stream(Stream), Encoding, Out, Cleanup).
  416open_output(Spec, Encoding, Out,
  417            close(Out)) :-
  418    out_to_file(Spec, File),
  419    open(File, write, Out, [encoding(Encoding)]).
  420
  421out_to_file(URL, File) :-
  422    atom(URL),
  423    file_name_to_url(File, URL),
  424    !.
  425out_to_file(File, File).
  426
  427
  428                 /*******************************
  429                 *            PREFIXES          *
  430                 *******************************/
  431
  432%!  init_prefix_map(+State, -State) is det.
  433%
  434%   Set  the  prefix_map  of  State.  The  prefix  map  is  list  of
  435%   Prefix-URI of prefixes to use for   emitting the graph requested
  436%   in State. If multiple prefixes are present   where  the one is a
  437%   prefix of the other, the longer one appears first in the list.
  438
  439init_prefix_map(State0, State) :-
  440    tw_state_prefixes(State0, Prefixes),
  441    nonvar(Prefixes),
  442    !,
  443    user_prefix_map(Prefixes, PrefixMap),
  444    set_prefix_map_of_tw_state(PrefixMap, State0, State).
  445init_prefix_map(State0, State) :-
  446    tw_state_graph(State0, Graph),
  447    graph_prefix_map(State0, Graph, PrefixMap),
  448    set_prefix_map_of_tw_state(PrefixMap, State0, State).
  449
  450init_prefix_map(State0, _Graphs, State) :-      % TriG version
  451    tw_state_prefixes(State0, Prefixes),
  452    nonvar(Prefixes),
  453    !,
  454    user_prefix_map(Prefixes, PrefixMap),
  455    set_prefix_map_of_tw_state(PrefixMap, State0, State).
  456init_prefix_map(State0, Graphs, State) :-       % TriG version
  457    maplist(graph_prefixes(State0), Graphs, NestedPrefixes),
  458    append(NestedPrefixes, Prefixes0),
  459    sort(Prefixes0, Prefixes),
  460    prefix_map(State0, Prefixes, PrefixMap),
  461    set_prefix_map_of_tw_state(PrefixMap, State0, State).
  462
  463graph_prefix_map(State, Graph, PrefixMap) :-
  464    graph_prefixes(State, Graph, Prefixes),
  465    prefix_map(State, Prefixes, PrefixMap).
  466
  467graph_prefixes(State0, Graph, Prefixes) :-
  468    tw_state_expand(State0, Expand),
  469    tw_state_only_known_prefixes(State0, OnlyKnown),
  470    rdf_graph_prefixes(Graph, Prefixes,
  471                       [ filter(turtle_prefix(OnlyKnown)),
  472                         expand(Expand),
  473                         min_count(2),
  474                         get_prefix(turtle:iri_turtle_prefix)
  475                       ]).
  476
  477prefix_map(State, Prefixes, PrefixMap) :-
  478    remove_base(State, Prefixes, Prefixes2),
  479    prefix_names(Prefixes2, State, Pairs),
  480    transpose_pairs(Pairs, URI_Abrevs),
  481    reverse(URI_Abrevs, RURI_Abrevs),
  482    flip_pairs(RURI_Abrevs, PrefixMap).
  483
  484%!  user_prefix_map(+Prefixes, -PrefixMap) is det.
  485%
  486%   Convert a list of prefix specifications   to  a list Prefix-URI,
  487%   longest URI first.
  488
  489user_prefix_map(Prefixes, PrefixMap) :-
  490    must_be(list, Prefixes),
  491    maplist(prefix_pair, Prefixes, Pairs),
  492    map_list_to_pairs(prefix_length, Pairs, LenPairs),
  493    sort(LenPairs, LenPairs1),
  494    pairs_values(LenPairs1, RevPrefixMap),
  495    reverse(RevPrefixMap, PrefixMap).
  496
  497prefix_pair(Prefix-URI, Prefix-URI) :-
  498    !,
  499    must_be(atom, Prefix),
  500    must_be(atom, URI).
  501prefix_pair(Prefix, Prefix-URI) :-
  502    must_be(atom, Prefix),
  503    (   rdf_current_prefix(Prefix, URI)
  504    ->  true
  505    ;   existence_error(prefix, Prefix)
  506    ).
  507
  508prefix_length(_-URI, Len) :- atom_length(URI, Len).
  509
  510%!  turtle_prefix(+OnlyKnown, +Where, +Prefix, +URI) is semidet.
  511%
  512%   Test whether we want  to  include   the  proposed  prefix in the
  513%   @prefix declaration.
  514
  515:- public turtle_prefix/4.              % called through rdf_graph_prefixes/3.
  516
  517turtle_prefix(true, _, Prefix, _) :-
  518    !,
  519    rdf_current_prefix(_, Prefix),
  520    !.
  521turtle_prefix(_, _, Prefix, URI) :-
  522    sub_atom(Prefix, _, 1, 0, Last),
  523    turtle_prefix_char(Last),
  524    atom_concat(Prefix, Local, URI),
  525    \+ sub_atom(Local, _, _, _, '.').
  526
  527turtle_prefix_char('#').
  528turtle_prefix_char('/').
  529
  530
  531remove_base(State, Prefixes, PrefixesNoBase) :-
  532    tw_state_base_dir(State, BaseDir),
  533    atom(BaseDir),
  534    !,
  535    delete(Prefixes, BaseDir, PrefixesNoBase).
  536remove_base(_State, Prefixes, Prefixes).
  537
  538flip_pairs([], []).
  539flip_pairs([Key-Val|Pairs], [Val-Key|Flipped]) :-
  540    flip_pairs(Pairs, Flipped).
  541
  542prefix_names(URIs, State, Prefixes) :-
  543    prefix_names(URIs, State, 1, Prefixes, []).
  544
  545prefix_names([], _, _, List, List) :- !.
  546prefix_names(URIs, State, Len, Prefixes, Tail) :-
  547    prefix_names(URIs, State, Len, Prefixes, PTail, Rest),
  548    Len1 is Len + 1,
  549    prefix_names(Rest, State, Len1, PTail, Tail).
  550
  551prefix_names(URIs, State, Len, Prefixes, PTail, Rest) :-
  552    map_list_to_pairs(propose_abbrev(State, Len), URIs, Pairs),
  553    !,
  554    keysort(Pairs, Sorted),
  555    unique(Sorted, Prefixes, PTail, Rest).
  556prefix_names(URIs, _, _, Prefixes, PTail, []) :-
  557    number_prefixes(URIs, 1, Prefixes, PTail).
  558
  559number_prefixes([], _, PL, PL).
  560number_prefixes([H|T0], N, [P-H|PL], T) :-
  561    atomic_concat(ns, N, P),
  562    succ(N, N1),
  563    number_prefixes(T0, N1, PL, T).
  564
  565unique([], L, L, []).
  566unique([A-U|T0], [A-U|T], L, Rest) :-
  567    T0 \= [A-_|_],
  568    !,
  569    unique(T0, T, L, Rest).
  570unique([A-U|T0], Prefixes, L, [U|Rest0]) :-
  571    strip_keys(T0, A, T1, Rest0, Rest),
  572    unique(T1, Prefixes, L, Rest).
  573
  574strip_keys([A-U|T0], A, T, [U|R0], R) :-
  575    !,
  576    strip_keys(T0, A, T, R0, R).
  577strip_keys(L, _, L, R, R).
  578
  579
  580%!  propose_abbrev(+State, +Len, +URI, -Abbrev) is multi.
  581%
  582%   Propose an abbreviation for URI.  Backtracking yields longer
  583%   ones.
  584
  585propose_abbrev(_, _, URI, Abbrev) :-
  586    well_known_ns(Abbrev, URI),
  587    !.
  588propose_abbrev(State, _, URI, Abbrev) :-
  589    tw_state_user_prefixes(State, true),
  590    rdf_current_prefix(Abbrev, URI),
  591    !.
  592propose_abbrev(_, Len, URI, Abbrev) :-
  593    namespace_parts(URI, Parts),
  594    include(abbrev_part, Parts, Names),
  595    reverse(Names, RevNames),
  596    length(Use, Len),
  597    append(Use, _, RevNames),
  598    atomic_list_concat(Use, -, Abbrev).
  599
  600abbrev_part(X) :-
  601    xml_name(X),
  602    \+ well_known_ns(X, _),
  603    \+ well_known_extension(X).
  604
  605well_known_ns(rdf,  'http://www.w3.org/1999/02/22-rdf-syntax-ns#').
  606well_known_ns(rdfs, 'http://www.w3.org/2000/01/rdf-schema#').
  607well_known_ns(owl,  'http://www.w3.org/2002/07/owl#').
  608well_known_ns(xsd,  'http://www.w3.org/2001/XMLSchema#').
  609well_known_ns(dc,   'http://purl.org/dc/elements/1.1/').
  610
  611well_known_extension(ttl).
  612well_known_extension(nt).
  613well_known_extension(n3).
  614well_known_extension(xml).
  615well_known_extension(rdf).
  616well_known_extension(owl).
  617
  618%!  namespace_parts(+URL, -Parts)
  619
  620namespace_parts(URL, Parts) :-
  621    atom_codes(URL, Codes),
  622    phrase(parts(Parts), Codes),
  623    !.
  624namespace_parts(URL, _) :-
  625    format(user_error, 'Couldn\'t split ~q~n', [URL]),
  626    fail.
  627
  628parts(List) --> sep2, parts2(List).
  629
  630parts2([H|T]) -->
  631    string(Codes),  {Codes \== []},
  632    sep,
  633    !,
  634    {atom_codes(H, Codes)},
  635    parts2(T).
  636parts2([]) --> [].
  637
  638string([]) --> [].
  639string([H|T]) --> [H], string(T).
  640
  641sep --> sep_char, sep2.
  642sep([], []).
  643
  644sep2 --> sep_char, !, sep2.
  645sep2 --> [].
  646
  647sep_char --> "/".
  648sep_char --> ":".
  649sep_char --> ".".
  650sep_char --> "?".
  651sep_char --> "#".
  652
  653
  654%!  init_base(+State0, -State) is det.
  655%
  656%   Initialise dealing with the base URI.  It sets two attributes of
  657%   the state: base_root and base_path.
  658
  659init_base(State0, State) :-
  660    tw_state_base(State0, BaseURI),
  661    atom(BaseURI),
  662    !,
  663    parse_url(BaseURI, Attributes),
  664    include(root_part, Attributes, RootAttrs),
  665    parse_url(BaseRoot, RootAttrs),
  666    memberchk(path(BasePath), Attributes),
  667    file_directory_name(BasePath, BaseDir),
  668    atomic_list_concat([BaseRoot, BaseDir, /], BaseDirURI),
  669    set_base_root_of_tw_state(BaseRoot, State0, State1),
  670    set_base_path_of_tw_state(BasePath, State1, State2),
  671    set_base_dir_of_tw_state(BaseDirURI, State2, State).
  672init_base(State, State).
  673
  674root_part(protocol(_)).
  675root_part(host(_)).
  676root_part(port(_)).
  677
  678
  679                 /*******************************
  680                 *              SAVE            *
  681                 *******************************/
  682
  683%!  tw_graph(+State, +Out) is det.
  684%
  685%   Write an RDF graph as Turtle data.
  686%
  687%   @tbd Write unconnected and multi-connected blank-nodes.
  688
  689tw_graph(State, Out) :-
  690    subjects(State, Subjects),
  691    length(Subjects, SubjectCount),
  692    inc_subject_count(State, SubjectCount),
  693    partition(rdf_is_bnode, Subjects, BNodes, ProperSubjects),
  694    maplist(pair_var, BNodes, Pairs),
  695    ord_list_to_rbtree(Pairs, BNTree),
  696    tw_state_nodeid_map(State, BNTree),
  697    (   ProperSubjects == []
  698    ->  true
  699    ;   length(ProperSubjects, PSCount),
  700        comment(State, 'Named toplevel resources (~D)', [PSCount], Out),
  701        tw_proper_subjects(ProperSubjects, State, Out)
  702    ),
  703    tw_bnodes(Pairs, State, Out).
  704
  705pair_var(BNode, BNode-_).
  706
  707tw_prefix_map(State, Out) :-
  708    tw_state_prefix_map(State, PrefixMap),
  709    tw_prefix_map(PrefixMap, State, Out).
  710
  711%!  tw_prefix_map(+PrefixMap, +State, +Out) is det.
  712%
  713%   Write the @base and @prefix declarations
  714
  715tw_prefix_map(PrefixMap, State, Out) :-
  716    tw_state_align_prefixes(State, true),
  717    !,
  718    longest_prefix(PrefixMap, 0, Length),
  719    PrefixCol is Length+10,
  720    tw_base(PrefixCol, State, Out),
  721    tw_prefix_map(PrefixMap, PrefixCol, State, Out).
  722tw_prefix_map(PrefixMap, State, Out) :-
  723    tw_base(0, State, Out),
  724    tw_prefix_map(PrefixMap, 0, State, Out).
  725
  726longest_prefix([], L, L).
  727longest_prefix([Prefix-_|T], L0, L) :-
  728    atom_length(Prefix, L1),
  729    L2 is max(L0, L1),
  730    longest_prefix(T, L2, L).
  731
  732
  733tw_base(Col, State, Out) :-
  734    tw_state_base(State, Base),
  735    atom(Base),
  736    !,
  737    format(Out, '@base ~t~*|', [Col]),
  738    turtle:turtle_write_uri(Out, Base),
  739    format(Out, ' .~n', []).
  740tw_base(_, _, _).
  741
  742
  743tw_prefix_map([], _, _, _).
  744tw_prefix_map([Prefix-URI|T], Col, State, Out) :-
  745    format(Out, '@prefix ~t~w: ~*|', [Prefix, Col]),
  746    tw_relative_uri(URI, State, Out),
  747    format(Out, ' .~n', []),
  748    (   T == []
  749    ->  true
  750    ;   tw_prefix_map(T, Col, State, Out)
  751    ).
  752
  753
  754%!  tw_proper_subjects(+Subjects, +State, +Out) is det.
  755%
  756%   Write the subjects that are not Bnodes.
  757
  758tw_proper_subjects([], _, _).
  759tw_proper_subjects([H|T], State, Out) :-
  760    separate_subjects(State, Out),
  761    tw_subject(H, H, State, Out),
  762    tw_proper_subjects(T, State, Out).
  763
  764
  765separate_subjects(State, Out) :-
  766    tw_state_subject_white_lines(State, ExtraLines),
  767    put_n(ExtraLines, '\n', Out).
  768
  769%!  tw_subject(+URI, +State, +Out) is det.
  770%
  771%   Write a toplevel non-bnode subject.
  772
  773tw_subject(URI, Ref, State, Out) :-
  774    subject_triples(URI, State, Pairs),
  775    length(Pairs, Count),
  776    inc_triple_count(State, Count),
  777    group_po(Pairs, Grouped),
  778    tw_subject_triples(Grouped, Ref, State, Out).
  779
  780group_po(Pairs, Grouped) :-
  781    group_pairs_by_key(Pairs, Grouped0),
  782    rdf_equal(rdf:type, RDFType),
  783    (   select(RDFType-Types, Grouped0, Grouped1)
  784    ->  Grouped = [RDFType-Types|Grouped1]
  785    ;   Grouped = Grouped0
  786    ).
  787
  788%!  tw_bnodes(+Pairs, +State, +Out) is det.
  789%
  790%   Write the Bnodes. Pairs is a list   URI-Ref, where Ref is one of
  791%   =written= if the Bnode is already written;   an integer if it is
  792%   used multiple times or a variable if   it  has not been written.
  793%   The order in which we deal with bnodes is defined as follows:
  794%
  795%       * First, write the bnodes that are not referenced at all
  796%       as toplevel bnodes using [ ... ] notation.
  797%
  798%       * Next, write the bnodes that need written as toplevel
  799%       nodes using the _:XX notation because they are referenced
  800%       multiple times in the graph. Continue this process until it
  801%       is exhausted.
  802
  803tw_bnodes(Pairs, State, Out) :-
  804    tw_top_bnodes(Pairs, State, Out, Rest1),
  805    tw_numbered_bnodes(Rest1, State, Out, 1, Rest2),
  806    tw_cyclic_bnodes(Rest2, State, Out, 0).
  807
  808
  809tw_numbered_bnodes([], _, _, _, []) :- !.
  810tw_numbered_bnodes(Pairs, State, Out, Level, Rest) :-
  811    multi_referenced(Pairs, RefPairs, Rest0),
  812    (   RefPairs == []
  813    ->  Rest = Rest0
  814    ;   length(RefPairs, Count),
  815        comment(State, 'Level ~D multi-referenced blank-nodes (~D)',
  816                [ Level, Count ], Out),
  817        tw_ref_bnodes(RefPairs, State, Out),
  818        Level1 is Level + 1,
  819        tw_numbered_bnodes(Rest0, State, Out, Level1, Rest)
  820    ).
  821
  822multi_referenced([], [], []).
  823multi_referenced([H|T], RefPairs, Rest) :-
  824    H = _-Ref,
  825    (   Ref == written
  826    ->  multi_referenced(T, RefPairs, Rest)
  827    ;   var(Ref)
  828    ->  Rest = [H|TR],
  829        multi_referenced(T, RefPairs, TR)
  830    ;   assertion(Ref = bnode(_)),
  831        RefPairs = [H|TRP],         % assigned reference
  832        multi_referenced(T, TRP, Rest)
  833    ).
  834
  835tw_ref_bnodes([], _, _).
  836tw_ref_bnodes([BNode-Ref|T], State, Out) :-
  837    separate_subjects(State, Out),
  838    tw_subject(BNode, Ref, State, Out),
  839    tw_ref_bnodes(T, State, Out).
  840
  841
  842%!  tw_top_bnodes(+Pairs, +State, +Out, -Rest)
  843%
  844%   Write the top bnodes: those that  do   not  appear  as an object
  845%   anywhere.
  846
  847tw_top_bnodes(Pairs, State, Out, Rest) :-
  848    unreferenced(Pairs, State, TopBNodes, Rest),
  849    (   TopBNodes == []
  850    ->  true
  851    ;   length(TopBNodes, Count),
  852        comment(State, 'Toplevel blank-nodes (~D)', [Count], Out),
  853        sort_bnodes(TopBNodes, SortedTopBNodes, State),
  854        tw_top_bnodes(SortedTopBNodes, State, Out)
  855    ).
  856
  857unreferenced([], _, [], []).
  858unreferenced([H|T], State, UnrefPairs, Rest) :-
  859    H = BNode-Ref,
  860    (   Ref == written
  861    ->  unreferenced(T, State, UnrefPairs, Rest)
  862    ;   var(Ref),
  863        object_link_count(BNode, State, 0)
  864    ->  UnrefPairs = [H|URT],
  865        unreferenced(T, State, URT, Rest)
  866    ;   Rest = [H|TR],
  867        unreferenced(T, State, UnrefPairs, TR)
  868    ).
  869
  870tw_top_bnodes([], _, _).
  871tw_top_bnodes([BNode-_|T], State, Out) :-
  872    tw_bnode(BNode, State, Out),
  873    tw_top_bnodes(T, State, Out).
  874
  875
  876tw_bnode(BNode, State, Out) :-
  877    subject_triples(BNode, State, Pairs),
  878    length(Pairs, Count),
  879    inc_triple_count(State, Count),
  880    (   tw_state_inline_bnodes(State, true)
  881    ->  tw_bnode_triples(Pairs, State, Out),
  882        format(Out, ' .~n', [])
  883    ;   next_bnode_id(State, BNode, Ref),
  884        tw_bnode_ntriples(Pairs, Ref, State, Out)
  885    ).
  886
  887tw_bnode_triples(Pairs, State, Out) :-
  888    group_po(Pairs, Grouped),
  889    (   tw_state_single_line_bnodes(State, true)
  890    ->  format(Out, '[ ', []),
  891        tw_triples(Grouped, -1, State, Out),
  892        format(Out, ' ]', [])
  893    ;   line_position(Out, Indent),
  894        format(Out, '[ ', []),
  895        line_position(Out, AIndent),
  896        tw_triples(Grouped, AIndent, State, Out),
  897        nl_indent(Out, State, Indent),
  898        format(Out, ']', [])
  899    ).
  900
  901tw_bnode_ntriples([], _, _, _).
  902tw_bnode_ntriples([P-O|T], Ref, State, Out) :-
  903    tw_bnode_ref(Ref, Out),
  904    format(Out, ' ', []),
  905    tw_predicate(P, State, Out),
  906    format(Out, ' ', []),
  907    tw_object(O, State, Out),
  908    format(Out, ' .~n', []),
  909    tw_bnode_ntriples(T, Ref, State, Out).
  910
  911
  912%!  tw_cyclic_bnodes(+Pairs, +BNode, +State, +Out, +Cycle)
  913%
  914%   The rest. These are groups of bnodes  that are reachable, but we
  915%   cannot find a starting point, neither from a named resource, nor
  916%   from an unlinked bnode. As long as we are not considering stable
  917%   canonical output, we can break the cycle at any point.
  918
  919tw_cyclic_bnodes([], _State, _Out, _) :- !.
  920tw_cyclic_bnodes(Pairs, State, Out, Cycle0) :-
  921    (   tw_state_canonical(State, true)
  922    ->  sort_bnode_pairs(Pairs, BNodes, State)
  923    ;   BNodes = Pairs
  924    ),
  925    succ(Cycle0, Cycle),
  926    BNodes = [BNode-Ref|_],
  927    next_bnode_id(State, BNode, Ref),
  928    comment(State, 'Breaking cycle ~D', [Cycle], Out),
  929    tw_numbered_bnodes(Pairs, State, Out, 1, Rest),
  930    tw_cyclic_bnodes(Rest, State, Out, Cycle).
  931
  932
  933%!  tw_subject_triples(+Grouped, +Subject, +State, +Out)
  934%
  935%   Save triples on Subject.  Combine groups of triples with the
  936%   same subject (;) and same subject+predicate (,).
  937%
  938%   @param  Subject is either a URI or an integer.  The latter is
  939%           used for writing a named bnode.
  940
  941tw_subject_triples([], _, _, _) :- !.
  942tw_subject_triples(Grouped, URI, State, Out) :-
  943    tw_state_group(State, false),
  944    !,
  945    tw_ungrouped_triples(Grouped, URI, State, Out).
  946tw_subject_triples(Grouped, URI, State, Out) :-
  947    tw_resource(URI, State, Out),
  948    (   tw_state_indent(State, Indent),
  949        Indent > 0
  950    ->  nl_indent(Out, State, Indent)
  951    ;   put_char(Out, ' '),
  952        line_position(Out, Indent)
  953    ),
  954    tw_triples(Grouped, Indent, State, Out),
  955    format(Out, ' .~n', []).
  956
  957%!  tw_ungrouped_triples(+Grouped, +URI, +State, +Out)
  958%
  959%   Write triples for subject URI as one line per triple.  Used
  960%   for canonical output.
  961
  962tw_ungrouped_triples([], _, _, _).
  963tw_ungrouped_triples([P-Vs|Groups], URI, State, Out) :-
  964    partition(rdf_is_bnode, Vs, BNVs, ProperVs),
  965    tw_ungrouped_values(ProperVs, P, URI, State, Out),
  966    sort_bnodes(BNVs, SortedBNVs, State),
  967    tw_ungrouped_values(SortedBNVs, P, URI, State, Out),
  968    tw_ungrouped_triples(Groups, URI, State, Out).
  969
  970tw_ungrouped_values([], _, _, _, _).
  971tw_ungrouped_values([V|T], P, URI, State, Out) :-
  972    tw_resource(URI, State, Out),
  973    put_char(Out, ' '),
  974    tw_predicate(P, State, Out),
  975    put_char(Out, ' '),
  976    tw_object(V, State, Out),
  977    format(Out, ' .~n', []),
  978    tw_ungrouped_values(T, P, URI, State, Out).
  979
  980
  981%!  tw_triples(+Groups, +Indent, +State, +Out) is det.
  982%
  983%   Triple writer that uses ; and ,- grouping
  984
  985tw_triples([P-Vs|MoreGroups], Indent, State, Out) :-
  986    tw_write_pvs(Vs, P, State, Out),
  987    (   MoreGroups == []
  988    ->  true
  989    ;   format(Out, ' ;', []),
  990        nl_indent(Out, State, Indent),
  991        tw_triples(MoreGroups, Indent, State, Out)
  992    ).
  993
  994tw_write_pvs(Values, P, State, Out) :-
  995    tw_predicate(P, State, Out),
  996    put_char(Out, ' '),
  997    line_position(Out, Indent),
  998    tw_write_vs(Values, Indent, State, Out).
  999
 1000tw_predicate(P, State, Out) :-
 1001    (   rdf_equal(P, rdf:type),
 1002        tw_state_a(State, true)
 1003    ->  format(Out, 'a', [])
 1004    ;   tw_resource(P, State, Out)
 1005    ).
 1006
 1007tw_write_vs([H|T], Indent, State, Out) :-
 1008    tw_object(H, State, Out),
 1009    (   T == []
 1010    ->  true
 1011    ;   format(Out, ' ,', []),
 1012        nl_indent(Out, State, Indent),
 1013        tw_write_vs(T, Indent, State, Out)
 1014    ).
 1015
 1016%!  tw_object(+Value, +State, +Out) is det.
 1017%
 1018%   Write the object of a triple.
 1019
 1020tw_object(Value, State, Out) :-
 1021    rdf_is_bnode(Value),
 1022    !,
 1023    tw_bnode_object(Value, State, Out).
 1024tw_object(Value, State, Out) :-
 1025    atom(Value),
 1026    !,
 1027    tw_resource(Value, State, Out).
 1028tw_object(Literal, State, Out) :-
 1029    tw_literal(Literal, State, Out).
 1030
 1031%!  tw_bnode_object(+Value, +State, +Out) is det.
 1032%
 1033%   Write a Bnode value.  There are a number of cases:
 1034%
 1035%       * The BNode was already written.  Write the same ref.
 1036%       * The BNode is not shared.  Inline and set =written=
 1037%       * The BNode is shared.  Generate a NodeID and store it
 1038%       * The BNode is once as object: Generate a NodeID
 1039%       * The BNode is more than once object: Generate a NodeID
 1040%         and put in table.
 1041
 1042tw_bnode_object(BNode, State, Out) :-
 1043    tw_state_nodeid_map(State, BNTree),
 1044    rb_lookup(BNode, Ref, BNTree),
 1045    !,
 1046    (   var(Ref)
 1047    ->  (   tw_state_inline_bnodes(State, true),
 1048            tw_unshared_bnode(BNode, State, Out)
 1049        ->  Ref = written
 1050        ;   next_bnode_id(State, BNode, Ref),
 1051            tw_bnode_ref(Ref, Out)
 1052        )
 1053    ;   tw_bnode_ref(Ref, Out)
 1054    ).
 1055tw_bnode_object(BNode, State, Out) :-
 1056    object_link_count(BNode, State, N),
 1057    N > 1,
 1058    !,
 1059    tw_state_nodeid_map(State, BNTree0),
 1060    rb_insert(BNTree0, BNode, Ref, BNTree),
 1061    set_nodeid_map_of_tw_state(BNTree, State),
 1062    next_bnode_id(State, BNode, Ref),
 1063    tw_bnode_ref(Ref, Out).
 1064tw_bnode_object(BNode, State, Out) :-
 1065    next_bnode_id(State, BNode, Ref),
 1066    tw_bnode_ref(Ref, Out).
 1067
 1068tw_bnode_ref(bnode(Ref), Out) :-
 1069    (   integer(Ref)
 1070    ->  format(Out, '_:bn~w', [Ref])
 1071    ;   format(Out, '_:~w', [Ref])
 1072    ).
 1073
 1074%!  tw_unshared_bnode(+BNode, +State, +Out) is semidet.
 1075%
 1076%   Write a bnode if this is the only place it is used.
 1077
 1078tw_unshared_bnode(BNode, State, Out) :-
 1079    object_link_count(BNode, State, 1),
 1080    subject_triples(BNode, State, Pairs),
 1081    (   Pairs == []
 1082    ->  format(Out, '[]', [])
 1083    ;   pairs_unshared_collection(Pairs, State, Collection)
 1084    ->  (   Collection == []
 1085        ->  format(Out, '()', [])
 1086        ;   tw_state_nodeid_map(State, BNTree),
 1087            rb_lookup(BNode, written, BNTree),
 1088            length(Collection, NMembers),
 1089            Triples is 2*NMembers,
 1090            inc_triple_count(State, Triples),
 1091            (   tw_state_single_line_bnodes(State, true)
 1092            ->  format(Out, '( ', []),
 1093                tw_collection(Collection, -1, State, Out),
 1094                format(Out, ' )', [])
 1095            ;   line_position(Out, Indent),
 1096                format(Out, '( ', []),
 1097                line_position(Out, AIndent),
 1098                tw_collection(Collection, AIndent, State, Out),
 1099                nl_indent(Out, State, Indent),
 1100                format(Out, ')', [])
 1101            )
 1102        )
 1103    ;   tw_bnode_triples(Pairs, State, Out)
 1104    ).
 1105
 1106tw_collection([H|T], Indent, State, Out) :-
 1107    tw_object(H, State, Out),
 1108    (   T \== []
 1109    ->  nl_indent(Out, State, Indent),
 1110        tw_collection(T, Indent, State, Out)
 1111    ;   true
 1112    ).
 1113
 1114%!  unshared_collection(+URI, +State, -Members) is semidet.
 1115%
 1116%   True if URI denodes an RDF list that  is made up from bnodes, is
 1117%   linked exactly once  to  its  context   and  contains  no  extra
 1118%   triples.
 1119
 1120unshared_collection(C, _, []) :-
 1121    rdf_equal(C, rdf:nil),
 1122    !.
 1123unshared_collection(C, State, List) :-
 1124    rdf_is_bnode(C),
 1125    object_link_count(C, State, 1),
 1126    tw_state_nodeid_map(State, BNTree),
 1127    rb_lookup(C, written, BNTree),
 1128    subject_triples(C, State, Pairs),
 1129    pairs_unshared_collection(Pairs, State, List).
 1130
 1131pairs_unshared_collection(Pairs, State, [H|T]) :-
 1132    rdf_equal(rdf:first, RDFFirst),
 1133    rdf_equal(rdf:rest, RDFRest),
 1134    Pairs = [ RDFFirst-H,
 1135              RDFRest-Rest
 1136            | More
 1137            ],
 1138    (   More == []
 1139    ;   rdf_equal(rdf:type, RDFType),
 1140        rdf_equal(rdf:'List', RDFList),
 1141        More == [RDFType-RDFList]
 1142    ),
 1143    unshared_collection(Rest, State, T).
 1144
 1145
 1146%!  object_link_count(+BNode, +STate, -Count) is det.
 1147%
 1148%   Number of times BNode is used as an object in the graph
 1149
 1150object_link_count(BNode, State, Count) :-
 1151    tw_state_graph(State, Graph),
 1152    tw_state_expand(State, Expand),
 1153    findall(S-P, call(Expand,S,P,BNode,Graph), Pairs0),
 1154    sort(Pairs0, Pairs),            % remove duplicates
 1155    length(Pairs, Count).
 1156
 1157%!  nl_indent(+Out, +State, +Indent) is det.
 1158%
 1159%   Write a newline and indent to column Indent.
 1160
 1161nl_indent(Out, _, -1) :-
 1162    !,
 1163    put_char(Out, ' ').
 1164nl_indent(Out, State, Indent) :-
 1165    nl(Out),
 1166    tw_state_tab_distance(State, TD),
 1167    (   TD == 0
 1168    ->  tab(Out, Indent)
 1169    ;   Tabs is Indent//TD,
 1170        Spaces is Indent mod TD,
 1171        put_n(Tabs, '\t', Out),
 1172        put_n(Spaces, ' ', Out)
 1173    ).
 1174
 1175put_n(N, Char, Out) :-
 1176    N > 0,
 1177    !,
 1178    put_char(Out, Char),
 1179    N2 is N - 1,
 1180    put_n(N2, Char, Out).
 1181put_n(_, _, _).
 1182
 1183
 1184%!  subject_triples(+URI, +State, -Pairs) is det.
 1185%
 1186%   Pairs is a sorted list of P-O  pairs representing all triples on
 1187%   the subject URI.
 1188
 1189subject_triples(URI, State, Pairs) :-
 1190    tw_state_graph(State, Graph),
 1191    tw_state_expand(State, Expand),
 1192    findall(P-O, call(Expand, URI, P, O, Graph), Pairs0),
 1193    sort(Pairs0, Pairs).
 1194
 1195
 1196                 /*******************************
 1197                 *          GRAPH-LOGIC         *
 1198                 *******************************/
 1199
 1200%!  subjects(+State, -Subjects:ord_set) is det.
 1201%
 1202%   Subjects is a list of all subjects in the graph requested in
 1203%   State.
 1204
 1205subjects(State, Subjects) :-
 1206    tw_state_expand(State, Expand),
 1207    tw_state_graph(State, Graph),
 1208    (   Expand == lookup,
 1209        atom(Graph),
 1210        (   rdf_graph_property(Graph, triples(Count))
 1211        ->  true
 1212        ;   Count = 0                       % non-existing graph
 1213        ),
 1214        rdf_statistics(triples(Total)),
 1215        Count * 10 < Total
 1216    ->  findall(S, rdf(S,_,_,Graph), List),
 1217        sort(List, Subjects)
 1218    ;   Expand \== lookup
 1219    ->  findall(S, call(Expand, S,_,_,Graph), List),
 1220        sort(List, Subjects)
 1221    ;   findall(Subject, subject(State, Subject), AllSubjects),
 1222        sort(AllSubjects, Subjects)
 1223    ).
 1224
 1225
 1226subject(State, Subject) :-
 1227    tw_state_graph(State, Graph),
 1228    (   atom(Graph)
 1229    ->  rdf_resource(Subject),
 1230        (   rdf(Subject, _, _, Graph)
 1231        ->  true
 1232        )
 1233    ;   rdf_subject(Subject)
 1234    ).
 1235
 1236
 1237:- public lookup/4.                     % called from expand hook.
 1238
 1239lookup(S,P,O,G) :-
 1240    (   var(G)
 1241    ->  rdf(S,P,O)
 1242    ;   rdf(S,P,O,G)
 1243    ).
 1244
 1245
 1246                 /*******************************
 1247                 *        CANONICAL ORDERING    *
 1248                 *******************************/
 1249
 1250/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 1251This section deals with the two problems of canonical graphs:
 1252
 1253    * Keep blank nodes in the same order
 1254    * Assign stable names to blank nodes that we need to
 1255      give a name.  There are two cases: (1) a blank nodes is
 1256      used in more than one place and (2) a blank node series
 1257      is cyclic.
 1258- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 1259
 1260%!  sort_bnodes(+BNodes, -Sorted, +State) is det.
 1261%
 1262%   Sort a list of blank nodes.
 1263
 1264sort_bnodes(BNodes, Sorted, _State) :-
 1265    sort(BNodes, Sorted).
 1266
 1267%!  sort_bnode_pairs(+Pairs, -Sorted, +State) is det.
 1268%
 1269%   Sort a list of Pairs BNode-Ref
 1270
 1271sort_bnode_pairs(Pairs, Sorted, _State) :-
 1272    sort(Pairs, Sorted).
 1273
 1274%!  bnode_to_term(+BNode, -Term, +State)
 1275%
 1276%   Term is a canonical representation of the graph formed by BNode.
 1277%   The transformation of a bnode is
 1278%
 1279%           bnode(p-[o1,o2,..], ..)
 1280%
 1281%   The arguments are alphabetically sorted   on predicate (can't we
 1282%   leave the preds out them?) and   the  objects are alphabetically
 1283%   sorted.  Sorting multiple bnode values?
 1284
 1285
 1286%!  next_bnode_id(+State, +BNode, -Ref) is det.
 1287%
 1288%   Generate a node-id for BNode.   When writing non-canonically, we
 1289%   simply number the bnodes.  Otherwise  we   want  a  more  stable
 1290%   numbering. Our numbering is a hash of  the content of the bnode.
 1291%   It is not unlikely that we find muliple copies, and therefore we
 1292%   number the full id is bn_<hash>_<n>, <n> counting 0...
 1293
 1294next_bnode_id(State, _BNode, bnode(Ref)) :-
 1295    tw_state_canonical(State, false),
 1296    !,
 1297    tw_state_bnode_id(State, Ref0),
 1298    Ref is Ref0+1,
 1299    nb_set_bnode_id_of_tw_state(Ref, State).
 1300next_bnode_id(State, BNode, bnode(Ref)) :-
 1301    bnode_hash(BNode, Hash),
 1302    tw_state_bnode_hash(State, BNHash),
 1303    (   var(BNHash)
 1304    ->  rb_empty(BNHash)
 1305    ;   true
 1306    ),
 1307    (   rb_update(BNHash, Hash, C0, C, BNHash1)
 1308    ->  C is C0+1
 1309    ;   C = 0,
 1310        rb_insert(BNHash, Hash, C, BNHash1)
 1311    ),
 1312    set_bnode_hash_of_tw_state(BNHash1, State),
 1313    format(atom(Ref), 'bn_~w_~d', [Hash, C]).
 1314
 1315%!  bnode_hash(+BNode, -Hash) is det.
 1316%
 1317%   Hash is the hash-value for a bnode.
 1318%
 1319%   @tbd: Hash on content.
 1320
 1321bnode_hash(BNode, Hash) :-
 1322    term_hash(BNode, Hash).
 1323
 1324
 1325                 /*******************************
 1326                 *           PRIMITIVES         *
 1327                 *******************************/
 1328
 1329%!  tw_resource(+Resource, +State, +Out) is det.
 1330%
 1331%   Write a resource
 1332
 1333tw_resource(BNodeID, _, Out) :-
 1334    BNodeID = bnode(_),
 1335    !,
 1336    tw_bnode_ref(BNodeID, Out).
 1337tw_resource(Resource, State, Out) :-
 1338    tw_state_prefix_map(State, PrefixMap),
 1339    member(Prefix-Full, PrefixMap),
 1340    atom_concat(Full, Name, Resource),
 1341    (   turtle:turtle_pn_local(Name)
 1342    ->  true
 1343    ;   Name == ''
 1344    ),
 1345    !,
 1346    format(Out, '~w:', [Prefix]),
 1347    turtle:turtle_write_pn_local(Out, Name).
 1348tw_resource(Resource, State, Out) :-
 1349    tw_relative_uri(Resource, State, Out).
 1350
 1351
 1352tw_relative_uri(Resource, State, Out) :-
 1353    tw_state_base_root(State, Root),
 1354    atom(Root),
 1355    atom_concat(Root, ResPath, Resource),
 1356    sub_atom(ResPath, 0, _, _, /),
 1357    tw_state_base_path(State, BasePath),
 1358    relative_path(ResPath, BasePath, RelPath),
 1359    !,
 1360    turtle:turtle_write_uri(Out, RelPath).
 1361tw_relative_uri(Resource, _, Out) :-
 1362    turtle:turtle_write_uri(Out, Resource).
 1363
 1364relative_path(Path, RelTo, RelPath) :-
 1365    atomic_list_concat(PL, /, Path),
 1366    atomic_list_concat(RL, /, RelTo),
 1367    delete_common_prefix(PL, RL, PL1, PL2),
 1368    to_dot_dot(PL2, DotDot, PL1),
 1369    atomic_list_concat(DotDot, /, RelPath).
 1370
 1371delete_common_prefix([H|T01], [H|T02], T1, T2) :-
 1372    !,
 1373    delete_common_prefix(T01, T02, T1, T2).
 1374delete_common_prefix(T1, T2, T1, T2).
 1375
 1376to_dot_dot([], Tail, Tail).
 1377to_dot_dot([_], Tail, Tail) :- !.
 1378to_dot_dot([_|T0], ['..'|T], Tail) :-
 1379    to_dot_dot(T0, T, Tail).
 1380
 1381
 1382%!  tw_literal(+Literal, +State, +Out) is det.
 1383%
 1384%   Write a literal value to the stream Out.
 1385
 1386tw_literal(literal(type(Type, Value)), State, Out) :-
 1387    !,
 1388    tw_typed_literal(Type, Value, State, Out).
 1389tw_literal(literal(lang(Lang, Value)), State, Out) :-
 1390    !,
 1391    tw_quoted_string(Value, State, Out),
 1392    downcase_atom(Lang, TurtleLang),        % Turtle lang = [a-z]+('-'[a-z0-9]+)*
 1393    format(Out, '@~w', [TurtleLang]).
 1394tw_literal(literal(Value), State, Out) :-
 1395    atom(Value),
 1396    !,
 1397    rdf_equal(xsd:string, TypeString),
 1398    tw_typed_literal(TypeString, Value, State, Out).
 1399                                                % Add types automatically
 1400tw_literal(literal(Value), State, Out) :-
 1401    integer(Value),
 1402    !,
 1403    rdf_equal(Type, xsd:integer),
 1404    tw_typed_literal(Type, Value, State, Out).
 1405tw_literal(literal(Value), State, Out) :-
 1406    float(Value),
 1407    !,
 1408    rdf_equal(Type, xsd:double),
 1409    tw_typed_literal(Type, Value, State, Out).
 1410tw_literal(literal(Value), State, Out) :-
 1411    xml_is_dom(Value),
 1412    !,
 1413    rdf_equal(Type, rdf:'XMLLiteral'),
 1414    tw_typed_literal(Type, Value, State, Out).
 1415tw_literal(Literal, _State, _Out) :-
 1416    type_error(rdf_literal, Literal).
 1417
 1418
 1419tw_typed_literal(Type, Value, State, Out) :-
 1420    tw_state_abbreviate_literals(State, true),
 1421    tw_abbreviated_literal(Type, Value, State, Out),
 1422    !.
 1423tw_typed_literal(Type, Value, State, Out) :-
 1424    (atom(Value) ; string(Value)),
 1425    !,
 1426    tw_quoted_string(Value, State, Out),
 1427    write(Out, '^^'),
 1428    tw_resource(Type, State, Out).
 1429tw_typed_literal(Type, Value, State, Out) :-
 1430    rdf_equal(Type, rdf:'XMLLiteral'),
 1431    !,
 1432    with_output_to(string(Tmp),
 1433                   xml_write(Value, [header(false)])),
 1434    tw_quoted_string(Tmp, State, Out),
 1435    write(Out, '^^'),
 1436    tw_resource(Type, State, Out).
 1437tw_typed_literal(Type, Value, State, Out) :-
 1438    format(string(Tmp), '~q', [Value]),
 1439    tw_quoted_string(Tmp, State, Out),
 1440    write(Out, '^^'),
 1441    tw_resource(Type, State, Out).
 1442
 1443
 1444%!  tw_abbreviated_literal(+Type, +Value, +State, +Out) is semidet.
 1445%
 1446%   Turtle abbreviated typed literals.
 1447%
 1448%   @tbd:   Deal with canonical forms (or is this a task of the
 1449%           RDF parser?
 1450%   @tbd:   What if the value is not in the lexical space of the type?
 1451
 1452term_expansion((tw_abbreviated_literal(NS:Local, Value, State, Out) :- Body),
 1453               (tw_abbreviated_literal(Type, Value, State, Out) :- Body)) :-
 1454    atom(NS),
 1455    rdf_global_id(NS:Local, Type).
 1456
 1457tw_abbreviated_literal(xsd:integer, Value, State, Out) :-
 1458    (   tw_state_canonize_numbers(State, false)
 1459    ->  write(Out, Value)
 1460    ;   atom_number(Value, Int),
 1461        format(Out, '~d', [Int])
 1462    ).
 1463tw_abbreviated_literal(xsd:double, Value, State, Out) :-
 1464    (   tw_state_canonize_numbers(State, false)
 1465    ->  write(Out, Value)
 1466    ;   ValueF is float(Value),
 1467        xsd_number_string(ValueF, FloatS),
 1468        format(Out, '~s', [FloatS])
 1469    ).
 1470tw_abbreviated_literal(xsd:string, Value, State, Out) :-
 1471    tw_quoted_string(Value, State, Out).
 1472tw_abbreviated_literal(xsd:decimal, Value, _, Out) :-
 1473    format(Out, '~w', [Value]).
 1474tw_abbreviated_literal(xsd:boolean, Value, _, Out) :-
 1475    format(Out, '~w', [Value]).
 1476
 1477
 1478%!  tw_quoted_string(+Atom, +State, +Out) is det.
 1479%
 1480%   Write  Atom  to  Out  as  a  quoted  string.  We  only  use  the
 1481%   single-"..." representation.
 1482
 1483tw_quoted_string(Atom, _, Out) :-
 1484    turtle:turtle_write_quoted_string(Out, Atom).
 1485
 1486
 1487                 /*******************************
 1488                 *             COMMENT          *
 1489                 *******************************/
 1490
 1491comment(State, Format, Args, Out) :-
 1492    tw_state_comment(State, true),
 1493    !,
 1494    format(Out, '~n# ', []),
 1495    format(Out, Format, Args),
 1496    format(Out, '~n', []).
 1497comment(_, _, _, _).
 1498
 1499
 1500
 1501                 /*******************************
 1502                 *           STATISTICS         *
 1503                 *******************************/
 1504
 1505inc_triple_count(State, Count) :-
 1506    tw_state_triple_count(State, C0),
 1507    C1 is C0+Count,
 1508    nb_set_triple_count_of_tw_state(C1, State).
 1509
 1510inc_subject_count(State, Count) :-
 1511    tw_state_subject_count(State, C0),
 1512    C1 is C0+Count,
 1513    nb_set_subject_count_of_tw_state(C1, State).
 1514
 1515:- multifile
 1516    prolog:message//1. 1517
 1518prolog:message(rdf(saved(File, Time, SavedSubjects, SavedTriples))) -->
 1519    [ 'Saved ~D triples about ~D subjects into '-[SavedTriples, SavedSubjects] ],
 1520    rdf_output(File),
 1521    [ ' (~3f sec)'-[Time] ].
 1522prolog:message(rdf(saved(File, Time, SavedSubjects, SavedTriples,
 1523                         SavedGraphs))) -->
 1524    [ 'Saved ~D graphs, ~D triples about ~D subjects into '-
 1525      [SavedGraphs, SavedTriples, SavedSubjects] ],
 1526    rdf_output(File),
 1527    [ ' (~3f sec)'-[Time] ].
 1528
 1529rdf_output(StreamSpec) -->
 1530    { (   StreamSpec = stream(Stream)
 1531      ->  true
 1532      ;   Stream = StreamSpec
 1533      ),
 1534      is_stream(Stream),
 1535      stream_property(Stream, file_name(File))
 1536    },
 1537    !,
 1538    [ '~p'-[File] ].
 1539rdf_output(File) -->
 1540    [ '~p'-[File] ]