View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2018, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(http_open,
   38          [ http_open/3,                % +URL, -Stream, +Options
   39            http_set_authorization/2,   % +URL, +Authorization
   40            http_close_keep_alive/1     % +Address
   41          ]).   42:- use_module(library(uri)).   43:- use_module(library(readutil)).   44:- use_module(library(socket)).   45:- use_module(library(lists)).   46:- use_module(library(option)).   47:- use_module(library(error)).   48:- use_module(library(base64)).   49:- use_module(library(debug)).   50:- use_module(library(aggregate)).   51:- use_module(library(apply)).   52:- use_module(library(http/http_header), [http_parse_header/2]).   53:- use_module(library(http/http_stream)).   54
   55/** <module> HTTP client library
   56
   57This library defines http_open/3, which opens a  URL as a Prolog stream.
   58The functionality of the  library  can   be  extended  by  loading two
   59additional modules that act as plugins:
   60
   61    * library(http/http_ssl_plugin)
   62    Loading this library causes http_open/3 to handle HTTPS connections.
   63    Relevant options for SSL certificate handling are handed to
   64    ssl_context/3. This plugin is loaded automatically if the scheme
   65    `https` is requested using a default SSL context. See the plugin for
   66    additional information regarding security.
   67
   68    * library(http/http_cookie)
   69    Loading this library adds tracking cookies to http_open/3. Returned
   70    cookies are collected in the Prolog database and supplied for
   71    subsequent requests.
   72
   73Here is a simple example to fetch a web-page:
   74
   75  ==
   76  ?- http_open('http://www.google.com/search?q=prolog', In, []),
   77     copy_stream_data(In, user_output),
   78     close(In).
   79  <!doctype html><head><title>prolog - Google Search</title><script>
   80  ...
   81  ==
   82
   83The example below fetches the modification time of a web-page. Note that
   84Modified is '' (the empty atom)  if   the  web-server does not provide a
   85time-stamp for the resource. See also parse_time/2.
   86
   87  ==
   88  modified(URL, Stamp) :-
   89          http_open(URL, In,
   90                    [ method(head),
   91                      header(last_modified, Modified)
   92                    ]),
   93          close(In),
   94          Modified \== '',
   95          parse_time(Modified, Stamp).
   96  ==
   97
   98Then next example uses Google search. It exploits library(uri) to manage
   99URIs, library(sgml) to load  an  HTML   document  and  library(xpath) to
  100navigate the parsed HTML. Note that  you   may  need to adjust the XPath
  101queries if the data returned by Google changes.
  102
  103  ==
  104  :- use_module(library(http/http_open)).
  105  :- use_module(library(xpath)).
  106  :- use_module(library(sgml)).
  107  :- use_module(library(uri)).
  108
  109  google(For, Title, HREF) :-
  110          uri_encoded(query_value, For, Encoded),
  111          atom_concat('http://www.google.com/search?q=', Encoded, URL),
  112          http_open(URL, In, []),
  113          call_cleanup(
  114              load_html(In, DOM, []),
  115              close(In)),
  116          xpath(DOM, //h3(@class=r), Result),
  117          xpath(Result, //a(@href=HREF0, text), Title),
  118          uri_components(HREF0, Components),
  119          uri_data(search, Components, Query),
  120          uri_query_components(Query, Parts),
  121          memberchk(q=HREF, Parts).
  122  ==
  123
  124An example query is below:
  125
  126==
  127?- google(prolog, Title, HREF).
  128Title = 'SWI-Prolog',
  129HREF = 'http://www.swi-prolog.org/' ;
  130Title = 'Prolog - Wikipedia',
  131HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
  132Title = 'Prolog - Wikipedia, the free encyclopedia',
  133HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
  134Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
  135HREF = 'http://www.pro-log.nl/' ;
  136Title = 'Learn Prolog Now!',
  137HREF = 'http://www.learnprolognow.org/' ;
  138Title = 'Free Online Version - Learn Prolog
  139...
  140==
  141
  142@see load_html/3 and xpath/3 can be used to parse and navigate HTML
  143     documents.
  144@see http_get/3 and http_post/4 provide an alternative interface that
  145     convert the reply depending on the =|Content-Type|= header.
  146*/
  147
  148:- multifile
  149    http:encoding_filter/3,           % +Encoding, +In0, -In
  150    http:current_transfer_encoding/1, % ?Encoding
  151    http:disable_encoding_filter/1,   % +ContentType
  152    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  153                                      % -NewStreamPair, +Options
  154    http:open_options/2,              % +Parts, -Options
  155    http:write_cookies/3,             % +Out, +Parts, +Options
  156    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  157    http:authenticate_client/2,       % +URL, +Action
  158    http:http_connection_over_proxy/6.  159
  160:- meta_predicate
  161    http_open(+,-,:).  162
  163:- predicate_options(http_open/3, 3,
  164                     [ authorization(compound),
  165                       final_url(-atom),
  166                       header(+atom, -atom),
  167                       headers(-list),
  168                       connection(+atom),
  169                       method(oneof([delete,get,put,head,post,patch,options])),
  170                       size(-integer),
  171                       status_code(-integer),
  172                       output(-stream),
  173                       timeout(number),
  174                       proxy(atom, integer),
  175                       proxy_authorization(compound),
  176                       bypass_proxy(boolean),
  177                       request_header(any),
  178                       user_agent(atom),
  179                       version(-compound),
  180        % The option below applies if library(http/http_header) is loaded
  181                       post(any),
  182        % The options below apply if library(http/http_ssl_plugin)) is loaded
  183                       pem_password_hook(callable),
  184                       cacert_file(atom),
  185                       cert_verify_hook(callable)
  186                     ]).  187
  188%!  user_agent(-Agent) is det.
  189%
  190%   Default value for =|User-Agent|=,  can   be  overruled using the
  191%   option user_agent(Agent) of http_open/3.
  192
  193user_agent('SWI-Prolog').
  194
  195%!  http_open(+URL, -Stream, +Options) is det.
  196%
  197%   Open the data at the HTTP  server   as  a  Prolog stream. URL is
  198%   either an atom  specifying  a  URL   or  a  list  representing a
  199%   broken-down  URL  as  specified  below.   After  this  predicate
  200%   succeeds the data can be read from Stream. After completion this
  201%   stream must be  closed  using   the  built-in  Prolog  predicate
  202%   close/1. Options provides additional options:
  203%
  204%     * authenticate(+Boolean)
  205%     If `false` (default `true`), do _not_ try to automatically
  206%     authenticate the client if a 401 (Unauthorized) status code
  207%     is received.
  208%
  209%     * authorization(+Term)
  210%     Send authorization. See also http_set_authorization/2. Supported
  211%     schemes:
  212%
  213%       - basic(+User, +Password)
  214%       HTTP Basic authentication.
  215%       - bearer(+Token)
  216%       HTTP Bearer authentication.
  217%       - digest(+User, +Password)
  218%       HTTP Digest authentication.  This option is only provided
  219%       if the plugin library(http/http_digest) is also loaded.
  220%
  221%     * connection(+Connection)
  222%     Specify the =Connection= header.  Default is =close=.  The
  223%     alternative is =|Keep-alive|=.  This maintains a pool of
  224%     available connections as determined by keep_connection/1.
  225%     The library(http/websockets) uses =|Keep-alive, Upgrade|=.
  226%     Keep-alive connections can be closed explicitly using
  227%     http_close_keep_alive/1. Keep-alive connections may
  228%     significantly improve repetitive requests on the same server,
  229%     especially if the IP route is long, HTTPS is used or the
  230%     connection uses a proxy.
  231%
  232%     * final_url(-FinalURL)
  233%     Unify FinalURL with the final   destination. This differs from
  234%     the  original  URL  if  the  returned  head  of  the  original
  235%     indicates an HTTP redirect (codes 301,  302 or 303). Without a
  236%     redirect, FinalURL is the same as URL if  URL is an atom, or a
  237%     URL constructed from the parts.
  238%
  239%     * header(Name, -AtomValue)
  240%     If provided, AtomValue is  unified  with   the  value  of  the
  241%     indicated  field  in  the  reply    header.  Name  is  matched
  242%     case-insensitive and the underscore  (_)   matches  the hyphen
  243%     (-). Multiple of these options  may   be  provided  to extract
  244%     multiple  header  fields.  If  the  header  is  not  available
  245%     AtomValue is unified to the empty atom ('').
  246%
  247%     * headers(-List)
  248%     If provided, List is unified with  a list of Name(Value) pairs
  249%     corresponding to fields in the reply   header.  Name and Value
  250%     follow the same conventions  used   by  the header(Name,Value)
  251%     option.
  252%
  253%     * method(+Method)
  254%     One of =get= (default), =head=, =delete=, =post=,   =put=   or
  255%     =patch=.
  256%     The  =head= message can be
  257%     used in combination with  the   header(Name,  Value) option to
  258%     access information on the resource   without actually fetching
  259%     the resource itself.  The  returned   stream  must  be  closed
  260%     immediately.
  261%
  262%     If post(Data) is provided, the default is =post=.
  263%
  264%     * size(-Size)
  265%     Size is unified with the   integer value of =|Content-Length|=
  266%     in the reply header.
  267%
  268%     * version(-Version)
  269%     Version is a _pair_ `Major-Minor`, where `Major` and `Minor`
  270%     are integers representing the HTTP version in the reply header.
  271%
  272%     * range(+Range)
  273%     Ask for partial content. Range   is  a term _|Unit(From,To)|_,
  274%     where `From` is an integer and `To`   is  either an integer or
  275%     the atom `end`. HTTP 1.1 only   supports Unit = `bytes`. E.g.,
  276%     to   ask   for    bytes    1000-1999,     use    the    option
  277%     range(bytes(1000,1999))
  278%
  279%     * redirect(+Boolean)
  280%     If `false` (default `true`), do _not_ automatically redirect
  281%     if a 3XX code is received.  Must be combined with
  282%     status_code(Code) and one of the header options to read the
  283%     redirect reply. In particular, without status_code(Code) a
  284%     redirect is mapped to an exception.
  285%
  286%     * status_code(-Code)
  287%     If this option is  present  and   Code  unifies  with the HTTP
  288%     status code, do *not* translate errors (4xx, 5xx) into an
  289%     exception. Instead, http_open/3 behaves as if 2xx (success) is
  290%     returned, providing the application to read the error document
  291%     from the returned stream.
  292%
  293%     * output(-Out)
  294%     Unify the output stream with Out and do not close it. This can
  295%     be used to upgrade a connection.
  296%
  297%     * timeout(+Timeout)
  298%     If provided, set a timeout on   the stream using set_stream/2.
  299%     With this option if no new data arrives within Timeout seconds
  300%     the stream raises an exception.  Default   is  to wait forever
  301%     (=infinite=).
  302%
  303%     * post(+Data)
  304%     Issue a =POST= request on the HTTP server.  Data is
  305%     handed to http_post_data/3.
  306%
  307%     * proxy(+Host:Port)
  308%     Use an HTTP proxy to connect to the outside world.  See also
  309%     socket:proxy_for_url/3.  This option overrules the proxy
  310%     specification defined by socket:proxy_for_url/3.
  311%
  312%     * proxy(+Host, +Port)
  313%     Synonym for proxy(+Host:Port).  Deprecated.
  314%
  315%     * proxy_authorization(+Authorization)
  316%     Send authorization to the proxy.  Otherwise   the  same as the
  317%     =authorization= option.
  318%
  319%     * bypass_proxy(+Boolean)
  320%     If =true=, bypass proxy hooks.  Default is =false=.
  321%
  322%     * request_header(Name = Value)
  323%     Additional  name-value  parts  are  added   in  the  order  of
  324%     appearance to the HTTP request   header.  No interpretation is
  325%     done.
  326%
  327%     * max_redirect(+Max)
  328%     Sets the maximum length of a redirection chain.  This is needed
  329%     for some IRIs that redirect indefinitely to other IRIs without
  330%     looping (e.g., redirecting to IRIs with a random element in them).
  331%     Max must be either a non-negative integer or the atom `infinite`.
  332%     The default value is `10`.
  333%
  334%     * user_agent(+Agent)
  335%     Defines the value of the  =|User-Agent|=   field  of  the HTTP
  336%     header. Default is =SWI-Prolog=.
  337%
  338%   The hook http:open_options/2 can  be   used  to  provide default
  339%   options   based   on   the   broken-down     URL.   The   option
  340%   status_code(-Code)  is  particularly  useful   to  query  *REST*
  341%   interfaces that commonly return status   codes  other than `200`
  342%   that need to be be processed by the client code.
  343%
  344%   @param URL is either an atom or string (url) or a list of _parts_.
  345%
  346%               When provided, this list may contain the fields
  347%               =scheme=, =user=, =password=, =host=, =port=, =path=
  348%               and either =query_string= (whose argument is an atom)
  349%               or =search= (whose argument is a list of
  350%               =|Name(Value)|= or =|Name=Value|= compound terms).
  351%               Only =host= is mandatory.  The example below opens the
  352%               URL =|http://www.example.com/my/path?q=Hello%20World&lang=en|=.
  353%               Note that values must *not* be quoted because the
  354%               library inserts the required quotes.
  355%
  356%               ==
  357%               http_open([ host('www.example.com'),
  358%                           path('/my/path'),
  359%                           search([ q='Hello world',
  360%                                    lang=en
  361%                                  ])
  362%                         ])
  363%               ==
  364%
  365%   @throws error(existence_error(url, Id),Context) is raised if the
  366%   HTTP result code is not in the range 200..299. Context has the
  367%   shape context(Message, status(Code, TextCode)), where `Code` is the
  368%   numeric HTTP code and `TextCode` is the textual description thereof
  369%   provided by the server. `Message` may provide additional details or
  370%   may be unbound.
  371%
  372%   @see ssl_context/3 for SSL related options if
  373%   library(http/http_ssl_plugin) is loaded.
  374
  375:- multifile
  376    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  377
  378http_open(URL, Stream, QOptions) :-
  379    meta_options(is_meta, QOptions, Options),
  380    (   atomic(URL)
  381    ->  parse_url_ex(URL, Parts)
  382    ;   Parts = URL
  383    ),
  384    autoload_https(Parts),
  385    add_authorization(Parts, Options, Options1),
  386    findall(HostOptions,
  387            http:open_options(Parts, HostOptions),
  388            AllHostOptions),
  389    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  390    (   option(bypass_proxy(true), Options)
  391    ->  try_http_proxy(direct, Parts, Stream, Options2)
  392    ;   term_variables(Options2, Vars2),
  393        findall(Result-Vars2,
  394                try_a_proxy(Parts, Result, Options2),
  395                ResultList),
  396        last(ResultList, Status-Vars2)
  397    ->  (   Status = true(_Proxy, Stream)
  398        ->  true
  399        ;   throw(error(proxy_error(tried(ResultList)), _))
  400        )
  401    ;   try_http_proxy(direct, Parts, Stream, Options2)
  402    ).
  403
  404try_a_proxy(Parts, Result, Options) :-
  405    parts_uri(Parts, AtomicURL),
  406    option(host(Host), Parts),
  407    (   (   option(proxy(ProxyHost:ProxyPort), Options)
  408        ;   is_list(Options),
  409            memberchk(proxy(ProxyHost,ProxyPort), Options)
  410        )
  411    ->  Proxy = proxy(ProxyHost, ProxyPort)
  412    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  413    ),
  414    debug(http(proxy),
  415          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  416    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  417    ->  (   var(E)
  418        ->  !, Result = true(Proxy, Stream)
  419        ;   Result = error(Proxy, E)
  420        )
  421    ;   Result = false(Proxy)
  422    ),
  423    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  424
  425try_http_proxy(Method, Parts, Stream, Options0) :-
  426    option(host(Host), Parts),
  427    (   Method == direct
  428    ->  parts_request_uri(Parts, RequestURI)
  429    ;   parts_uri(Parts, RequestURI)
  430    ),
  431    select_option(visited(Visited0), Options0, OptionsV, []),
  432    Options = [visited([Parts|Visited0])|OptionsV],
  433    parts_scheme(Parts, Scheme),
  434    default_port(Scheme, DefPort),
  435    url_part(port(Port), Parts, DefPort),
  436    host_and_port(Host, DefPort, Port, HostPort),
  437    (   option(connection(Connection), Options0),
  438        keep_alive(Connection),
  439        get_from_pool(Host:Port, StreamPair),
  440        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
  441              [ Host:Port, StreamPair ]),
  442        catch(send_rec_header(StreamPair, Stream, HostPort,
  443                              RequestURI, Parts, Options),
  444              error(E,_),
  445              keep_alive_error(E))
  446    ->  true
  447    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  448                                        SocketStreamPair, Options, Options1),
  449        (   catch(http:http_protocol_hook(Scheme, Parts,
  450                                          SocketStreamPair,
  451                                          StreamPair, Options),
  452                  Error,
  453                  ( close(SocketStreamPair, [force(true)]),
  454                    throw(Error)))
  455        ->  true
  456        ;   StreamPair = SocketStreamPair
  457        ),
  458        send_rec_header(StreamPair, Stream, HostPort,
  459                        RequestURI, Parts, Options1)
  460    ),
  461    return_final_url(Options).
  462
  463http:http_connection_over_proxy(direct, _, Host:Port,
  464                                StreamPair, Options, Options) :-
  465    !,
  466    open_socket(Host:Port, StreamPair, Options).
  467http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  468                                StreamPair, Options, Options) :-
  469    \+ ( memberchk(scheme(Scheme), Parts),
  470         secure_scheme(Scheme)
  471       ),
  472    !,
  473    % We do not want any /more/ proxy after this
  474    open_socket(ProxyHost:ProxyPort, StreamPair,
  475                [bypass_proxy(true)|Options]).
  476http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  477                                StreamPair, Options, Options) :-
  478    !,
  479    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  480    catch(negotiate_socks_connection(Host:Port, StreamPair),
  481          Error,
  482          ( close(StreamPair, [force(true)]),
  483            throw(Error)
  484          )).
  485
  486
  487merge_options_rev(Old, New, Merged) :-
  488    merge_options(New, Old, Merged).
  489
  490is_meta(pem_password_hook).             % SSL plugin callbacks
  491is_meta(cert_verify_hook).
  492
  493
  494http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  495
  496default_port(https, 443) :- !.
  497default_port(wss,   443) :- !.
  498default_port(_,     80).
  499
  500host_and_port(Host, DefPort, DefPort, Host) :- !.
  501host_and_port(Host, _,       Port,    Host:Port).
  502
  503%!  autoload_https(+Parts) is det.
  504%
  505%   If the requested scheme is https or wss, load the HTTPS plugin.
  506
  507autoload_https(Parts) :-
  508    memberchk(scheme(S), Parts),
  509    secure_scheme(S),
  510    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  511    exists_source(library(http/http_ssl_plugin)),
  512    !,
  513    use_module(library(http/http_ssl_plugin)).
  514autoload_https(_).
  515
  516secure_scheme(https).
  517secure_scheme(wss).
  518
  519%!  send_rec_header(+StreamPair, -Stream,
  520%!                  +Host, +RequestURI, +Parts, +Options) is det.
  521%
  522%   Send header to Out and process reply.  If there is an error or
  523%   failure, close In and Out and return the error or failure.
  524
  525send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  526    (   catch(guarded_send_rec_header(StreamPair, Stream,
  527                                      Host, RequestURI, Parts, Options),
  528              E, true)
  529    ->  (   var(E)
  530        ->  (   option(output(StreamPair), Options)
  531            ->  true
  532            ;   true
  533            )
  534        ;   close(StreamPair, [force(true)]),
  535            throw(E)
  536        )
  537    ;   close(StreamPair, [force(true)]),
  538        fail
  539    ).
  540
  541guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  542    user_agent(Agent, Options),
  543    method(Options, MNAME),
  544    http_version(Version),
  545    option(connection(Connection), Options, close),
  546    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  547    debug(http(send_request), "> Host: ~w", [Host]),
  548    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  549    debug(http(send_request), "> Connection: ~w", [Connection]),
  550    format(StreamPair,
  551           '~w ~w HTTP/~w\r\n\c
  552               Host: ~w\r\n\c
  553               User-Agent: ~w\r\n\c
  554               Connection: ~w\r\n',
  555           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  556    parts_uri(Parts, URI),
  557    x_headers(Options, URI, StreamPair),
  558    write_cookies(StreamPair, Parts, Options),
  559    (   option(post(PostData), Options)
  560    ->  http_header:http_post_data(PostData, StreamPair, [])
  561    ;   format(StreamPair, '\r\n', [])
  562    ),
  563    flush_output(StreamPair),
  564                                    % read the reply header
  565    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  566    update_cookies(Lines, Parts, Options),
  567    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  568            StreamPair, Stream).
  569
  570
  571%!  http_version(-Version:atom) is det.
  572%
  573%   HTTP version we publish. We  can  only   use  1.1  if we support
  574%   chunked encoding.
  575
  576http_version('1.1') :-
  577    http:current_transfer_encoding(chunked),
  578    !.
  579http_version('1.0').
  580
  581method(Options, MNAME) :-
  582    option(post(_), Options),
  583    !,
  584    option(method(M), Options, post),
  585    (   map_method(M, MNAME0)
  586    ->  MNAME = MNAME0
  587    ;   domain_error(method, M)
  588    ).
  589method(Options, MNAME) :-
  590    option(method(M), Options, get),
  591    (   map_method(M, MNAME0)
  592    ->  MNAME = MNAME0
  593    ;   map_method(_, M)
  594    ->  MNAME = M
  595    ;   domain_error(method, M)
  596    ).
  597
  598map_method(delete,  'DELETE').
  599map_method(get,     'GET').
  600map_method(head,    'HEAD').
  601map_method(post,    'POST').
  602map_method(put,     'PUT').
  603map_method(patch,   'PATCH').
  604map_method(options, 'OPTIONS').
  605
  606%!  x_headers(+Options, +URI, +Out) is det.
  607%
  608%   Emit extra headers from   request_header(Name=Value)  options in
  609%   Options.
  610%
  611%   @tbd Use user/password fields
  612
  613x_headers(Options, URI, Out) :-
  614    x_headers_(Options, [url(URI)|Options], Out).
  615
  616x_headers_([], _, _).
  617x_headers_([H|T], Options, Out) :-
  618    x_header(H, Options, Out),
  619    x_headers_(T, Options, Out).
  620
  621x_header(request_header(Name=Value), _, Out) :-
  622    !,
  623    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  624    format(Out, '~w: ~w\r\n', [Name, Value]).
  625x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  626    !,
  627    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  628x_header(authorization(Authorization), Options, Out) :-
  629    !,
  630    auth_header(Authorization, Options, 'Authorization', Out).
  631x_header(range(Spec), _, Out) :-
  632    !,
  633    Spec =.. [Unit, From, To],
  634    (   To == end
  635    ->  ToT = ''
  636    ;   must_be(integer, To),
  637        ToT = To
  638    ),
  639    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  640    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  641x_header(_, _, _).
  642
  643%!  auth_header(+AuthOption, +Options, +HeaderName, +Out)
  644
  645auth_header(basic(User, Password), _, Header, Out) :-
  646    !,
  647    format(codes(Codes), '~w:~w', [User, Password]),
  648    phrase(base64(Codes), Base64Codes),
  649    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  650    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  651auth_header(bearer(Token), _, Header, Out) :-
  652    !,
  653    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  654    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  655auth_header(Auth, Options, _, Out) :-
  656    option(url(URL), Options),
  657    add_method(Options, Options1),
  658    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  659    !.
  660auth_header(Auth, _, _, _) :-
  661    domain_error(authorization, Auth).
  662
  663user_agent(Agent, Options) :-
  664    (   option(user_agent(Agent), Options)
  665    ->  true
  666    ;   user_agent(Agent)
  667    ).
  668
  669add_method(Options0, Options) :-
  670    option(method(_), Options0),
  671    !,
  672    Options = Options0.
  673add_method(Options0, Options) :-
  674    option(post(_), Options0),
  675    !,
  676    Options = [method(post)|Options0].
  677add_method(Options0, [method(get)|Options0]).
  678
  679%!  do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header,
  680%!          +Options, +Parts, +Host, +In, -FinalIn) is det.
  681%
  682%   Handle the HTTP status once available. If   200-299, we are ok. If a
  683%   redirect, redo the open,  returning  a   new  stream.  Else issue an
  684%   error.
  685%
  686%   @error  existence_error(url, URL)
  687
  688                                        % Redirections
  689do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  690    redirect_code(Code),
  691    option(redirect(true), Options0, true),
  692    location(Lines, RequestURI),
  693    !,
  694    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  695    close(In),
  696    parts_uri(Parts, Base),
  697    uri_resolve(RequestURI, Base, Redirected),
  698    parse_url_ex(Redirected, RedirectedParts),
  699    (   redirect_limit_exceeded(Options0, Max)
  700    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  701        throw(error(permission_error(redirect, http, Redirected),
  702                    context(_, Comment)))
  703    ;   redirect_loop(RedirectedParts, Options0)
  704    ->  throw(error(permission_error(redirect, http, Redirected),
  705                    context(_, 'Redirection loop')))
  706    ;   true
  707    ),
  708    redirect_options(Options0, Options),
  709    http_open(RedirectedParts, Stream, Options).
  710                                        % Need authentication
  711do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  712    authenticate_code(Code),
  713    option(authenticate(true), Options0, true),
  714    parts_uri(Parts, URI),
  715    parse_headers(Lines, Headers),
  716    http:authenticate_client(
  717             URI,
  718             auth_reponse(Headers, Options0, Options)),
  719    !,
  720    close(In0),
  721    http_open(Parts, Stream, Options).
  722                                        % Accepted codes
  723do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  724    (   option(status_code(Code), Options),
  725        Lines \== []
  726    ->  true
  727    ;   successful_code(Code)
  728    ),
  729    !,
  730    parts_uri(Parts, URI),
  731    parse_headers(Lines, Headers),
  732    return_version(Options, Version),
  733    return_size(Options, Headers),
  734    return_fields(Options, Headers),
  735    return_headers(Options, Headers),
  736    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  737    transfer_encoding_filter(Lines, In1, In),
  738                                    % properly re-initialise the stream
  739    set_stream(In, file_name(URI)),
  740    set_stream(In, record_position(true)).
  741do_open(_, _, _, [], Options, _, _, _, _) :-
  742    option(connection(Connection), Options),
  743    keep_alive(Connection),
  744    !,
  745    throw(error(keep_alive(closed),_)).
  746                                        % report anything else as error
  747do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  748    parts_uri(Parts, URI),
  749    (   map_error_code(Code, Error)
  750    ->  Formal =.. [Error, url, URI]
  751    ;   Formal = existence_error(url, URI)
  752    ),
  753    throw(error(Formal, context(_, status(Code, Comment)))).
  754
  755
  756successful_code(Code) :-
  757    between(200, 299, Code).
  758
  759%!  redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet.
  760%
  761%   True if we have exceeded the maximum redirection length (default 10).
  762
  763redirect_limit_exceeded(Options, Max) :-
  764    option(visited(Visited), Options, []),
  765    length(Visited, N),
  766    option(max_redirect(Max), Options, 10),
  767    (Max == infinite -> fail ; N > Max).
  768
  769
  770%!  redirect_loop(+Parts, +Options) is semidet.
  771%
  772%   True if we are in  a  redirection   loop.  Note  that some sites
  773%   redirect once to the same place using  cookies or similar, so we
  774%   allow for two tries. In fact,   we  should probably test whether
  775%   authorization or cookie headers have changed.
  776
  777redirect_loop(Parts, Options) :-
  778    option(visited(Visited), Options, []),
  779    include(==(Parts), Visited, Same),
  780    length(Same, Count),
  781    Count > 2.
  782
  783
  784%!  redirect_options(+Options0, -Options) is det.
  785%
  786%   A redirect from a POST should do a GET on the returned URI. This
  787%   means we must remove  the   method(post)  and post(Data) options
  788%   from the original option-list.
  789
  790redirect_options(Options0, Options) :-
  791    (   select_option(post(_), Options0, Options1)
  792    ->  true
  793    ;   Options1 = Options0
  794    ),
  795    (   select_option(method(Method), Options1, Options),
  796        \+ redirect_method(Method)
  797    ->  true
  798    ;   Options = Options1
  799    ).
  800
  801redirect_method(delete).
  802redirect_method(get).
  803redirect_method(head).
  804
  805
  806%!  map_error_code(+HTTPCode, -PrologError) is semidet.
  807%
  808%   Map HTTP error codes to Prolog errors.
  809%
  810%   @tbd    Many more maps. Unfortunately many have no sensible Prolog
  811%           counterpart.
  812
  813map_error_code(401, permission_error).
  814map_error_code(403, permission_error).
  815map_error_code(404, existence_error).
  816map_error_code(405, permission_error).
  817map_error_code(407, permission_error).
  818map_error_code(410, existence_error).
  819
  820redirect_code(301).                     % Moved Permanently
  821redirect_code(302).                     % Found (previously "Moved Temporary")
  822redirect_code(303).                     % See Other
  823redirect_code(307).                     % Temporary Redirect
  824
  825authenticate_code(401).
  826
  827%!  open_socket(+Address, -StreamPair, +Options) is det.
  828%
  829%   Create and connect a client socket to Address.  Options
  830%
  831%       * timeout(+Timeout)
  832%       Sets timeout on the stream, *after* connecting the
  833%       socket.
  834%
  835%   @tbd    Make timeout also work on tcp_connect/4.
  836%   @tbd    This is the same as do_connect/4 in http_client.pl
  837
  838open_socket(Address, StreamPair, Options) :-
  839    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  840    tcp_connect(Address, StreamPair, Options),
  841    stream_pair(StreamPair, In, Out),
  842    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  843    set_stream(In, record_position(false)),
  844    (   option(timeout(Timeout), Options)
  845    ->  set_stream(In, timeout(Timeout))
  846    ;   true
  847    ).
  848
  849
  850return_version(Options, Major-Minor) :-
  851    option(version(Major-Minor), Options, _).
  852
  853return_size(Options, Headers) :-
  854    (   memberchk(content_length(Size), Headers)
  855    ->  option(size(Size), Options, _)
  856    ;   true
  857    ).
  858
  859return_fields([], _).
  860return_fields([header(Name, Value)|T], Headers) :-
  861    !,
  862    (   Term =.. [Name,Value],
  863        memberchk(Term, Headers)
  864    ->  true
  865    ;   Value = ''
  866    ),
  867    return_fields(T, Headers).
  868return_fields([_|T], Lines) :-
  869    return_fields(T, Lines).
  870
  871return_headers(Options, Headers) :-
  872    option(headers(Headers), Options, _).
  873
  874%!  parse_headers(+Lines, -Headers:list(compound)) is det.
  875%
  876%   Parse the header lines for   the  headers(-List) option. Invalid
  877%   header   lines   are   skipped,   printing   a   warning   using
  878%   pring_message/2.
  879
  880parse_headers([], []) :- !.
  881parse_headers([Line|Lines], Headers) :-
  882    catch(http_parse_header(Line, [Header]), Error, true),
  883    (   var(Error)
  884    ->  Headers = [Header|More]
  885    ;   print_message(warning, Error),
  886        Headers = More
  887    ),
  888    parse_headers(Lines, More).
  889
  890
  891%!  return_final_url(+Options) is semidet.
  892%
  893%   If Options contains final_url(URL), unify URL with the final
  894%   URL after redirections.
  895
  896return_final_url(Options) :-
  897    option(final_url(URL), Options),
  898    var(URL),
  899    !,
  900    option(visited([Parts|_]), Options),
  901    parts_uri(Parts, URL).
  902return_final_url(_).
  903
  904
  905%!  transfer_encoding_filter(+Lines, +In0, -In) is det.
  906%
  907%   Install filters depending on the transfer  encoding. If In0 is a
  908%   stream-pair, we close the output   side. If transfer-encoding is
  909%   not specified, the content-encoding is  interpreted as a synonym
  910%   for transfer-encoding, because many   servers incorrectly depend
  911%   on  this.  Exceptions  to  this   are  content-types  for  which
  912%   disable_encoding_filter/1 holds.
  913
  914transfer_encoding_filter(Lines, In0, In) :-
  915    transfer_encoding(Lines, Encoding),
  916    !,
  917    transfer_encoding_filter_(Encoding, In0, In).
  918transfer_encoding_filter(Lines, In0, In) :-
  919    content_encoding(Lines, Encoding),
  920    content_type(Lines, Type),
  921    \+ http:disable_encoding_filter(Type),
  922    !,
  923    transfer_encoding_filter_(Encoding, In0, In).
  924transfer_encoding_filter(_, In, In).
  925
  926transfer_encoding_filter_(Encoding, In0, In) :-
  927    stream_pair(In0, In1, Out),
  928    (   nonvar(Out)
  929    ->  close(Out)
  930    ;   true
  931    ),
  932    (   http:encoding_filter(Encoding, In1, In)
  933    ->  true
  934    ;   domain_error(http_encoding, Encoding)
  935    ).
  936
  937content_type(Lines, Type) :-
  938    member(Line, Lines),
  939    phrase(field('content-type'), Line, Rest),
  940    !,
  941    atom_codes(Type, Rest).
  942
  943%!  http:disable_encoding_filter(+ContentType) is semidet.
  944%
  945%   Do not use  the   =|Content-encoding|=  as =|Transfer-encoding|=
  946%   encoding for specific values of   ContentType. This predicate is
  947%   multifile and can thus be extended by the user.
  948
  949http:disable_encoding_filter('application/x-gzip').
  950http:disable_encoding_filter('application/x-tar').
  951http:disable_encoding_filter('x-world/x-vrml').
  952http:disable_encoding_filter('application/zip').
  953http:disable_encoding_filter('application/x-gzip').
  954http:disable_encoding_filter('application/x-zip-compressed').
  955http:disable_encoding_filter('application/x-compress').
  956http:disable_encoding_filter('application/x-compressed').
  957http:disable_encoding_filter('application/x-spoon').
  958
  959%!  transfer_encoding(+Lines, -Encoding) is semidet.
  960%
  961%   True if Encoding  is  the   value  of  the =|Transfer-encoding|=
  962%   header.
  963
  964transfer_encoding(Lines, Encoding) :-
  965    what_encoding(transfer_encoding, Lines, Encoding).
  966
  967what_encoding(What, Lines, Encoding) :-
  968    member(Line, Lines),
  969    phrase(encoding_(What, Debug), Line, Rest),
  970    !,
  971    atom_codes(Encoding, Rest),
  972    debug(http(What), '~w: ~p', [Debug, Rest]).
  973
  974encoding_(content_encoding, 'Content-encoding') -->
  975    field('content-encoding').
  976encoding_(transfer_encoding, 'Transfer-encoding') -->
  977    field('transfer-encoding').
  978
  979%!  content_encoding(+Lines, -Encoding) is semidet.
  980%
  981%   True if Encoding is the value of the =|Content-encoding|=
  982%   header.
  983
  984content_encoding(Lines, Encoding) :-
  985    what_encoding(content_encoding, Lines, Encoding).
  986
  987%!  read_header(+In:stream, +Parts, -Version, -Code:int,
  988%!  -Comment:atom, -Lines:list) is det.
  989%
  990%   Read the HTTP reply-header.  If the reply is completely empty
  991%   an existence error is thrown.  If the replied header is
  992%   otherwise invalid a 500 HTTP error is simulated, having the
  993%   comment =|Invalid reply header|=.
  994%
  995%   @param Parts    A list of compound terms that describe the
  996%                   parsed request URI.
  997%   @param Version  HTTP reply version as Major-Minor pair
  998%   @param Code     Numeric HTTP reply-code
  999%   @param Comment  Comment of reply-code as atom
 1000%   @param Lines    Remaining header lines as code-lists.
 1001%
 1002%   @error existence_error(http_reply, Uri)
 1003
 1004read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 1005    read_line_to_codes(In, Line),
 1006    (   Line == end_of_file
 1007    ->  parts_uri(Parts, Uri),
 1008        existence_error(http_reply,Uri)
 1009    ;   true
 1010    ),
 1011    Line \== end_of_file,
 1012    phrase(first_line(Major-Minor, Code, Comment), Line),
 1013    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1014    read_line_to_codes(In, Line2),
 1015    rest_header(Line2, In, Lines),
 1016    !,
 1017    (   debugging(http(open))
 1018    ->  forall(member(HL, Lines),
 1019               debug(http(open), '~s', [HL]))
 1020    ;   true
 1021    ).
 1022read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1023
 1024rest_header([], _, []) :- !.            % blank line: end of header
 1025rest_header(L0, In, [L0|L]) :-
 1026    read_line_to_codes(In, L1),
 1027    rest_header(L1, In, L).
 1028
 1029%!  content_length(+Header, -Length:int) is semidet.
 1030%
 1031%   Find the Content-Length in an HTTP reply-header.
 1032
 1033content_length(Lines, Length) :-
 1034    member(Line, Lines),
 1035    phrase(content_length(Length0), Line),
 1036    !,
 1037    Length = Length0.
 1038
 1039location(Lines, RequestURI) :-
 1040    member(Line, Lines),
 1041    phrase(atom_field(location, RequestURI), Line),
 1042    !.
 1043
 1044connection(Lines, Connection) :-
 1045    member(Line, Lines),
 1046    phrase(atom_field(connection, Connection0), Line),
 1047    !,
 1048    Connection = Connection0.
 1049
 1050first_line(Major-Minor, Code, Comment) -->
 1051    "HTTP/", integer(Major), ".", integer(Minor),
 1052    skip_blanks,
 1053    integer(Code),
 1054    skip_blanks,
 1055    rest(Comment).
 1056
 1057atom_field(Name, Value) -->
 1058    field(Name),
 1059    rest(Value).
 1060
 1061content_length(Len) -->
 1062    field('content-length'),
 1063    integer(Len).
 1064
 1065field(Name) -->
 1066    { atom_codes(Name, Codes) },
 1067    field_codes(Codes).
 1068
 1069field_codes([]) -->
 1070    ":",
 1071    skip_blanks.
 1072field_codes([H|T]) -->
 1073    [C],
 1074    { match_header_char(H, C)
 1075    },
 1076    field_codes(T).
 1077
 1078match_header_char(C, C) :- !.
 1079match_header_char(C, U) :-
 1080    code_type(C, to_lower(U)),
 1081    !.
 1082match_header_char(0'_, 0'-).
 1083
 1084
 1085skip_blanks -->
 1086    [C],
 1087    { code_type(C, white)
 1088    },
 1089    !,
 1090    skip_blanks.
 1091skip_blanks -->
 1092    [].
 1093
 1094%!  integer(-Int)//
 1095%
 1096%   Read 1 or more digits and return as integer.
 1097
 1098integer(Code) -->
 1099    digit(D0),
 1100    digits(D),
 1101    { number_codes(Code, [D0|D])
 1102    }.
 1103
 1104digit(C) -->
 1105    [C],
 1106    { code_type(C, digit)
 1107    }.
 1108
 1109digits([D0|D]) -->
 1110    digit(D0),
 1111    !,
 1112    digits(D).
 1113digits([]) -->
 1114    [].
 1115
 1116%!  rest(-Atom:atom)//
 1117%
 1118%   Get rest of input as an atom.
 1119
 1120rest(Atom) --> call(rest_(Atom)).
 1121
 1122rest_(Atom, L, []) :-
 1123    atom_codes(Atom, L).
 1124
 1125
 1126                 /*******************************
 1127                 *   AUTHORIZATION MANAGEMENT   *
 1128                 *******************************/
 1129
 1130%!  http_set_authorization(+URL, +Authorization) is det.
 1131%
 1132%   Set user/password to supply with URLs   that have URL as prefix.
 1133%   If  Authorization  is  the   atom    =|-|=,   possibly   defined
 1134%   authorization is cleared.  For example:
 1135%
 1136%   ==
 1137%   ?- http_set_authorization('http://www.example.com/private/',
 1138%                             basic('John', 'Secret'))
 1139%   ==
 1140%
 1141%   @tbd    Move to a separate module, so http_get/3, etc. can use this
 1142%           too.
 1143
 1144:- dynamic
 1145    stored_authorization/2,
 1146    cached_authorization/2. 1147
 1148http_set_authorization(URL, Authorization) :-
 1149    must_be(atom, URL),
 1150    retractall(stored_authorization(URL, _)),
 1151    (   Authorization = (-)
 1152    ->  true
 1153    ;   check_authorization(Authorization),
 1154        assert(stored_authorization(URL, Authorization))
 1155    ),
 1156    retractall(cached_authorization(_,_)).
 1157
 1158check_authorization(Var) :-
 1159    var(Var),
 1160    !,
 1161    instantiation_error(Var).
 1162check_authorization(basic(User, Password)) :-
 1163    must_be(atom, User),
 1164    must_be(text, Password).
 1165check_authorization(digest(User, Password)) :-
 1166    must_be(atom, User),
 1167    must_be(text, Password).
 1168
 1169%!  authorization(+URL, -Authorization) is semidet.
 1170%
 1171%   True if Authorization must be supplied for URL.
 1172%
 1173%   @tbd    Cleanup cache if it gets too big.
 1174
 1175authorization(_, _) :-
 1176    \+ stored_authorization(_, _),
 1177    !,
 1178    fail.
 1179authorization(URL, Authorization) :-
 1180    cached_authorization(URL, Authorization),
 1181    !,
 1182    Authorization \== (-).
 1183authorization(URL, Authorization) :-
 1184    (   stored_authorization(Prefix, Authorization),
 1185        sub_atom(URL, 0, _, _, Prefix)
 1186    ->  assert(cached_authorization(URL, Authorization))
 1187    ;   assert(cached_authorization(URL, -)),
 1188        fail
 1189    ).
 1190
 1191add_authorization(_, Options, Options) :-
 1192    option(authorization(_), Options),
 1193    !.
 1194add_authorization(Parts, Options0, Options) :-
 1195    url_part(user(User), Parts),
 1196    url_part(password(Passwd), Parts),
 1197    Options = [authorization(basic(User,Passwd))|Options0].
 1198add_authorization(Parts, Options0, Options) :-
 1199    stored_authorization(_, _) ->   % quick test to avoid work
 1200    parts_uri(Parts, URL),
 1201    authorization(URL, Auth),
 1202    !,
 1203    Options = [authorization(Auth)|Options0].
 1204add_authorization(_, Options, Options).
 1205
 1206
 1207%!  parse_url_ex(+URL, -Parts)
 1208%
 1209%   Parts:  Scheme,  Host,  Port,    User:Password,  RequestURI  (no
 1210%   fragment).
 1211
 1212parse_url_ex(URL, [uri(URL)|Parts]) :-
 1213    uri_components(URL, Components),
 1214    phrase(components(Components), Parts),
 1215    (   option(host(_), Parts)
 1216    ->  true
 1217    ;   domain_error(url, URL)
 1218    ).
 1219
 1220components(Components) -->
 1221    uri_scheme(Components),
 1222    uri_authority(Components),
 1223    uri_request_uri(Components).
 1224
 1225uri_scheme(Components) -->
 1226    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1227    !,
 1228    [ scheme(Scheme)
 1229    ].
 1230uri_scheme(_) --> [].
 1231
 1232uri_authority(Components) -->
 1233    { uri_data(authority, Components, Auth), nonvar(Auth),
 1234      !,
 1235      uri_authority_components(Auth, Data)
 1236    },
 1237    [ authority(Auth) ],
 1238    auth_field(user, Data),
 1239    auth_field(password, Data),
 1240    auth_field(host, Data),
 1241    auth_field(port, Data).
 1242uri_authority(_) --> [].
 1243
 1244auth_field(Field, Data) -->
 1245    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1246      !,
 1247      (   atom(EncValue)
 1248      ->  uri_encoded(query_value, Value, EncValue)
 1249      ;   Value = EncValue
 1250      ),
 1251      Part =.. [Field,Value]
 1252    },
 1253    [ Part ].
 1254auth_field(_, _) --> [].
 1255
 1256uri_request_uri(Components) -->
 1257    { uri_data(path, Components, Path0),
 1258      uri_data(search, Components, Search),
 1259      (   Path0 == ''
 1260      ->  Path = (/)
 1261      ;   Path = Path0
 1262      ),
 1263      uri_data(path, Components2, Path),
 1264      uri_data(search, Components2, Search),
 1265      uri_components(RequestURI, Components2)
 1266    },
 1267    [ request_uri(RequestURI)
 1268    ].
 1269
 1270%!  parts_scheme(+Parts, -Scheme) is det.
 1271%!  parts_uri(+Parts, -URI) is det.
 1272%!  parts_request_uri(+Parts, -RequestURI) is det.
 1273%!  parts_search(+Parts, -Search) is det.
 1274%!  parts_authority(+Parts, -Authority) is semidet.
 1275
 1276parts_scheme(Parts, Scheme) :-
 1277    url_part(scheme(Scheme), Parts),
 1278    !.
 1279parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1280    url_part(protocol(Scheme), Parts),
 1281    !.
 1282parts_scheme(_, http).
 1283
 1284parts_authority(Parts, Auth) :-
 1285    url_part(authority(Auth), Parts),
 1286    !.
 1287parts_authority(Parts, Auth) :-
 1288    url_part(host(Host), Parts, _),
 1289    url_part(port(Port), Parts, _),
 1290    url_part(user(User), Parts, _),
 1291    url_part(password(Password), Parts, _),
 1292    uri_authority_components(Auth,
 1293                             uri_authority(User, Password, Host, Port)).
 1294
 1295parts_request_uri(Parts, RequestURI) :-
 1296    option(request_uri(RequestURI), Parts),
 1297    !.
 1298parts_request_uri(Parts, RequestURI) :-
 1299    url_part(path(Path), Parts, /),
 1300    ignore(parts_search(Parts, Search)),
 1301    uri_data(path, Data, Path),
 1302    uri_data(search, Data, Search),
 1303    uri_components(RequestURI, Data).
 1304
 1305parts_search(Parts, Search) :-
 1306    option(query_string(Search), Parts),
 1307    !.
 1308parts_search(Parts, Search) :-
 1309    option(search(Fields), Parts),
 1310    !,
 1311    uri_query_components(Search, Fields).
 1312
 1313
 1314parts_uri(Parts, URI) :-
 1315    option(uri(URI), Parts),
 1316    !.
 1317parts_uri(Parts, URI) :-
 1318    parts_scheme(Parts, Scheme),
 1319    ignore(parts_authority(Parts, Auth)),
 1320    parts_request_uri(Parts, RequestURI),
 1321    uri_components(RequestURI, Data),
 1322    uri_data(scheme, Data, Scheme),
 1323    uri_data(authority, Data, Auth),
 1324    uri_components(URI, Data).
 1325
 1326parts_port(Parts, Port) :-
 1327    parts_scheme(Parts, Scheme),
 1328    default_port(Scheme, DefPort),
 1329    url_part(port(Port), Parts, DefPort).
 1330
 1331url_part(Part, Parts) :-
 1332    Part =.. [Name,Value],
 1333    Gen =.. [Name,RawValue],
 1334    option(Gen, Parts),
 1335    !,
 1336    Value = RawValue.
 1337
 1338url_part(Part, Parts, Default) :-
 1339    Part =.. [Name,Value],
 1340    Gen =.. [Name,RawValue],
 1341    (   option(Gen, Parts)
 1342    ->  Value = RawValue
 1343    ;   Value = Default
 1344    ).
 1345
 1346
 1347                 /*******************************
 1348                 *            COOKIES           *
 1349                 *******************************/
 1350
 1351write_cookies(Out, Parts, Options) :-
 1352    http:write_cookies(Out, Parts, Options),
 1353    !.
 1354write_cookies(_, _, _).
 1355
 1356update_cookies(_, _, _) :-
 1357    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1358    !.
 1359update_cookies(Lines, Parts, Options) :-
 1360    (   member(Line, Lines),
 1361        phrase(atom_field('set_cookie', CookieData), Line),
 1362        http:update_cookies(CookieData, Parts, Options),
 1363        fail
 1364    ;   true
 1365    ).
 1366
 1367
 1368                 /*******************************
 1369                 *           OPEN ANY           *
 1370                 *******************************/
 1371
 1372:- multifile iostream:open_hook/6. 1373
 1374%!  iostream:open_hook(+Spec, +Mode, -Stream, -Close,
 1375%!                     +Options0, -Options) is semidet.
 1376%
 1377%   Hook implementation that makes  open_any/5   support  =http= and
 1378%   =https= URLs for `Mode == read`.
 1379
 1380iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1381    (atom(URL) -> true ; string(URL)),
 1382    uri_is_global(URL),
 1383    uri_components(URL, Components),
 1384    uri_data(scheme, Components, Scheme),
 1385    http_scheme(Scheme),
 1386    !,
 1387    Options = Options0,
 1388    Close = close(Stream),
 1389    http_open(URL, Stream, Options0).
 1390
 1391http_scheme(http).
 1392http_scheme(https).
 1393
 1394
 1395                 /*******************************
 1396                 *          KEEP-ALIVE          *
 1397                 *******************************/
 1398
 1399%!  consider_keep_alive(+HeaderLines, +Parts, +Host,
 1400%!                      +Stream0, -Stream,
 1401%!                      +Options) is det.
 1402
 1403consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1404    option(connection(Asked), Options),
 1405    keep_alive(Asked),
 1406    connection(Lines, Given),
 1407    keep_alive(Given),
 1408    content_length(Lines, Bytes),
 1409    !,
 1410    stream_pair(StreamPair, In0, _),
 1411    connection_address(Host, Parts, HostPort),
 1412    debug(http(connection),
 1413          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1414    stream_range_open(In0, In,
 1415                      [ size(Bytes),
 1416                        onclose(keep_alive(StreamPair, HostPort))
 1417                      ]).
 1418consider_keep_alive(_, _, _, Stream, Stream, _).
 1419
 1420connection_address(Host, _, Host) :-
 1421    Host = _:_,
 1422    !.
 1423connection_address(Host, Parts, Host:Port) :-
 1424    parts_port(Parts, Port).
 1425
 1426keep_alive(keep_alive) :- !.
 1427keep_alive(Connection) :-
 1428    downcase_atom(Connection, 'keep-alive').
 1429
 1430:- public keep_alive/4. 1431
 1432keep_alive(StreamPair, Host, _In, 0) :-
 1433    !,
 1434    debug(http(connection), 'Adding connection to ~p to pool', [Host]),
 1435    add_to_pool(Host, StreamPair).
 1436keep_alive(StreamPair, Host, In, Left) :-
 1437    Left < 100,
 1438    debug(http(connection), 'Reading ~D left bytes', [Left]),
 1439    read_incomplete(In, Left),
 1440    add_to_pool(Host, StreamPair),
 1441    !.
 1442keep_alive(StreamPair, _, _, _) :-
 1443    debug(http(connection),
 1444          'Closing connection due to excessive unprocessed input', []),
 1445    (   debugging(http(connection))
 1446    ->  catch(close(StreamPair), E,
 1447              print_message(warning, E))
 1448    ;   close(StreamPair, [force(true)])
 1449    ).
 1450
 1451%!  read_incomplete(+In, +Left) is semidet.
 1452%
 1453%   If we have not all input from  a Keep-alive connection, read the
 1454%   remainder if it is short. Else, we fail and close the stream.
 1455
 1456read_incomplete(In, Left) :-
 1457    catch(setup_call_cleanup(
 1458              open_null_stream(Null),
 1459              copy_stream_data(In, Null, Left),
 1460              close(Null)),
 1461          _,
 1462          fail).
 1463
 1464:- dynamic
 1465    connection_pool/4,              % Hash, Address, Stream, Time
 1466    connection_gc_time/1. 1467
 1468add_to_pool(Address, StreamPair) :-
 1469    keep_connection(Address),
 1470    get_time(Now),
 1471    term_hash(Address, Hash),
 1472    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1473
 1474get_from_pool(Address, StreamPair) :-
 1475    term_hash(Address, Hash),
 1476    retract(connection_pool(Hash, Address, StreamPair, _)).
 1477
 1478%!  keep_connection(+Address) is semidet.
 1479%
 1480%   Succeeds if we want to keep   the  connection open. We currently
 1481%   keep a maximum of 10 connections  waiting   and  a  maximum of 2
 1482%   waiting for the same address. Connections   older than 2 seconds
 1483%   are closed.
 1484
 1485keep_connection(Address) :-
 1486    close_old_connections(2),
 1487    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1488    C =< 10,
 1489    term_hash(Address, Hash),
 1490    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1491    Count =< 2.
 1492
 1493close_old_connections(Timeout) :-
 1494    get_time(Now),
 1495    Before is Now - Timeout,
 1496    (   connection_gc_time(GC),
 1497        GC > Before
 1498    ->  true
 1499    ;   (   retractall(connection_gc_time(_)),
 1500            asserta(connection_gc_time(Now)),
 1501            connection_pool(Hash, Address, StreamPair, Added),
 1502            Added < Before,
 1503            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1504            debug(http(connection),
 1505                  'Closing inactive keep-alive to ~p', [Address]),
 1506            close(StreamPair, [force(true)]),
 1507            fail
 1508        ;   true
 1509        )
 1510    ).
 1511
 1512
 1513%!  http_close_keep_alive(+Address) is det.
 1514%
 1515%   Close all keep-alive connections matching Address. Address is of
 1516%   the  form  Host:Port.  In  particular,  http_close_keep_alive(_)
 1517%   closes all currently known keep-alive connections.
 1518
 1519http_close_keep_alive(Address) :-
 1520    forall(get_from_pool(Address, StreamPair),
 1521           close(StreamPair, [force(true)])).
 1522
 1523%!  keep_alive_error(+Error)
 1524%
 1525%   Deal with an error from reusing  a keep-alive connection. If the
 1526%   error is due to an I/O error   or end-of-file, fail to backtrack
 1527%   over get_from_pool/2. Otherwise it is a   real error and we thus
 1528%   re-raise it.
 1529
 1530keep_alive_error(keep_alive(closed)) :-
 1531    !,
 1532    debug(http(connection), 'Keep-alive connection was closed', []),
 1533    fail.
 1534keep_alive_error(io_error(_,_)) :-
 1535    !,
 1536    debug(http(connection), 'IO error on Keep-alive connection', []),
 1537    fail.
 1538keep_alive_error(Error) :-
 1539    throw(Error).
 1540
 1541
 1542                 /*******************************
 1543                 *     HOOK DOCUMENTATION       *
 1544                 *******************************/
 1545
 1546%!  http:open_options(+Parts, -Options) is nondet.
 1547%
 1548%   This hook is used by the HTTP   client library to define default
 1549%   options based on the the broken-down request-URL.  The following
 1550%   example redirects all trafic, except for localhost over a proxy:
 1551%
 1552%       ==
 1553%       :- multifile
 1554%           http:open_options/2.
 1555%
 1556%       http:open_options(Parts, Options) :-
 1557%           option(host(Host), Parts),
 1558%           Host \== localhost,
 1559%           Options = [proxy('proxy.local', 3128)].
 1560%       ==
 1561%
 1562%   This hook may return multiple   solutions.  The returned options
 1563%   are  combined  using  merge_options/3  where  earlier  solutions
 1564%   overrule later solutions.
 1565
 1566%!  http:write_cookies(+Out, +Parts, +Options) is semidet.
 1567%
 1568%   Emit a =|Cookie:|= header for the  current connection. Out is an
 1569%   open stream to the HTTP server, Parts is the broken-down request
 1570%   (see uri_components/2) and Options is the list of options passed
 1571%   to http_open.  The predicate is called as if using ignore/1.
 1572%
 1573%   @see complements http:update_cookies/3.
 1574%   @see library(http/http_cookie) implements cookie handling on
 1575%   top of these hooks.
 1576
 1577%!  http:update_cookies(+CookieData, +Parts, +Options) is semidet.
 1578%
 1579%   Update the cookie database.  CookieData  is   the  value  of the
 1580%   =|Set-Cookie|= field, Parts is  the   broken-down  request  (see
 1581%   uri_components/2) and Options is the list   of options passed to
 1582%   http_open.
 1583%
 1584%   @see complements http:write_cookies
 1585%   @see library(http/http_cookies) implements cookie handling on
 1586%   top of these hooks.