View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jeffrey Rosenwald
    4    E-mail:        jeffrose@acm.org
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2013, Jeffrey Rosenwald
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(protobufs,
   36          [
   37           protobuf_message/2,   % ?Template ?Codes
   38           protobuf_message/3    % ?Template ?Codes ?Rest
   39          ]).

Google's Protocol Buffers

Protocol buffers are Google's language-neutral, platform-neutral, extensible mechanism for serializing structured data -- think XML, but smaller, faster, and simpler. You define how you want your data to be structured once. This takes the form of a template that describes the data structure. You use this template to encode and decode your data structure into wire-streams that may be sent-to or read-from your peers. The underlying wire stream is platform independent, lossless, and may be used to interwork with a variety of languages and systems regardless of word size or endianness. Techniques exist to safely extend your data structure without breaking deployed programs that are compiled against the "old" format.

The idea behind Google's Protocol Buffers is that you define your structured messages using a domain-specific language and tool set. In SWI-Prolog, you define your message template as a list of predefined Prolog terms that correspond to production rules in the Definite Clause Grammar (DCG) that realizes the interpreter. Each production rule has an equivalent rule in the protobuf grammar. The process is not unlike specifiying the format of a regular expression. To encode a template to a wire-stream, you pass a grounded template, X, and variable, Y, to protobuf_message/2. To decode a wire-stream, Y, you pass an ungrounded template, X, along with a grounded wire-stream, Y, to protobuf_message/2. The interpreter will unify the unbound variables in the template with values decoded from the wire-stream.

For an overview and tutorial with examples, see protobufs_overview.txt. Examples of usage may also be found by inspecting test_protobufs.pl.

author
- : Jeffrey Rosenwald (JeffRose@acm.org)
See also
- http://code.google.com/apis/protocolbuffers
Compatibility
- : SWI-Prolog */
   76:- require([ use_foreign_library/1
   77           , atom_codes/2
   78           , call/2
   79           , float32_codes/2
   80           , float64_codes/2
   81           , int32_codes/2
   82           , int64_codes/2
   83           , integer_zigzag/2
   84           , string_codes/2
   85           , succ/2
   86           , between/3
   87           ]).   88
   89:- use_foreign_library(foreign(protobufs)).   90:- use_module(library(utf8)).   91:- use_module(library(error)).   92:- use_module(library(lists)).   93
   94wire_type(varint, 0).
   95wire_type(fixed64, 1).
   96wire_type(length_delimited, 2).
   97wire_type(start_group, 3).
   98wire_type(end_group, 4).
   99wire_type(fixed32, 5).
  100
  101%
  102% deal with Google's method of encoding 2's complement integers
  103% such that packed length is proportional to magnitude. We can handle up
  104% to 63 bits, plus sign. Essentially moves sign-bit from MSB to LSB.
  105%
  106:- if(false).  % now done in the C-support code
  107zig_zag(Int, X) :-
  108    integer(Int),
  109    !,
  110    X is (Int << 1) xor (Int >> 63).
  111zig_zag(Int, X) :-
  112    integer(X),
  113    Y is -1 * (X /\ 1),
  114    Int is (X >> 1) xor Y.
  115:- endif.  116%
  117%  basic wire-type processing handled by C-support code
  118%
  119
  120fixed_int32(X, [A0, A1, A2, A3 | Rest], Rest) :-
  121    int32_codes(X, [A0, A1, A2, A3]).
  122
  123fixed_int64(X, [A0, A1, A2, A3, A4, A5, A6, A7 | Rest], Rest) :-
  124    int64_codes(X, [A0, A1, A2, A3, A4, A5, A6, A7]).
  125
  126fixed_float64(X, [A0, A1, A2, A3, A4, A5, A6, A7 | Rest], Rest) :-
  127    float64_codes(X, [A0, A1, A2, A3, A4, A5, A6, A7]).
  128
  129fixed_float32(X, [A0, A1, A2, A3 | Rest], Rest) :-
  130    float32_codes(X, [A0, A1, A2, A3]).
  131
  132%
  133%   Start of the DCG
  134%
  135
  136code_string(N, Codes, Rest, Rest1) :-
  137    length(Codes, N),
  138    append(Codes, Rest1, Rest),
  139    !.
  140/*
  141code_string(N, Codes) -->
  142        { length(Codes, N)},
  143        Codes, !.
  144*/
  145%
  146% deal with Google's method of packing unsigned integers in variable
  147% length, modulo 128 strings.
  148%
  149% var_int and tag_type productions were rewritten in straight Prolog for
  150% speed's sake.
  151%
  152
  153var_int(A, [A | Rest], Rest) :-
  154    A < 128,
  155    !.
  156var_int(X, [A | Rest], Rest1) :-
  157    nonvar(X),
  158    X1 is X >> 7,
  159    A is 128 + (X /\ 0x7f),
  160    var_int(X1, Rest, Rest1),
  161    !.
  162var_int(X, [A | Rest], Rest1) :-
  163    var_int(X1, Rest, Rest1),
  164    X is (X1 << 7) + A - 128,
  165    !.
  166%
  167%
  168
  169tag_type(Tag, Type, Rest, Rest1) :-
  170    nonvar(Tag), nonvar(Type),
  171    wire_type(Type, X),
  172    A is Tag << 3 \/ X,
  173    var_int(A, Rest, Rest1),
  174    !.
  175tag_type(Tag, Type, Rest, Rest1) :-
  176    var_int(A, Rest, Rest1),
  177    X is A /\ 0x07,
  178    wire_type(Type, X),
  179    Tag is A >> 3.
  180%
  181prolog_type(Tag, double) -->     tag_type(Tag, fixed64).
  182prolog_type(Tag, integer64) -->  tag_type(Tag, fixed64).
  183prolog_type(Tag, float) -->      tag_type(Tag, fixed32).
  184prolog_type(Tag, integer32) -->  tag_type(Tag, fixed32).
  185prolog_type(Tag, integer) -->    tag_type(Tag, varint).
  186prolog_type(Tag, unsigned) -->   tag_type(Tag, varint).
  187prolog_type(Tag, boolean) -->    tag_type(Tag, varint).
  188prolog_type(Tag, enum) -->       tag_type(Tag, varint).
  189prolog_type(Tag, atom) -->       tag_type(Tag, length_delimited).
  190prolog_type(Tag, codes) -->      tag_type(Tag, length_delimited).
  191prolog_type(Tag, utf8_codes) --> tag_type(Tag, length_delimited).
  192prolog_type(Tag, string) -->     tag_type(Tag, length_delimited).
  193prolog_type(Tag, embedded) -->   tag_type(Tag, length_delimited).
  194%
  195%   The protobuf-2.1.0 grammar allows negative values in enums.
  196%   But they are encoded as unsigned in the  golden message.
  197%   Encode as integer and lose. Encode as unsigned and win.
  198%
  199:- meta_predicate enumeration(1,*,*).  200
  201enumeration(Type) -->
  202    { call(Type, Value) },
  203    payload(unsigned, Value).
  204
  205payload(enum, A) -->
  206    enumeration(A).
  207payload(double,  A) -->
  208    fixed_float64(A).
  209payload(integer64, A) -->
  210    fixed_int64(A).
  211payload(float, A) -->
  212    fixed_float32(A).
  213payload(integer32, A) -->
  214    fixed_int32(A).
  215payload(integer, A) -->
  216    { nonvar(A), integer_zigzag(A,X) },
  217    !,
  218    var_int(X).
  219payload(integer, A) -->
  220    var_int(X),
  221    { integer_zigzag(A, X) }.
  222payload(unsigned, A) -->
  223    {   nonvar(A)
  224    ->  A >= 0
  225    ;   true
  226    },
  227    var_int(A).
  228payload(codes, A) -->
  229    { nonvar(A), !, length(A, Len)},
  230    var_int(Len),
  231    code_string(Len, A).
  232payload(codes, A) -->
  233    var_int(Len),
  234    code_string(Len, A).
  235payload(utf8_codes, A) -->
  236    { nonvar(A),
  237      !,
  238      phrase(utf8_codes(A), B)
  239    },
  240    payload(codes, B).
  241payload(utf8_codes, A) -->
  242    payload(codes, B),
  243    { phrase(utf8_codes(A), B) }.
  244payload(atom, A) -->
  245    { nonvar(A),
  246      atom_codes(A, Codes)
  247    },
  248    payload(utf8_codes, Codes),
  249    !.
  250payload(atom, A) -->
  251    payload(utf8_codes, Codes),
  252    { atom_codes(A, Codes) }.
  253payload(boolean, true) -->
  254    payload(unsigned, 1).
  255payload(boolean, false) -->
  256    payload(unsigned, 0).
  257payload(string, A) -->
  258    {   nonvar(A)
  259    ->  string_codes(A, Codes)
  260    ;   true
  261    },
  262    payload(codes, Codes),
  263    { string_codes(A, Codes) }.
  264payload(embedded, protobuf(A)) -->
  265    { ground(A),
  266      phrase(protobuf(A), Codes)
  267    },
  268    payload(codes, Codes),
  269    !.
  270payload(embedded, protobuf(A)) -->
  271    payload(codes, Codes),
  272    { phrase(protobuf(A), Codes) }.
  273
  274start_group(Tag) -->            tag_type(Tag, start_group).
  275
  276end_group(Tag) -->              tag_type(Tag, end_group).
  277%
  278%
  279nothing([]) --> [], !.
  280
  281protobuf([A | B]) -->
  282    { A =.. [ Type, Tag, Payload] },
  283    message_sequence(Type, Tag, Payload),
  284    !,
  285    (   protobuf(B)
  286    ;   nothing(B)
  287    ).
  288
  289
  290repeated_message_sequence(repeated_enum, Tag, Type, [A | B]) -->
  291    { Compound =.. [Type, A] },
  292    message_sequence(enum, Tag, Compound),
  293    (   repeated_message_sequence(repeated_enum, Tag, Type, B)
  294    ;   nothing(B)
  295    ).
  296repeated_message_sequence(Type, Tag, [A | B]) -->
  297    message_sequence(Type, Tag, A),
  298    repeated_message_sequence(Type, Tag, B).
  299repeated_message_sequence(_Type, _Tag, A) -->
  300    nothing(A).
  301
  302
  303message_sequence(repeated, Tag, enum(Compound)) -->
  304    { Compound =.. [ Type, List] },
  305    repeated_message_sequence(repeated_enum, Tag, Type, List).
  306message_sequence(repeated, Tag, Compound) -->
  307    { Compound =.. [Type, A] },
  308    repeated_message_sequence(Type, Tag, A).
  309message_sequence(group, Tag, A) -->
  310    start_group(Tag),
  311    protobuf(A),
  312    end_group(Tag),
  313    !.
  314message_sequence(PrologType, Tag, Payload) -->
  315    prolog_type(Tag, PrologType),
  316    payload(PrologType, Payload).
 protobuf_message(?Template, ?Wire_stream) is semidet
 protobuf_message(?Template, ?Wire_stream, ?Rest) is nondet
Marshalls and unmarshalls byte streams encoded using Google's Protobuf grammars. protobuf_message/2 provides a bi-directional parser that marshalls a Prolog structure to Wire_stream, according to rules specified by Template. It can also unmarshall Wire_stream into a Prolog structure according to the same grammar. protobuf_message/3 provides a difference list version.
Arguments:
Template- is a protobuf grammar specification. On decode, unbound variables in the Template are unified with their respective values in the Wire_stream. On encode, Template must be ground.
Wire_stream- is a code list that was generated by a protobuf encoder using an equivalent template.
  336protobuf_message(protobuf(Template), Wirestream) :-
  337    must_be(list, Template),
  338    phrase(protobuf(Template), Wirestream),
  339    !.
  340
  341protobuf_message(protobuf(Template), Wirestream, Residue) :-
  342    must_be(list, Template),
  343    phrase(protobuf(Template), Wirestream, Residue)