1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker and Matt Lilley 4 E-mail: J.Wielemaker@cs.vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2012-2019, VU University Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(archive, 36 [ archive_open/3, % +Stream, -Archive, +Options 37 archive_open/4, % +Stream, +Mode, -Archive, +Options 38 archive_create/3, % +OutputFile, +InputFileList, +Options 39 archive_close/1, % +Archive 40 archive_property/2, % +Archive, ?Property 41 archive_next_header/2, % +Archive, -Name 42 archive_open_entry/2, % +Archive, -EntryStream 43 archive_header_property/2, % +Archive, ?Property 44 archive_set_header_property/2, % +Archive, +Property 45 archive_extract/3, % +Archive, +Dir, +Options 46 47 archive_entries/2, % +Archive, -Entries 48 archive_data_stream/3, % +Archive, -DataStream, +Options 49 archive_foldl/4 % :Goal, +Archive, +State0, -State 50 ]). 51:- use_module(library(error)). 52:- use_module(library(lists)). 53:- use_module(library(option)). 54:- use_module(library(filesex)). 55 56:- meta_predicate 57 archive_foldl( , , , ). 58 59/** <module> Access several archive formats 60 61This library uses _libarchive_ to access a variety of archive formats. 62The following example lists the entries in an archive: 63 64 ``` 65 list_archive(File) :- 66 archive_open(File, Archive, []), 67 repeat, 68 ( archive_next_header(Archive, Path) 69 -> format('~w~n', [Path]), 70 fail 71 ; !, 72 archive_close(Archive) 73 ). 74 ``` 75 76Here is another example which counts the files in the archive and prints 77file type information. It uses archive_foldl/4, a higher level 78predicate: 79 80 ``` 81 print_entry(Path, Handle, Cnt0, Cnt1) :- 82 archive_header_property(Handle, filetype(Type)), 83 format('File ~w is of type ~w~n', [Path, Type]), 84 Cnt1 is Cnt0 + 1. 85 86 list_archive(File) :- 87 archive_foldl(print_entry, File, 0, FileCount), 88 format('We have ~w files', [FileCount]). 89 ``` 90 91@see https://github.com/libarchive/libarchive/ 92*/ 93 94:- use_foreign_library(foreign(archive4pl)). 95 96%! archive_open(+Data, -Archive, +Options) is det. 97% 98% Wrapper around archive_open/4 that opens the archive in read mode. 99 100archive_open(Stream, Archive, Options) :- 101 archive_open(Stream, read, Archive, Options). 102 103:- predicate_options(archive_open/4, 4, 104 [ close_parent(boolean), 105 filter(oneof([all,bzip2,compress,gzip,grzip,lrzip, 106 lzip,lzma,lzop,none,rpm,uu,xz])), 107 format(oneof([all,'7zip',ar,cab,cpio,empty,gnutar, 108 iso9660,lha,mtree,rar,raw,tar,xar,zip])) 109 ]). 110:- predicate_options(archive_create/3, 3, 111 [ directory(atom), 112 pass_to(archive_open/4, 4) 113 ]). 114 115%! archive_open(+Data, +Mode, -Archive, +Options) is det. 116% 117% Open the archive in Data and unify Archive with a handle to the 118% opened archive. Data is either a file or a stream that contains 119% a valid archive. Details are controlled by Options. Typically, 120% the option close_parent(true) is used to close stream if the 121% archive is closed using archive_close/1. For other options, the 122% defaults are typically fine. The option format(raw) must be used 123% to process compressed streams that do not contain explicit 124% entries (e.g., gzip'ed data) unambibuously. The =raw= format 125% creates a _pseudo archive_ holding a single member named =data=. 126% 127% * close_parent(+Boolean) 128% If this option is =true= (default =false=), Stream is closed 129% if archive_close/1 is called on Archive. 130% 131% * compression(+Compression) 132% Synomym for filter(Compression). Deprecated. 133% 134% * filter(+Filter) 135% Support the indicated filter. This option may be 136% used multiple times to support multiple filters. In read mode, 137% If no filter options are provided, =all= is assumed. In write 138% mode, none is assumed. 139% Supported values are =all=, =bzip2=, =compress=, =gzip=, 140% =grzip=, =lrzip=, =lzip=, =lzma=, =lzop=, =none=, =rpm=, =uu= 141% and =xz=. The value =all= is default for read, =none= for write. 142% 143% * format(+Format) 144% Support the indicated format. This option may be used 145% multiple times to support multiple formats in read mode. 146% In write mode, you must supply a single format. If no format 147% options are provided, =all= is assumed for read mode. Note that 148% =all= does *not* include =raw= and =mtree=. To open both archive 149% and non-archive files, _both_ format(all) and 150% format(raw) and/or format(mtree) must be specified. Supported 151% values are: =all=, =7zip=, =ar=, =cab=, =cpio=, =empty=, =gnutar=, 152% =iso9660=, =lha=, =mtree=, =rar=, =raw=, =tar=, =xar= and =zip=. 153% The value =all= is default for read. 154% 155% Note that the actually supported compression types and formats 156% may vary depending on the version and installation options of 157% the underlying libarchive library. This predicate raises a 158% domain error if the (explicitly) requested format is not 159% supported. 160% 161% @error domain_error(filter, Filter) if the requested 162% filter is not supported. 163% @error domain_error(format, Format) if the requested 164% format type is not supported. 165 166archive_open(stream(Stream), Mode, Archive, Options) :- 167 !, 168 archive_open_stream(Stream, Mode, Archive, Options). 169archive_open(Stream, Mode, Archive, Options) :- 170 is_stream(Stream), 171 !, 172 archive_open_stream(Stream, Mode, Archive, Options). 173archive_open(File, Mode, Archive, Options) :- 174 open(File, Mode, Stream, [type(binary)]), 175 catch(archive_open_stream(Stream, Mode, Archive, [close_parent(true)|Options]), 176 E, (close(Stream, [force(true)]), throw(E))). 177 178 179%! archive_close(+Archive) is det. 180% 181% Close the archive. If close_parent(true) is specified, the 182% underlying stream is closed too. If there is an entry opened 183% with archive_open_entry/2, actually closing the archive is 184% delayed until the stream associated with the entry is closed. 185% This can be used to open a stream to an archive entry without 186% having to worry about closing the archive: 187% 188% == 189% archive_open_named(ArchiveFile, EntryName, Stream) :- 190% archive_open(ArchiveFile, Handle, []), 191% archive_next_header(Handle, Name), 192% archive_open_entry(Handle, Stream), 193% archive_close(Archive). 194% == 195 196 197%! archive_property(+Handle, ?Property) is nondet. 198% 199% True when Property is a property of the archive Handle. Defined 200% properties are: 201% 202% * filters(List) 203% True when the indicated filters are applied before reaching 204% the archive format. 205 206archive_property(Handle, Property) :- 207 defined_archive_property(Property), 208 Property =.. [Name,Value], 209 archive_property(Handle, Name, Value). 210 211defined_archive_property(filter(_)). 212 213 214%! archive_next_header(+Handle, -Name) is semidet. 215% 216% Forward to the next entry of the archive for which Name unifies 217% with the pathname of the entry. Fails silently if the name of 218% the archive is reached before success. Name is typically 219% specified if a single entry must be accessed and unbound 220% otherwise. The following example opens a Prolog stream to a 221% given archive entry. Note that _Stream_ must be closed using 222% close/1 and the archive must be closed using archive_close/1 223% after the data has been used. See also setup_call_cleanup/3. 224% 225% == 226% open_archive_entry(ArchiveFile, Entry, Stream) :- 227% open(ArchiveFile, read, In, [type(binary)]), 228% archive_open(In, Archive, [close_parent(true)]), 229% archive_next_header(Archive, Entry), 230% archive_open_entry(Archive, Stream). 231% == 232% 233% @error permission_error(next_header, archive, Handle) if a 234% previously opened entry is not closed. 235 236%! archive_open_entry(+Archive, -Stream) is det. 237% 238% Open the current entry as a stream. Stream must be closed. 239% If the stream is not closed before the next call to 240% archive_next_header/2, a permission error is raised. 241 242 243%! archive_set_header_property(+Archive, +Property) 244% 245% Set Property of the current header. Write-mode only. Defined 246% properties are: 247% 248% * filetype(-Type) 249% Type is one of =file=, =link=, =socket=, =character_device=, 250% =block_device=, =directory= or =fifo=. It appears that this 251% library can also return other values. These are returned as 252% an integer. 253% * mtime(-Time) 254% True when entry was last modified at time. 255% * size(-Bytes) 256% True when entry is Bytes long. 257% * link_target(-Target) 258% Target for a link. Currently only supported for symbolic 259% links. 260 261%! archive_header_property(+Archive, ?Property) 262% 263% True when Property is a property of the current header. Defined 264% properties are: 265% 266% * filetype(-Type) 267% Type is one of =file=, =link=, =socket=, =character_device=, 268% =block_device=, =directory= or =fifo=. It appears that this 269% library can also return other values. These are returned as 270% an integer. 271% * mtime(-Time) 272% True when entry was last modified at time. 273% * size(-Bytes) 274% True when entry is Bytes long. 275% * link_target(-Target) 276% Target for a link. Currently only supported for symbolic 277% links. 278% * format(-Format) 279% Provides the name of the archive format applicable to the 280% current entry. The returned value is the lowercase version 281% of the output of archive_format_name(). 282% * permissions(-Integer) 283% True when entry has the indicated permission mask. 284 285archive_header_property(Archive, Property) :- 286 ( nonvar(Property) 287 -> true 288 ; header_property(Property) 289 ), 290 archive_header_prop_(Archive, Property). 291 292header_property(filetype(_)). 293header_property(mtime(_)). 294header_property(size(_)). 295header_property(link_target(_)). 296header_property(format(_)). 297header_property(permissions(_)). 298 299 300%! archive_extract(+ArchiveFile, +Dir, +Options) 301% 302% Extract files from the given archive into Dir. Supported 303% options: 304% 305% * remove_prefix(+Prefix) 306% Strip Prefix from all entries before extracting. If Prefix 307% is a list, then each prefix is tried in order, succeding at 308% the first one that matches. If no prefixes match, an error 309% is reported. If Prefix is an atom, then that prefix is removed. 310% * exclude(+ListOfPatterns) 311% Ignore members that match one of the given patterns. 312% Patterns are handed to wildcard_match/2. 313% * include(+ListOfPatterns) 314% Include members that match one of the given patterns. 315% Patterns are handed to wildcard_match/2. The `exclude` 316% options takes preference if a member matches both the `include` 317% and the `exclude` option. 318% 319% @error existence_error(directory, Dir) if Dir does not exist 320% or is not a directory. 321% @error domain_error(path_prefix(Prefix), Path) if a path in 322% the archive does not start with Prefix 323% @tbd Add options 324 325archive_extract(Archive, Dir, Options) :- 326 ( exists_directory(Dir) 327 -> true 328 ; existence_error(directory, Dir) 329 ), 330 setup_call_cleanup( 331 archive_open(Archive, Handle, Options), 332 extract(Handle, Dir, Options), 333 archive_close(Handle)). 334 335extract(Archive, Dir, Options) :- 336 archive_next_header(Archive, Path), 337 !, 338 option(include(InclPatterns), Options, ['*']), 339 option(exclude(ExclPatterns), Options, []), 340 ( archive_header_property(Archive, filetype(file)), 341 \+ matches(ExclPatterns, Path), 342 matches(InclPatterns, Path) 343 -> archive_header_property(Archive, permissions(Perm)), 344 remove_prefix(Options, Path, ExtractPath), 345 directory_file_path(Dir, ExtractPath, Target), 346 file_directory_name(Target, FileDir), 347 make_directory_path(FileDir), 348 setup_call_cleanup( 349 archive_open_entry(Archive, In), 350 setup_call_cleanup( 351 open(Target, write, Out, [type(binary)]), 352 copy_stream_data(In, Out), 353 close(Out)), 354 close(In)), 355 set_permissions(Perm, Target) 356 ; true 357 ), 358 extract(Archive, Dir, Options). 359extract(_, _, _). 360 361%! matches(+Patterns, +Path) is semidet. 362% 363% True when Path matches a pattern in Patterns. 364 365matches([], _Path) :- 366 !, 367 fail. 368matches(Patterns, Path) :- 369 split_string(Path, "/", "/", Parts), 370 member(Segment, Parts), 371 Segment \== "", 372 member(Pattern, Patterns), 373 wildcard_match(Pattern, Segment), 374 !. 375 376remove_prefix(Options, Path, ExtractPath) :- 377 ( option(remove_prefix(Remove), Options) 378 -> ( is_list(Remove) 379 -> ( member(P, Remove), 380 atom_concat(P, ExtractPath, Path) 381 -> true 382 ; domain_error(path_prefix(Remove), Path) 383 ) 384 ; ( atom_concat(Remove, ExtractPath, Path) 385 -> true 386 ; domain_error(path_prefix(Remove), Path) 387 ) 388 ) 389 ; ExtractPath = Path 390 ). 391 392%! set_permissions(+Perm:integer, +Target:atom) 393% 394% Restore the permissions. Currently only restores the executable 395% permission. 396 397set_permissions(Perm, Target) :- 398 Perm /\ 0o100 =\= 0, 399 !, 400 '$mark_executable'(Target). 401set_permissions(_, _). 402 403 404 /******************************* 405 * HIGH LEVEL PREDICATES * 406 *******************************/ 407 408%! archive_entries(+Archive, -Paths) is det. 409% 410% True when Paths is a list of pathnames appearing in Archive. 411 412archive_entries(Archive, Paths) :- 413 setup_call_cleanup( 414 archive_open(Archive, Handle, []), 415 contents(Handle, Paths), 416 archive_close(Handle)). 417 418contents(Handle, [Path|T]) :- 419 archive_next_header(Handle, Path), 420 !, 421 contents(Handle, T). 422contents(_, []). 423 424%! archive_data_stream(+Archive, -DataStream, +Options) is nondet. 425% 426% True when DataStream is a stream to a data object inside 427% Archive. This predicate transparently unpacks data inside 428% _possibly nested_ archives, e.g., a _tar_ file inside a _zip_ 429% file. It applies the appropriate decompression filters and thus 430% ensures that Prolog reads the plain data from DataStream. 431% DataStream must be closed after the content has been processed. 432% Backtracking opens the next member of the (nested) archive. This 433% predicate processes the following options: 434% 435% - meta_data(-Data:list(dict)) 436% If provided, Data is unified with a list of filters applied to 437% the (nested) archive to open the current DataStream. The first 438% element describes the outermost archive. Each Data dict 439% contains the header properties (archive_header_property/2) as 440% well as the keys: 441% 442% - filters(Filters:list(atom)) 443% Filter list as obtained from archive_property/2 444% - name(Atom) 445% Name of the entry. 446% 447% Non-archive files are handled as pseudo-archives that hold a 448% single stream. This is implemented by using archive_open/3 with 449% the options `[format(all),format(raw)]`. 450 451archive_data_stream(Archive, DataStream, Options) :- 452 option(meta_data(MetaData), Options, _), 453 archive_content(Archive, DataStream, MetaData, []). 454 455archive_content(Archive, Entry, [EntryMetadata|PipeMetadataTail], PipeMetadata2) :- 456 archive_property(Archive, filter(Filters)), 457 repeat, 458 ( archive_next_header(Archive, EntryName) 459 -> findall(EntryProperty, 460 archive_header_property(Archive, EntryProperty), 461 EntryProperties), 462 dict_create(EntryMetadata, archive_meta_data, 463 [ filters(Filters), 464 name(EntryName) 465 | EntryProperties 466 ]), 467 ( EntryMetadata.filetype == file 468 -> archive_open_entry(Archive, Entry0), 469 ( EntryName == data, 470 EntryMetadata.format == raw 471 -> % This is the last entry in this nested branch. 472 % We therefore close the choicepoint created by repeat/0. 473 % Not closing this choicepoint would cause 474 % archive_next_header/2 to throw an exception. 475 !, 476 PipeMetadataTail = PipeMetadata2, 477 Entry = Entry0 478 ; PipeMetadataTail = PipeMetadata1, 479 open_substream(Entry0, 480 Entry, 481 PipeMetadata1, 482 PipeMetadata2) 483 ) 484 ; fail 485 ) 486 ; !, 487 fail 488 ). 489 490open_substream(In, Entry, ArchiveMetadata, PipeTailMetadata) :- 491 setup_call_cleanup( 492 archive_open(stream(In), 493 Archive, 494 [ close_parent(true), 495 format(all), 496 format(raw) 497 ]), 498 archive_content(Archive, Entry, ArchiveMetadata, PipeTailMetadata), 499 archive_close(Archive)). 500 501 502%! archive_create(+OutputFile, +InputFiles, +Options) is det. 503% 504% Convenience predicate to create an archive in OutputFile with 505% data from a list of InputFiles and the given Options. 506% 507% Besides options supported by archive_open/4, the following 508% options are supported: 509% 510% * directory(+Directory) 511% Changes the directory before adding input files. If this is 512% specified, paths of input files must be relative to 513% Directory and archived files will not have Directory 514% as leading path. This is to simulate =|-C|= option of 515% the =tar= program. 516% 517% * format(+Format) 518% Write mode supports the following formats: `7zip`, `cpio`, 519% `gnutar`, `iso9660`, `xar` and `zip`. Note that a particular 520% installation may support only a subset of these, depending on 521% the configuration of `libarchive`. 522 523archive_create(OutputFile, InputFiles, Options) :- 524 must_be(list(text), InputFiles), 525 option(directory(BaseDir), Options, '.'), 526 setup_call_cleanup( 527 archive_open(OutputFile, write, Archive, Options), 528 archive_create_1(Archive, BaseDir, BaseDir, InputFiles, top), 529 archive_close(Archive)). 530 531archive_create_1(_, _, _, [], _) :- !. 532archive_create_1(Archive, Base, Current, ['.'|Files], sub) :- 533 !, 534 archive_create_1(Archive, Base, Current, Files, sub). 535archive_create_1(Archive, Base, Current, ['..'|Files], Where) :- 536 !, 537 archive_create_1(Archive, Base, Current, Files, Where). 538archive_create_1(Archive, Base, Current, [File|Files], Where) :- 539 directory_file_path(Current, File, Filename), 540 archive_create_2(Archive, Base, Filename), 541 archive_create_1(Archive, Base, Current, Files, Where). 542 543archive_create_2(Archive, Base, Directory) :- 544 exists_directory(Directory), 545 !, 546 entry_name(Base, Directory, Directory0), 547 archive_next_header(Archive, Directory0), 548 time_file(Directory, Time), 549 archive_set_header_property(Archive, mtime(Time)), 550 archive_set_header_property(Archive, filetype(directory)), 551 archive_open_entry(Archive, EntryStream), 552 close(EntryStream), 553 directory_files(Directory, Files), 554 archive_create_1(Archive, Base, Directory, Files, sub). 555archive_create_2(Archive, Base, Filename) :- 556 entry_name(Base, Filename, Filename0), 557 archive_next_header(Archive, Filename0), 558 size_file(Filename, Size), 559 time_file(Filename, Time), 560 archive_set_header_property(Archive, size(Size)), 561 archive_set_header_property(Archive, mtime(Time)), 562 setup_call_cleanup( 563 archive_open_entry(Archive, EntryStream), 564 setup_call_cleanup( 565 open(Filename, read, DataStream, [type(binary)]), 566 copy_stream_data(DataStream, EntryStream), 567 close(DataStream)), 568 close(EntryStream)). 569 570entry_name('.', Name, Name) :- !. 571entry_name(Base, Name, EntryName) :- 572 directory_file_path(Base, EntryName, Name). 573 574%! archive_foldl(:Goal, +Archive, +State0, -State). 575% 576% Operates like foldl/4 but for the entries in the archive. For each 577% member of the archive, Goal called as `call(:Goal, +Path, +Handle, 578% +S0, -S1). Here, `S0` is current state of the _accumulator_ 579% (starting with State0) and `S1` is the next state of the 580% accumulator, producing State after the last member of the archive. 581% 582% @see archive_header_property/2, archive_open/4. 583% 584% @arg Archive File name or stream to be given to archive_open/[3,4]. 585 586archive_foldl(Goal, Archive, State0, State) :- 587 setup_call_cleanup( 588 archive_open(Archive, Handle, [close_parent(true)]), 589 archive_foldl_(Goal, Handle, State0, State), 590 archive_close(Handle) 591 ). 592 593archive_foldl_(Goal, Handle, State0, State) :- 594 ( archive_next_header(Handle, Path) 595 -> call(Goal, Path, Handle, State0, State1), 596 archive_foldl_(Goal, Handle, State1, State) 597 ; State = State0 598 )