1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2009-2018, VU University Amsterdam 7 CWI, Amsterdam 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(csv, 37 [ csv//1, % +Rows 38 csv//2, % +Rows, +Options 39 40 csv_read_file/2, % +File, -Data 41 csv_read_file/3, % +File, -Data, +Options 42 csv_read_stream/3, % +Stream, -Data, +Options 43 44 csv_read_file_row/3, % +File, -Row, +Options 45 csv_read_row/3, % +Stream, -Row, +CompiledOptions 46 csv_options/2, % -Compiled, +Options 47 48 csv_write_file/2, % +File, +Data 49 csv_write_file/3, % +File, +Data, +Options 50 csv_write_stream/3 % +Stream, +Data, +Options 51 ]). 52:- use_module(library(record)). 53:- use_module(library(error)). 54:- use_module(library(pure_input)). 55:- use_module(library(debug)). 56:- use_module(library(option)). 57:- use_module(library(apply)). 58:- use_module(library(dcg/basics)).
73:- predicate_options(csv//2, 2, 74 [ separator(nonneg), % mustv be code 75 strip(boolean), 76 ignore_quotes(boolean), 77 convert(boolean), 78 case(oneof([down,preserve,up])), 79 functor(atom), 80 arity(-nonneg), % actually ?nonneg 81 match_arity(boolean) 82 ]). 83:- predicate_options(csv_read_file/3, 3, 84 [ pass_to(csv//2, 2), 85 pass_to(phrase_from_file/3, 3) 86 ]). 87:- predicate_options(csv_read_file_row/3, 3, 88 [ pass_to(csv//2, 2), 89 pass_to(open/4, 4) 90 ]). 91:- predicate_options(csv_write_file/3, 3, 92 [ pass_to(csv//2, 2), 93 pass_to(open/4, 4) 94 ]). 95:- predicate_options(csv_write_stream/3, 3, 96 [ pass_to(csv//2, 2) 97 ]). 98 99 100:- record 101 csv_options(separator:integer=0',, 102 strip:boolean=false, 103 ignore_quotes:boolean=false, 104 convert:boolean=true, 105 case:oneof([down,preserve,up])=preserve, 106 functor:atom=row, 107 arity:integer, 108 match_arity:boolean=true, 109 skip_header:atom).
\t
for
.tsv
files and ,
otherwise.
Suppose we want to create a predicate table/6 from a CSV file
that we know contains 6 fields per record. This can be done
using the code below. Without the option arity(6)
, this would
generate a predicate table/N, where N is the number of fields
per record in the data.
?- csv_read_file(File, Rows, [functor(table), arity(6)]), maplist(assert, Rows).
133csv_read_file(File, Rows) :- 134 csv_read_file(File, Rows, []). 135 136csv_read_file(File, Rows, Options) :- 137 default_separator(File, Options, Options1), 138 make_csv_options(Options1, Record, RestOptions), 139 phrase_from_file(csv_roptions(Rows, Record), File, RestOptions). 140 141 142default_separator(File, Options0, Options) :- 143 ( option(separator(_), Options0) 144 -> Options = Options0 145 ; file_name_extension(_, Ext0, File), 146 downcase_atom(Ext0, Ext), 147 ext_separator(Ext, Sep) 148 -> Options = [separator(Sep)|Options0] 149 ; Options = Options0 150 ). 151 152ext_separator(csv, 0',). 153ext_separator(tsv, 0'\t).
160csv_read_stream(Stream, Rows, Options) :-
161 make_csv_options(Options, Record, _),
162 phrase_from_stream(csv_roptions(Rows, Record), Stream).
separator(0';)
parses
a semicolon separated file.true
(default false), threat double quotes as a normal
character.true
(default false
), strip leading and trailing
blank space. RFC4180 says that blank space is part of the
data.#
. After
skipping comment lines this option causes csv//2 to skip empty
lines. Note that an empty line may not contain white space
characters (space or tab) as these may provide valid data.true
(default), use name/2 on the field data. This
translates the field into a number if possible.down
, downcase atomic values. If up
, upcase them
and if preserve
(default), do not change the case.row
.domain_error(row_arity(Expected), Found)
if a row is
found with different arity.false
(default true
), do not reject CSV files where
lines provide a varying number of fields (columns). This
can be a work-around to use some incorrect CSV files.214csv(Rows) --> 215 csv(Rows, []). 216 217csv(Rows, Options) --> 218 { make_csv_options(Options, Record, _) }, 219 csv_roptions(Rows, Record). 220 221csv_roptions(Rows, Record) --> 222 { ground(Rows) }, 223 !, 224 emit_csv(Rows, Record). 225csv_roptions(Rows, Record) --> 226 skip_header(Record), 227 csv_data(Rows, Record). 228 229skip_header(Options) --> 230 { csv_options_skip_header(Options, CommentStart), 231 nonvar(CommentStart), 232 atom_codes(CommentStart, Codes) 233 }, 234 !, 235 skip_header_lines(Codes), 236 skip_blank_lines. 237skip_header(_) --> 238 []. 239 240skip_header_lines(CommentStart) --> 241 string(CommentStart), 242 !, 243 ( string(_Comment), 244 end_of_record 245 -> skip_header_lines(CommentStart) 246 ). 247skip_header_lines(_) --> 248 []. 249 250skip_blank_lines --> 251 eos, 252 !. 253skip_blank_lines --> 254 end_of_record, 255 !, 256 skip_blank_lines. 257skip_blank_lines --> 258 []. 259 260csv_data([], _) --> 261 eos, 262 !. 263csv_data([Row|More], Options) --> 264 row(Row, Options), 265 !, 266 { debug(csv, 'Row: ~p', [Row]) }, 267 csv_data(More, Options). 268 269 270row(Row, Options) --> 271 fields(Fields, Options), 272 { csv_options_functor(Options, Functor), 273 Row =.. [Functor|Fields], 274 functor(Row, _, Arity), 275 check_arity(Options, Arity) 276 }. 277 278check_arity(Options, Arity) :- 279 csv_options_arity(Options, Arity), 280 !. 281check_arity(Options, _) :- 282 csv_options_match_arity(Options, false), 283 !. 284check_arity(Options, Arity) :- 285 csv_options_arity(Options, Expected), 286 domain_error(row_arity(Expected), Arity). 287 288fields([F|T], Options) --> 289 field(F, Options), 290 ( separator(Options) 291 -> fields(T, Options) 292 ; end_of_record 293 -> { T = [] } 294 ). 295 296field(Value, Options) --> 297 "\"", 298 { csv_options_ignore_quotes(Options, false) }, 299 !, 300 string_codes(Codes), 301 { make_value(Codes, Value, Options) }. 302field(Value, Options) --> 303 { csv_options_strip(Options, true) }, 304 !, 305 stripped_field(Value, Options). 306field(Value, Options) --> 307 { csv_options_separator(Options, Sep) }, 308 field_codes(Codes, Sep), 309 { make_value(Codes, Value, Options) }. 310 311 312stripped_field(Value, Options) --> 313 ws, 314 ( "\"", 315 { csv_options_strip(Options, false) } 316 -> string_codes(Codes), 317 ws 318 ; { csv_options_separator(Options, Sep) }, 319 field_codes(Codes0, Sep), 320 { strip_trailing_ws(Codes0, Codes) } 321 ), 322 { make_value(Codes, Value, Options) }. 323 324ws --> " ", !, ws. 325ws --> "\t", !, ws. 326ws --> "". 327 328strip_trailing_ws(List, Stripped) :- 329 append(Stripped, WS, List), 330 all_ws(WS). 331 332all_ws([]). 333all_ws([32|T]) :- all_ws(T). 334all_ws([9|T]) :- all_ws(T).
342string_codes(List) --> 343 [H], 344 ( { H == 0'" } 345 -> ( "\"" 346 -> { List = [H|T] }, 347 string_codes(T) 348 ; { List = [] } 349 ) 350 ; { List = [H|T] }, 351 string_codes(T) 352 ). 353 354field_codes([], Sep), [Sep] --> [Sep], !. 355field_codes([], _), "\n" --> "\r\n", !. 356field_codes([], _), "\n" --> "\n", !. 357field_codes([], _), "\n" --> "\r", !. 358field_codes([H|T], Sep) --> [H], !, field_codes(T, Sep). 359field_codes([], _) --> []. % unterminated last record
366make_value(Codes, Value, Options) :- 367 csv_options_convert(Options, Convert), 368 csv_options_case(Options, Case), 369 make_value(Convert, Case, Codes, Value). 370 371make_value(true, preserve, Codes, Value) :- 372 !, 373 name(Value, Codes). 374make_value(true, Case, Codes, Value) :- 375 !, 376 ( number_string(Value, Codes) 377 -> true 378 ; make_value(false, Case, Codes, Value) 379 ). 380make_value(false, preserve, Codes, Value) :- 381 !, 382 atom_codes(Value, Codes). 383make_value(false, down, Codes, Value) :- 384 !, 385 string_codes(String, Codes), 386 downcase_atom(String, Value). 387make_value(false, up, Codes, Value) :- 388 string_codes(String, Codes), 389 upcase_atom(String, Value). 390 391separator(Options) --> 392 { csv_options_separator(Options, Sep) }, 393 [Sep]. 394 395end_of_record --> "\n". % Unix files 396end_of_record --> "\r\n". % DOS files 397end_of_record --> "\r". % MacOS files 398end_of_record --> eos. % unterminated last record
In addition to the options of csv_read_file/3, this predicate processes the option:
422csv_read_file_row(File, Row, Options) :- 423 default_separator(File, Options, Options1), 424 make_csv_options(Options1, RecordOptions, Options2), 425 select_option(line(Line), Options2, RestOptions, _), 426 setup_call_cleanup( 427 open(File, read, Stream, RestOptions), 428 csv_read_stream_row(Stream, Row, Line, RecordOptions), 429 close(Stream)). 430 431csv_read_stream_row(Stream, Row, Line, Options) :- 432 between(1, infinite, Line), 433 ( csv_read_row(Stream, Row0, Options), 434 Row0 \== end_of_file 435 -> Row = Row0 436 ; !, 437 fail 438 ).
end_of_file
upon reaching the
end of the input.448csv_read_row(Stream, Row, _Record) :- 449 at_end_of_stream(Stream), 450 !, 451 Row = end_of_file. 452csv_read_row(Stream, Row, Record) :- 453 read_lines_to_codes(Stream, Codes, Record, even), 454 phrase(row(Row0, Record), Codes), 455 !, 456 Row = Row0. 457 458read_lines_to_codes(Stream, Codes, Options, QuoteQuantity) :- 459 read_line_to_codes(Stream, Codes0), 460 Codes0 \== end_of_file, 461 ( ( csv_options_ignore_quotes(Options, true) 462 ; check_quotes(Codes0, QuoteQuantity, even) 463 ) 464 -> Codes = Codes0 465 ; append(Codes0, [0'\n|Tail], Codes), 466 read_lines_to_codes(Stream, Tail, Options, odd) 467 ). 468 469check_quotes([], QuoteQuantity, QuoteQuantity) :- 470 !. 471check_quotes([0'"|T], odd, Result) :- 472 !, 473 check_quotes(T, even, Result). 474check_quotes([0'"|T], even, Result) :- 475 !, 476 check_quotes(T, odd, Result). 477check_quotes([_|T], QuoteQuantity, Result) :- 478 check_quotes(T, QuoteQuantity, Result).
488csv_options(Compiled, Options) :- 489 make_csv_options(Options, Compiled, _Ignored). 490 491 492 /******************************* 493 * OUTPUT * 494 *******************************/
\t
for
.tsv
files and ,
otherwise.504csv_write_file(File, Data) :- 505 csv_write_file(File, Data, []). 506 507csv_write_file(File, Data, Options) :- 508 must_be(list, Data), 509 default_separator(File, Options, Options1), 510 make_csv_options(Options1, OptionsRecord, RestOptions), 511 setup_call_cleanup( 512 open(File, write, Out, RestOptions), 513 maplist(csv_write_row(Out, OptionsRecord), Data), 514 close(Out)). 515 516csv_write_row(Out, OptionsRecord, Row) :- 517 phrase(emit_row(Row, OptionsRecord), String), 518 format(Out, '~s', [String]). 519 520emit_csv([], _) --> []. 521emit_csv([H|T], Options) --> 522 emit_row(H, Options), 523 emit_csv(T, Options). 524 525emit_row(Row, Options) --> 526 { Row =.. [_|Fields] }, 527 emit_fields(Fields, Options), 528 "\r\n". % RFC 4180 demands \r\n 529 530emit_fields([], _) --> 531 "". 532emit_fields([H|T], Options) --> 533 emit_field(H, Options), 534 ( { T == [] } 535 -> [] 536 ; { csv_options_separator(Options, Sep) }, 537 [Sep], 538 emit_fields(T, Options) 539 ). 540 541emit_field(H, Options) --> 542 { ( atom(H) 543 -> atom_codes(H, Codes) 544 ; string(H) 545 -> string_codes(H, Codes) 546 ) 547 }, 548 !, 549 ( { needs_quotes(H, Options) } 550 -> "\"", emit_string(Codes), "\"" 551 ; emit_codes(Codes) 552 ). 553emit_field([], _) --> 554 !, 555 { atom_codes('[]', Codes) }, 556 emit_codes(Codes). 557emit_field(H, _) --> 558 { number_codes(H,Codes) }, 559 emit_codes(Codes). 560 561needs_quotes(Atom, _) :- 562 sub_atom(Atom, _, _, _, '"'), 563 !. 564needs_quotes(Atom, _) :- 565 sub_atom(Atom, _, _, _, '\n'), 566 !. 567needs_quotes(Atom, _) :- 568 sub_atom(Atom, _, _, _, '\r'), 569 !. 570needs_quotes(Atom, Options) :- 571 csv_options_separator(Options, Sep), 572 char_code(Char, Sep), 573 sub_atom(Atom, _, _, _, Char), 574 !. 575 576emit_string([]) --> "". 577emit_string([0'"|T]) --> !, "\"\"", emit_string(T). 578emit_string([H|T]) --> [H], emit_string(T). 579 580emit_codes([]) --> "". 581emit_codes([0'"|T]) --> !, "\"\"", emit_codes(T). 582emit_codes([H|T]) --> [H], emit_codes(T).
save_data(File) :- setup_call_cleanup( open(File, write, Out), forall(data(C1,C2,C3), csv_write_stream(Out, [row(C1,C2,C3)], [])), close(Out)),
601csv_write_stream(Stream, Data, Options) :-
602 must_be(list, Data),
603 make_csv_options(Options, OptionsRecord, _),
604 maplist(csv_write_row(Stream, OptionsRecord), Data)
Process CSV (Comma-Separated Values) data
This library parses and generates CSV data. CSV data is represented in Prolog as a list of rows. Each row is a compound term, where all rows have the same name and arity.