389 lines
9.0 KiB
C
389 lines
9.0 KiB
C
/* csv2rec.c - csv to rec converter. */
|
|
|
|
/* Copyright (C) 2010-2022 Jose E. Marchesi */
|
|
|
|
/* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include <getopt.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <xalloc.h>
|
|
#include <gettext.h>
|
|
#define _(str) gettext (str)
|
|
|
|
#include <csv.h>
|
|
#include <rec.h>
|
|
#include <recutl.h>
|
|
|
|
/* Forward declarations. */
|
|
static void parse_args (int argc, char **argv);
|
|
static rec_db_t process_csv (void);
|
|
static int is_space (unsigned char c);
|
|
static int is_term (unsigned char c);
|
|
static void field_cb (void *s, size_t len, void *data);
|
|
static void record_cb (int c, void *data);
|
|
|
|
/*
|
|
* Types
|
|
*/
|
|
|
|
struct csv2rec_ctx
|
|
{
|
|
rec_db_t db;
|
|
rec_rset_t rset;
|
|
rec_record_t record;
|
|
|
|
size_t num_fields;
|
|
size_t lineno;
|
|
|
|
#define ALLOC_FIELDS 256
|
|
bool header_p;
|
|
size_t num_field_names;
|
|
char **field_names;
|
|
};
|
|
|
|
/*
|
|
* Global variables
|
|
*/
|
|
|
|
char *csv2rec_record_type = NULL;
|
|
char *csv2rec_csv_file = NULL;
|
|
bool csv2rec_strict = false;
|
|
bool csv2rec_omit_empty = false;
|
|
|
|
/*
|
|
* Command line options management
|
|
*/
|
|
|
|
enum
|
|
{
|
|
COMMON_ARGS,
|
|
RECORD_TYPE_ARG,
|
|
STRICT_ARG,
|
|
OMIT_EMPTY_ARG
|
|
};
|
|
|
|
static const struct option GNU_longOptions[] =
|
|
{
|
|
COMMON_LONG_ARGS,
|
|
{"type", required_argument, NULL, RECORD_TYPE_ARG},
|
|
{"strict", no_argument, NULL, STRICT_ARG},
|
|
{"omit-empty", no_argument, NULL, OMIT_EMPTY_ARG},
|
|
{NULL, 0, NULL, 0}
|
|
};
|
|
|
|
/*
|
|
* Functions.
|
|
*/
|
|
|
|
void
|
|
recutl_print_help (void)
|
|
{
|
|
/* TRANSLATORS: --help output, csv2rec synopsis.
|
|
no-wrap */
|
|
printf (_("\
|
|
Usage: csv2rec [OPTIONS]... [CSV_FILE]\n"));
|
|
|
|
/* TRANSLATORS: --help output, csv2rec short description.
|
|
no-wrap */
|
|
fputs (_("\
|
|
Convert csv data into rec data.\n"), stdout);
|
|
|
|
puts ("");
|
|
/* TRANSLATORS: --help output, csv2rec options.
|
|
no-wrap */
|
|
fputs (_("\
|
|
-t, --type=TYPE type name for the converted records; if this\n\
|
|
parameter is omitted then no type is used.\n\
|
|
-s, --strict be strict parsing the csv file.\n\
|
|
-e, --omit-empty omit empty fields.\n"), stdout);
|
|
|
|
recutl_print_help_common ();
|
|
puts ("");
|
|
recutl_print_help_footer ();
|
|
}
|
|
|
|
static void
|
|
parse_args (int argc,
|
|
char **argv)
|
|
{
|
|
int ret;
|
|
char c;
|
|
|
|
while ((ret = getopt_long (argc,
|
|
argv,
|
|
"t:se",
|
|
GNU_longOptions,
|
|
NULL)) != -1)
|
|
{
|
|
c = ret;
|
|
switch (c)
|
|
{
|
|
COMMON_ARGS_CASES
|
|
case RECORD_TYPE_ARG:
|
|
case 't':
|
|
csv2rec_record_type = xstrdup (optarg);
|
|
break;
|
|
case STRICT_ARG:
|
|
case 's':
|
|
csv2rec_strict = true;
|
|
break;
|
|
case OMIT_EMPTY_ARG:
|
|
case 'e':
|
|
csv2rec_omit_empty = true;
|
|
break;
|
|
default:
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
/* Read the name of the csv file, if any. */
|
|
if (optind < argc)
|
|
{
|
|
if ((argc - optind) != 1)
|
|
{
|
|
recutl_print_help ();
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
|
|
csv2rec_csv_file = argv[optind++];
|
|
}
|
|
}
|
|
|
|
static int
|
|
is_space (unsigned char c)
|
|
{
|
|
return (c == CSV_SPACE) || (c == CSV_TAB);
|
|
}
|
|
|
|
static int
|
|
is_term (unsigned char c)
|
|
{
|
|
return (c == CSV_CR) || (c == CSV_LF);
|
|
}
|
|
|
|
void
|
|
field_cb (void *s, size_t len, void *data)
|
|
{
|
|
char *str;
|
|
char *field_name;
|
|
rec_field_t field;
|
|
struct csv2rec_ctx *ctx;
|
|
size_t i;
|
|
|
|
ctx = (struct csv2rec_ctx *) data;
|
|
str = xmalloc (len + 1);
|
|
memcpy (str, s, len);
|
|
str[len] = '\0';
|
|
|
|
if (ctx->header_p)
|
|
{
|
|
/* Add a new field name to ctx.field_names. */
|
|
|
|
if ((ctx->num_field_names % ALLOC_FIELDS) == 0)
|
|
ctx->field_names =
|
|
realloc (ctx->field_names, ((ctx->num_field_names / ALLOC_FIELDS) + 1) * (sizeof(char *) * ALLOC_FIELDS));
|
|
|
|
/* Normalize the name: spaces and tabs are turned into dashes
|
|
'_'. */
|
|
for (i = 0; i < strlen (str); i++)
|
|
{
|
|
if ((str[i] == ' ') || (str[i] == '\t'))
|
|
str[i] = '_';
|
|
}
|
|
|
|
/* Verify that it is a valid field name. */
|
|
field_name = str;
|
|
if (!rec_field_name_p (field_name))
|
|
recutl_fatal (_("invalid field name '%s' in header\n"),
|
|
str);
|
|
ctx->field_names[ctx->num_field_names++] = str;
|
|
}
|
|
else
|
|
{
|
|
/* Create a new field and insert it in the current record. */
|
|
|
|
if (ctx->num_fields >= ctx->num_field_names)
|
|
recutl_fatal (_("error while parsing CSV file: too many columns in row\n"));
|
|
|
|
if (!ctx->record)
|
|
{
|
|
/* Create a new record. */
|
|
ctx->record = rec_record_new ();
|
|
if (!ctx->record)
|
|
recutl_out_of_memory ();
|
|
}
|
|
|
|
if (!csv2rec_omit_empty || (strlen(str) > 0))
|
|
{
|
|
if (ctx->num_fields > ctx->num_field_names)
|
|
{
|
|
char *source = csv2rec_csv_file;
|
|
|
|
if (!source)
|
|
source = "stdin";
|
|
|
|
fprintf (stderr,
|
|
_("%s: %lu: \
|
|
this line contains %lu fields, but %lu header fields were read\n"),
|
|
source,
|
|
ctx->lineno,
|
|
ctx->num_field_names,
|
|
ctx->num_fields);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
|
|
field = rec_field_new (ctx->field_names[ctx->num_fields], str);
|
|
rec_mset_append (rec_record_mset (ctx->record), MSET_FIELD,
|
|
(void *) field, MSET_ANY);
|
|
}
|
|
|
|
ctx->num_fields++;
|
|
}
|
|
}
|
|
|
|
void
|
|
record_cb (int c, void *data)
|
|
{
|
|
struct csv2rec_ctx *ctx;
|
|
ctx = (struct csv2rec_ctx *) data;
|
|
|
|
ctx->lineno++;
|
|
|
|
if (ctx->header_p)
|
|
ctx->header_p = false;
|
|
else
|
|
{
|
|
if (!ctx->rset)
|
|
{
|
|
/* Create a new record set. */
|
|
ctx->rset = rec_rset_new ();
|
|
if (!ctx->rset)
|
|
recutl_out_of_memory ();
|
|
|
|
/* Add a type, if needed. */
|
|
if (csv2rec_record_type)
|
|
rec_rset_set_type (ctx->rset, csv2rec_record_type);
|
|
|
|
/* Add it to the database. */
|
|
if (!ctx->db)
|
|
{
|
|
ctx->db = rec_db_new ();
|
|
if (!ctx->db)
|
|
recutl_out_of_memory ();
|
|
}
|
|
rec_db_insert_rset (ctx->db, ctx->rset, rec_db_size (ctx->db));
|
|
}
|
|
|
|
/* Add the current record to the record set. */
|
|
rec_mset_append (rec_rset_mset (ctx->rset),
|
|
MSET_RECORD,
|
|
(void *) ctx->record, MSET_ANY);
|
|
ctx->record = NULL;
|
|
|
|
/* Reset the field counter. */
|
|
ctx->num_fields = 0;
|
|
}
|
|
}
|
|
|
|
static rec_db_t
|
|
process_csv (void)
|
|
{
|
|
struct csv2rec_ctx ctx;
|
|
FILE *in;
|
|
struct csv_parser p;
|
|
unsigned char options = 0;
|
|
char buf[1024];
|
|
size_t bytes_read = 0;
|
|
|
|
/* Initialize the data in the context. */
|
|
ctx.db = NULL;
|
|
ctx.rset = NULL;
|
|
ctx.record = NULL;
|
|
ctx.header_p = true;
|
|
ctx.field_names = NULL;
|
|
ctx.num_field_names = 0;
|
|
ctx.num_fields = 0;
|
|
ctx.lineno = 0;
|
|
|
|
/* Set the files to read/write from/to.
|
|
|
|
If a filename was specified, read the csv file from there.
|
|
Otherwise use the standard input. The output is written to the
|
|
standard output in any case. */
|
|
if (csv2rec_csv_file)
|
|
{
|
|
if (!(in = fopen (csv2rec_csv_file, "r")))
|
|
recutl_fatal (_("cannot read file %s\n"), csv2rec_csv_file);
|
|
}
|
|
else
|
|
in = stdin;
|
|
|
|
/* Initialize the csv library. */
|
|
if (csv_init (&p, options) != 0)
|
|
recutl_fatal (_("failed to initialize csv parser\n"));
|
|
|
|
/* Set some properties of the parser. */
|
|
if (csv2rec_strict)
|
|
{
|
|
options |= CSV_STRICT;
|
|
csv_set_opts (&p, options);
|
|
}
|
|
|
|
csv_set_space_func (&p, is_space);
|
|
csv_set_term_func (&p, is_term);
|
|
|
|
/* Parse the input file in chunks of data. */
|
|
while ((bytes_read = fread (buf, 1, 1024, in)) > 0)
|
|
{
|
|
if (csv_parse (&p, buf, bytes_read, field_cb, record_cb, &ctx)
|
|
!= bytes_read)
|
|
recutl_fatal (_("error while parsing CSV file: %s\n"),
|
|
csv_strerror (csv_error (&p)));
|
|
|
|
}
|
|
|
|
return ctx.db;
|
|
}
|
|
|
|
int
|
|
main (int argc, char *argv[])
|
|
{
|
|
int ret;
|
|
rec_db_t db;
|
|
rec_writer_t writer;
|
|
|
|
recutl_init ("csv2rec");
|
|
|
|
parse_args (argc, argv);
|
|
db = process_csv ();
|
|
ret = EXIT_SUCCESS;
|
|
|
|
if (db)
|
|
{
|
|
writer = rec_writer_new (stdout);
|
|
rec_write_db (writer, db);
|
|
|
|
rec_writer_destroy (writer);
|
|
rec_db_destroy (db);
|
|
}
|
|
else
|
|
ret = EXIT_FAILURE;
|
|
|
|
return ret;
|
|
}
|