#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2019-12-06 12:49:48 +0200 (Fri, 06 Dec 2019) $ 
#$Revision: 7550 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.10/scripts/cif2csv $
#------------------------------------------------------------------------------
#*
#* Convert a CIF file into a CSV file.
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_parser_messages report_message );
use COD::ToolsVersion;

sub csv_format($$$);

my $die_on_error_level = {
    'ERROR'   => 1,
    'WARNING' => 0,
    'NOTE'    => 0
};

my $use_parser = 'c';

#* OPTIONS:
#*   --use-perl-parser
#*                     Use Perl parser to parse CIF files.
#*   --use-c-parser
#*                     Use C parser to parse CIF files (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    '--use-perl-parser' => sub { $use_parser = 'perl' },
    '--use-c-parser'    => sub { $use_parser = 'c' },
    '--options'         => sub { options; exit },
    '--help,--usage'    => sub { usage; exit },
    '--version'         => sub { print 'cod-tools version ',
                                 $COD::ToolsVersion::Version, "\n";
                                 exit }
);

@ARGV = ('-') unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

my $fields_separator = ','; # as per section 1.4 of RFC 4180
my $lines_separator  = "\r\n"; # as per section 4.1.1 of RFC 2046

for my $filename (@ARGV) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    if( !@{$data} || !defined $data->[0] || !defined $data->[0]{name} ) {
        report_message( {
           'program'   => $0,
           'filename'  => $filename,
           'err_level' => 'WARNING',
           'message'   => 'file seems to be empty'
        }, $die_on_error_level->{WARNING} );
        next;
    }

    for my $datablock (@{$data}) {
        print {*STDERR} $datablock->{name} . "\n";
        print 'data_'
            . $fields_separator . $datablock->{name}
            . $lines_separator;
        # print stray values
        for my $tagName (sort keys %{$datablock->{values}}) {
            if(defined $datablock->{inloop}->{$tagName}) {
                next; # print loops after stray values
            }
            my $precision = undef;
            if(defined $datablock->{precisions}{$tagName}[0]) {
                $precision = $datablock->{precisions}{$tagName}[0];
            }
            print $tagName . $fields_separator;
            print csv_format($datablock->{values}{$tagName}[0],
                             $datablock->{types}{$tagName}[0],
                             $precision);
            print $lines_separator;
        }
        # and print loop values
        for my $loop (@{$datablock->{loops}}) {
            local $, = $fields_separator;
            print @{$loop};
            print $lines_separator;
            my $loop_value_length = $#{$datablock->{values}{$loop->[0]}};
            for my $line(0..$loop_value_length) {
                for my $tagName (@{$loop}) {
                    my $precision = undef;
                    if(defined $datablock->{precisions}{$tagName}[0]) {
                        $precision = $datablock->{precisions}{$tagName}[0];
                    }
                    print csv_format($datablock->{values}{$tagName}[$line],
                                     $datablock->{types}{$tagName}[$line],
                                     $precision);
                    if($tagName ne $loop->[$#{$loop}]) { # do not print field separator
                        print $fields_separator;        # after last element
                    }
                }
                print $lines_separator;
            }
        }
    }
}

sub csv_format($$$)
{
    my ($value, $type, $precision) = @_;
    if(0 == length $value) {
        return '';
    }
    my $result = '';
    my %formats = ('SQSTRING'   => '"%s"',
                   'DQSTRING'   => '"%s"',
                   'UQSTRING'   => '"%s"',
                   'TEXTFIELD'  => '"%s"',
                   'INT'        => '%d',
                   'FLOAT'      => '%f');
    return unless defined $formats{$type} && defined $value;
    my $format = $formats{$type};
    if('INT' eq $type || 'FLOAT' eq $type) {
        if ( $value =~ m/([+-]?[0-9]*(?:[.][0-9]+)?)(?:[(]([0-9]+)[)])?/ ) {
            $precision = $2;
            $value     = $1;
        }
    }
    if($value =~ m/,/ && $format !~ m/"/) {
        $format = '"' . $format . '"';
    }
    if($format =~ m/"/) {
        $value =~ s/"/""/g; # as per sections 2.6 & 2.7 of RFC 4180
    }
    $result = sprintf $format, $value;
    if(defined $precision) {
        $result .= '(' . $precision . ')';
    }
    return $result;
}
