#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Revision: 8845 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v3.4.0/scripts/cif_diff $
#$Date: 2021-08-01 23:10:01 +0300 (Sun, 01 Aug 2021) $
#------------------------------------------------------------------------------
#*
#* Parse two CIF files and compare their content
#*
#* USAGE:
#*    $0 --options input1.cif input2.cif
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Data::Diff qw( diff );
use COD::ErrorHandler qw( process_errors
                          process_parser_messages
                          report_message );
use COD::Precision qw( cmp_cif_numbers );
use COD::SOptions qw( getOptions get_value );
use COD::SUsage qw( options usage );
use COD::ToolsVersion qw( get_version_string );

my $use_parser = 'c';

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

my $compare_datablock_names = 1;
my $fold_long_fields        = 1;
my $folding_width           = 72;
my %comparators = ();

sub string_compare { return $_[0] cmp $_[1] }

my $compare_only_selected = 0;
my @selected_tags = ();
my @excluded_tags = ();

my $ignore_empty_values = 0;

my $left    = 1;
my $right   = 1;
my $common  = 1;

my $L_option_param;

#* OPTIONS:
#*   -t, --tags
#*                     Whitespace- or comma-separated list of tags to compare.
#*                     When present, only tags enumerated with this option are
#*                     compared.
#*
#*   -a, --add-tag
#*                     Add a tag to the list of compared tags.
#*
#*   -c, --clear-tags
#*                     Clear the list of compared tags.
#*
#*   -f, --file-with-tags
#*                     Specify a file with tags to be compared.
#*
#*   --compare-all-tags
#*                     Compare all tags present in the compared files (default).
#*
#*   --no-compare, --exclude
#*                     Whitespace- or comma-separated list of tags not to compare.
#*                     When present, enumerated tags will be excluded from the
#*                     comparison.
#*
#*   --compare-datablock-names
#*   --dont-compare-datablock-names
#*                     Also compare the names of CIF data blocks. Default: on.
#*
#*   --compare-numeric _cell_length_a
#*                     Force numeric comparison for given tag.
#*
#*   --compare-string _cell_length_a
#*                     Force string comparison for given tag.
#*
#*   --ignore-empty-values
#*                     Ignore tags with empty values.
#*
#*   --no-ignore-empty-values
#*   --dont-ignore-empty-values
#*                     Take tags with empty values into comparison (default).
#*
#*   --no-left
#*                     Do not print tags present in the left file only.
#*                     Similar to 'comm -1'.
#*
#*   --no-right
#*                     Do not print tags present in the right file only.
#*                     Similar to 'comm -2'.
#*
#*   --no-common
#*                     Do not print tags with the same values in both files.
#*
#*   --folding-width 72
#*                     Specify the length of the longest unfolded line.
#*
#*   --fold-long-fields
#*   --dont-fold-long-fields
#*                     Fold fields, longer than folding width. Default: on.
#*
#*   -L, -u
#*                     Ignored options. Used to make this script usable
#*                     as --diff-cmd for 'svn diff'.
#*
#*   --use-perl-parser
#*                     Use Perl parser to parse CIF files.
#*   --use-c-parser
#*                     Use C parser to parse CIF files (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "-L" => \$L_option_param, # Ignore the '-L' option and its argument
    "-u" => sub { 1; }, # Ignore the '-u' option, so that we can use
                        # the 'cif_diff' program as the --diff-cmd for
                        # 'svn diff'
    "-t,--tags" => sub { $compare_only_selected = 1;
                         @selected_tags = split( /\s|,/, get_value()); },
    "-a,--add-tag" => sub { $compare_only_selected = 1;
                            push( @selected_tags, get_value() ); },
    "-c,--clear-tags" => sub { $compare_only_selected = 1;
                               @selected_tags = () },
    "-f,--file-with-tags" => sub {
        $compare_only_selected = 1;
        my $filename = get_value();

        eval {
            open( my $tags, '<', $filename ) or die 'ERROR, '
                  . 'could not open file for reading -- '
                  . lcfirst($!) . "\n";

            @selected_tags = ( @selected_tags,
                                   map  { s/^\s*|\s*$//g; $_ }
                                   grep { !/^\#/ } <$tags> );

            close( $tags ) or die 'ERROR, '
                 . 'error while closing file after reading -- '
                 . lcfirst($!) . "\n";
        };
        if ($@) {
            process_errors( {
              'message'  => $@,
              'program'  => $0,
              'filename' => $filename
            }, $die_on_error_level->{ERROR} );
        };
    },
    "--compare-all-tags" => sub{ $compare_only_selected = 0; },
    "--no-compare,--exclude" =>
        sub{ @excluded_tags = split( /\s|,/, get_value()); },

    "--compare-datablock-names"      => sub{ $compare_datablock_names = 1 },
    "--dont-compare-datablock-names" => sub{ $compare_datablock_names = 0 },

    "--compare-numeric" =>
        sub{ $comparators{ get_value() } = \&cmp_cif_numbers },
    "--compare-string" =>
        sub{ $comparators{ get_value() } = \&string_compare },

    "--ignore-empty-values"      => sub{ $ignore_empty_values = 1 },
    "--no-ignore-empty-values"   => sub{ $ignore_empty_values = 0 },
    "--dont-ignore-empty-values" => sub{ $ignore_empty_values = 0 },

    "--folding-width"         => \$folding_width,
    "--fold-long-fields"      => sub{ $fold_long_fields = 1 },
    "--dont-fold-long-fields" => sub{ $fold_long_fields = 0 },
    "--no-left"     => sub{ $left   = 0 },
    "--no-right"    => sub{ $right  = 0 },
    "--no-common"   => sub{ $common = 0 },
    "--use-perl-parser" => sub { $use_parser = "perl" },
    "--use-c-parser"    => sub { $use_parser = "c" },

    '--options'      => sub { options; exit },
    '--help,--usage' => sub { usage; exit },
    '--version'      => sub { print get_version_string(), "\n"; exit }
);

push( @ARGV, "-" ) unless @ARGV > 1;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

if( @ARGV != 2 ) {
    report_message( {
        'program'   => $0,
        'err_level' => 'ERROR',
        'message'   => 'please supply two files for comparison'
    }, $die_on_error_level->{'ERROR'} );
}

my $parser_options = { 'parser' => $use_parser, 'no_print' => 1 };
my ( $data1, $data2, $err_count, $messages );

( $data1, $err_count, $messages ) = parse_cif( $ARGV[0], $parser_options );
process_parser_messages( $messages, $die_on_error_level );

( $data2, $err_count, $messages ) = parse_cif( $ARGV[1], $parser_options );
process_parser_messages( $messages, $die_on_error_level );

my $min_datablocks  = scalar @$data1;
if( scalar @$data1 != scalar @$data2 ) {
    if( $min_datablocks > scalar @$data2 ) {
        $min_datablocks = scalar @$data2;
    }
    report_message( {
        'program'   => $0,
        'err_level' => 'WARNING',
        'message'   => 'number of data blocks in supplied files is different, '
                     . "taking first $min_datablocks data blocks for comparison"
    }, $die_on_error_level->{'WARNING'} );
}

my $options = {
        'compare_datablock_names' => $compare_datablock_names,
        'fold'                    => $fold_long_fields,
        'folding_width'           => $folding_width,
        'comparators'             => \%comparators,
        'ignore_empty_values'     => $ignore_empty_values,
        'suppress_left'           => !$left,
        'suppress_right'          => !$right,
        'suppress_common'         => !$common };

if( $compare_only_selected == 1 ) {
    @selected_tags = map { lc($_) } @selected_tags;
    $options->{compare_only} = \@selected_tags;
}
if( @excluded_tags > 0 ) {
    @excluded_tags = map { lc($_) } @excluded_tags;
    $options->{compare_not} = \@excluded_tags;
}

for( my $i = 0; $i < $min_datablocks; $i++ ) {
    diff( $data1->[$i], $data2->[$i], $options );
}
