#!perl
use Cassandane::Tiny;

sub test_subject_isutf8
    :min_version_3_0
{
    my ($self) = @_;

    xlog $self, "Generate and index test messages.";
    # that's: "nuff réunion critères duff"
    my $subject = "=?utf-8?q?nuff_r=C3=A9union_crit=C3=A8res_duff?=";
    my $body = "empty";
    my %params = (
        mime_charset => "utf-8",
        body => $body
    );
    $self->make_message($subject, %params) || die;
    $self->{instance}->run_command({cyrus => 1}, 'squatter');

    my $talk = $self->{store}->get_client();

    # Connect to IMAP
    xlog $self, "Select INBOX";
    my $r = $talk->select("INBOX") || die;

    # Search subject without accents
    # my $term = "réunion critères";
    my %searches;

    if ($self->{skipdiacrit}) {
        # Diacritics are stripped before indexing and search. That's a sane
        # choice as long as there is no language-specific stemming applied
        # during indexing and search.
        %searches = (
            "reunion criteres" => 1,
            "réunion critères" => 1,
            "reunion critères" => 1,
            "réunion criter" => 1,
            "réunion crit" => 0,
            "union critères" => 0,
        );
        my $term = "naive";
    } else {
        # Diacritics are not stripped from search. This currently is very
        # restrictive: until Cyrus can stem by language, this is basically
        # a whole-word match.
        %searches = (
            "reunion criteres" => 0,
            "réunion critères" => 1,
            "reunion critères" => 0,
            "réunion criter" => 0,
            "réunion crit" => 0,
            "union critères" => 0,
        );
    }

    while (my($term, $expectedCnt) = each %searches) {
        xlog $self, "SEARCH for FUZZY text \"$term\"";
        $r = $talk->search(
            "charset", "utf-8", "fuzzy", ["text", { Quote => $term }],
        ) || die;
        $self->assert_num_equals($expectedCnt, scalar @$r);
    }

}
