#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Filter SAM files for a list of taxon IDs.

.. moduleauthor:: Florian Aldehoff <samsifter@biohazardous.de>
"""

import sys
if not sys.version_info[0] >= 3:
    print("Error, I need python 3.x or newer")
    exit(1)

import argparse
import fileinput
import csv
from os.path import basename, splitext

# custom libraries
from samsifter.models.filter import FilterItem
from samsifter.models.parameter import FilterSwitch, FilterFilepath
from samsifter.util.arg_sanitation import check_sam, check_csv
from samsifter.util.filters import pattern_filter

# global variables
TEXT = "Filter taxa by list of taxon IDs"
DESC = ("filtering references by a list of NCBI taxon IDs given in a "
        "tab-separated CSV file")


def item():
    """Create item representing this tool in list and tree views.

    Returns
    -------
    FilterItem
        Item for use in item-based list and tree views.
    """
    filter_item = FilterItem(text=TEXT, desc=DESC)
    filter_item.set_command(splitext(basename(__file__))[0])

    filter_item.add_parameter(FilterFilepath(
        text="taxon list file",
        desc="tab-separated CSV file with NCBI taxon IDs in first column",
        cli_name="--list",
        default="taxa.csv",
        extensions=['csv'],
        required=True
    ))

    filter_item.add_parameter(FilterSwitch(
        text="filter direction",
        desc="Keep or discard entries passing the filter criteria?",
        cli_name="--discard",
        default=0,
        options=["discard", "keep"]
    ))

    return filter_item


def main():
    """Executable to filter SAM files for a list of taxon IDs.

    See ``--help`` for details on expected arguments. Takes input from
    either STDIN, or optional, or positional arguments. Logs messages to
    STDERR and writes processed SAM files to STDOUT.
    """
    # parse arguments
    parser = argparse.ArgumentParser(description=DESC)
    parser.add_argument('-i', '--input',
                        type=check_sam,
                        help="specify SAM file to be analysed (default: "
                        "STDIN)",
                        required=False)
    parser.add_argument('-l', '--list',
                        type=check_csv,
                        help="tab-separated CSV file with accession numbers "
                        "in first column",
                        required=True)
    parser.add_argument('--discard',
                        type=int,
                        help="keep or discard entries passing the filter "
                             "criteria?",
                        required=False,
                        default=0)
    (args, remain_args) = parser.parse_known_args()

    # generate pattern list from CSV file
    patterns = []
    with open(args.list, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter='\t')
        for row in reader:
            patterns.append("tax|" + row[0] + "|")

    # open SAM file from either command line argument or STDIN
    if args.input:
        handle = open(args.input, 'r')
    else:
        handle = fileinput.input(remain_args)

    pattern_filter(patterns, handle, discard=(args.discard == 0))
    handle.close()

    exit()


if __name__ == "__main__":
    main()
