#!/usr/bin/env python

from __future__ import print_function, division, unicode_literals

__doc__ = """filter txinfo files by given criteria"""

import argparse
import os
import re
import sys

import uta.formats.seqinfo as ufsi


def parse_args(argv):
    ap = argparse.ArgumentParser(
        description=__doc__,
        )
    ap.add_argument(
        'FILE'
        )
    ap.add_argument(
        '--transcripts-from', '-T',
        )
    ap.add_argument(
        '--transcript-regexps', '-R',
        default = [],
        action = 'append',
        )
    
    opts = ap.parse_args(argv)
    return opts


if __name__ == '__main__':
    opts = parse_args(sys.argv[1:])

    filter_acs = set()
    if opts.transcripts_from:
        filter_acs = set([ l.strip() for l in open(opts.transcripts_from,'r').readlines() ])

    regexps = [ re.compile(tre) for tre in opts.transcript_regexps ]

    sir = ufsi.SeqInfoReader(sys.stdin if opts.FILE == '-' else open(opts.FILE,'r'))
    siw = ufsi.SeqInfoWriter(sys.stdout)

    for si in sir:
        if si.ac in filter_acs or any(tre.match(si.ac) for tre in regexps):
            siw.write(si)
