#!/usr/bin/env python

# TODO: stash aligned sequences in table too

import logging
logging.basicConfig(level=logging.INFO)

import psycopg2, psycopg2.extras
import locus.utils.genomeutils as gu
import IPython

conn = psycopg2.connect("dbname=reece")
sel_sql = '''
SELECT GE.genomic_exon_id,TE.transcript_exon_id,
  G.gene,T.ac,
  G.chr,G.strand,GE.start_i as g_start_i,GE.end_i as g_end_i,
  TE.start_i as t_start_i,TE.end_i as t_end_i,
  TE.ord, TE.name,
  substr(T.seq,TE.start_i+1,TE.end_i-TE.start_i) as t_seq
FROM gene G
JOIN transcript T on G.gene=T.gene
JOIN genomic_exon GE on T.ac=GE.ac
JOIN transcript_exon TE on GE.ac=TE.ac and GE.ord=TE.ord
WHERE EXISTS (SELECT * FROM gtx_alignment GA
                  WHERE GA.genomic_exon_id=GE.genomic_exon_id
                  AND GA.transcript_exon_id=TE.transcript_exon_id
	          AND GA.seqviewer_url is not NULL
                  AND GA.g_seq_a is NULL)
ORDER BY T.ac,TE.ord
'''

upd_sql = """
UPDATE gtx_alignment
SET g_seq_a = %(g_seq_a)s, t_seq_a = %(t_seq_a)s
WHERE genomic_exon_id=%(genomic_exon_id)s AND transcript_exon_id=%(transcript_exon_id)s
"""

sel_cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
upd_cur = conn.cursor()

sel_cur.execute(sel_sql)
logging.info('%d alignments to execute' % (sel_cur.rowcount))
for row in sel_cur:
    assert len(row['t_seq']) == row['t_end_i'] - row['t_start_i'], '%s exon %s: len(t_seq) != end_i-start_i' % (row['gene'],row['name'])
    t_seq = row['t_seq']
    g_seq = gu.fetch_genomic_sequence_interval_ucsc_websvc(
        row['chr'],row['g_start_i'],row['g_end_i'])
    if row['strand'] == -1:
        g_seq = gu.reverse_complement(g_seq)
    g_seq_a,t_seq_a = gu.align2(g_seq.upper(),t_seq.upper())

    cigar = gu.alignment_cigar_string(g_seq_a,t_seq_a)
    data = dict(row)
    data.update( {'g_seq_a': g_seq_a, 't_seq_a': t_seq_a } )
    upd_cur.execute(upd_sql, data)

    conn.commit()
    logging.info("gene %s, transcript %s, exon %s, %s" % (
            row['gene'],row['ac'],row['name'],cigar))

## <LICENSE>
## Copyright 2014 UTA Contributors (https://bitbucket.org/invitae/uta)
## 
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
## 
##     http://www.apache.org/licenses/LICENSE-2.0
## 
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
## </LICENSE>
