#!/usr/bin/python3

import os
import re
import sys
import time
import socket
import optparse
import tempfile
import traceback

os.environ.setdefault("CONDOR_CONFIG", "/etc/condor-ce/condor_config")

import classad2 as classad
import htcondor2 as htcondor
import htcondorce.tools as ce

G_DEBUG = False

CONDOR_JOB_STATUS = {0: "Unexpanded",
                     1: "Idle",
                     2: "Running",
                     3: "Removed",
                     4: "Completed",
                     5: "Held",
                     6: "Transferring Output",
}

def verify_matching_condor_versions():
    lib_ver = htcondor.version()
    try:
        _, stdout, _ = ce.run_command('condor_version')
    except RuntimeError:
        raise ce.CondorRunException('Could not find version information using condor_version. ' \
                                    'Ensure that condor_version is in your PATH')
    tool_ver = ce.to_str(stdout).split('\n')[0]
    if lib_ver != tool_ver:
        raise RuntimeError('Found multiple, mismatched versions of HTCondor libraries. ' \
                           'Please ensure that you only have one version of HTCondor installed.')


def run_ping(collector_name, schedd_name):
    print("Testing HTCondor-CE authorization...")

    args = ["condor_ce_ping", "-pool", collector_name, "-name", schedd_name, "-verbose", "-debug", "WRITE"]

    try:
        rc, stdout, _ = ce.run_command(args)
    except RuntimeError as exc:
        raise ce.CondorRunException('Failed to ping %s due to the following error:\n%s' % (address, exc))

    if G_DEBUG:
        print("*"*5, "condor_ping output", "*"*5)
        print(stdout, end='')
        print("*"*20)

    if rc < 0:
        raise ce.CondorRunException("Failed to ping %s; condor_ping terminated with signal %d." \
                                    % (schedd_name, -rc))
    elif rc > 0:
        if re.search('Failed to connect', stdout):
            raise ce.CondorRunException("Failed to ping %s: Please contact the site's system adminstrator to " \
                                        "ensure that the CE you're trying to contact is functional." % schedd_name)
        else:
            message = "Failed to ping %s; authorization check exited with code %d." % (schedd_name, rc)
            if not G_DEBUG:
                message = message + " Re-run the command with '-d' for more verbose output."
            raise ce.CondorRunException(message)

    unauthorized_user = re.search(r'Remote Mapping:\s*(.*)\nAuthorized:\s*FALSE', stdout, re.MULTILINE)
    if unauthorized_user:
        raise ce.CondorUserException("User %s does not have permissions for %s. Please contact the CE's " \
                                     "system administrator to ensure that your user is mapped properly " \
                                     "in the site's authentication system."
                                     % (unauthorized_user.group(1), schedd_name))

    print(f"Verified WRITE access for scheduler daemon {schedd_name}")

def parse_opts():

    usage = "usage: %prog [options] <CE hostname>"
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-s", "--schedd-name",
                      help="Name of the schedd to use.",
                      dest="schedd_name")
    parser.add_option("-d", "--debug", help="Print debugging info.",
                      dest="debug", default=False, action="store_true")
    parser.add_option("--skip-scitokens", help="Skip SCITOKENS authentication.",
                      dest="skip_scitokens", default=False, action="store_true")
    parser.add_option("-a", "--attribute", help="Add attribute to job ad.",
                      dest="attribute", default=[], action='append')
    parser.add_option("-n", "--no-clean", help="Do not clean temporary "
                      "files.", dest="clean", default=True, action="store_false")

    opts, args = parser.parse_args()
    global G_DEBUG
    G_DEBUG = opts.debug
    if G_DEBUG and hasattr(htcondor.param, 'setdefault') and hasattr(htcondor, 'enable_debug'):
        htcondor.param.setdefault('TOOL_DEBUG', "D_FULLDEBUG")
        htcondor.enable_debug()

    return opts, args


def setup_user_creds():
    """Return a dict of token/X.509 attributes that are necessary for remote submission to a schedd
    """
    results = {}
    try:
        results['scitokens_file'] = ce.bearer_token_path()
    except OSError:
        pass

    try:
        proxy = ce.x509_user_proxy_path()
        results['x509userproxy'] = proxy
    except OSError:
        pass

    if not results:
        raise ce.CondorUserException("Could not read a bearer token or an X.509 proxy for job submission")

    return results


def check_job_submit(job_info, schedd_ad, setup_creds=True):

    sub = {
        "universe": "vanilla",
        "executable": "/usr/bin/env",
        "transfer_executable": "false",
        "output": job_info['stdout_file'],
        "error": job_info['stderr_file'],
        "log": job_info['log_file'],
        "leave_in_queue": "( StageOutFinish > 0 ) =!= true",
    }
    for attr in job_info['attribute']:
        key, value = attr.split('=', 1)
        sub[f"MY.{key.strip()}"] = classad.quote(value.strip())

    if setup_creds:
        sub.update(setup_user_creds())

    if G_DEBUG:
        print(f"Job submit description:\n{sub}")

    try:
        schedd = htcondor.Schedd(schedd_ad)
    except RuntimeError as exc:
        raise ce.CondorRunException(f"Failed to contact schedd at {schedd_ad['Machine']} due to the following error:\n{exc}")
    print(f"Submitting job to schedd {schedd_ad['MyAddress']}")
    ad_results = []
    try:
        result = schedd.submit(htcondor.Submit(sub), 1, True)
    except RuntimeError as exc:
        raise ce.CondorRunException("- Failed to submit job to %s due to the following error:\n%s" \
                                    % (schedd_ad['Machine'], exc))

    cluster = result.cluster()
    print(f"- Successful submission; cluster ID {cluster}")

    if G_DEBUG:
        print(f"Resulting job ad: {result.clusterad()}")

    print(f"Spooling cluster {cluster} files to schedd {schedd_ad['MyAddress']}")
    try:
        schedd.spool(result)
    except RuntimeError as exc:
        raise ce.CondorRunException(f"- Failed to spool files to {schedd_ad['Machine']} due to the following error:\n{exc}")

    print("- Successful spooling")

    attempts = int(htcondor.param.get("CONDOR_CE_TRACE_ATTEMPTS", 600))
    last_status = -1
    for attempt in range(attempts):
        if G_DEBUG:
            print(f"Querying job status ({attempt+1}/{attempts})")
        try:
            ad = schedd.query("ClusterID == %d" % cluster, ["JobStatus", "ClusterID", "ProcID"])
        except RuntimeError as exc:
            raise ce.CondorRunException(f"- Failed to query job status due to the following error:\n{exc}")
        if len(ad) != 1:
            raise ce.CondorRunException("Could not find the job in cluster %d" % cluster)
        status = ad[0]['JobStatus']
        if G_DEBUG:
            print(f"Job status: {CONDOR_JOB_STATUS.get(status, status)}")
        elif last_status != status:
            if last_status == -1:
                print(f"Job status: {CONDOR_JOB_STATUS.get(status, status)}")
            else:
                print(f"Job transitioned from {CONDOR_JOB_STATUS.get(last_status, last_status)}"
                      f" to {CONDOR_JOB_STATUS.get(status, status)}")
            last_status = status
        if status in [3, 4]:
            break
        time.sleep(1)

    if status == 5: # TODO - provide better diagnostics
        raise ce.CondorRunException("Remote job, %d.0, was held" % cluster)
    elif status == 4:
        try:
            schedd.retrieve("ClusterID == %d" % cluster)
        except RuntimeError as exc:
            raise ce.CondorRunException(f"Failed to retrieve output from {schedd_ad['Machine']} due to the following error:\n{exc}")
        try:
            schedd.act(htcondor.JobAction.Remove, "ClusterID == %d" % cluster)
        except RuntimeError as exc:
            raise ce.CondorRunException(f"Failed to cleanup job on {schedd_ad['Machine']} due to the following error:\n{exc}")
        output = open(job_info['stdout_file'], "r").read()
        if not output:
            raise ce.CondorRunException("Job produced empty stdout")
        if G_DEBUG:
            print("*"*5, "Job output", "*"*5)
            print(output, end='')
            print("*"*20)
        else:
            print("- Job was successful")
    else:
        raise RuntimeError('Job did not complete within the given timeframe (%ss)' % attempts)

def main():
    opts, args = parse_opts()

    if len(args) < 1:
        raise ce.CondorUserException('ERROR: Insufficient number of arguments\n' + \
                                     '"Usage: condor_ce_trace [options] <hostname>[:<port>]"')
    coll_addr = args[0]

    verify_matching_condor_versions()

    # TODO: refactor all of this into parse_opts, this is insanity
    job_info = {}
    job_info['attribute'] = opts.attribute
    collector_hostname = coll_addr.split(":")[0]
    collector_hostname = socket.getfqdn(collector_hostname)
    job_info['collector_fqdn'] = collector_hostname
    port_info = coll_addr.split(":")[1:]
    if port_info:
        job_info['collector_port'] = int(port_info[0])
    else:
        job_info['collector_port'] = 9619
    collector_hostname = "%s:%d" % (collector_hostname, job_info['collector_port'])
    job_info['collector_name'] = collector_hostname
    if opts.schedd_name:
        job_info['schedd_name'] = opts.schedd_name
    else:
        job_info['schedd_name'] = collector_hostname.split(":")[0]

    try:
        coll = htcondor.Collector(collector_hostname)
        schedd_ad = coll.locate(htcondor.DaemonTypes.Schedd, job_info['schedd_name'])
    except Exception:
        raise ce.CondorRunException("ERROR: Could not contact CE collector at '%s'. " % collector_hostname + \
                                    "Verify that the Collector daemon is up with `condor_ce_status -any`.")

    if schedd_ad is None:
        raise ce.CondorRunException('ERROR: Could not find CE schedd %s.\n' % job_info['schedd_name'] + \
                                    'Verify that the Scheduler daemon is up with `condor_ce_status -any`.')

    os.environ.setdefault('_condor_SEC_CLIENT_AUTHENTICATION_METHODS', 'SCITOKENS,SSL,FS')
    run_ping(collector_hostname, job_info['schedd_name'])
    try:
        job_info.update(ce.generate_job_files())
        check_job_submit(job_info, schedd_ad, setup_creds=not opts.skip_scitokens)
    finally:
        if opts.clean:
            ce.cleanup_job_files(job_info)

if __name__ == '__main__':
    try:
        main()
    except (ce.CondorRunException, ce.CondorUserException, RuntimeError) as tool_exc:
        ce.print_timestamped_msg(tool_exc)
        sys.exit(1)
    except Exception:
        PID = os.getpid()
        if G_DEBUG:
            ce.print_formatted_msg('Uncaught exception, please send the following error to %s ' % ce.HELP_EMAIL + \
                                   'with a description of the issue:\n%s' % traceback.format_exc())
        else:
            try:
                FD, STACK_FILE = tempfile.mkstemp(dir=".", prefix=".stack_%d_" % PID)
                F = os.fdopen(FD, 'w')
                F.write('%s\n' % time.strftime('%Y-%m-%d %H:%M:%S'))
                traceback.print_exc(file=F)
                F.close()
                ce.print_formatted_msg('Uncaught exception, please send %s to %s with a description of the issue.' %
                                       (STACK_FILE, ce.HELP_EMAIL))
            except OSError:
                ce.print_formatted_msg("Unable to write stackfile due to the following error:\n%s"
                                       % traceback.format_exc())
        sys.exit(1)
