#!/usr/bin/python
#
# Copyright 2014-2020 w3c
#
# SPDX-License-Identifier: MIT

# Modified from https://github.com/w3c/htmldiff-ui/htmldiff

import atexit
import os
import re
import sys
import tempfile
import tidy

from subprocess import Popen, PIPE

def tidyFile(filename):
    ifp = open(filename, 'r')

    # option for tidy
    options = dict(tidy_mark=0,show_warnings=0,quiet=1,char_encoding='utf8')
    html5 = re.search(r"<!doctype\s+html\s*>", ifp.read(4096),
                      re.IGNORECASE)
    ifp.seek(0)
    html5_options = {'add_xml_space': 'no',
                     'output_xhtml': 'no',
                     'tidy_mark': 'no',
                     'new_blocklevel_tags': 'article,aside,canvas,dialog,details,figcaption,figure,footer,header,hgroup,menu,nav,section,main,summary,math,semantics,mrow,mfenced,mtable,mtr,mtd,mi,mn,msub,mo,mfrac,munderover,mtext,svg,g,image,rect,text,desc,line,path,polygon,ellipse,tspan,defs,feoffset,fecolormatrix,filter,fegaussianblur,feblend,marker,circle',
                     'new_inline_tags': 'video,audio,canvas,ruby,rt,rp,time,meter,progress,track,source,emu-val,emu-nt,emu-t,mark',
                     'break_before_br': 'no',
                     'vertical_space': 'no',
                     'enclose_text': 'no',
                     'numeric_entities': 'yes',
                     'wrap': '1000',
                     'wrap_attributes': 'no',
                     'drop_empty_paras': 'no'
                     }
    if html5:
        options.update(html5_options)
    newtidy = tidy.parseString(ifp.read(), **options)
    if len(newtidy.errors) > 0:
        if not html5:
            ifp.seek(0)
            options.update(html5_options)
            newtidy = tidy.parseString(ifp.read(), **options)
    ifp.close()

    fp = tempfile.NamedTemporaryFile(
           mode='w+', prefix='htmldiff-', suffix='.html')
    atexit.register(fp.close)
    fp.write(str(newtidy))
    fp.flush()
    fp.seek(0)

    # sys.stderr.write('tidyFile: tempfile name %s\n' % fp.name)

    if (newtidy.errors):
        sys.stderr.write('tidyFile: tidy.parseString error: %s\n' % str(newtidy.errors))
    return fp

if __name__ == '__main__':
    if (len(sys.argv) < 3):
        sys.stderr.write('tidy: need args file1 file2\n')
        sys.exit(1)

    refdoc = tidyFile(sys.argv[1])
    newdoc = tidyFile(sys.argv[2])

    scriptdir = os.path.abspath(os.path.dirname(sys.argv[0]))
    perlscript = os.path.join(scriptdir, 'htmldiff.pl')

    p = Popen([perlscript, refdoc.name, newdoc.name],
              stdin=PIPE, stdout=PIPE, stderr=PIPE)
    sys.stdout.flush()
    sys.stderr.flush()
    (out, err) = p.communicate()
    p.stdin.close()
    if err:
        sys.stderr.write('htmldiff: An error occured when running htmldiff.pl on the documents:', str(err))
        exit(1)
    else:
        print(out)
