/*
tablegen is a helper CLI to create Go source files from Unicode Character Data files.

tablegen recognizes the following flags:

    -p <package name>   : package name of output package
    -f <n>              : field index of character category
    -x <prefix>         : prefix to categories, used for table naming
    -o <filename>       : name of output source file
    -u <URL>            : UCD file URL, e.g. http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt

tablegen will download the UCD file, iterate over character code/range entries
and write a Go source code file. Tables defined in the Go file contain
*unicode.RangeTable variabes, which may be queried by functions of the Go
standard library (package unicode).

For example, after creating tables from UAX#11 East Asian Width tables (see link above),
clients may query if a Unicode character is contained in an UAX#11 range by means
of unicode.Is(…). After a call to

    tablegen -f 2 -p mypackage -o uax11tables.go -x EAW
             -u http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt

a file named uax11tables.go will contain (amongst others) a range table called
`EAW_Na` (indicating a "narrow" EA character), which can be queried by

    isnarrow := unicode.Is(EAW_Na, '梨')

Unicode Annex #44 is a starting point for UCD information:
http://www.unicode.org/reports/tr44/.
An overview over Unicode Character Data files can be found here:
https://www.unicode.org/versions/components-13.0.0.html.

___________________________________________________________________________

License

Governed by a 3-Clause BSD license. License file may be found in the root
folder of this module.

Copyright © 2021 Norbert Pillmayer <norbert@pillmayer.com>

*/
package main

import (
	"bytes"
	"flag"
	"fmt"
	"go/format"
	"io/ioutil"
	"log"
	"net/http"
	"os"
	"strings"

	"github.com/npillmayer/uax/internal/ucdparse"
)

var prefix = flag.String("x", "", "prefix to categories, used for table naming")

// This is a very rough implementation.
// Creating Unicode tables is a rare task, and I do not plan to actively maintain
// this little CLI.  Sorry for that.
func main() {
	buf := new(bytes.Buffer)
	ranges := make(map[string]*ucdparse.RangeTableCollector)
	//
	// command line flags
	var packageNameFlag = flag.String("p", "packagenotset", "package name of output package")
	var ucdurl = flag.String("u", "UCD-URL",
		"UCD file URL, e.g. http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt")
	var outname = flag.String("o", "tables.go", "name of output source file")
	var catfield = flag.Int("f", 2, "field position of category field, 1…n")

	flag.Parse()
	categoryFieldNo := *catfield - 1
	//
	// retrieve unicode character database file from URL
	resp, err := http.Get(*ucdurl)
	if err != nil {
		log.Fatal(err)
	}
	printPreamble(buf, *packageNameFlag)

	// parse UCD file and collect ranges
	ucdparse.Parse(resp.Body, func(p *ucdparse.Token) {
		l, r := p.Range()             // char range in field 0
		t := p.Field(categoryFieldNo) // character category
		t = strings.TrimSpace(t)
		if t == "" {
			return
		}
		append(ranges, t, l, r)
	})

	// output range information per category
	for _, rt := range ranges {
		rt.Output(buf)
	}
	printVarSection(buf, ranges)
	_, err = format.Source(buf.Bytes())
	if err != nil {
		log.Printf(err.Error())
	}
	err = ioutil.WriteFile(*outname, buf.Bytes(), 0666)
	if err != nil {
		log.Fatal(err)
	}
}

// append a character-range [l…r| to a table collector for category cat.
// l and r may be identical.
func append(ranges map[string]*ucdparse.RangeTableCollector, cat string, l, r rune) {
	var t *ucdparse.RangeTableCollector
	if *prefix != "" {
		cat = *prefix + "_" + cat
	}
	var ok bool
	if t, ok = ranges[cat]; !ok {
		fmt.Fprintf(os.Stderr, "creating table %s\n", cat)
		t = &ucdparse.RangeTableCollector{Cat: cat}
		ranges[cat] = t
	}
	t.Append(l, r)
}

func printPreamble(buf *bytes.Buffer, packagename string) {
	fmt.Fprintf(buf, `// Code generated by UAX table generator --- DO NOT EDIT.

package `)
	fmt.Fprintf(buf, packagename)
	fmt.Fprintf(buf, `

import "unicode"

`)
}

// iterate over all the range tables and produce a Go global variable for each,
// referencing the real table under a public name.
// Say we have produced a table from a category 'PREFIX_A', which will have
// Go code produced like
//
//     var _PREFIX_A *unicode.RangeTable = {
//          …
//     }
//
// Then this function will produce a variable
//
//     var PREFIX_A *unicode.RangeTable = _PREFIX_A
//
func printVarSection(buf *bytes.Buffer, ranges map[string]*ucdparse.RangeTableCollector) {
	fmt.Fprintf(buf, "var (\n")
	for k, _ := range ranges {
		fmt.Fprintf(buf, "    %s *unicode.RangeTable = _%s\n", k, k)
	}
	fmt.Fprintf(buf, ")\n")
}
