#!/usr/bin/env python

import argparse
import os.path
from glob import glob

from charset_normalizer import from_fp
from charset_normalizer.models import CliDetectionResult

def main(argv=None):
    parser = argparse.ArgumentParser(
        description="change file encoding to UTF-8",
        formatter_class=argparse.RawTextHelpFormatter
    )
    
    parser.add_argument(
        "infile", metavar="input file(s)",
        type=str,
        nargs="*",
        help="files",
    )
    
    print_group = parser.add_mutually_exclusive_group()
    print_group.add_argument(
        "-p", "--print-only",
        action="store_true",
        default=False,
        dest="print_only",
        help="only print file encoding",
    )
    
    print_group.add_argument(
        "-q", "--quiet",
        action="store_true",
        default=False,
        dest="quiet",
        help="do not print encodings",
    )
    
    args = parser.parse_args(argv)
    
    for fglob in args.infile:
        for fname in glob(fglob):
            fname = os.path.abspath(fname)
            if not os.path.isfile(fname):
                continue
            with open(fname, "rb") as f:
                matches = from_fp(f, threshold=0.2, explain=False)
                # best guess is None if there's no good match
                best_guess = matches.best()
                if not best_guess:
                    if not args.quiet:
                        print(fname, "\tno guess")
                    continue
                if not args.quiet:
                    print(fname, "\t", best_guess.encoding)
                if args.print_only:
                    continue
                if any(best_guess.encoding == x for x in ["utf_8", "ascii"]):
                    continue
            # try to read the file using the guessed encoding
            # then try to write a new file
            print("converting %s to UTF-8" % fname)
            with open(fname, "r", encoding=best_guess.encoding) as f:
                data = f.read()
            
            name, ext  = os.path.splitext(fname)
            new_name = "%s-utf_8%s" % (name, ext)
            print("writing to", new_name)
            with open(new_name, "w", encoding="utf-8") as f:
                f.write(data)



if __name__ == "__main__":
    main()
