/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.snap.spinn3rhadoop;

import java.io.File;
import java.io.IOException;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;

public abstract class UnicodeDegarbler {
    public abstract String degarble(String var1);

    public abstract boolean isGarbled(String var1);

    public abstract double getNonGarbageFraction(String var1);

    public static void main(String[] args) throws Exception {
        String table = "/Users/Niko/Documents/workspace/spinn3rhadoop_java/export/include/unicode_error_table.tsv";
        Latin1ToUtf8Degarbler degarbler = new Latin1ToUtf8Degarbler(table, 0.8);
        String text = "ungl\u221a\u00a3\u00ac\u00backlich k\u221a\u00a3\u00ac\u00a7mpfte s\u221a\u00a3\u00ac\u00a9bastien buemi";
        text = "\u00e7<9c><9f>\u00e6<98>\u00af\u00e6<9c><8d>\u00e4\u00ba<86>\u00e8<87>\u00aa\u00e5\u00b7\u00b1(\u00e7\u00bb<88>\u00e6<96>\u00bc100%) - i.hobby\u2014\u00e6\u00a8\u00a1\u00e5<9e><8b>\u00e5<8c>\u00ba - nintendo world bbs gba|nds|psp|wii|\u00e6\u00b8\u00b8\u00e6<88><8f>\u00e7<8e><8b>|\u00e6\u00a8\u00a1\u00e5<9e><8b>1/100 exia\u00e5<8f><91>\u00e5<94>\u00ae \u00e7\u00ad<89>\u00e9<9b>\u00aa\u00e5\u00b4\u00a9\u00ef\u00bc<8c>bandai\u00e5<88>\u00ab\u00e6<83>\u00b3\u00e9\u00aa<97>\u00e6<88><91>\u00e9<92>\u00b1 1/100\u00e9<9b>\u00aa\u00e5\u00b4\u00a9\u00e5<8f><91>\u00e5<94>\u00ae \u00e7\u00ad<89>mg\u00ef\u00bc<8c>bandai\u00e5<88>\u00ab\u00e6<83>\u00b3\u00e9\u00aa<97>\u00e6<88><91>\u00e9<92>\u00b1 mg exia\u00e5<8f><91>\u00e5<94>\u00ae \u00e7\u00ad<89>r2\u00e5<95>\u00a6 mg exiar2\u00e5<8f><91>\u00e5<94>\u00ae \u00e7\u00ad<89>\u00e5<87>\u00bamg\u00e9<9b>\u00aa\u00e5\u00b4\u00a9 mg\u00e9<9b>\u00aa\u00e5\u00b4\u00a9\u00e5<8f><91>\u00e5<94>\u00ae \u00e7\u00ad<89>\u00e5<87>\u00bapg\u00ef\u00bc<8c>\u00e6<88><91>\u00e6<98>\u00af\u00e4\u00b8<8d>\u00e4\u00bc<9a>\u00e4\u00b8<8a>\u017e<93>\u00e7<9a><84> 1/100 exia\u00e5<8f><91>\u00e5<94>\u00ae \u00e7\u00ad<89>\u00e9<9b>\u00aa\u00e5\u00b4\u00a9\u00ef\u00bc<8c>bandai\u00e5<88>\u00ab\u00e6<83>\u00b3\u00e9\u00aa<97>\u00e6<88><91>\u00e9<92>\u00b1 1/100\u00e9<9b>\u00aa\u00e5\u00b4\u00a9\u00e5<8f><91>\u00e5<94>\u00ae \u00e7\u00ad<89>mg\u00ef\u00bc<8c>bandai\u00e5<88>\u00ab\u00e6<83>\u00b3\u00e9\u00aa<97>\u00e6<88><91>\u00e9<92>\u00b1 mg exia\u00e5<8f><91>\u00e5<94>\u00ae \u00e7\u00ad<89>r2\u00e5<95>\u00a6 mg exiar2\u00e5<8f><91>\u00e5<94>\u00ae \u00e7\u00ad<89>\u00e5<87>\u00bamg\u00e9<9b>\u00aa\u00e5\u00b4\u00a9 mg\u00e9<9b>\u00aa\u00e5\u00b4\u00a9\u00e5<8f><91>\u00e5<94>\u00ae \u00e7\u00ad<89>\u00e5<87>\u00bapg\u00ef\u00bc<8c>\u00e6<88><91>\u00e6<98>\u00af\u00e4\u00b8<8d>\u00e4\u00bc<9a>\u00e4\u00b8<8a>\u017e<93>\u00e7<9a><84>";
        System.out.println(degarbler.degarble(text));
        System.out.println(new NullDegarbler(0.8).isGarbled(text));
    }

    public static class Latin1ToUtf8Degarbler
    extends UnicodeDegarbler {
        private Map<String, String> table = new HashMap<String, String>();
        private double minAsciiRatio;

        public Latin1ToUtf8Degarbler(String tableFile, double minAsciiRatio) throws IOException {
            this.minAsciiRatio = minAsciiRatio;
            Scanner sc = new Scanner(new File(tableFile), "UTF-8").useDelimiter("\n");
            while (sc.hasNext()) {
                String line = sc.next();
                if (line.startsWith("#")) continue;
                String[] tokens = line.split("\t", 5);
                String pattern = tokens[4].replace(' ', '\u00a0');
                pattern = URLDecoder.decode(pattern, "ISO-8859-1").toLowerCase();
                pattern = pattern.replaceAll("\u00a0", "");
                this.table.put(pattern, tokens[2].toLowerCase());
            }
            sc.close();
        }

        @Override
        public String degarble(String text) {
            StringBuffer result = new StringBuffer();
            int i = 0;
            int n = text.length();
            block0: while (i < n) {
                int k = Math.min(3, n - i);
                while (k >= 2) {
                    String sub = text.substring(i, i + k);
                    if (this.table.containsKey(sub)) {
                        result.append(this.table.get(sub));
                        i += k;
                        continue block0;
                    }
                    --k;
                }
                result.append(text.charAt(i));
                ++i;
            }
            return result.toString();
        }

        @Override
        public boolean isGarbled(String text) {
            int n = text.length();
            int ascii = 0;
            int i = 0;
            while (i < n) {
                if (text.charAt(i) < '\u0080') {
                    ++ascii;
                }
                ++i;
            }
            return (double)((float)ascii / (float)n) < this.minAsciiRatio;
        }

        @Override
        public double getNonGarbageFraction(String text) {
            int n = text.length();
            int ascii = 0;
            int i = 0;
            while (i < n) {
                if (text.charAt(i) < '\u0080') {
                    ++ascii;
                }
                ++i;
            }
            return (double)ascii / (double)n;
        }
    }

    public static class NullDegarbler
    extends UnicodeDegarbler {
        private double minNonQuestionMarkRatio;

        public NullDegarbler(double minNonQuestionMarkRatio) {
            this.minNonQuestionMarkRatio = minNonQuestionMarkRatio;
        }

        @Override
        public String degarble(String text) {
            return text;
        }

        @Override
        public boolean isGarbled(String text) {
            int n = text.length();
            int nonQuestionMarks = 0;
            int i = 0;
            while (i < n) {
                if (text.charAt(i) != '?') {
                    ++nonQuestionMarks;
                }
                ++i;
            }
            return (double)((float)nonQuestionMarks / (float)n) < this.minNonQuestionMarkRatio;
        }

        @Override
        public double getNonGarbageFraction(String text) {
            int n = text.length();
            int nonQuestionMarks = 0;
            int i = 0;
            while (i < n) {
                if (text.charAt(i) != '?') {
                    ++nonQuestionMarks;
                }
                ++i;
            }
            return (double)nonQuestionMarks / (double)n;
        }
    }
}

