/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.snap.spinn3rhadoop;

import edu.stanford.snap.spinn3rhadoop.Spinn3rDoc;
import edu.stanford.snap.spinn3rhadoop.UnicodeDegarbler;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import org.apache.commons.lang.StringEscapeUtils;

public abstract class Spinn3rDocumentReader {
    protected BufferedReader bufferedReader;
    protected Spinn3rDoc.Spinn3rVersion version;
    protected UnicodeDegarbler degarbler;

    public Spinn3rDocumentReader(InputStream in, Spinn3rDoc.Spinn3rVersion version, String charEncoding, UnicodeDegarbler degarbler) throws UnsupportedEncodingException {
        this.bufferedReader = new BufferedReader(new InputStreamReader(in, charEncoding));
        this.version = version;
        this.degarbler = degarbler;
    }

    public abstract Spinn3rDoc read() throws IOException;

    public static void main(String[] args) throws Exception {
        MultiLineReader reader = new MultiLineReader(new FileInputStream("/tmp/web-2011-07-23T23-00-00Z.txt"), Spinn3rDoc.Spinn3rVersion.B, "UTF-8", new UnicodeDegarbler.NullDegarbler(0.8));
        int i = 0;
        while (i < 100) {
            System.out.println(((Spinn3rDocumentReader)reader).read());
            ++i;
        }
    }

    public static class MultiLineReader
    extends Spinn3rDocumentReader {
        public MultiLineReader(InputStream in, Spinn3rDoc.Spinn3rVersion version, String charEncoding, UnicodeDegarbler degarbler) throws UnsupportedEncodingException {
            super(in, version, charEncoding, degarbler);
        }

        @Override
        public Spinn3rDoc read() throws IOException {
            Spinn3rDoc doc = new Spinn3rDoc();
            doc.version = this.version;
            String line;
            while ((line = this.bufferedReader.readLine()) != null) {
                if ((line = this.degarbler.degarble(line)).isEmpty()) {
                    return doc;
                }
                String[] tokens = line.split("\t", 2);
                try {
                    String lineType = tokens[0];
                    String value = tokens[1];
                    if (lineType.equals("U")) {
                        doc.url = value;
                        continue;
                    }
                    if (lineType.equals("D")) {
                        doc.date = value;
                        continue;
                    }
                    if (lineType.equals("T")) {
                        doc.title = StringEscapeUtils.unescapeHtml((String)value);
                        continue;
                    }
                    if (lineType.equals("C")) {
                        doc.content = StringEscapeUtils.unescapeHtml((String)value);
                        continue;
                    }
                    if (lineType.equals("L")) {
                        String[] linkTokens = value.split("\t", 2);
                        doc.links.add(new Spinn3rDoc.Link(Integer.parseInt(linkTokens[0]), linkTokens[1]));
                        continue;
                    }
                    if (!lineType.equals("Q")) continue;
                    String[] quoteTokens = value.split("\t", 3);
                    doc.quotes.add(new Spinn3rDoc.Quote(Integer.parseInt(quoteTokens[0]), Integer.parseInt(quoteTokens[1]), quoteTokens[2]));
                }
                catch (Exception e) {
                    throw new IOException(String.valueOf(e.getClass().getName()) + " : " + e.getMessage() + ": LINE:" + line);
                }
            }
            return null;
        }
    }

    public static class SingleLineReader
    extends Spinn3rDocumentReader {
        public SingleLineReader(InputStream in, Spinn3rDoc.Spinn3rVersion version, String charEncoding, UnicodeDegarbler degarbler) throws UnsupportedEncodingException {
            super(in, version, charEncoding, degarbler);
        }

        @Override
        public Spinn3rDoc read() throws IOException {
            String[] columns;
            String line = this.bufferedReader.readLine();
            if ((line = this.degarbler.degarble(line)) == null) {
                return null;
            }
            Spinn3rDoc doc = new Spinn3rDoc();
            doc.version = this.version;
            String[] stringArray = columns = line.split("\t");
            int n = columns.length;
            int n2 = 0;
            while (n2 < n) {
                String col = stringArray[n2];
                String[] tokens = col.split(":", 2);
                try {
                    String colType = tokens[0];
                    String value = tokens[1];
                    if (colType.equals("U")) {
                        doc.url = value;
                    } else if (colType.equals("D")) {
                        doc.date = value = value.replaceFirst("T", " ").replaceFirst("Z.*", "");
                    } else if (colType.equals("T")) {
                        doc.title = StringEscapeUtils.unescapeHtml((String)value);
                    } else if (colType.equals("F")) {
                        doc.title_raw = value;
                    } else if (colType.equals("C")) {
                        doc.content = StringEscapeUtils.unescapeHtml((String)value);
                    } else if (colType.equals("H")) {
                        doc.content_raw = value;
                    } else if (colType.equals("L")) {
                        String[] linkTokens = value.split(":", 3);
                        doc.links.add(new Spinn3rDoc.Link(Integer.parseInt(linkTokens[0]), Integer.parseInt(linkTokens[1]), linkTokens[2]));
                    } else if (colType.equals("Q")) {
                        String[] quoteTokens = value.split(":", 3);
                        doc.quotes.add(new Spinn3rDoc.Quote(Integer.parseInt(quoteTokens[0]), Integer.parseInt(quoteTokens[1]), quoteTokens[2]));
                    }
                }
                catch (Exception e) {
                    throw new IOException(String.valueOf(e.getClass().getName()) + " : " + e.getMessage() + ": LINE:" + line);
                }
                ++n2;
            }
            return doc;
        }
    }
}

