/*
 * Decompiled with CFR 0.152.
 */
package weka.core.converters;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.List;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.converters.AbstractFileLoader;
import weka.core.converters.BatchConverter;
import weka.core.converters.ConverterUtils;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class CSVLoader
extends AbstractFileLoader
implements BatchConverter,
OptionHandler {
    static final long serialVersionUID = 5607529739745491340L;
    public static String FILE_EXTENSION = ".csv";
    protected ArrayList<Hashtable<Object, Integer>> m_cumulativeStructure;
    protected ArrayList<ArrayList<Object>> m_cumulativeInstances;
    protected transient BufferedReader m_sourceReader;
    protected transient StreamTokenizer m_st;
    protected Range m_NominalAttributes = new Range();
    protected Range m_StringAttributes = new Range();
    protected String m_MissingValue = "?";
    protected String m_FieldSeparator = ",";
    protected boolean m_FirstCheck;

    public CSVLoader() {
        this.setRetrieval(0);
    }

    @Override
    public String getFileExtension() {
        return FILE_EXTENSION;
    }

    @Override
    public String getFileDescription() {
        return "CSV data files";
    }

    @Override
    public String[] getFileExtensions() {
        return new String[]{this.getFileExtension()};
    }

    public String globalInfo() {
        return "Reads a source that is in comma separated format (the default). One can also change the column separator from comma to tab or another character. Assumes that the first row in the file determines the number of and names of the attributes.";
    }

    @Override
    public Enumeration listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.add(new Option("\tThe range of attributes to force type to be NOMINAL.\n\t'first' and 'last' are accepted as well.\n\tExamples: \"first-last\", \"1,4,5-27,50-last\"\n\t(default: -none-)", "N", 1, "-N <range>"));
        result.add(new Option("\tThe range of attribute to force type to be STRING.\n\t'first' and 'last' are accepted as well.\n\tExamples: \"first-last\", \"1,4,5-27,50-last\"\n\t(default: -none-)", "S", 1, "-S <range>"));
        result.add(new Option("\tThe string representing a missing value.\n\t(default: ?)", "M", 1, "-M <str>"));
        result.addElement(new Option("\tThe field separator to be used.\n\t'\\t' can be used as well.\n\t(default: ',')", "F", 1, "-F <separator>"));
        return result.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String tmpStr = Utils.getOption('N', options);
        if (tmpStr.length() != 0) {
            this.setNominalAttributes(tmpStr);
        } else {
            this.setNominalAttributes("");
        }
        tmpStr = Utils.getOption('S', options);
        if (tmpStr.length() != 0) {
            this.setStringAttributes(tmpStr);
        } else {
            this.setStringAttributes("");
        }
        tmpStr = Utils.getOption('M', options);
        if (tmpStr.length() != 0) {
            this.setMissingValue(tmpStr);
        } else {
            this.setMissingValue("?");
        }
        tmpStr = Utils.getOption('F', options);
        if (tmpStr.length() != 0) {
            this.setFieldSeparator(tmpStr);
        } else {
            this.setFieldSeparator(",");
        }
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        if (this.getNominalAttributes().length() > 0) {
            result.add("-N");
            result.add(this.getNominalAttributes());
        }
        if (this.getStringAttributes().length() > 0) {
            result.add("-S");
            result.add(this.getStringAttributes());
        }
        result.add("-M");
        result.add(this.getMissingValue());
        return result.toArray(new String[result.size()]);
    }

    public void setNominalAttributes(String value) {
        this.m_NominalAttributes.setRanges(value);
    }

    public String getNominalAttributes() {
        return this.m_NominalAttributes.getRanges();
    }

    public String nominalAttributesTipText() {
        return "The range of attributes to force to be of type NOMINAL, example ranges: 'first-last', '1,4,7-14,50-last'.";
    }

    public void setStringAttributes(String value) {
        this.m_StringAttributes.setRanges(value);
    }

    public String getStringAttributes() {
        return this.m_StringAttributes.getRanges();
    }

    public String stringAttributesTipText() {
        return "The range of attributes to force to be of type STRING, example ranges: 'first-last', '1,4,7-14,50-last'.";
    }

    public void setMissingValue(String value) {
        this.m_MissingValue = value;
    }

    public String getMissingValue() {
        return this.m_MissingValue;
    }

    public String missingValueTipText() {
        return "The placeholder for missing values, default is '?'.";
    }

    public void setFieldSeparator(String value) {
        this.m_FieldSeparator = Utils.unbackQuoteChars(value);
        if (this.m_FieldSeparator.length() != 1) {
            this.m_FieldSeparator = ",";
            System.err.println("Field separator can only be a single character (exception being '\t'), defaulting back to '" + this.m_FieldSeparator + "'!");
        }
    }

    public String getFieldSeparator() {
        return Utils.backQuoteChars(this.m_FieldSeparator);
    }

    public String fieldSeparatorTipText() {
        return "The character to use as separator for the columns/fields (use '\\t' for TAB).";
    }

    @Override
    public void setSource(InputStream input) throws IOException {
        this.m_structure = null;
        this.m_sourceFile = null;
        this.m_File = null;
        this.m_FirstCheck = true;
        this.m_sourceReader = new BufferedReader(new InputStreamReader(input));
    }

    @Override
    public void setSource(File file) throws IOException {
        super.setSource(file);
    }

    @Override
    public Instances getStructure() throws IOException {
        if (this.m_sourceFile == null && this.m_sourceReader == null) {
            throw new IOException("No source has been specified");
        }
        if (this.m_structure == null) {
            try {
                this.m_st = new StreamTokenizer(this.m_sourceReader);
                this.initTokenizer(this.m_st);
                this.readStructure(this.m_st);
            }
            catch (FileNotFoundException fileNotFoundException) {
                // empty catch block
            }
        }
        return this.m_structure;
    }

    private void readStructure(StreamTokenizer st) throws IOException {
        this.readHeader(st);
    }

    @Override
    public Instances getDataSet() throws IOException {
        ArrayList<Object> current;
        if (this.m_sourceFile == null && this.m_sourceReader == null) {
            throw new IOException("No source has been specified");
        }
        if (this.m_structure == null) {
            this.getStructure();
        }
        if (this.m_st == null) {
            this.m_st = new StreamTokenizer(this.m_sourceReader);
            this.initTokenizer(this.m_st);
        }
        this.m_st.ordinaryChar(this.m_FieldSeparator.charAt(0));
        this.m_cumulativeStructure = new ArrayList(this.m_structure.numAttributes());
        for (int i = 0; i < this.m_structure.numAttributes(); ++i) {
            this.m_cumulativeStructure.add(new Hashtable());
        }
        this.m_cumulativeInstances = new ArrayList();
        while ((current = this.getInstance(this.m_st)) != null) {
            this.m_cumulativeInstances.add(current);
        }
        ArrayList<Attribute> atts = new ArrayList<Attribute>(this.m_structure.numAttributes());
        for (int i = 0; i < this.m_structure.numAttributes(); ++i) {
            String attname = this.m_structure.attribute(i).name();
            Hashtable<Object, Integer> tempHash = this.m_cumulativeStructure.get(i);
            if (tempHash.size() == 0) {
                atts.add(new Attribute(attname));
                continue;
            }
            if (this.m_StringAttributes.isInRange(i)) {
                atts.add(new Attribute(attname, (List<String>)null));
                continue;
            }
            ArrayList<String> values = new ArrayList<String>(tempHash.size());
            for (int z = 0; z < tempHash.size(); ++z) {
                values.add("dummy");
            }
            Enumeration<Object> e = tempHash.keys();
            while (e.hasMoreElements()) {
                Object ob = e.nextElement();
                int index = tempHash.get(ob);
                String s = ob.toString();
                if (s.startsWith("'") || s.startsWith("\"")) {
                    s = s.substring(1, s.length() - 1);
                }
                values.set(index, new String(s));
            }
            atts.add(new Attribute(attname, values));
        }
        String relationName = this.m_sourceFile != null ? this.m_sourceFile.getName().replaceAll("\\.[cC][sS][vV]$", "") : "stream";
        Instances dataSet = new Instances(relationName, atts, this.m_cumulativeInstances.size());
        for (int i = 0; i < this.m_cumulativeInstances.size(); ++i) {
            current = this.m_cumulativeInstances.get(i);
            double[] vals = new double[dataSet.numAttributes()];
            for (int j = 0; j < current.size(); ++j) {
                int index;
                Hashtable<Object, Integer> lookup;
                Object cval = current.get(j);
                if (cval instanceof String) {
                    if (((String)cval).compareTo(this.m_MissingValue) == 0) {
                        vals[j] = Utils.missingValue();
                        continue;
                    }
                    if (dataSet.attribute(j).isString()) {
                        vals[j] = dataSet.attribute(j).addStringValue((String)cval);
                        continue;
                    }
                    if (dataSet.attribute(j).isNominal()) {
                        lookup = this.m_cumulativeStructure.get(j);
                        index = lookup.get(cval);
                        vals[j] = index;
                        continue;
                    }
                    throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!");
                }
                if (dataSet.attribute(j).isNominal()) {
                    lookup = this.m_cumulativeStructure.get(j);
                    index = lookup.get(cval);
                    vals[j] = index;
                    continue;
                }
                vals[j] = dataSet.attribute(j).isString() ? (double)dataSet.attribute(j).addStringValue("" + cval) : (Double)cval;
            }
            dataSet.add(new DenseInstance(1.0, vals));
        }
        this.m_structure = new Instances(dataSet, 0);
        this.setRetrieval(1);
        this.m_cumulativeStructure = null;
        this.m_sourceReader.close();
        return dataSet;
    }

    @Override
    public Instance getNextInstance(Instances structure) throws IOException {
        throw new IOException("CSVLoader can't read data sets incrementally.");
    }

    private ArrayList<Object> getInstance(StreamTokenizer tokenizer) throws IOException {
        ArrayList<Object> current = new ArrayList<Object>();
        ConverterUtils.getFirstToken(tokenizer);
        if (tokenizer.ttype == -1) {
            return null;
        }
        boolean first = true;
        while (tokenizer.ttype != 10 && tokenizer.ttype != -1) {
            boolean wasSep;
            if (!first) {
                ConverterUtils.getToken(tokenizer);
            }
            if (tokenizer.ttype == this.m_FieldSeparator.charAt(0) || tokenizer.ttype == 10) {
                current.add(this.m_MissingValue);
                wasSep = true;
            } else {
                wasSep = false;
                if (tokenizer.sval.equals(this.m_MissingValue)) {
                    current.add(new String(this.m_MissingValue));
                } else {
                    try {
                        double val = Double.valueOf(tokenizer.sval);
                        current.add(new Double(val));
                    }
                    catch (NumberFormatException e) {
                        current.add(new String(tokenizer.sval));
                    }
                }
            }
            if (!wasSep) {
                ConverterUtils.getToken(tokenizer);
            }
            first = false;
        }
        if (current.size() != this.m_structure.numAttributes()) {
            ConverterUtils.errms(tokenizer, "wrong number of values. Read " + current.size() + ", expected " + this.m_structure.numAttributes());
        }
        try {
            this.checkStructure(current);
        }
        catch (Exception ex) {
            ex.printStackTrace();
        }
        return current;
    }

    private void checkStructure(ArrayList<Object> current) throws Exception {
        if (current == null) {
            throw new Exception("current shouldn't be null in checkStructure");
        }
        if (this.m_FirstCheck) {
            this.m_NominalAttributes.setUpper(current.size() - 1);
            this.m_StringAttributes.setUpper(current.size() - 1);
            this.m_FirstCheck = false;
        }
        for (int i = 0; i < current.size(); ++i) {
            int newIndex;
            Hashtable<Object, Integer> tempHash;
            Object ob = current.get(i);
            if (ob instanceof String || this.m_NominalAttributes.isInRange(i) || this.m_StringAttributes.isInRange(i)) {
                if (ob.toString().compareTo(this.m_MissingValue) == 0 || (tempHash = this.m_cumulativeStructure.get(i)).containsKey(ob)) continue;
                if (tempHash.size() == 0) {
                    for (int j = 0; j < this.m_cumulativeInstances.size(); ++j) {
                        ArrayList<Object> tempUpdate = this.m_cumulativeInstances.get(j);
                        Object tempO = tempUpdate.get(i);
                        if (tempO instanceof String || tempHash.containsKey(tempO)) continue;
                        tempHash.put(new Double((Double)tempO), new Integer(tempHash.size()));
                    }
                }
                newIndex = tempHash.size();
                tempHash.put(ob, new Integer(newIndex));
                continue;
            }
            if (ob instanceof Double) {
                tempHash = this.m_cumulativeStructure.get(i);
                if (tempHash.size() == 0 || tempHash.containsKey(ob)) continue;
                newIndex = tempHash.size();
                tempHash.put(new Double((Double)ob), new Integer(newIndex));
                continue;
            }
            throw new Exception("Wrong object type in checkStructure!");
        }
    }

    private void readHeader(StreamTokenizer tokenizer) throws IOException {
        ArrayList<Attribute> attribNames = new ArrayList<Attribute>();
        ConverterUtils.getFirstToken(tokenizer);
        if (tokenizer.ttype == -1) {
            ConverterUtils.errms(tokenizer, "premature end of file");
        }
        while (tokenizer.ttype != 10) {
            attribNames.add(new Attribute(tokenizer.sval));
            ConverterUtils.getToken(tokenizer);
        }
        String relationName = this.m_sourceFile != null ? this.m_sourceFile.getName().replaceAll("\\.[cC][sS][vV]$", "") : "stream";
        this.m_structure = new Instances(relationName, attribNames, 0);
    }

    private void initTokenizer(StreamTokenizer tokenizer) {
        tokenizer.resetSyntax();
        tokenizer.whitespaceChars(0, 31);
        tokenizer.wordChars(32, 255);
        tokenizer.whitespaceChars(this.m_FieldSeparator.charAt(0), this.m_FieldSeparator.charAt(0));
        tokenizer.commentChar(37);
        tokenizer.quoteChar(34);
        tokenizer.quoteChar(39);
        tokenizer.eolIsSignificant(true);
    }

    @Override
    public void reset() throws IOException {
        this.m_structure = null;
        this.m_st = null;
        this.setRetrieval(0);
        if (this.m_File != null) {
            this.setFile(new File(this.m_File));
        }
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 6098 $");
    }

    public static void main(String[] args) {
        CSVLoader.runFileLoader(new CSVLoader(), args);
    }
}

