package weka.core.converters;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.StreamTokenizer;
import java.util.Enumeration;
import java.util.Hashtable;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;

/* loaded from: input_file:weka-3-2-3/weka.jar:weka/core/converters/CSVLoader.class */
public class CSVLoader extends AbstractLoader {
    protected Instances m_structure = null;
    protected File m_sourceFile = null;
    private FastVector m_cumulativeStructure;
    private FastVector m_cumulativeInstances;

    public String globalInfo() {
        return "Reads a source that is in comma separated or tab separated format. Assumes that the first row in the file determines the number of and names of the attributes.";
    }

    public void reset() {
        this.m_structure = null;
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public void setSource(File file) throws IOException {
        reset();
        if (file == null) {
            throw new IOException("Source file object is null!");
        }
        this.m_sourceFile = file;
        try {
            new BufferedReader(new FileReader(file)).close();
        } catch (FileNotFoundException e) {
            throw new IOException("File not found");
        }
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instances getStructure() throws IOException {
        if (this.m_sourceFile == null) {
            throw new IOException("No source has been specified");
        }
        if (this.m_structure == null) {
            try {
                StreamTokenizer streamTokenizer = new StreamTokenizer(new BufferedReader(new FileReader(this.m_sourceFile)));
                initTokenizer(streamTokenizer);
                readStructure(streamTokenizer);
            } catch (FileNotFoundException e) {
            }
        }
        return this.m_structure;
    }

    private void readStructure(StreamTokenizer streamTokenizer) throws IOException {
        readHeader(streamTokenizer);
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instances getDataSet() throws IOException {
        if (this.m_sourceFile == null) {
            throw new IOException("No source has been specified");
        }
        setSource(this.m_sourceFile);
        BufferedReader bufferedReader = new BufferedReader(new FileReader(this.m_sourceFile));
        StreamTokenizer streamTokenizer = new StreamTokenizer(bufferedReader);
        initTokenizer(streamTokenizer);
        readStructure(streamTokenizer);
        streamTokenizer.ordinaryChar(44);
        streamTokenizer.ordinaryChar(9);
        this.m_cumulativeStructure = new FastVector(this.m_structure.numAttributes());
        for (int i = 0; i < this.m_structure.numAttributes(); i++) {
            this.m_cumulativeStructure.addElement(new Hashtable());
        }
        this.m_cumulativeInstances = new FastVector();
        while (true) {
            FastVector cSVLoader = getInstance(streamTokenizer);
            if (cSVLoader == null) {
                break;
            }
            this.m_cumulativeInstances.addElement(cSVLoader);
        }
        bufferedReader.close();
        FastVector fastVector = new FastVector(this.m_structure.numAttributes());
        for (int i2 = 0; i2 < this.m_structure.numAttributes(); i2++) {
            String name = this.m_structure.attribute(i2).name();
            Hashtable hashtable = (Hashtable) this.m_cumulativeStructure.elementAt(i2);
            if (hashtable.size() == 0) {
                fastVector.addElement(new Attribute(name));
            } else {
                FastVector fastVector2 = new FastVector(hashtable.size());
                for (int i3 = 0; i3 < hashtable.size(); i3++) {
                    fastVector2.addElement("dummy");
                }
                Enumeration keys = hashtable.keys();
                while (keys.hasMoreElements()) {
                    Object nextElement = keys.nextElement();
                    fastVector2.setElementAt(new String(nextElement.toString()), ((Integer) hashtable.get(nextElement)).intValue());
                }
                fastVector.addElement(new Attribute(name, fastVector2));
            }
        }
        Instances instances = new Instances(this.m_sourceFile.getName(), fastVector, this.m_cumulativeInstances.size());
        for (int i4 = 0; i4 < this.m_cumulativeInstances.size(); i4++) {
            FastVector fastVector3 = (FastVector) this.m_cumulativeInstances.elementAt(i4);
            double[] dArr = new double[instances.numAttributes()];
            for (int i5 = 0; i5 < fastVector3.size(); i5++) {
                Object elementAt = fastVector3.elementAt(i5);
                if (elementAt instanceof String) {
                    if (((String) elementAt).compareTo("?") == 0) {
                        dArr[i5] = Instance.missingValue();
                    } else {
                        if (!instances.attribute(i5).isNominal()) {
                            System.err.println("Wrong attribute type!!!");
                            System.exit(1);
                        }
                        dArr[i5] = ((Integer) ((Hashtable) this.m_cumulativeStructure.elementAt(i5)).get(elementAt)).intValue();
                    }
                } else if (instances.attribute(i5).isNominal()) {
                    dArr[i5] = ((Integer) ((Hashtable) this.m_cumulativeStructure.elementAt(i5)).get(elementAt)).intValue();
                } else {
                    dArr[i5] = ((Double) elementAt).doubleValue();
                }
            }
            instances.add(new Instance(1.0d, dArr));
        }
        this.m_structure = new Instances(instances, 0);
        return instances;
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instance getNextInstance() throws IOException {
        throw new IOException("CSVLoader can't read data sets incrementally.");
    }

    private FastVector getInstance(StreamTokenizer streamTokenizer) throws IOException {
        boolean z;
        FastVector fastVector = new FastVector();
        ConverterUtils.getFirstToken(streamTokenizer);
        if (streamTokenizer.ttype == -1) {
            return null;
        }
        boolean z2 = true;
        while (true) {
            boolean z3 = z2;
            if (streamTokenizer.ttype == 10 || streamTokenizer.ttype == -1) {
                break;
            }
            if (!z3) {
                ConverterUtils.getToken(streamTokenizer);
            }
            if (streamTokenizer.ttype == 44 || streamTokenizer.ttype == 9 || streamTokenizer.ttype == 10) {
                fastVector.addElement("?");
                z = true;
            } else {
                z = false;
                try {
                    fastVector.addElement(new Double(Double.valueOf(streamTokenizer.sval).doubleValue()));
                } catch (NumberFormatException e) {
                    fastVector.addElement(new String(streamTokenizer.sval.replace(' ', '_')));
                }
            }
            if (!z) {
                ConverterUtils.getToken(streamTokenizer);
            }
            z2 = false;
        }
        if (fastVector.size() != this.m_structure.numAttributes()) {
            ConverterUtils.errms(streamTokenizer, new StringBuffer().append("wrong number of values. Read ").append(fastVector.size()).append(", expected ").append(this.m_structure.numAttributes()).toString());
        }
        try {
            checkStructure(fastVector);
        } catch (Exception e2) {
            e2.printStackTrace();
        }
        return fastVector;
    }

    private void checkStructure(FastVector fastVector) throws Exception {
        if (fastVector == null) {
            throw new Exception("current shouldn't be null in checkStructure");
        }
        for (int i = 0; i < fastVector.size(); i++) {
            Object elementAt = fastVector.elementAt(i);
            if (!(elementAt instanceof String)) {
                if (!(elementAt instanceof Double)) {
                    throw new Exception("Wrong object type in checkStructure!");
                }
                Hashtable hashtable = (Hashtable) this.m_cumulativeStructure.elementAt(i);
                if (hashtable.size() != 0 && !hashtable.containsKey(elementAt)) {
                    hashtable.put(new Double(((Double) elementAt).doubleValue()), new Integer(hashtable.size()));
                }
            } else if (((String) elementAt).compareTo("?") != 0) {
                Hashtable hashtable2 = (Hashtable) this.m_cumulativeStructure.elementAt(i);
                if (!hashtable2.containsKey(elementAt)) {
                    if (hashtable2.size() == 0) {
                        for (int i2 = 0; i2 < this.m_cumulativeInstances.size(); i2++) {
                            Object elementAt2 = ((FastVector) this.m_cumulativeInstances.elementAt(i2)).elementAt(i);
                            if (!(elementAt2 instanceof String) && !hashtable2.containsKey(elementAt2)) {
                                hashtable2.put(new Double(((Double) elementAt2).doubleValue()), new Integer(hashtable2.size()));
                            }
                        }
                    }
                    hashtable2.put(elementAt, new Integer(hashtable2.size()));
                }
            }
        }
    }

    private void readHeader(StreamTokenizer streamTokenizer) throws IOException {
        FastVector fastVector = new FastVector();
        ConverterUtils.getFirstToken(streamTokenizer);
        if (streamTokenizer.ttype == -1) {
            ConverterUtils.errms(streamTokenizer, "premature end of file");
        }
        while (streamTokenizer.ttype != 10) {
            fastVector.addElement(new Attribute(streamTokenizer.sval));
            ConverterUtils.getToken(streamTokenizer);
        }
        this.m_structure = new Instances(this.m_sourceFile.getName(), fastVector, 0);
    }

    private void initTokenizer(StreamTokenizer streamTokenizer) {
        streamTokenizer.resetSyntax();
        streamTokenizer.whitespaceChars(0, 31);
        streamTokenizer.wordChars(32, 255);
        streamTokenizer.whitespaceChars(44, 44);
        streamTokenizer.whitespaceChars(9, 9);
        streamTokenizer.commentChar(37);
        streamTokenizer.quoteChar(34);
        streamTokenizer.quoteChar(39);
        streamTokenizer.eolIsSignificant(true);
    }

    public static void main(String[] strArr) {
        if (strArr.length <= 0) {
            System.err.println("Usage:\n\tCSVLoader <file.csv>\n");
            return;
        }
        File file = new File(strArr[0]);
        try {
            CSVLoader cSVLoader = new CSVLoader();
            cSVLoader.setSource(file);
            System.out.println(cSVLoader.getDataSet());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
