package weka.filters;

import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.kstar.KStarConstants;
import weka.core.Attribute;
import weka.core.ContingencyTables;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.SparseInstance;
import weka.core.SpecialFunctions;
import weka.core.UnassignedClassException;
import weka.core.UnsupportedClassTypeException;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

/* loaded from: input_file:weka-3-2-3/weka.jar:weka/filters/DiscretizeFilter.class */
public class DiscretizeFilter extends Filter implements OptionHandler, WeightedInstancesHandler {
    protected Range m_DiscretizeCols = new Range();
    protected int m_NumBins = 10;
    protected double[][] m_CutPoints = null;
    protected boolean m_UseMDL = true;
    protected boolean m_MakeBinary = false;
    protected boolean m_UseBetterEncoding = false;
    protected boolean m_UseKononenko = false;
    protected boolean m_FindNumBins = false;

    public DiscretizeFilter() {
        setAttributeIndices("first-last");
    }

    @Override // weka.core.OptionHandler
    public Enumeration listOptions() {
        Vector vector = new Vector(7);
        vector.addElement(new Option("\tSpecify the (maximum) number of bins to divide numeric attributes into.\n\t(default class-based discretization)", "B", 1, "-B <num>"));
        vector.addElement(new Option("\tOptimize number of bins using leave-one-out estimate\n\t of estimated entropy.", "O", 0, "-O"));
        vector.addElement(new Option("\tSpecify list of columns to Discretize. First and last are valid indexes.\n\t(default none)", "R", 1, "-R <col1,col2-col4,...>"));
        vector.addElement(new Option("\tInvert matching sense of column indexes.", "V", 0, "-V"));
        vector.addElement(new Option("\tOutput binary attributes for discretized attributes.", "D", 0, "-D"));
        vector.addElement(new Option("\tUse better encoding of split point for MDL.", "E", 0, "-E"));
        vector.addElement(new Option("\tUse Kononenko's MDL criterion.", "K", 0, "-K"));
        return vector.elements();
    }

    @Override // weka.core.OptionHandler
    public void setOptions(String[] strArr) throws Exception {
        setMakeBinary(Utils.getFlag('D', strArr));
        setUseBetterEncoding(Utils.getFlag('E', strArr));
        setUseKononenko(Utils.getFlag('K', strArr));
        setFindNumBins(Utils.getFlag('O', strArr));
        setInvertSelection(Utils.getFlag('V', strArr));
        setUseMDL(true);
        String option = Utils.getOption('B', strArr);
        if (option.length() != 0) {
            setBins(Integer.parseInt(option));
            setUseMDL(false);
        } else {
            setBins(10);
        }
        String option2 = Utils.getOption('R', strArr);
        if (option2.length() != 0) {
            setAttributeIndices(option2);
        } else {
            setAttributeIndices("first-last");
        }
        if (getInputFormat() != null) {
            setInputFormat(getInputFormat());
        }
    }

    @Override // weka.core.OptionHandler
    public String[] getOptions() {
        String[] strArr = new String[11];
        int i = 0;
        if (getMakeBinary()) {
            i = 0 + 1;
            strArr[0] = "-D";
        }
        if (getUseBetterEncoding()) {
            int i2 = i;
            i++;
            strArr[i2] = "-E";
        }
        if (getUseKononenko()) {
            int i3 = i;
            i++;
            strArr[i3] = "-K";
        }
        if (getFindNumBins()) {
            int i4 = i;
            i++;
            strArr[i4] = "-O";
        }
        if (getInvertSelection()) {
            int i5 = i;
            i++;
            strArr[i5] = "-V";
        }
        if (!getUseMDL()) {
            int i6 = i;
            int i7 = i + 1;
            strArr[i6] = "-B";
            i = i7 + 1;
            strArr[i7] = new StringBuffer().append("").append(getBins()).toString();
        }
        if (!getAttributeIndices().equals("")) {
            int i8 = i;
            int i9 = i + 1;
            strArr[i8] = "-R";
            i = i9 + 1;
            strArr[i9] = getAttributeIndices();
        }
        while (i < strArr.length) {
            int i10 = i;
            i++;
            strArr[i10] = "";
        }
        return strArr;
    }

    @Override // weka.filters.Filter
    public boolean setInputFormat(Instances instances) throws Exception {
        super.setInputFormat(instances);
        this.m_DiscretizeCols.setUpper(instances.numAttributes() - 1);
        this.m_CutPoints = null;
        if (!this.m_UseMDL) {
            return false;
        }
        if (instances.classIndex() < 0) {
            throw new UnassignedClassException("Cannot use class-based discretization: no class assigned to the dataset");
        }
        if (instances.classAttribute().isNominal()) {
            return false;
        }
        throw new UnsupportedClassTypeException("Supervised discretization not possible: class is not nominal!");
    }

    @Override // weka.filters.Filter
    public boolean input(Instance instance) {
        if (getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            resetQueue();
            this.m_NewBatch = false;
        }
        if (this.m_CutPoints != null) {
            convertInstance(instance);
            return true;
        }
        bufferInput(instance);
        return false;
    }

    @Override // weka.filters.Filter
    public boolean batchFinished() {
        if (getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_CutPoints == null) {
            calculateCutPoints();
            setOutputFormat();
            for (int i = 0; i < getInputFormat().numInstances(); i++) {
                convertInstance(getInputFormat().instance(i));
            }
        }
        flushInput();
        this.m_NewBatch = true;
        return numPendingOutput() != 0;
    }

    public String globalInfo() {
        return "An instance filter that discretizes a range of numeric attributes in the dataset into nominal attributes. Discretization can be either by simple binning, or by Fayyad & Irani's MDL method (the default).";
    }

    public String findNumBinsTipText() {
        return "Optimize bins using leave-one-out.";
    }

    public boolean getFindNumBins() {
        return this.m_FindNumBins;
    }

    public void setFindNumBins(boolean z) {
        this.m_UseMDL = false;
        this.m_FindNumBins = z;
    }

    public String makeBinaryTipText() {
        return "Make resulting attributes binary.";
    }

    public boolean getMakeBinary() {
        return this.m_MakeBinary;
    }

    public void setMakeBinary(boolean z) {
        this.m_MakeBinary = z;
    }

    public String useMDLTipText() {
        return "Use class-based discretization. If set to false, does not require a class attribute, and uses a fixed number of bins (according to bins setting).";
    }

    public boolean getUseMDL() {
        return this.m_UseMDL;
    }

    public void setUseMDL(boolean z) {
        this.m_UseMDL = z;
    }

    public String useKononenkoTipText() {
        return "Use Kononenko's MDL criterion. If set to false uses the Fayyad & Irani criterion.";
    }

    public boolean getUseKononenko() {
        return this.m_UseKononenko;
    }

    public void setUseKononenko(boolean z) {
        this.m_UseMDL = true;
        this.m_UseKononenko = z;
    }

    public String useBetterEncodingTipText() {
        return "Uses a different split point encoding. Who says it's better? (Eibe fix this).";
    }

    public boolean getUseBetterEncoding() {
        return this.m_UseBetterEncoding;
    }

    public void setUseBetterEncoding(boolean z) {
        this.m_UseMDL = true;
        this.m_UseBetterEncoding = z;
    }

    public String binsTipText() {
        return "Number of bins for class-blind discretisation. This setting is ignored if MDL-based discretisation is used.";
    }

    public int getBins() {
        return this.m_NumBins;
    }

    public void setBins(int i) {
        this.m_UseMDL = false;
        this.m_NumBins = i;
    }

    public String invertSelectionTipText() {
        return "Set attribute selection mode. If false, only selected (numeric) attributes in the range will be discretized; if true, only non-selected attributes will be discretized.";
    }

    public boolean getInvertSelection() {
        return this.m_DiscretizeCols.getInvert();
    }

    public void setInvertSelection(boolean z) {
        this.m_DiscretizeCols.setInvert(z);
    }

    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on. This is a comma separated list of attribute indices, with \"first\" and \"last\" valid values. Specify an inclusive range with \"-\". E.g: \"first-3,5,6-10,last\".";
    }

    public String getAttributeIndices() {
        return this.m_DiscretizeCols.getRanges();
    }

    public void setAttributeIndices(String str) {
        this.m_DiscretizeCols.setRanges(str);
    }

    public void setAttributeIndicesArray(int[] iArr) {
        setAttributeIndices(Range.indicesToRangeList(iArr));
    }

    public double[] getCutPoints(int i) {
        if (this.m_CutPoints == null) {
            return null;
        }
        return this.m_CutPoints[i];
    }

    /* JADX WARN: Type inference failed for: r1v3, types: [double[], double[][]] */
    protected void calculateCutPoints() {
        Instances instances = null;
        this.m_CutPoints = new double[getInputFormat().numAttributes()];
        for (int numAttributes = getInputFormat().numAttributes() - 1; numAttributes >= 0; numAttributes--) {
            if (this.m_DiscretizeCols.isInRange(numAttributes) && getInputFormat().attribute(numAttributes).isNumeric()) {
                if (this.m_UseMDL) {
                    if (instances == null) {
                        instances = new Instances(getInputFormat());
                    }
                    calculateCutPointsByMDL(numAttributes, instances);
                } else if (this.m_FindNumBins) {
                    findNumBins(numAttributes);
                } else {
                    calculateCutPointsByBinning(numAttributes);
                }
            }
        }
    }

    protected void calculateCutPointsByMDL(int i, Instances instances) {
        instances.sort(instances.attribute(i));
        int numInstances = instances.numInstances();
        int i2 = 0;
        while (true) {
            if (i2 >= instances.numInstances()) {
                break;
            }
            if (instances.instance(i2).isMissing(i)) {
                numInstances = i2;
                break;
            }
            i2++;
        }
        this.m_CutPoints[i] = cutPointsForSubset(instances, i, 0, numInstances);
    }

    private boolean KononenkosMDL(double[] dArr, double[][] dArr2, double d, int i) {
        double d2 = 0.0d;
        double d3 = 0.0d;
        int i2 = 0;
        for (double d4 : dArr) {
            if (d4 > KStarConstants.FLOOR) {
                i2++;
            }
        }
        double log2Multinomial = SpecialFunctions.log2Multinomial(d, dArr) + SpecialFunctions.log2Binomial((d + i2) - 1.0d, i2 - 1);
        for (int i3 = 0; i3 < dArr2.length; i3++) {
            double sum = Utils.sum(dArr2[i3]);
            d2 += SpecialFunctions.log2Binomial((sum + i2) - 1.0d, i2 - 1);
            d3 += SpecialFunctions.log2Multinomial(sum, dArr2[i3]);
        }
        return Utils.gr(log2Multinomial, Utils.log2(i) + d2 + d3);
    }

    private boolean FayyadAndIranisMDL(double[] dArr, double[][] dArr2, double d, int i) {
        double entropy = ContingencyTables.entropy(dArr);
        double entropyConditionedOnRows = entropy - ContingencyTables.entropyConditionedOnRows(dArr2);
        int i2 = 0;
        for (double d2 : dArr) {
            if (d2 > KStarConstants.FLOOR) {
                i2++;
            }
        }
        int i3 = 0;
        for (int i4 = 0; i4 < dArr2[0].length; i4++) {
            if (dArr2[0][i4] > KStarConstants.FLOOR) {
                i3++;
            }
        }
        int i5 = 0;
        for (int i6 = 0; i6 < dArr2[1].length; i6++) {
            if (dArr2[1][i6] > KStarConstants.FLOOR) {
                i5++;
            }
        }
        return Utils.gr(entropyConditionedOnRows, (Utils.log2(i) + (Utils.log2(Math.pow(3.0d, i2) - 2.0d) - (((i2 * entropy) - (i5 * ContingencyTables.entropy(dArr2[1]))) - (i3 * ContingencyTables.entropy(dArr2[0]))))) / d);
    }

    private double[] cutPointsForSubset(Instances instances, int i, int i2, int i3) {
        double[] dArr;
        double d = -1.0d;
        int i4 = -1;
        int i5 = 0;
        int i6 = 0;
        if (i3 - i2 < 2) {
            return null;
        }
        double[][] dArr2 = new double[2][instances.numClasses()];
        for (int i7 = i2; i7 < i3; i7++) {
            i5 = (int) (i5 + instances.instance(i7).weight());
            double[] dArr3 = dArr2[1];
            int classValue = (int) instances.instance(i7).classValue();
            dArr3[classValue] = dArr3[classValue] + instances.instance(i7).weight();
        }
        double[] dArr4 = new double[instances.numClasses()];
        System.arraycopy(dArr2[1], 0, dArr4, 0, instances.numClasses());
        double entropy = ContingencyTables.entropy(dArr4);
        double d2 = entropy;
        double[][] dArr5 = new double[2][instances.numClasses()];
        for (int i8 = i2; i8 < i3 - 1; i8++) {
            double[] dArr6 = dArr2[0];
            int classValue2 = (int) instances.instance(i8).classValue();
            dArr6[classValue2] = dArr6[classValue2] + instances.instance(i8).weight();
            double[] dArr7 = dArr2[1];
            int classValue3 = (int) instances.instance(i8).classValue();
            dArr7[classValue3] = dArr7[classValue3] - instances.instance(i8).weight();
            if (Utils.sm(instances.instance(i8).value(i), instances.instance(i8 + 1).value(i))) {
                double value = instances.instance(i8).value(i);
                double entropyConditionedOnRows = ContingencyTables.entropyConditionedOnRows(dArr2);
                if (Utils.sm(entropyConditionedOnRows, d2)) {
                    d = value;
                    d2 = entropyConditionedOnRows;
                    i4 = i8;
                    System.arraycopy(dArr2[0], 0, dArr5[0], 0, instances.numClasses());
                    System.arraycopy(dArr2[1], 0, dArr5[1], 0, instances.numClasses());
                }
                i6++;
            }
        }
        if (!this.m_UseBetterEncoding) {
            i6 = (i3 - i2) - 1;
        }
        if (Utils.eq(entropy - d2, KStarConstants.FLOOR)) {
            return null;
        }
        if (!(this.m_UseKononenko && KononenkosMDL(dArr4, dArr5, i5, i6)) && (this.m_UseKononenko || !FayyadAndIranisMDL(dArr4, dArr5, i5, i6))) {
            return null;
        }
        double[] cutPointsForSubset = cutPointsForSubset(instances, i, i2, i4 + 1);
        double[] cutPointsForSubset2 = cutPointsForSubset(instances, i, i4 + 1, i3);
        if (cutPointsForSubset == null && cutPointsForSubset2 == null) {
            dArr = new double[]{d};
        } else if (cutPointsForSubset2 == null) {
            dArr = new double[cutPointsForSubset.length + 1];
            System.arraycopy(cutPointsForSubset, 0, dArr, 0, cutPointsForSubset.length);
            dArr[cutPointsForSubset.length] = d;
        } else if (cutPointsForSubset == null) {
            dArr = new double[1 + cutPointsForSubset2.length];
            dArr[0] = d;
            System.arraycopy(cutPointsForSubset2, 0, dArr, 1, cutPointsForSubset2.length);
        } else {
            double[] dArr8 = new double[cutPointsForSubset.length + cutPointsForSubset2.length + 1];
            dArr = new double[cutPointsForSubset.length + cutPointsForSubset2.length + 1];
            System.arraycopy(cutPointsForSubset, 0, dArr, 0, cutPointsForSubset.length);
            dArr[cutPointsForSubset.length] = d;
            System.arraycopy(cutPointsForSubset2, 0, dArr, cutPointsForSubset.length + 1, cutPointsForSubset2.length);
        }
        return dArr;
    }

    protected void calculateCutPointsByBinning(int i) {
        double d = 0.0d;
        double d2 = 1.0d;
        for (int i2 = 0; i2 < getInputFormat().numInstances(); i2++) {
            Instance instance = getInputFormat().instance(i2);
            if (!instance.isMissing(i)) {
                double value = instance.value(i);
                if (d < d2) {
                    d2 = value;
                    d = value;
                }
                if (value > d) {
                    d = value;
                }
                if (value < d2) {
                    d2 = value;
                }
            }
        }
        double d3 = (d - d2) / this.m_NumBins;
        double[] dArr = null;
        if (this.m_NumBins > 1 && d3 > KStarConstants.FLOOR) {
            dArr = new double[this.m_NumBins - 1];
            for (int i3 = 1; i3 < this.m_NumBins; i3++) {
                dArr[i3 - 1] = d2 + (d3 * i3);
            }
        }
        this.m_CutPoints[i] = dArr;
    }

    protected void findNumBins(int i) {
        double d = Double.MAX_VALUE;
        double d2 = -Double.MIN_VALUE;
        double d3 = 0.0d;
        double d4 = Double.MAX_VALUE;
        int i2 = 1;
        for (int i3 = 0; i3 < getInputFormat().numInstances(); i3++) {
            Instance instance = getInputFormat().instance(i3);
            if (!instance.isMissing(i)) {
                double value = instance.value(i);
                if (value > d2) {
                    d2 = value;
                }
                if (value < d) {
                    d = value;
                }
            }
        }
        for (int i4 = 0; i4 < this.m_NumBins; i4++) {
            double[] dArr = new double[i4 + 1];
            d3 = (d2 - d) / (i4 + 1);
            for (int i5 = 0; i5 < getInputFormat().numInstances(); i5++) {
                Instance instance2 = getInputFormat().instance(i5);
                if (!instance2.isMissing(i)) {
                    int i6 = 0;
                    while (true) {
                        if (i6 < i4 + 1) {
                            if (instance2.value(i) <= d + ((i6 + 1.0d) * d3)) {
                                int i7 = i6;
                                dArr[i7] = dArr[i7] + instance2.weight();
                                break;
                            }
                            i6++;
                        }
                    }
                }
            }
            double d5 = 0.0d;
            int i8 = 0;
            while (true) {
                if (i8 >= i4 + 1) {
                    break;
                }
                if (dArr[i8] < 2.0d) {
                    d5 = Double.MAX_VALUE;
                    break;
                } else {
                    d5 -= dArr[i8] * Math.log((dArr[i8] - 1.0d) / d3);
                    i8++;
                }
            }
            if (d5 < d4) {
                d4 = d5;
                i2 = i4 + 1;
            }
        }
        double[] dArr2 = null;
        if (i2 > 1 && d3 > KStarConstants.FLOOR) {
            dArr2 = new double[i2 - 1];
            for (int i9 = 1; i9 < i2; i9++) {
                dArr2[i9 - 1] = d + (d3 * i9);
            }
        }
        this.m_CutPoints[i] = dArr2;
    }

    protected void setOutputFormat() {
        if (this.m_CutPoints == null) {
            setOutputFormat(null);
            return;
        }
        FastVector fastVector = new FastVector(getInputFormat().numAttributes());
        int classIndex = getInputFormat().classIndex();
        for (int i = 0; i < getInputFormat().numAttributes(); i++) {
            if (!this.m_DiscretizeCols.isInRange(i) || !getInputFormat().attribute(i).isNumeric()) {
                fastVector.addElement(getInputFormat().attribute(i).copy());
            } else if (!this.m_MakeBinary) {
                FastVector fastVector2 = new FastVector(1);
                if (this.m_CutPoints[i] == null) {
                    fastVector2.addElement("'All'");
                } else {
                    for (int i2 = 0; i2 <= this.m_CutPoints[i].length; i2++) {
                        if (i2 == 0) {
                            fastVector2.addElement(new StringBuffer().append("'(-inf-").append(Utils.doubleToString(this.m_CutPoints[i][i2], 6)).append("]'").toString());
                        } else if (i2 == this.m_CutPoints[i].length) {
                            fastVector2.addElement(new StringBuffer().append("'(").append(Utils.doubleToString(this.m_CutPoints[i][i2 - 1], 6)).append("-inf)'").toString());
                        } else {
                            fastVector2.addElement(new StringBuffer().append("'(").append(Utils.doubleToString(this.m_CutPoints[i][i2 - 1], 6)).append("-").append(Utils.doubleToString(this.m_CutPoints[i][i2], 6)).append("]'").toString());
                        }
                    }
                }
                fastVector.addElement(new Attribute(getInputFormat().attribute(i).name(), fastVector2));
            } else if (this.m_CutPoints[i] == null) {
                FastVector fastVector3 = new FastVector(1);
                fastVector3.addElement("'All'");
                fastVector.addElement(new Attribute(getInputFormat().attribute(i).name(), fastVector3));
            } else {
                if (i < getInputFormat().classIndex()) {
                    classIndex += this.m_CutPoints[i].length - 1;
                }
                for (int i3 = 0; i3 < this.m_CutPoints[i].length; i3++) {
                    FastVector fastVector4 = new FastVector(2);
                    fastVector4.addElement(new StringBuffer().append("'(-inf-").append(Utils.doubleToString(this.m_CutPoints[i][i3], 6)).append("]'").toString());
                    fastVector4.addElement(new StringBuffer().append("'(").append(Utils.doubleToString(this.m_CutPoints[i][i3], 6)).append("-inf)'").toString());
                    fastVector.addElement(new Attribute(getInputFormat().attribute(i).name(), fastVector4));
                }
            }
        }
        Instances instances = new Instances(getInputFormat().relationName(), fastVector, 0);
        instances.setClassIndex(classIndex);
        setOutputFormat(instances);
    }

    protected void convertInstance(Instance instance) {
        int i = 0;
        double[] dArr = new double[outputFormatPeek().numAttributes()];
        for (int i2 = 0; i2 < getInputFormat().numAttributes(); i2++) {
            if (this.m_DiscretizeCols.isInRange(i2) && getInputFormat().attribute(i2).isNumeric()) {
                double value = instance.value(i2);
                if (this.m_CutPoints[i2] == null) {
                    if (instance.isMissing(i2)) {
                        dArr[i] = Instance.missingValue();
                    } else {
                        dArr[i] = 0.0d;
                    }
                    i++;
                } else if (this.m_MakeBinary) {
                    for (int i3 = 0; i3 < this.m_CutPoints[i2].length; i3++) {
                        if (instance.isMissing(i2)) {
                            dArr[i] = Instance.missingValue();
                        } else if (value <= this.m_CutPoints[i2][i3]) {
                            dArr[i] = 0.0d;
                        } else {
                            dArr[i] = 1.0d;
                        }
                        i++;
                    }
                } else {
                    if (instance.isMissing(i2)) {
                        dArr[i] = Instance.missingValue();
                    } else {
                        int i4 = 0;
                        while (i4 < this.m_CutPoints[i2].length && value > this.m_CutPoints[i2][i4]) {
                            i4++;
                        }
                        dArr[i] = i4;
                    }
                    i++;
                }
            } else {
                dArr[i] = instance.value(i2);
                i++;
            }
        }
        Instance sparseInstance = instance instanceof SparseInstance ? new SparseInstance(instance.weight(), dArr) : new Instance(instance.weight(), dArr);
        copyStringValues(sparseInstance, false, instance.dataset(), getInputStringIndex(), getOutputFormat(), getOutputStringIndex());
        sparseInstance.setDataset(getOutputFormat());
        push(sparseInstance);
    }

    public static void main(String[] strArr) {
        try {
            if (Utils.getFlag('b', strArr)) {
                Filter.batchFilterFile(new DiscretizeFilter(), strArr);
            } else {
                Filter.filterFile(new DiscretizeFilter(), strArr);
            }
        } catch (Exception e) {
            System.out.println(e.getMessage());
        }
    }
}
