/*
 * Decompiled with CFR 0.152.
 */
package com.thierrymasson.qlearner;

public class QLearning
implements Runnable {
    boolean DEBUG = false;
    int iter_trace = 500000;
    static final int A_N = 0;
    static final int A_E = 1;
    static final int A_S = 2;
    static final int A_O = 3;
    long currentIter = -1L;
    long iterMax = 100000L;
    double exploration = 0.7;
    double amortissement = 0.9;
    double recompense = 500.0;
    double penalite = 100.0;
    double apprentissage = 0.1;
    double[][][] QValues;
    int[][] map;
    int[][] resMap;
    double Q_S = 0.0;
    int Actions = 4;
    int hits = 0;
    boolean d_apprent = false;
    double apprent_step = 0.0;

    public QLearning(int Qsize) {
        this.initQValues(Qsize);
    }

    public QLearning(long itMax, double explore, double amort, double recomp, double penal, double apprent, int Qsize, int[][] mapDefine, boolean d_apprent) {
        this.iterMax = itMax;
        this.exploration = explore;
        this.amortissement = amort;
        this.recompense = recomp;
        this.penalite = penal;
        this.apprentissage = apprent;
        this.Q_S = Qsize;
        this.map = mapDefine;
        this.initQValues(Qsize);
        this.d_apprent = d_apprent;
        if (this.d_apprent) {
            this.apprent_step = this.apprentissage / (double)this.iterMax;
        }
    }

    private void initQValues(int Qsize) {
        this.QValues = new double[Qsize][Qsize][this.Actions];
        for (int i = 0; i < Qsize; ++i) {
            for (int j = 0; j < Qsize; ++j) {
                for (int a = 0; a < this.Actions; ++a) {
                    this.QValues[i][j][a] = 0.0;
                }
            }
        }
    }

    public int[][] giveResMap() {
        int i = 0;
        while ((double)i < this.Q_S) {
            int j = 0;
            while ((double)j < this.Q_S) {
                int[] nArray = this.map[i];
                int n = j;
                nArray[n] = nArray[n] + (100 + 100 * this.giveBestAction(i, j));
                ++j;
            }
            ++i;
        }
        return this.map;
    }

    private int giveBestAction(int StateI, int StateJ) {
        int theBest = -1;
        double theBestVal = -100000.0;
        for (int i = 0; i < this.Actions; ++i) {
            if (!(this.QValues[StateI][StateJ][i] > theBestVal)) continue;
            theBestVal = this.QValues[StateI][StateJ][i];
            theBest = i;
        }
        return theBest;
    }

    private double giveBestQValue(int StateI, int StateJ) {
        double theBest = -1000000.0;
        for (int i = 0; i < this.Actions; ++i) {
            if (!(this.QValues[StateI][StateJ][i] > theBest)) continue;
            theBest = this.QValues[StateI][StateJ][i];
        }
        return theBest;
    }

    private int[] giveNextState(int StateI, int StateJ, int action) {
        int[] result = new int[2];
        switch (action) {
            case 0: {
                if (StateI == 0) {
                    result[0] = StateI;
                    result[1] = StateJ;
                    break;
                }
                result[0] = StateI - 1;
                result[1] = StateJ;
                break;
            }
            case 1: {
                if ((double)StateJ == this.Q_S - 1.0) {
                    result[0] = StateI;
                    result[1] = StateJ;
                    break;
                }
                result[0] = StateI;
                result[1] = StateJ + 1;
                break;
            }
            case 2: {
                if ((double)StateI == this.Q_S - 1.0) {
                    result[0] = StateI;
                    result[1] = StateJ;
                    break;
                }
                result[0] = StateI + 1;
                result[1] = StateJ;
                break;
            }
            case 3: {
                if (StateJ == 0) {
                    result[0] = StateI;
                    result[1] = StateJ;
                    break;
                }
                result[0] = StateI;
                result[1] = StateJ - 1;
            }
        }
        return result;
    }

    private double recompenser(int StateI, int StateJ, int action) {
        double result = 0.0;
        int[] nextState = this.giveNextState(StateI, StateJ, action);
        int location = this.map[nextState[0]][nextState[1]];
        switch (location) {
            case 0: {
                break;
            }
            case 1: {
                result = this.penalite;
                break;
            }
            case 2: {
                result = this.recompense;
            }
        }
        return result;
    }

    private int choisirAction(int StateI, int StateJ) {
        double rval = Math.random();
        int actionChoisie = rval < this.exploration ? Math.round((float)Math.random() * (float)(this.Actions - 1)) : this.giveBestAction(StateI, StateJ);
        return actionChoisie;
    }

    private int[] departAleatoire() {
        int[] result = new int[]{Math.round((float)(Math.random() * (this.Q_S - 1.0))), Math.round((float)(Math.random() * (this.Q_S - 1.0)))};
        return result;
    }

    private void majQValue(int StateI, int StateJ, int act) {
        double oldValue = this.QValues[StateI][StateJ][act];
        double rec = this.recompenser(StateI, StateJ, act);
        int[] nextState = this.giveNextState(StateI, StateJ, act);
        double Qstar = this.giveBestQValue(nextState[0], nextState[1]);
        this.QValues[StateI][StateJ][act] = (1.0 - this.apprentissage) * oldValue + this.apprentissage * (rec + this.amortissement * Qstar);
    }

    public void run() {
        int[] CurrState = this.departAleatoire();
        for (long iter = 0L; iter < this.iterMax; ++iter) {
            this.currentIter = iter;
            if (this.DEBUG && iter % (long)this.iter_trace == (long)0) {
                System.err.println("Iteration =".concat(String.valueOf(String.valueOf(iter))));
            }
            int currAct = this.choisirAction(CurrState[0], CurrState[1]);
            this.majQValue(CurrState[0], CurrState[1], currAct);
            int[] nextState = this.giveNextState(CurrState[0], CurrState[1], currAct);
            switch (this.map[nextState[0]][nextState[1]]) {
                case 0: {
                    CurrState = nextState;
                    break;
                }
                case 1: {
                    CurrState = nextState;
                    break;
                }
                case 2: {
                    CurrState = this.departAleatoire();
                    ++this.hits;
                }
            }
            if (!this.d_apprent) continue;
            this.apprentissage -= this.apprent_step;
        }
        if (this.DEBUG) {
            System.err.println("Total hits =".concat(String.valueOf(String.valueOf(this.hits))));
        }
    }

    public int getTargetHits() {
        return this.hits;
    }

    public long getCurrentIter() {
        return this.currentIter;
    }

    public long getIterMax() {
        return this.iterMax;
    }

    static {
        A_N = 0;
        A_E = 1;
        A_S = 2;
        A_O = 3;
    }
}

