/* This is a header file for the RMDP class, that represents 
   Random Markov Decision Processes (both discerte and continuous 
   state spaces).

   File rmdp.h
   Last modified: May, 2002
   Copyright: Bohdana Ratitch, SOCS, McGill University
*/

#include <stdlib.h>
#include <string.h>
#include<stdio.h>
#include<math.h>
#include<sys/types.h>		
#include<iostream.h>
#include<fstream.h>

#ifndef INTERFACE_CLASSES	
#define INTERFACE_CLASSES
#include "interface_classes.h"
#endif

// constants to  specify the task of the generator
#define load 1
#define generate 0
#define generate_with_constraints 2

// type of the RMSP
#define discrete 1
#define continuous 0


class RMDPparameters; //complete declaration at the end of this file

/////////////////////////////////////////////////////////////////////////////

class RMDP : public Environment{
 
 public:

  RMDP(int task, RMDPparameters& p, ActionSet** as, bool save);
  /* Constructor.
     Argument "task" specifies whether an RMDP should be generated or loaded.
     Use global constants "load", "generate" or "generate_with_constraints" 
     to pass the value.

         Response to values:

	 "generate" - an object is generated with parameters passed by "p".
	 "load" - load a previously generated RMDP with name spacified
	          in p.rmdpName (could be passed through the command line 
		  argument "rmdp" to main() and then processed by 
		  p.process(...) method).
	 "generate_with_constraints" -  generate an object with constraints
	          specified in the text file whose name is provided in
		  p.UDC (could be passed through the command line 
		  argument "UDC" to main() and then processed by 
		  p.process(...) method).

     Argument "p" passes all parameters with which an object is generated.
     (See comments for RMPDparameters class for detailed information)

     Argument "as" returns to the caller created action set.

     Argument "save" indicates whether parameters of the generated RMDP
     should be saved to text files on the disk. Use global constants
     "true" or "false" to pass the value. If "true", the files are saved in the 
     directory whose name is provided in p.rmdpName (could be passed through 
     the command line argument "rmdp" to main() and then processed by 
     p.process(...) method).
  */

  bool applicable(const State& s, const Action& a);
  /* Implements a virtual function from the base class.
     Checks if action "a" is applicable in state "s".
  */

  void bound(int i, bool& bounded, double& left, double& right);
  /* Gives bounds on state variables' values
     "i" -  index of state variable
     "bounded" - indicates if i^th variable is bounded
     "left" - left bound
     "right" - right bound
  */

  void startState(State& start, bool& terminal);
  /* Implements a virtual function from the base class.
     Here samples a start state accrding to a uniform distribution.
     Sampled state is returned in the argument "start" and the "CurrentState" data
     member of the object is set to the sampled state. 
     
     The second argument "terminal" returns an indication of whether the 
     sampled state is terminal. Takes values "true" and "false". 
  */

  void uniformStateSample(State& s);
  /* Implements a virtual function from the base class.
     Returns a uniformly sampled state with argument "s".
   */
	
  void setState(const State& s, bool& terminal);
  /* Sets the current state ("CurrentState" data member) to "s" and returns
     an indication of whether the state is terminal with "terminal" argument.
  */

	
  void transition(const Action& action, State& s_new, double& r_new, bool& terminal);
  /*	Implements a transtion from the current state in responce to the action 
	performed by the agent. 

	"action" - input value - action performed by the agent
	"s_new" -  return value - new state
	"r_new" - return value - new reward
	"terminal" - return value - indication of whether "s_new" is a terminal state

	New current state is then remembered by the object.
  */

	 
  void saveRMDP(char* rmdpName, ActionSet* as);
  /*	Saves parameters of the generated RMDP into text files in 
	directory whose name is provided by the argument "rmdpName".
	General description is saved in file with extention .rgp
	The names of the files get the prefix which is the same as the 
	subdirectory in which they are to be stored (last subdirectory in 
	the path specified in "rmdpName").
   */
 
  

  ~RMDP();

 

  static void generateRMDPconstraints(RMDPparameters& p);
  /* Generates random constrains with parameters provided in the argument "p".
     The constraints are saved in the text file. The name of the file has to 
     be specified in p.UDC (could be passed through the command line argument
     "UDC" to main() and then processed by p.process(...) method).
  */

 protected:

  void loadRMDP(char* fileName, ActionSet** as);
  /*	Loads a previously generated (or user created) MDP from a description in a text file 
	fileName.
	Returns a new action set to the calling function.
  */

  void generateRMDP(RMDPparameters& p, ActionSet** as);
  /* Generates an RMDP with parameters in "p"
   */

  void generateRMDP(char* fileName, ActionSet** as);
  /*	Generates an RMDP using constraints specified in the text file fileName.
	Returns a new action set to the calling function.
  */

  void purturb(int size, double* w, int S, int Values=0, double low=0.0, double high=1.0, int* dare=NULL);
  /* "size" - size of the array to purturb.
     "w" - array to be purturbed
     "S" - S/100 is the probability with which each element remains the same.
     "Values"  - select new values from a discrete set of size Values.
     "low" - lower bound on the range from which to select.
     "high" - upper bound on the range from which to select.
     "do" - array of size "size" that indicates which elements of the array 
            w should be considered for purturbing. If do==NULL, all elements of
	    "w" are subject to be purturbed with probability 1-S/100.
  */

  

  double discreteSample(double low, double high, int V);
  /* Samples from a discrete set of size V of values that are spread uniformly in the 
     interval [low, high] (the interval is devided into V subintervals and discrete 
     values lie in the middle of subintervals.
  */

  //data members
  int Type;
  int Variables;			
  int Actions;
  int* Discretize;
  int* BranchingFactor;
  int maxBF;
  int DependencyFactor;
  
  StateActionFA** BranchingProbability;
  /* Pointer to a 1-dimensional array of functions representing branchingprobabilities.
     Iindex limit - maximum branching factor.
  */ 
  StateActionFA*** TransitionMean;	
  /* Pointer to a 2-dimensional array functions representing transition means. 
     Number of rows: Variables
     Number of columns: maximum branching factor
  */

  StateActionFA*** TransitionVariance;
  /* Pointer to a 2-dimensional array of functions representing transition variance. 
     Number of rows: Variables
     Number of columns: maximum branching factor
  */

  StateActionFA* RewardMean;
  /* Pointer to a 1-dimensional array of functions that represent reward means.
     Number of rows: Actions
  */

  StateActionFA* RewardVariance;		
  /* Pointer to a 1-dimensional array of functions that represent reward variances.
     Number of rows: Actions
  */

  Approximator* TerminationProbability;
  /* A pointer to a function representing termination probabilities.
   */

};

////////////////////////////////////////////////////////
class RMDPparameters{

 public:

  RMDPparameters();
  int process(int argc, char* argv[]);
  /* Processes command line parameters and assigns values 
     to the corresponding data members.
     
     In command line, specify parameters in the following format:
     
          exacutable_name program_name parameter=value(s) parameter=value(s) ...

      In the cases where the list of values has to be specified, 
      the individual values are separated by ',' and/or '/' 

    Parameters:
    
      ?     :  prints out the information on usage as provided here.
      rmdp  :  name of the RMDP if to be saved or loaded (with path if not in the current directory)
      T     :  type of the RMDP (d for discrete and c for continuous). E.g.: T=d
      V     :  number of variables.
      A     :  number of actions.
      D     :  - for continuous RMDPs, list the number of discrete units for piecewise-constant 
                 functions describing the RMDP dynamics (must have one number for each variable);
               - for discrete RMDPs, list the number of discrete values that each variable can 
	         take (must have one number for each variable).
               E.g.: D=10,10
      BF    :  branching factor(s) (for each action).
               E.g.:   BF=5 (the same for all actions) 
                    or BF=3,5 (3-for the first action and 5 for the second action)
      BFM   :  maximum branching factor(s) (for each action), if branching factor should vary. 
               across the state space. Format as for BF.
      minTV :  lower bounds on transition variances.
               (one value for all variables and actions or values for each variable and action).
                E.g.:   minTV=0 (the same for all variables and actions) 
                     or minTV=0,0/0.01,0.01 (comma separates values for variables 
		                             and '/' separates values for different actions
      maxTV :  upper bounds on transition variances. Format as for minTV.
      minRV :  lower bound on reward variances (one value for all actions or values for each action).
               E.g.:    minRV=0 (for all actions) 
                     or minRV=0.001,0.01 (values provided for each action)
      maxRV :  upper bound on reward variances (for each action). Format as for maxRV.
      minTP :  lower bound on termination probabilities.
      maxTP :  upper bound on termination probabilities.
      UDC   :  name of the file with user defined constraints, if any 
               (with path if not in the current directory).

      
    Additional parameters to specify if generating random constraints:

      UDC   :  in this case, name of the file to save constraints 
               (with path if not in the current directory).
      TR    :  number of regions with distinct branching probabilities 
               (one value for all actions or values for each action).
               E.g.:   TR=10 (the same for all actions) 
                    or TR=10,50 (values for each action)
      RR    :  number of regions with distinct reward constraints. Format as for TR.
      TMR   :  number of regions with distinct termination probabilities. 
               E.g.: TMR=10
      TIL   :  max interval length (between 0 and 1) for hypercubes in the state space
               to specify transition constraints (for all variables and actions).
               Format as for minTV.
      RIL   :  max interval length (between 0 and 1) for reward constraints 
              (for all variables and actions). Format as for minTV.
      TMIL  :  max interval length (between 0 and 1) for termination constraints (for each variable).
               E.g.:   TMIL=0.1 (the same for all variables) 
                    or TMIL=0.1,0.2 (values for each variable)
    
    Parameters below help to generate tasks with similarities in the transition and reward 
    structure across actions. Also they are helpfull in generating tasks with certain MDP 
    attributes' values. Used only when generating without constraints.
      
      TS    :  percentage (between 0 and 100) of similar parameters in all functions 
               representing transition rules (branching probabilities, transition means 
	       and transition variances) for different actions.
      BPS   :  percentage (between 0 and 100) of similar parameters in functions representing 
               branching probabilities for different actions.
      TMS   :  percentage (between 0 and 100) of similar parameters in functions representing 
               transition means for different actions.
      TVS   :  percentage (between 0 and 100) of similar parameters in functions representing
               transition variances for different actions.
      RMS   :  percentage (between 0 and 100) of similar parameters in functions representing 
               reward means for different actions.
      RVS   :  percentage (between 0 and 100) of similar parameters in functions representing 
               reward variances for different actions.
   */

  void check(int task);
  /* Checks if all necessary parameters were specified for generating an RMDP (when "task"=="generate")
     or constraints (when "task"=="generate_constraints").
  */
     

  //data members
  char* rmdpName; //name of the RMDP (with path if to be stored not in the current directory)
  char* UDC;//file name with user-defined constraints
  char Type;//type of the RMDP: d - discrete states, c - conttinuous
  int Variables;//number of variables
  int Actions;//actions
  int* Discretize; //number of discrete units for piece-wise constant functions describing system's dynamics, each dimension
  int* BranchingFactor; //branching factors (for each action)
  int* MaxBranchingFactor; //max branching factors (if variation across the state space is desired)
  double** TIL;//maximum length of intervals in the transition constraints (for each variable and action)
  int* TransitionRules; //number of non-default transition constraints; corresponds also to number of regions with different branching probabilities (for each action)
  
  double** minTV; //lower bound on transition variance (for each variable and for each action)
  double** maxTV;//upper bound on transition variance (for each variable and for each action)
  
  int* RewardRules; //number of non-default reward constraints (for each action)
  double** RIL;//maximum length of intervals in the reward constraints (for each variable and each action)
  
  double* minRV;//lower bound on reward variance (for each action)
  double* maxRV;//upper bound on reward variance (for each action)
  
  int TerminationRules; //number of non-default termination constraints 
  double* TMIL; //max length of the intervals in non-default termination constraints
  double minTP;//lower bound on termination probabilities
  double maxTP;//upper bound on termination probabilities
  
  int BPsimilarity;//percentage of identical parameters in the functions specifying branching probabilities across different actions
  int TMsimilarity;//percentage of identical parameters in the functions specifying transition means across different actions
  int TVsimilarity;//percentage of identical parameters in the functions specifying transition variances across different actions
  bool TransitionSimilarity;//indicates whether any similarity in branching probabilities or transition means was required by user
  int RMsimilarity;//percentage of identical parameters in the functions specifying reward means across different actions
  int RVsimilarity;//percentage of identical parameters in the functions specifying reward variances across different actions
  bool RewardSimilarity;//indicates whether any similarity in reward means was required by user
  
  int DependencyFactor; //not currently used

  ~RMDPparameters();

 private:
  int tokenize(char* s, int n, int* tokens);
  int tokenize(char* string, int n, double* tokens);
  int tokenize(char* string, int n, char* tokens);
};

	
