/* This is an implementation of the program with which you can 
     - generate RMDPs with specified command line arguments;
     - compute MDP attributes for the generated RMDP, if desired;
     - load previously generated RMDP;
     - print a trajectory of the generated RMDP with a random policy;
     - automatically generate RMDP constraints and then generate an RMDP 
     with those constraints. 


   Compile this program while linking it with 
   rmdp.cpp, pwc.cpp, environment.cpp, random_numbers.cpp, 
   sarepr.cpp and safa.cpp. Then run the executable with the command 
   line argument ? (or \? in your shell) to see the list of arguments 
   you should specify in the command line.

   For the detailed documentation see on-line 
   http://www.cs.mcgill.ca/~sonce/

   File: rmdp-order.cpp
   Last modified: May, 2002
   Copyright: Bohdana Ratitch, SOCS, McGill University
*/

#include<iostream.h>
#include<time.h>
#include<math.h>
#include<sys/types.h>
#include<stdlib.h>
#include<fstream.h>
#include<string.h>

#ifndef INTERFACE_CLASSES	
#define INTERFACE_CLASSES 
#include "interface_classes.h"
#endif

#ifndef RMDP_CLASS	
#define RMDP_CLASS
#include "rmdp.h"
#endif

void main(int argc, char* argv[]){
 
  //seed random number generator
  time_t stime;
  time(&stime);
  struct tm* currentTime;
  currentTime=gmtime(&stime);
  unsigned seed=(currentTime->tm_hour+2)*currentTime->tm_min*currentTime->tm_sec;
  srand(seed);

  RMDPparameters p;
	
  ActionSet* action_set;
  int i, j;
  int Steps=10;//for the trajectory

  int Samples=0;//to compute attributes
  int Transitions=150;//to compute attributes
  
  if (argc==1){
    cout << "Please supply command line arguments." << endl;
    cout << "Would you like to see help on the arguments usage? (y/n)" << endl;
    char in;
    cin >> in;
    if (in=='y') strcpy(argv[0],"?");
    else exit(EXIT_SUCCESS);
  }

  if (p.process(argc,argv)>0){
    cout << endl;
    cout << "Parameters of the main() program:" << endl;
    cout << "S \t number of state samples to use for computing the attributes' values (attributes are not computed if not specified)" << endl;
    cout << "steps= \t max number of state transition on the trajectory" << endl;
    exit(EXIT_SUCCESS);
  }

  for (i=0; i<argc; i++){
    
    if (strncmp(argv[i],"S=",2)==0)
      Samples=atoi(&(argv[i][2]));

    if (strncmp(argv[i],"steps=",6)==0)
      Steps=atoi(&(argv[i][6]));
  }

  //generate RMDP with parameters specified in the command line
  
  Environment* env = new RMDP(generate, p, &action_set, true);

  cout << endl;
  cout << "\t RMDP generated successfully and parameters saved in " << p.rmdpName << " directory" << endl << endl << endl;

  delete env;
  delete action_set;
    

  //later you may want to load the same RMDP. This is how you do it:
  env = new RMDP(load, p, &action_set, false);

  //print out trajectory of maximum Steps
  cout << "\t A trajectory with the generated RMDP" << endl << endl;;
  State CurrentState, NextState;
  Action CurrentAction;
  double reward;
  bool terminal;
  int ChosenActionID;

  env->startState(CurrentState, terminal);
  cout << "State: " << CurrentState;
  for (i=0;i<Steps; i++){
    if (terminal==true){
      cout << " (terminal state)" << endl;
      break;
    }

    // select an action at random
    ChosenActionID=rand()%(action_set->size);
    cout << " Action: " << action_set->action[ChosenActionID] << endl;
    // make a transition
    env->transition(action_set->action[ChosenActionID], NextState, reward, terminal);
    cout << "State: " << NextState;
  }
  
  cout << endl << endl;;


  //Compute MDP attributes

  if (Samples!=0){
    if (p.Type=='d'){
      Samples=1;
      for(i=0;i<p.Variables;i++)
	Samples=Samples*p.Discretize[i];
    }

    Attributes att;
    att.setParameters(1,0.5);//compute up to 1-step STE and risk factor with k=0.5 

    //compute global values of attributes for the uniform state distribution 

    env->computeAttributes(att, Samples, Transitions, p.Discretize, (*action_set));

    cout << "Attributes' values based on " << Samples << " uniformly distributed state samples:" << endl << endl; 
    cout << "STE = " << att.Entropy[0] << endl;
    cout << "Controllability = " << att.Controllability << endl;
    cout << "RiskFactor (k=0.5) = " << att.RiskFactor << endl;
    cout << "RewardVariance = " << att.RewardVariance << endl;
    cout << "TransitionDistance = " << att.TransitionDistance << endl;
    cout << "TransitionVariability = " << att.TransitionVariability << endl;
    
    //compute global attributes for the state distribution on the trajectory with a random walk
    
    State start;
    bool terminal;
    env->startState(start, terminal);
    cout << endl;
    cout << "Computing attributes' values based on one trajectory with the random walk sarting in state  " << start << endl << endl;
    
    env->computeAttributes(att, start, Samples, Transitions, p.Discretize, (*action_set));
    cout << "STE = " << att.Entropy[0] << endl;
    cout << "Controllability = " << att.Controllability << endl;
    cout << "RiskFactor (k=0.5) = " << att.RiskFactor << endl;
    cout << "RewardVariance = " << att.RewardVariance << endl;
    cout << "TransitionDistance = " << att.TransitionDistance << endl;
    cout << "TransitionVariability = " << att.TransitionVariability << endl;
  }
  
  cout << endl;
  
    
  delete env;
  delete action_set;


  // Generate constraints for the RMDP 
  

  RMDP::generateRMDPconstraints(p);
  cout << "\t RMDP constraints are saved in file " << p.UDC << endl;

  // You can look at the generated constraints and edit them the way you like  
  // Later you may want to  generate RMDP with those constraints

  
  // Generate but don't save
  env = new RMDP(generate_with_constraints, p, &action_set, false);
  cout << "RMDP generated successfully with constraints specified in " << p.UDC << endl << endl;
  if (Samples!=0){

    Attributes att;
    att.setParameters(1,0.5);//compute up to 1-step STE and risk factor with k=0.5 
    env->computeAttributes(att, Samples, Transitions, p.Discretize, (*action_set));
    
    cout << "Attributes' values based on " << Samples << " uniformly distributed state samples:" << endl; 
    cout << "STE = " << att.Entropy[0] << endl;
    cout << "Controllability = " << att.Controllability << endl;
    cout << "RiskFactor (k=0.5) = " << att.RiskFactor << endl;
    cout << "RewardVariance = " << att.RewardVariance << endl;
    cout << "TransitionDistance = " << att.TransitionDistance << endl;
    cout << "TransitionVariability = " << att.TransitionVariability << endl;
  }

  delete env;
  delete action_set;

  cout << endl;
    
}

