C **********************************************************************
C
C 	init_agents
C
C **********************************************************************

	SUBROUTINE init_agents

	INCLUDE 'agent.h'

	CALL get_parms

	alpha2 = alpha * alpha
	alpha3 = alpha * alpha2
	alpha4 = alpha * alpha3

	num_epsilons = 0
	average_epsilon = 0.0D0

C	rho             = 0.0D0
C	total_reward    = 0.0D0
C	total_time      = 0.0D0
C	rand_action     = 0

	DO i=1, tot_cars
	   rho               (i) = 0.0D0
	   total_reward      (i) = 0.0D0
	   total_time        (i) = 0.0D0
	   rand_action       (i) = 0
	END DO
	RETURN 
	END

C **********************************************************************
C
C 	get_parms
C
C **********************************************************************

	SUBROUTINE get_parms

	INCLUDE 'agent.h'

	OPEN (UNIT=14, FILE='parms.dat', status='OLD')

	READ (14,*) temperature
	READ (14,*) temp_factor
	READ (14,*) learning_rate
	READ (14,*) momentum
	READ (14,*) alpha
	READ (14,*) slope
	READ (14,*) save_interval
	READ (14,*) rand_type
	READ (14,*) traffic_type
	READ (14,*) inter_traffic
	READ (14,*) reward_type
	READ (14,*) do_floor
	READ (14,*) do_parking
        READ (14,*) do_sarsa
        READ (14,*) lr_factor

	CLOSE (14)

	RETURN 
	END

C **********************************************************************
C
C       reset_agents
C
C **********************************************************************

	SUBROUTINE reset_agents

	INCLUDE 'agent.h'
	INCLUDE 'my.h' ! tot_cars (INTEGER)

	INTEGER i

C	current_state(19) = 0.1
C	DO i = 3,18
C	   current_state (i-1) = 0.0
C	   current_state (i) = 1.0
C	   DO icar = 1,4
C	      CALL forward_prop (current_state,icar,1,2)
C	      WRITE (20,*) icar, output_units(1), output_units(2)
C	   END DO
C	END DO
C	STOP

	last_event_time = 0.0D0
C	rho             = 0.0D0
C	total_reward    = 0.0D0
C	total_time      = 0.0D0
C	rand_action     = 0

	DO i=1, tot_cars
	   last_action       (i) = 0
	   last_action_time  (i) = 0.0D0
	   last_evaluation   (i) = 0.0D0
	   immediate_rewards (i) = 0.0D0
	   rho               (i) = 0.0D0
	   total_reward      (i) = 0.0D0
	   total_time        (i) = 0.0D0
	   rand_action       (i) = 0
	END DO
	
	RETURN 
	END

C **********************************************************************
C
C     handle_event
C
C **********************************************************************

      SUBROUTINE handle_event ( time )

      INCLUDE 'agent.h'
      INCLUDE 'my.h' ! max_sys (INTEGER), tot_cars (INTEGER)

      COMMON /intcom/ lastint(2,10), meanint(2,10), off(2,10)
      REAL*8 lastint, meanint, off

      INTEGER i,j,icar,flag
      REAL time
      REAL*8 a              ! time elapsed since last event
      REAL*8 g              ! discount factor
      REAL*8 k,k2,k3        ! arrival time until last event time
      REAL*8 sum,sum2,sum3  ! total waiting time
      REAL*8 r              ! reward accumulator
      REAL*8 lambda         ! arrival rate to use for this button

      a = time - last_event_time
C      g = dexp(-alpha * a)

      r = 0.0D0

C      IF (reward_type .EQ. 0) THEN
C	 DO i = 1, max_sys
C	    IF (waiting_list(i) .GT. -0.5) THEN
C	       k = last_event_time - waiting_list(i)
C	       sum = k + a
C	       r = r + 2.0D0/alpha3 + 2.0D0* k /alpha2 +  k * k /alpha - 
C     X            g * (2.0D0/alpha3 + 2.0D0*sum/alpha2 + sum*sum/alpha)
C	    END IF
C	 END DO
C      ELSE
C	 DO i = 2, tot_flrs
C	    DO j = 1, 1
C	       flag = 0
C	       DO icar = 1, tot_cars
C		  IF ((cur_flr(icar) .EQ. i) .AND.
C     X		      (cur_dir(icar) .EQ. 0) .AND.
C     X		      (tnow - last_action_time(icar) .GT. 3.596) .AND.
C     X		      (state(icar)   .EQ. loading)) THEN
C		      flag= 1
C		  END IF
C	       END DO
C	       IF ((buttons(j,i) .GT. 0.0) .AND. (flag .EQ. 0)) THEN
CC		  lambda = 0.04D0
CC	       lambda = 1.0 / arr_int(i,min(24,INT(buttons(j,i)/300.0)+1))
CC	       IF (arr_int(i,min(24,INT(time/300.0)+1)) .LT. 1.0/lambda) THEN
CC		  lambda = 1.0 / arr_int(i,min(24,INT(time/300.0)+1))
CC   	       END IF
C		  lambda = 1.0D0 / lastint(j,i)
C		  IF (lambda .GT. 0.04D0) THEN
C		     lambda = 0.04D0
C		  END IF
C		  k = last_event_time - buttons(j,i)
C		  sum = k + a
C		  k2 = k * k
C		  sum2 = sum * sum
C		  k3 = k2 * k
C		  sum3 = sum2 * sum
C		  r = r + (1.0D0 - g)*2.0D0*lambda/alpha4 +
C     X           2.0D0/alpha3 + 2.0D0*k/alpha2 + k2/alpha -
C     X      g * (2.0D0/alpha3 + 2.0D0*sum/alpha2 + sum2/alpha) +
C     X lambda * (2.0D0*k/alpha3 + k2/alpha2 + k3/(3.0D0*alpha) -
C     X      g * (2.0D0*sum/alpha3 + sum2/alpha2 + sum3/(3.0D0*alpha)))
C	       END IF
C	    END DO
C	 END DO
C      END IF

	IF (reward_type .EQ. 0) THEN
	   DO i = 1, max_sys
	      IF (waiting_list(i) .GT. -0.5) THEN
		 k = last_event_time - waiting_list(i)
		 sum = k + a
		 r = r + (sum*sum*sum)/3 - (k*k*k)/3
	      END IF
	   END DO
	ENDIF

      DO i = 1, tot_cars
	 immediate_rewards (i) = immediate_rewards (i) + r
      END DO

      last_event_time = time

      RETURN
      END

C **********************************************************************
C
C       compute_action_probs ( v1, v2, prob, temperature, car )
C
C **********************************************************************

        SUBROUTINE compute_action_probs ( v1, v2, prob, temp, car )
	
	INCLUDE 'agent.h'
	INCLUDE 'my.h'		! max_sys (INTEGER), tot_cars (INTEGER)

        REAL*8 v1, v2, prob, temp
	INTEGER car
        REAL*8 expv1, expv2, sum

C  Don't forget we want the MIN since these are penalties...

        IF (v1/temp .GT. 709.0D0  .OR.
     +      v2/temp .GT. 709.0D0  .OR.
     +      v1/temp .LT. -709.0D0 .OR.
     +      v2/temp .LT. -709.0D0) THEN
           IF (v1 .LT. v2) THEN  ! MIN
              prob = 1.0
	      rand_action(car) = 0
           ELSE
              prob = 0.0
	      rand_action(car) = 0
           END IF

        ELSE
           expv1 = dexp(v1/temp)
           expv2 = dexp(v2/temp)
           sum = expv1 + expv2
           prob = expv2 / sum  ! MIN
	   rand_action(car) = 1
        END IF

        RETURN
        END

C **********************************************************************
C
C       handle_car_arrival
C
C **********************************************************************

	SUBROUTINE handle_car_arrival ( car, time, action )

	INCLUDE 'agent.h'
	INCLUDE 'my.h' ! debug (LOGICAL)

	INTEGER car
	REAL time
	INTEGER action

	LOGICAL car_in_motion
	REAL*8 g   ! discount factor
	REAL*8 vx, vy
	REAL*8 epsilon
	REAL get_rand
	REAL*8 prob

C	------------------------------------------------------------
C       Update rewards for all cars.
C       ------------------------------------------------------------

	CALL handle_event (time)

C	------------------------------------------------------------
C       Update the state
C       ------------------------------------------------------------

	CALL set_state_for ( car, time )

C	numstop = 0
C	numcont = 0
C	DO junk = 1,4
C	   CALL forward_prop (current_state,junk,1,2)
C	   IF (output_units(1) .LT. output_units(2)) THEN
C	      numstop = numstop + 1
C	   ELSE
C	      numcont = numcont + 1
C	   END IF
C	END DO
C	CALL forward_prop (current_state,5,1,2)
C	IF (output_units(1) .LT. output_units(2)) THEN
C	   num5 = 0
C	ELSE
C	   num5 = 1
C	END IF
C	write (11,*) time, numstop, numcont, num5

C	------------------------------------------------------------
C       Calculate vy
C       ------------------------------------------------------------
        
C       This code must be modified if separate networks are used for
C       separate cars, actions, motion states, etc.
        
	IF (action .EQ. 0) THEN
	   IF ( car_in_motion(car) ) THEN
C	      CALL forward_prop (current_state,car,1,2)
	      CALL forward_prop (current_state,1,1,1)
	      CALL forward_prop (current_state,1,2,2)
	   ELSE
	      CALL forward_prop (current_state,2,1,2)
	   END IF
	   IF (output_units(1) .LT. output_units(2)) THEN ! MIN
	      vy = output_units(1)
	   ELSE
	      vy = output_units(2)
	   END IF
	   
C	------------------------------------------------------------
C       Decide upon an action if not already constrained
C       ------------------------------------------------------------
        
	   CALL compute_action_probs
     +           (output_units(1),output_units(2),prob,temperature, car)

	   IF ( car_in_motion(car) ) THEN
	      IF (get_rand ( 0.0, 1.0 ) .LT. prob) THEN
		 action = ISTOP_ACTION
	      ELSE
		 action = ICONT_ACTION
	      END IF
	   ELSE IF (get_rand ( 0.0, 1.0 ) .LT. prob) THEN
	      action = IUP_ACTION
	   ELSE
	      action = IDOWN_ACTION
	   END IF
	ELSE IF (action .EQ. ISTOP_ACTION) THEN ! constrained
	   CALL forward_prop (current_state,1,1,1)
	   vy = output_units(1)
	ELSE IF (action .EQ. ICONT_ACTION) THEN ! constrained
	   CALL forward_prop (current_state,1,2,2)
	   vy = output_units(2)
	ELSE IF (action .EQ. IUP_ACTION) THEN ! constrained
	   CALL forward_prop (current_state,2,1,1)
	   vy = output_units(1)
	ELSE IF (action .EQ. IDOWN_ACTION) THEN ! constrained
	   CALL forward_prop (current_state,2,2,2)
	   vy = output_units(2)
	END IF

C	------------------------------------------------------------
C       Calculate vx
C       ------------------------------------------------------------
        
        IF (last_action(car) .EQ. ISTOP_ACTION) THEN
C	   CALL forward_prop (previous_state,car,1,1)
	   CALL forward_prop (previous_state,1,1,1)
           vx = output_units(1)
        ELSE IF (last_action(car) .EQ. ICONT_ACTION) THEN
C	   CALL forward_prop (previous_state,car,2,2)
	   CALL forward_prop (previous_state,1,2,2)
           vx = output_units(2)
        ELSE IF (last_action(car) .EQ. IUP_ACTION) THEN
	   CALL forward_prop (previous_state,2,1,1)
           vx = output_units(1)
        ELSE IF (last_action(car) .EQ. IDOWN_ACTION) THEN
	   CALL forward_prop (previous_state,2,2,2)
           vx = output_units(2)
        END IF

C	------------------------------------------------------------
C       Scale down the immediate rewards (actually costs)
C       ------------------------------------------------------------
        
	immediate_rewards(car) = immediate_rewards(car) / 1000000.0D0

C	------------------------------------------------------------
C       Calculate the TD error
C       ------------------------------------------------------------
        
C	g = dexp(-alpha * (time - last_action_time ( car )))
C	epsilon = immediate_rewards(car) + g*vy - vx

C       ------------------------------------------------------------
C       Modification for REIL SMART by Nicholas Marchalleck 8/13/96
C       ------------------------------------------------------------
	epsilon = (immediate_rewards(car) - rho(car) * (time - last_action_time(car)) + vy - vx)*0.01

	IF (rand_action(car) .EQ. 0) THEN
	   total_reward(car) = total_reward(car) + immediate_rewards(car)
	   total_time(car) = total_time(car) + (time - last_action_time(car))
	   rho(car) = total_reward(car)/total_time(car)
	ENDIF

C	------------------------------------------------------------
C       Train the networks using the TD error
C       ------------------------------------------------------------
        
        IF (last_action(car) .EQ. ISTOP_ACTION) THEN
C	   CALL back_prop (previous_state,epsilon,car,1)
	   CALL back_prop (previous_state,epsilon,1,1)
        ELSE IF (last_action(car) .EQ. ICONT_ACTION) THEN
C	   CALL back_prop (previous_state,epsilon,car,2)
	   CALL back_prop (previous_state,epsilon,1,2)
        ELSE IF (last_action(car) .EQ. IUP_ACTION) THEN
	   CALL back_prop (previous_state,epsilon,2,1)
        ELSE IF (last_action(car) .EQ. IDOWN_ACTION) THEN
	   CALL back_prop (previous_state,epsilon,2,2)
        END IF

C	------------------------------------------------------------
C       Print information for debugging purposes
C       ------------------------------------------------------------
        
	IF (debug) THEN
	   WRITE (11,*) "last action ", last_action(car)
	   WRITE (11,*) "vx ", vx
	   WRITE (11,*) "immed r ", immediate_rewards(car)
	   WRITE (11,*) "g ", g
	   WRITE (11,*) "vy ", vy
	   WRITE (11,*) "epsilon ", epsilon
           WRITE (11,*) "prob ", prob
	END IF

C       ------------------------------------------------------------
C       Save information for critic.stats file
C       ------------------------------------------------------------

	IF (last_action(car) .NE. 0) THEN
	   average_epsilon = average_epsilon + epsilon * epsilon
	   num_epsilons = num_epsilons + 1
	END IF

C       ------------------------------------------------------------
C       Reset immediate rewards to zero and save some stuff
C       ------------------------------------------------------------

	immediate_rewards(car) = 0.0D0
	
	last_action ( car ) = action
        last_action_time ( car ) = time
	last_evaluation ( car ) = vy

        CALL save_state_for ( car )
	
	RETURN
	END

C **********************************************************************
C
C     compute_stats
C
C **********************************************************************

      SUBROUTINE compute_stats ( time, run )

      INCLUDE 'agent.h'

      REAL time
      INTEGER run

      REAL*8 get_total_squared_wait
      REAL*8 get_average_floor_wait
      INTEGER i,unit

      temperature = temperature * temp_factor
      learning_rate = learning_rate * lr_factor

C     Save weights every save_interval'th run.

      IF (run .EQ. run / save_interval * save_interval) THEN
         CALL write_net
      END IF

C     Write to wait.stats

      WRITE (12, *) run, " ", get_total_squared_wait ( time )

C     Write to critic.stats

      IF (num_epsilons .GT. 0) THEN
         WRITE ( 13, * ) run, " ", average_epsilon/num_epsilons
      ELSE
         WRITE ( 13, * ) run, " ", -10000000.0
      END IF

      num_epsilons = 0
      average_epsilon = 0.0D0

C     Write floor wait times

      unit = 60
      DO i=2, tot_flrs
         WRITE (unit + i - 1, *) run, " ", get_average_floor_wait ( i )
      END DO

      CALL save_rand

      RETURN
      END

C **********************************************************************
C
C	set_state_for ( car )
C
C	This routine loads the state vector for the specified car.
C
C **********************************************************************

	SUBROUTINE set_state_for ( car, time )

	INCLUDE 'my.h'
	INCLUDE 'agent.h'
	INCLUDE 'siman.h'

      COMMON /intcom/ lastint(2,10), meanint(2,10), off(2,10)
      REAL*8 lastint, meanint, off

	INTEGER num_in_car
	LOGICAL waiting_either_dir
	INTEGER car, i, j, index
	REAL time
	REAL butt,maxbutt,incr

C	------------------------------------------------------------
C	Zero out the state.
C	------------------------------------------------------------

	DO i = 1, ISTATE_SIZE
	   current_state (i) = 0.0D0
	END DO

C	------------------------------------------------------------
C	Fill in the hall buttons   (up and down)
C	------------------------------------------------------------

	maxbutt = 1000000.0
	index = 1
	DO i = tot_flrs, 2, -1
	   butt = buttons(1,i)  ! down
	   IF (butt .GT. 0.0) THEN
C new section
C	      IF (i .GT. cur_flr(car)) THEN
C		 current_state(1) = current_state(1) + 0.5D0
C	      ELSE IF (i .LT. cur_flr(car)) THEN
C		 current_state(2) = current_state(2) + 0.5D0
C	      END IF
C end new section
	      IF (butt .LT. maxbutt) THEN
		 maxbutt = butt
	      END IF
	      current_state(index+1) = (tnow - butt) / 80.0D0
C	      current_state(index+1) = 1.0D0
	   ELSE
	      current_state(index)   = 1.0D0
	   END IF
	   index = index + 2
C	   butt = buttons(2,i-1)  ! up
C	   IF (butt .GT. 0.0) THEN
C	      IF (butt .LT. maxbutt) THEN
C		 maxbutt = butt
C	      END IF
C	      current_state(index) = (tnow - butt) / 80.0
C	   END IF
C	   index = index + 1
	END DO
	  
C	------------------------------------------------------------
C	Fill in the current car location/direction
C	------------------------------------------------------------

	IF (cur_dir(car) .EQ. 1) THEN !up
C	   current_state (20) = 1.0
C	   current_state (8 + cur_flr(car)) = 1.0
	   current_state (17 + cur_flr(car)) = 1.0
	ELSE
C	   current_state (21) = 1.0
C	   current_state (27 - cur_flr(car)) = 1.0
	   current_state (36 - cur_flr(car)) = 1.0
	END IF
	      
C	------------------------------------------------------------
C	Fill in some features
C	------------------------------------------------------------

C  This bit is on if the car has passengers.

C	IF (num_in_car(car) .GT. 0) THEN
C	   current_state(44) = 0.1
C	END IF

C  This bit is on if there are no passengers waiting.

C	IF (.NOT. waiting_either_dir(car)) THEN
C	   current_state(45) = 0.1
C	END IF

C  This bit is always on.

C        current_state(26) = 0.1
        current_state(35) = 0.1

C  Is this the longest waiting queue?

	IF (buttons(1, cur_flr(car)) .LE. maxbutt) THEN
	   current_state(36) = 1.0
	END IF

C  Is this the highest queue?

	IF (buttons(1, cur_flr(car)) .GT. 0.0) THEN
	   current_state(37) = 1.0
	END IF

	IF (cur_flr(car) .LT. tot_flrs) THEN
	   DO i = cur_flr(car) + 1, tot_flrs
	      IF (buttons(1, i) .GT. 0.0) THEN
		 current_state(37) = 0.0
	      END IF
	   END DO
	END IF

C	------------------------------------------------------------
C	Fill in the other cars (superimposed) with partial info
C	------------------------------------------------------------

C	DO i = 1, tot_cars
C	   IF (i .NE. car) THEN
C	      IF (cur_flr(i) .GE. cur_flr(car)) THEN
C		 IF (cur_dir(i) .EQ. 1) THEN
C		    current_state(36) = current_state(36) + 0.5
C		 ELSE
C		    current_state(37) = current_state(37) + 0.5
C		 END IF
C	      ELSE
C		 IF (cur_dir(i) .EQ. 1) THEN
C		    current_state(38) = current_state(38) + 0.5
C		 ELSE
C		    current_state(39) = current_state(39) + 0.5
C		 END IF
C	      END IF
C	   END IF
C	END DO

C	------------------------------------------------------------
C	Fill in the other cars (superimposed) with footprints
C	------------------------------------------------------------

	DO i = 1, tot_cars
	   IF (i .NE. car) THEN
	      IF (state(i) .EQ. stopped
     +   .OR.     state(i) .EQ. loading
     +   .OR.     state(i) .EQ. turning) THEN
		 current_state(37 + cur_flr(i)) =
     +               current_state(37 + cur_flr(i)) + 1.0
	      ELSE IF (state(i) .EQ. decel) THEN
		 current_state(37 + cur_flr(i)) =
     +               current_state(37 + cur_flr(i)) + 0.75
	      ELSE IF (state(i) .EQ. moving) THEN
		 incr = 0.5
		 IF (cur_dir(i) .EQ. 1) THEN
		    DO j = cur_flr(i), 10
		       current_state(37 + j) = current_state(37 + j) + incr
		       incr = incr * 0.5
		    END DO
		 ELSE
		    DO j = cur_flr(i), 1, -1
		       current_state(37 + j) = current_state(37 + j) + incr
		       incr = incr * 0.5
		    END DO
		 END IF
	      ELSE IF (state(i) .EQ. accel) THEN
		 incr = 0.25
		 IF (cur_dir(i) .EQ. 1) THEN
		    DO j = cur_flr(i), 10
		       current_state(37 + j) = current_state(37 + j) + incr
		       incr = incr * 0.5
		    END DO
		 ELSE
		    DO j = cur_flr(i), 1, -1
		       current_state(37 + j) = current_state(37 + j) + incr
		       incr = incr * 0.5
		    END DO
		 END IF
	      END IF
	   END IF
	END DO

C	DO i = 1, 9
C	   IF (buttons(2,i) .GT. 0.0) THEN
C	      current_state(47+i) = (tnow - buttons(2,i)) / 80.0D0
C	   END IF
C	END DO


C   time of day info
C
C	j = int(time / 300.0)
C	IF (j .GE. 0 .AND. J .LE. 11) THEN
C	   current_state(48 + j) = 1.0
C	END IF

	DO i = 1, ISTATE_SIZE
	   previous_state(i) = last_state(car, i)
	END DO

	RETURN 
	END

C **********************************************************************
C
C 	save_state_for
C
C **********************************************************************

	SUBROUTINE save_state_for ( car )

	INCLUDE 'agent.h'

	INTEGER car
	INTEGER i

	DO i = 1, ISTATE_SIZE
	  last_state(car, i) = current_state(i)
	END DO
	
	RETURN
	END

C **********************************************************************
C
C       init_rand
C
C **********************************************************************

	SUBROUTINE init_rand
	
	COMMON /rand_stuff/ seed
	INTEGER*4 seed

	OPEN (UNIT=49, FILE='rand.out', status='OLD', ERR=100)

C       ------------------------------------------------------------
C       File successfully opened.
C       ------------------------------------------------------------

	READ (49, 50) seed
c       KR wants a negative seed
        if(seed.gt.0.0) seed=-1.0*seed
 50	FORMAT (I12)
c       CALL srand48(seed)

	RETURN

C       ------------------------------------------------------------
C       File open failed.
C       ------------------------------------------------------------

 100	seed = -23111445
C       CALL srand48(seed)

	OPEN (UNIT=49, FILE='rand.out', status='unknown')
	WRITE (49, 50) seed

	RETURN
	END

C **********************************************************************
C
C       save_rand
C
C **********************************************************************

	SUBROUTINE save_rand
	
	COMMON /rand_stuff/ seed
	INTEGER*4 seed

	REWIND (49)

	WRITE (49, 50) -1*int(ran(seed)*10000000)
 50	FORMAT (I12)

	RETURN
	END

C **********************************************************************
C
C 	get_rand ( min, max )
C
C **********************************************************************

	REAL FUNCTION get_rand ( min, max )

	COMMON /rand_stuff/ seed
	INTEGER*4 seed
        REAL*8 ran

	REAL min, max

	get_rand = ran(seed)*(max-min) + min

	RETURN
	END

C **********************************************************************
C
C 	get_poisson_interval ( arrival_rate )
C
C **********************************************************************

	REAL FUNCTION get_poisson_interval ( arrival_rate )

	COMMON /rand_stuff/ seed
	INTEGER*4 seed
        REAL*8 ran

	REAL arrival_rate
	REAL rnum
	
 100	rnum =  ran(seed)
	IF (rnum .LE. 0.00000000001) GO TO 100

	get_poisson_interval = - LOG(rnum) / arrival_rate

	RETURN
	END


