adjustment on sdp module

adjustment on the initialization for the last epoch
copa-uniandes · Dec 23, 2024 · 9afdca7 · 9afdca7
1 parent c7bc520
commit 9afdca7
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 20 deletions.
diff --git a/examples/dtsdp_problem_examples/sdp_problem_solutions/sdp_maquinas.py b/examples/dtsdp_problem_examples/sdp_problem_solutions/sdp_maquinas.py
@@ -13,36 +13,37 @@
 # Decisiones
 A = np.array(["Reemplazar","No Reemplazar"])
 # Retornos Inmediatos
-R = np.zeros((len(E), len(estados), len(A)))
-for t in range(len(E)): 
-    for s_index,i in enumerate(estados):
-        for posA,a in enumerate(A):
-            if(i=="Excelente" and a=="Reemplazar"):
-                R[t,s_index,posA]=-1000000
-            elif(i=="Excelente" and a=="No Reemplazar"):
-                R[t,s_index,posA]=100
+R = np.zeros((len(E),len(S),len(A)))
+# Recorremos sobre las épocas
+for t in range(len(E)):
+    # Recorremos sobre los estados:
+    for s_index, i in enumerate(S):
+        # Recorremos sobre las decisiones:
+        for a_index, a in enumerate(A):
+            if i=='Excelente' and a=='Reemplazar':
+                R[t,s_index,a_index] = -1000
+            elif i=='Excelente' and a=='No Reemplazar':
+                R[t,s_index,a_index] = 100
             elif(i=="Bueno" and a=="Reemplazar"):
-                R[t,s_index,posA]=-100
+                R[t,s_index,a_index]=-100
             elif(i=="Bueno" and a=="No Reemplazar"):
-                R[t,s_index,posA]=80
+                R[t,s_index,a_index]=80
             elif(i=="Promedio" and a=="Reemplazar"):
-                R[t,s_index,posA]=-100
+                R[t,s_index,a_index]=-100
             elif(i=="Promedio" and a=="No Reemplazar"):
-                R[t,s_index,posA]=50
+                R[t,s_index,a_index]=50
             elif(i=="Malo" and a=="Reemplazar"):
-                R[t,s_index,posA]=-100
+                R[t,s_index,a_index]=-100
             elif(i=="Malo" and a=="No Reemplazar"):
-                R[t,s_index,posA]=10
+                R[t,s_index,a_index]=10
 
 # Matrices de transición
-probs = {t:np.zeros((len(A), len(estados), len(estados))) for t in E}
-
 matNoReemplazar = np.array([[0.7,0.3,0,0],
                           [0,0.7,0.3,0],
-                          [0,0,0.7,0.3],
+                          [0,0,0.6,0.4],
                           [0,0,0,1]])
 
-matReemplazar = np.array([[1,0,0,0],
+matReemplazar = np.array([[0,0,0,0],
                           [0.7,0.3,0,0],
                           [0.7,0.3,0,0],
                           [0.7,0.3,0,0]])

diff --git a/jmarkov/sdp/__pycache__/dtsdp.cpython-311.pyc b/jmarkov/sdp/__pycache__/dtsdp.cpython-311.pyc
diff --git a/jmarkov/sdp/dtsdp.py b/jmarkov/sdp/dtsdp.py
@@ -132,7 +132,7 @@ def solve(self, minimize = False):
         f = R
         for s_index,i in enumerate(S):
             Ft_optimo[s_index,-1] = max(f[-1,s_index])
-            dec = int(np.where(Ft_optimo[s_index,-1] == max(f[-1,s_index]))[0][0])
+            dec = int(np.argmax(f[-1, s_index]))
             Mat_Dec_optimo[s_index,-1] = A[dec]
         # start backward iteration
         # iterate through time steps (from second to last, to first)

diff --git a/tests/tests_dtsdp.py b/tests/tests_dtsdp.py
@@ -114,6 +114,6 @@ def test_policy_solver(self):
                 transition_matrices[t] = decisiones_dict
             sdp = dtsdp(epochs, states, actions, transition_matrices, immediate_returns, discount_factor)
             result = sdp.solve(minimize = False)[1]
-            self.assertTrue(np.array_equal(result, ([['N', 'N', 'R'],['N', 'N', 'R'],['N', 'N', 'R'],['R', 'N', 'R']])))
+            self.assertTrue(np.array_equal(result, ([['N', 'N', 'N'],['N', 'N', 'N'],['N', 'N', 'N'],['R', 'N', 'N']])))
 if __name__ == '__main__':
     unittest.main()