Skip to content

Commit

Permalink
mcts tune DrawUtilityPenalty
Browse files Browse the repository at this point in the history
  • Loading branch information
dhbloo committed Oct 12, 2024
1 parent 34dc002 commit ea397bc
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 19 deletions.
59 changes: 41 additions & 18 deletions Rapfi/search/mcts/parameter.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,56 @@

#pragma once

#include "../../config.h"
#include "../../tuning/tunemap.h"

#include <cstdint>

namespace Search::MCTS {

constexpr float MaxNewVisitsProp = 0.36f;
inline float MaxNewVisitsProp = 0.36f;
TUNE(MaxNewVisitsProp);

inline float CpuctExploration = 0.39f;
inline float CpuctExplorationLog = 0.98f;
inline float CpuctExplorationBase = 340;
TUNE(CpuctExploration);
TUNE(CpuctExplorationLog);
TUNE(CpuctExplorationBase);

constexpr float CpuctExploration = 0.39f;
constexpr float CpuctExplorationLog = 0.98f;
constexpr float CpuctExplorationBase = 340;
inline float CpuctUtilityStdevScale = 0.043f;
inline float CpuctUtilityVarPrior = 0.16f;
inline float CpuctUtilityVarPriorWeight = 1.87f;
TUNE(CpuctUtilityStdevScale);
TUNE(CpuctUtilityVarPrior);
TUNE(CpuctUtilityVarPriorWeight);

constexpr float CpuctUtilityStdevScale = 0.043f;
constexpr float CpuctUtilityVarPrior = 0.16f;
constexpr float CpuctUtilityVarPriorWeight = 1.87f;
inline float FpuReductionMax = 0.06f;
inline float FpuLossProp = 0.0008f;
inline float RootFpuReductionMax = 0.073f;
inline float RootFpuLossProp = 0.0036f;
inline float FpuUtilityBlendPow = 0.84f;
TUNE(FpuReductionMax);
TUNE(FpuLossProp);
TUNE(RootFpuReductionMax);
TUNE(RootFpuLossProp);
TUNE(FpuUtilityBlendPow);

constexpr float FpuReductionMax = 0.06f;
constexpr float FpuLossProp = 0.0008f;
constexpr float RootFpuReductionMax = 0.073f;
constexpr float RootFpuLossProp = 0.0036f;
constexpr float FpuUtilityBlendPow = 0.84f;
inline uint32_t MinTranspositionSkipVisits = 11;

constexpr uint32_t MinTranspositionSkipVisits = 11;
inline bool UseLCBForBestmoveSelection = true;
inline float LCBStdevs = 6.28f;
inline float LCBMinVisitProp = 0.1f;

constexpr bool UseLCBForBestmoveSelection = true;
constexpr float LCBStdevs = 6.28f;
constexpr float LCBMinVisitProp = 0.1f;
inline float PolicyTemperature = 0.91f;
inline float RootPolicyTemperature = 1.05f;
TUNE(RootPolicyTemperature, 0.7f, 1.1f);
TUNE(PolicyTemperature, 0.7f, 1.3f);

constexpr float PolicyTemperature = 0.91f;
constexpr float RootPolicyTemperature = 1.05f;
inline float ChildDrawPow = 1.0f;
inline float ParentDrawPow = 1.0f;
TUNE(ChildDrawPow);
TUNE(ParentDrawPow);
TUNE(Config::DrawUtilityPenalty, 0.0f, 1.0f);

} // namespace Search::MCTS
3 changes: 2 additions & 1 deletion Rapfi/search/mcts/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ inline float puctSelectionValue(float childUtility,
float Q = childUtility;

if (Config::DrawUtilityPenalty != 0)
Q -= Config::DrawUtilityPenalty * childDraw * (1 - parentDraw);
Q -= Config::DrawUtilityPenalty * ::pow(childDraw, ChildDrawPow)
* ::pow(1 - parentDraw, ParentDrawPow);

// Account for virtual losses
if (childVirtualVisits > 0)
Expand Down

0 comments on commit ea397bc

Please sign in to comment.