43 lines
713 B
Go
43 lines
713 B
Go
package algo
|
|
|
|
import (
|
|
"lukechampine.com/frand"
|
|
"tuxpa.in/a/gambit"
|
|
"tuxpa.in/a/gambit/helper"
|
|
)
|
|
|
|
var _ gambit.Bandit = (*EpsilonGreedy)(nil)
|
|
|
|
type EpsilonGreedy struct {
|
|
Epsilon float64
|
|
cr helper.CountReward
|
|
}
|
|
|
|
func (u *EpsilonGreedy) Select(r float64) int {
|
|
if r > u.Epsilon {
|
|
return int(u.cr.RewardMax())
|
|
}
|
|
return frand.Intn(u.cr.Size())
|
|
}
|
|
|
|
func (u *EpsilonGreedy) Update(a int, r float64) error {
|
|
return u.cr.Update(a, r)
|
|
}
|
|
|
|
func (u *EpsilonGreedy) Reset(n int) error {
|
|
u.cr.ResetTo(n)
|
|
return nil
|
|
}
|
|
|
|
func (u *EpsilonGreedy) Size() int {
|
|
return u.cr.Size()
|
|
}
|
|
|
|
func (u *EpsilonGreedy) Count(res []int) {
|
|
u.cr.Count(res)
|
|
}
|
|
|
|
func (u *EpsilonGreedy) Reward(res []float64) {
|
|
u.cr.Reward(res)
|
|
}
|