Monday, March 14, 2016

Bandit algorithms 5 - The Softmax Algorithm

-- Introducing the Softmax Algorithm

The Softmax algorithm tries to cope with arms differing in estimated value by explicitly incorportatin g information about the reward rates of the available arms into its method for choosing which arm to selet when it explores.

def categorical_draw(probs):
  z = random.random()
  cum_prob = 0.0
  for i in range(len(probs)):
    prob = probs[i]
    cum_prob += prob
    if cum_prob > z:
      return i

  return len(probs) - 1

def select_arm(self):
  z = sum(self.values)
  probs = [v / z for v in self.values]
  return categorical_draw(probs)


Temperature parameter based on an analogy with physics in which systems at high temperatures tend to behave randomly, while they take on more structure at low temperatures. In fact, the full Softmax algorithm is closely related to a concept called the Boltzmann distribution in physics, which is used to describe how groups of particles behave.

We’ll call this new temperature parameter tau. We introduce tau to produce the following new algorithm:

At time T, select one of the two arms with probabilities computed as follows:

exp(rA / tau) / (exp(rA / tau) + exp(rB / tau))
exp(rB / tau) / (exp(rA / tau) + exp(rB / tau))
For whichever arm you picked, update your estimate of the mean using the same update rule we used for the epsilon-Greedy algorithm.

-- Implementing the Softmax Algorithm

import math
import random

def categorical_draw(probs):
  z = random.random()
  cum_prob = 0.0
  for i in range(len(probs)):
    prob = probs[i]
    cum_prob += prob
    if cum_prob > z:
      return i

  return len(probs) - 1

class Softmax:
  def __init__(self, temperature, counts, values):
    self.temperature = temperature
    self.counts = counts
    self.values = values
    return

  def initialize(self, n_arms):
    self.counts = [0 for col in range(n_arms)]
    self.values = [0.0 for col in range(n_arms)]
    return

  def select_arm(self):
    z = sum([math.exp(v / self.temperature) for v in self.values])
    probs = [math.exp(v / self.temperature) / z for v in self.values]
    return categorical_draw(probs)

  def update(self, chosen_arm, reward):
    self.counts[chosen_arm] = self.counts[chosen_arm] + 1
    n = self.counts[chosen_arm]

    value = self.values[chosen_arm]
    new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
    self.values[chosen_arm] = new_value
    return

execfile("core.py")

import random

random.seed(1)
means = [0.1, 0.1, 0.1, 0.1, 0.9]
n_arms = len(means)
random.shuffle(means)
arms = map(lambda (mu): BernoulliArm(mu), means)
print("Best arm is " + str(ind_max(means)))

f = open("algorithms/softmax/standard_softmax_results.tsv", "w")

for temperature in [0.1, 0.2, 0.3, 0.4, 0.5]:
  algo = Softmax(temperature, [], [])
  algo.initialize(n_arms)
  results = test_algorithm(algo, arms, 5000, 250)
  for i in range(len(results[0])):
      f.write(str(temperature) + "\t")
      f.write("\t".join([str(results[j][i]) for j in range(len(results))]) + "\n")

f.close()

Measuring the Performance of the Softmax Algorithm

execfile("core.py")

import random

random.seed(1)
means = [0.1, 0.1, 0.1, 0.1, 0.9]
n_arms = len(means)
random.shuffle(means)
arms = map(lambda (mu): BernoulliArm(mu), means)
print("Best arm is " + str(ind_max(means)))

f = open("algorithms/softmax/standard_softmax_results.tsv", "w")

for temperature in [0.1, 0.2, 0.3, 0.4, 0.5]:
  algo = Softmax(temperature, [], [])
  algo.initialize(n_arms)
  results = test_algorithm(algo, arms, 5000, 250)
  for i in range(len(results[0])):
      f.write(str(temperature) + "\t")
      f.write("\t".join([str(results[j][i]) for j in range(len(results))]) + "\n")

f.close()

-- The Annealing Softmax Algorithm

Annealing is the process of decreasing the temperature in a Softmax algorithm over time.
Annealing is a process of modifying a bandit algorithm’s behavior so that it will explore less over time.

t = sum(self.counts) + 1
temperature = 1 / math.log(t + 0.0000001)

import math
import random

def categorical_draw(probs):
  z = random.random()
  cum_prob = 0.0
  for i in range(len(probs)):
    prob = probs[i]
    cum_prob += prob
    if cum_prob > z:
      return i

  return len(probs) - 1

class AnnealingSoftmax:
  def __init__(self, counts, values):
    self.counts = counts
    self.values = values
    return

def initialize(self, n_arms):
    self.counts = [0 for col in range(n_arms)]
    self.values = [0.0 for col in range(n_arms)]
    return

def select_arm(self):
    t = sum(self.counts) + 1
    temperature = 1 / math.log(t + 0.0000001)

    z = sum([math.exp(v / temperature) for v in self.values])
    probs = [math.exp(v / temperature) / z for v in self.values]
    return categorical_draw(probs)

def update(self, chosen_arm, reward):
    self.counts[chosen_arm] = self.counts[chosen_arm] + 1
    n = self.counts[chosen_arm]

    value = self.values[chosen_arm]
    new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
    self.values[chosen_arm] = new_value

    return

No comments:

Post a Comment

Blog Archive