My DS Coding Bolg: Bandit algorithms 5

-- Introducing the Softmax Algorithm

The Softmax algorithm tries to cope with arms differing in estimated value by explicitly incorportatin g information about the reward rates of the available arms into its method for choosing which arm to selet when it explores.

def categorical_draw(probs):

z = random.random()

cum_prob = 0.0

for i in range(len(probs)):

prob = probs[i]

cum_prob += prob

if cum_prob > z:

return i

return len(probs) - 1

def select_arm(self):

z = sum(self.values)

probs = [v / z for v in self.values]

return categorical_draw(probs)

Temperature parameter based on an analogy with physics in which systems at high temperatures tend to behave randomly, while they take on more structure at low temperatures. In fact, the full Softmax algorithm is closely related to a concept called the Boltzmann distribution in physics, which is used to describe how groups of particles behave.

We’ll call this new temperature parameter tau. We introduce tau to produce the following new algorithm:

At time T, select one of the two arms with probabilities computed as follows:

exp(rA / tau) / (exp(rA / tau) + exp(rB / tau))

exp(rB / tau) / (exp(rA / tau) + exp(rB / tau))

For whichever arm you picked, update your estimate of the mean using the same update rule we used for the epsilon-Greedy algorithm.

-- Implementing the Softmax Algorithm

import math

import random

def categorical_draw(probs):

z = random.random()

cum_prob = 0.0

for i in range(len(probs)):

prob = probs[i]

cum_prob += prob

if cum_prob > z:

return i

return len(probs) - 1

class Softmax:

def __init__(self, temperature, counts, values):

self.temperature = temperature

self.counts = counts

self.values = values

return

def initialize(self, n_arms):

self.counts = [0 for col in range(n_arms)]

self.values = [0.0 for col in range(n_arms)]

return

def select_arm(self):

z = sum([math.exp(v / self.temperature) for v in self.values])

probs = [math.exp(v / self.temperature) / z for v in self.values]

return categorical_draw(probs)

def update(self, chosen_arm, reward):

self.counts[chosen_arm] = self.counts[chosen_arm] + 1

n = self.counts[chosen_arm]

value = self.values[chosen_arm]

new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward

self.values[chosen_arm] = new_value

return

execfile("core.py")

import random

random.seed(1)

means = [0.1, 0.1, 0.1, 0.1, 0.9]

n_arms = len(means)

random.shuffle(means)

arms = map(lambda (mu): BernoulliArm(mu), means)

print("Best arm is " + str(ind_max(means)))

f = open("algorithms/softmax/standard_softmax_results.tsv", "w")

for temperature in [0.1, 0.2, 0.3, 0.4, 0.5]:

algo = Softmax(temperature, [], [])

algo.initialize(n_arms)

results = test_algorithm(algo, arms, 5000, 250)

for i in range(len(results[0])):

f.write(str(temperature) + "\t")

f.write("\t".join([str(results[j][i]) for j in range(len(results))]) + "\n")

f.close()

Measuring the Performance of the Softmax Algorithm

execfile("core.py")

import random

random.seed(1)

means = [0.1, 0.1, 0.1, 0.1, 0.9]

n_arms = len(means)

random.shuffle(means)

arms = map(lambda (mu): BernoulliArm(mu), means)

print("Best arm is " + str(ind_max(means)))

f = open("algorithms/softmax/standard_softmax_results.tsv", "w")

for temperature in [0.1, 0.2, 0.3, 0.4, 0.5]:

algo = Softmax(temperature, [], [])

algo.initialize(n_arms)

results = test_algorithm(algo, arms, 5000, 250)

for i in range(len(results[0])):

f.write(str(temperature) + "\t")

f.write("\t".join([str(results[j][i]) for j in range(len(results))]) + "\n")

f.close()

-- The Annealing Softmax Algorithm

Annealing is the process of decreasing the temperature in a Softmax algorithm over time.

Annealing is a process of modifying a bandit algorithm’s behavior so that it will explore less over time.

t = sum(self.counts) + 1

temperature = 1 / math.log(t + 0.0000001)

import math

import random

def categorical_draw(probs):

z = random.random()

cum_prob = 0.0

for i in range(len(probs)):

prob = probs[i]

cum_prob += prob

if cum_prob > z:

return i

return len(probs) - 1

class AnnealingSoftmax:

def __init__(self, counts, values):

self.counts = counts

self.values = values

return

def initialize(self, n_arms):

self.counts = [0 for col in range(n_arms)]

self.values = [0.0 for col in range(n_arms)]

return

def select_arm(self):

t = sum(self.counts) + 1

temperature = 1 / math.log(t + 0.0000001)

z = sum([math.exp(v / temperature) for v in self.values])

probs = [math.exp(v / temperature) / z for v in self.values]

return categorical_draw(probs)

def update(self, chosen_arm, reward):

self.counts[chosen_arm] = self.counts[chosen_arm] + 1

n = self.counts[chosen_arm]

value = self.values[chosen_arm]

new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward

self.values[chosen_arm] = new_value

return

My DS Coding Bolg

Monday, March 14, 2016

Bandit algorithms 5 - The Softmax Algorithm

No comments:

Post a Comment

Blog Archive