def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g, _ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def average_mean_and_variance(means_and_variances):
average_mean_and_variance = []
for mean_and_variance in zip(*means_and_variances):
total_of_mean_and_variance = []
for g in mean_and_variance:
expanded_g = tf.expand_dims(g, 0)
total_of_mean_and_variance.append(expanded_g)
mean_of_mean_and_variance = tf.concat(axis=0, values=total_of_mean_and_variance)
average_mean_and_variance.append(tf.reduce_mean(mean_of_mean_and_variance, 0))
return average_mean_and_variance
def assign_mean_and_variance(means_and_variances, global_mean_and_variance):
op = []
for mean_and_variance in means_and_variances:
for i, g in enumerate( mean_and_variance):
op.append(tf.assign(g, global_mean_and_variance[i]))
return op
def main():
with tf.Graph().as_default(), tf.device('/cpu:0'):
# Using Poly learning rate policy
base_lr = tf.constant(base_learning_rate)
step_ph = tf.placeholder(dtype=tf.float32, shape=())
learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / num_steps), power))
tf.summary.scalar('lr', learning_rate)
opt = tf.train.AdamOptimizer(learning_rate)
multi_grads = []
multi_mean_and_variance = []
with tf.variable_scope(tf.get_variable_scope()):
for i in xrange(gpu_nums):
with tf.device('/gpu:%d' % i):
with tf.name_scope('%s_%d' % ('Model', i)) as scope:
with tf.name_scope("create_inputs"):
reader = ImageReader()
image_batch, label_batch = reader.dequeue(batch_size)
tf.summary.image('images_{}'.format(i), image_batch, max_outputs = 4)
tf.summary.image('labels_{}'.format(i), label_batch, max_outputs = 4)
logits = Model(image_batch).logits
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label_batch)
l2_losses = [weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)
tf.summary.scalar('loss_{}'.format(i), reduced_loss)
all_trainable = [v for v in tf.trainable_variables()]
mean_and_variance = [v for v in tf.global_variables() if 'moving_mean' in v.name or 'moving_variance' in v.name]
tf.get_variable_scope().reuse_variables()
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
grads = opt.compute_gradients(reduced_loss, all_trainable)
multi_grads.append(grads)
multi_mean_and_variance.append(mean_and_variance)
grads = average_gradients(multi_grads)
#global_mean_and_variance = average_mean_and_variance(multi_mean_and_variance)
#bn_op = assign_mean_and_variance(multi_mean_and_variance, global_mean_and_variance)
#ops = [opt.apply_gradients(grads)] + bn_op
ops = [opt.apply_gradients(grads)]
train_op = tf.group(*ops)