Presentation is loading. Please wait.

Presentation is loading. Please wait.

Presented By :- Ankur Mali IST 597

Similar presentations


Presentation on theme: "Presented By :- Ankur Mali IST 597"— Presentation transcript:

1 Presented By :- Ankur Mali IST 597
Tensorflow Tutorial Presented By :- Ankur Mali IST 597

2 Numerical Computation
Overflow and Underflow >>> import numpy as np >>> a = np.array([2**63 - 1, 2**63 - 1], dtype=int) >>> a array([ , ]) >>> a.dtype dtype('int64') >>>a + 1 array([ , ]) >>>a.sum() -2 import numpy as np b= np.array([2**61-1,2**61-1],dtype=int) array([ , ]) b+1 array([ , ]) b.sum()

3 Floating point error x = 1e9 eps = 1e-6 for _ in xrange(int(1e6)):
... x += eps >>> print x If subtraction x = 1e9 eps = 1e-6 for _ in xrange(int(1e6)): x += eps x -= eps print x

4 Stochastic Gradient Descent
SGD optimization on loss surface contours[ruder.io]

5 Stochastic Gradient Descent in Tensorflow
def sgd(cost, params, lr=tf.float32(0.002)): g_params = tf.gradients(cost, params) updates = [] for param, g_param in zip(params, g_params): updates.append(param.assign(param - lr*g_param)) return updates

6 SGD with clip def sgd_clip(cost, params, lr=tf.float32(0.002), thld=tf.float32( )): g_params = tf.gradients(cost, params) updates = [] for param, g_param in zip(params, g_params): g_param = tf.where(tf.greater(tf.abs(g_param), thld), thld/tf.norm(g_param,ord=1)*g_param, g_param) updates.append(param.assign(param - lr*g_param)) return updates

7 Momentum def momentum(cost, params, lr=tf.float32(0.003), gamma=tf.float32( )): g_params = tf.gradients(cost, params) updates = [] for param, g_param in zip(params, g_params): v = tf.Variable(np.zeros(param.get_shape(), dtype='float32'), name='v') updates.append(v.assign(gamma*v - lr*g_param)) with tf.control_dependencies(updates): updates.append(param.assign(param + v)) return updates

8 RMSProp def rmsprop(cost, params, lr=tf.float32(0.002), gamma=tf.float32( ), eps=tf.float32(1e-8)): g_params = tf.gradients(cost, params) updates = [] for param, g_param in zip(params, g_params): ms_g = tf.Variable(np.zeros(param.get_shape(), dtype='float32'), name='ms_g') updates.append(ms_g.assign(gamma*ms_g + (1. - gamma)*g_param**2)) with tf.control_dependencies(updates): updates.append(param.assign(param - lr/tf.sqrt(ms_g + eps)*g_param)) return updates

9 Adam def adam(cost, params, alpha=tf.float32(0.002), beta_1=tf.float32( ), beta_2=tf.float32( ), eps=tf.float32(1e-8)): g_params = tf.gradients(cost, params) t = tf.Variable(0.0, dtype=tf.float32, name='t') updates = [] updates.append(t.assign(t + 1)) with tf.control_dependencies(updates): for param, g_param in zip(params, g_params): m = tf.Variable(np.zeros(param.get_shape(), dtype='float32'), name='m') v = tf.Variable(np.zeros(param.get_shape(), dtype='float32'), name='v') alpha_t = alpha*tf.sqrt(1. - beta_2**t)/(1. - beta_1**t) updates.append(m.assign(beta_1*m + (1. - beta_1)*g_param)) updates.append(v.assign(beta_2*v + (1. - beta_2)*g_param**2)) updates.append(param.assign(param - alpha_t*m/(tf.sqrt(v) + eps))) return updates Improving Generalization Performance by Switching from Adam to SGD

10 Linear Regression in tensorflow
Part 1 Import library and define parameters for training from __future__ import print_function import tensorflow as tf import numpy import matplotlib.pyplot as plt rng = numpy.random # Parameters learning_rate = 0.01 training_epochs = 1000 display_step = 50

11 Part-2 # Training Data train_X = numpy.asarray([3.3,4.4,5.5,6.71,6.93,4.168,9.779,6.182,7.59,2.167, 7.042,10.791,5.313,7.997,5.654,9.27,3.1]) train_Y = numpy.asarray([1.7,2.76,2.09,3.19,1.694,1.573,3.366,2.596,2.53,1.221, 2.827,3.465,1.65,2.904,2.42,2.94,1.3]) n_samples = train_X.shape[0] # tf Graph Input X = tf.placeholder("float") Y = tf.placeholder("float") # Set model weights W = tf.Variable(rng.randn(), name="weight") b = tf.Variable(rng.randn(), name="bias")

12 Part 3 # Construct a linear model pred = tf.add(tf.multiply(X, W), b)
# Mean squared error cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples) # Gradient descent # Note, minimize() knows to modify W and b because Variable objects are trainable=True by default optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer()

13 Part 4 # Start training with tf.Session() as sess:
# Run the initializer sess.run(init) # Fit all training data for epoch in range(training_epochs): for (x, y) in zip(train_X, train_Y): sess.run(optimizer, feed_dict={X: x, Y: y}) # Display logs per epoch step if (epoch+1) % display_step == 0: c = sess.run(cost, feed_dict={X: train_X, Y:train_Y}) print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \ "W=", sess.run(W), "b=", sess.run(b)) print("Optimization Finished!") training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y}) print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')

14 Part 5 Display and test model
plt.plot(train_X, train_Y, 'ro', label='Original data') plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line') plt.legend() plt.show() # Testing example, as requested (Issue #2) test_X = numpy.asarray([6.83, 4.668, 8.9, 7.91, 5.7, 8.7, 3.1, 2.1]) test_Y = numpy.asarray([1.84, 2.273, 3.2, 2.831, 2.92, 3.24, 1.35, 1.03]) print("Testing... (Mean square loss Comparison)") testing_cost = sess.run( tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * test_X.shape[0]), feed_dict={X: test_X, Y: test_Y}) # same function as cost above print("Testing cost=", testing_cost) print("Absolute mean square loss difference:", abs( training_cost - testing_cost)) plt.plot(test_X, test_Y, 'bo', label='Testing data')

15 Output

16 Second Order Optimization
data_x = [0., 1., 2.] data_y = [-1., 1., 3.] batch_size = len(data_x) x = tf.placeholder(shape=[batch_size], dtype=tf.float32, name="x") y = tf.placeholder(shape=[batch_size], dtype=tf.float32, name="y") W = tf.Variable(tf.ones(shape=[1]), dtype=tf.float32, name="W") b = tf.Variable(tf.zeros(shape=[1]), dtype=tf.float32, name="b")

17 Part 2 pred = x * W + b loss = tf.reduce_mean(0.5 * (y - pred)**2)
# Preprocessings to the weight update wrt_variables = [W, b] grads = tf.gradients(loss, wrt_variables) hess = tf.hessians(loss, wrt_variables) inv_hess = [tf.matrix_inverse(h) for h in hess]

18 Part 3 # 2nd order weights update rule. update_directions = [
- tf.reduce_sum(h) * g for h, g in zip(inv_hess, grads) ] op_apply_updates = [ v.assign_add(up) for v, up in zip(wrt_variables, update_directions)

19 Part 4 sess = tf.Session() sess.run(tf.global_variables_initializer())
# First loss initial_loss = sess.run( loss, feed_dict={ x: data_x, y: data_y } ) print("Initial loss:", initial_loss)

20 Part 5 ('Prediction:', array([-0.99999994, 1. , 3. ], dtype=float32))
('Expected:', [-1.0, 1.0, 3.0]) for iteration in range(100): new_loss, _ = sess.run( [loss, op_apply_updates], feed_dict={ x: data_x, y: data_y } ) print("Loss after iteration {}: {}".format(iteration, new_loss)) # Results: print("Prediction:", sess.run(pred, feed_dict={x: data_x})) print("Expected:", data_y)

21 Questions?


Download ppt "Presented By :- Ankur Mali IST 597"

Similar presentations


Ads by Google