In the previous exercise, we had to use the rnn_layer function during serving because we created a loop over sequences (while serving). This is a bad idea, because it makes the graph bigger unnecessarily. Now we will demonstrate a better code organization.
Not only that, but rnn_layer is a function and cannot be easilt saved to metagraph. Creating a new rnn_layer, does not reuse the weight even though we specify the right variable scope with reuse=True. Hence we look for a way to be able to use the network in another file, without having to invoke rnn_layer
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
SEQ = 600
HIDDEN = 300
Feedable iterator
output_types = (tf.float32,tf.float32)
output_shapes = (tf.TensorShape((None,None)),tf.TensorShape((None,None,1)))
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(handle,output_types,output_shapes)
idx,y = iterator.get_next()
def generator():
x = np.linspace(0,200,1000)
b = np.random.uniform(-.05,.05,10)
y = 0.5
for i in range(1,10):
omega = i*np.pi/20.
y -= (0.318/i)*np.sin(omega*x) + b[i]*np.cos(omega*x)
for i in range(2000):
idx = np.random.randint(0,400) #scalar
yield (x[idx:idx+SEQ],y[idx:idx+SEQ])
Training dataset
dataset = tf.data.Dataset.from_generator(generator,(tf.float32,tf.float32),((SEQ,),(SEQ,)))
dataset = dataset.map(lambda x,y:(x,tf.expand_dims(y,axis=1)))
dataset = dataset.repeat(1)
dataset = dataset.batch(1)
train_iterator = dataset.make_one_shot_iterator()
dataset.output_shapes
with tf.Session() as sess:
hdl = sess.run(train_iterator.string_handle())
x_,y_ = sess.run([idx,y],{handle:hdl})
plt.plot(x_,y_.reshape(-1,SEQ),'*')
Create the rnn
target = y[:,1:]
cells = [tf.keras.layers.SimpleRNNCell(HIDDEN,activation=tf.nn.relu),tf.keras.layers.SimpleRNNCell(HIDDEN,activation=tf.nn.relu)]
#cells = tf.keras.layers.LSTMCell(HIDDEN,activation=tf.nn.relu)
rnn_layer = tf.keras.layers.RNN(cells,return_state=True,return_sequences=True,unroll=True)
Unrolled version. We have to know the sequence size beforehand (Do not unroll when you use LSTM for this sequence size)
X = y[:,:-1]
X = tf.reshape(X,(-1,SEQ-1,1))
with tf.variable_scope("RNN",reuse=tf.AUTO_REUSE):
# H,h1_last = rnn_layer(X)
H,h1_last,h2_last = rnn_layer(X)
with tf.variable_scope("Out",reuse=tf.AUTO_REUSE):
outputs = tf.layers.dense(H,1)
Give a hook for rolled version. This is used when we get random sequence size from training data
#Rolled : used when we have Random sequence
X = y[:,:-1]
rnn_layer.unroll = False
with tf.variable_scope("RNN",reuse=True):
# H_r,h1_last_r = rnn_layer(X)
H_r,h1_last_r,h2_last_r = rnn_layer(X)
with tf.variable_scope("Out",reuse=True):
outputs_r = tf.layers.dense(H_r,1)
Generate version. Do not unroll INSIDE the graph for production run! you will make the graph bigger
#Dynamic
rnn_layer.unroll = False
h1_initial_eval = tf.placeholder(tf.float32,(None,300))
h2_initial_eval = tf.placeholder(tf.float32,(None,300))
x_in = tf.placeholder(tf.float32,(None,None,1))
with tf.variable_scope("RNN",reuse=True):
H_eval,h1_last_eval,h2_last_eval = rnn_layer(x_in,initial_state=[h1_initial_eval,h2_initial_eval])
with tf.variable_scope("Out",reuse=True):
outputs_eval = tf.layers.dense(H_eval,1)
tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
loss = tf.reduce_mean(tf.square(outputs-target))
#loss = tf.reduce_mean(tf.square(outputs_r-target)) #for lstm, unrolling takes ton of time to load. and outputs nan
optimizer = tf.train.AdamOptimizer(0.0001)
train = optimizer.minimize(loss)
saver = tf.train.Saver()
!rm models/RNN3/*
def train_rnn():
import time
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
hdl = sess.run(train_iterator.string_handle())
start = time.time()
try:
i = 1
tmp = []
while True:
i = i+1
l,_ = sess.run([loss,train],{handle:hdl})
tmp.append(l)
if i%500 == 0:
avg_loss = np.array(tmp).mean()
print("Batch: ",i,avg_loss)
tmp = []
except tf.errors.OutOfRangeError:
pass
end = time.time()
elapsed = end-start
print("Elapsed time : ", elapsed, " s")
saver.save(sess,'models/RNN3/my_first_model.ckpt')
train_rnn()
Eval dataset
SEQ = 300
dataset_t = tf.data.Dataset.from_generator(generator,(tf.float32,tf.float32),((300,),(300,)))
dataset_t = dataset_t.map(lambda x,y:(x,tf.expand_dims(y,axis=1)))
dataset_t = dataset_t.repeat(1)
dataset_t = dataset_t.batch(1)
test_iterator = dataset_t.make_one_shot_iterator()
Use rolled version
with tf.Session() as sess:
saver.restore(sess,'models/RNN3/my_first_model.ckpt')
hdl = sess.run(test_iterator.string_handle())
oe,t = sess.run([outputs_r,target],{handle:hdl})
plt.plot(t[0].reshape(299))
plt.plot(oe[0].reshape(299),'*')
Use generate version
with tf.Session() as sess:
saver.restore(sess,'models/RNN3/my_first_model.ckpt')
hdl = sess.run(test_iterator.string_handle())
x_init,t = sess.run([y[:,:-1],target],{handle:hdl})
h1_init = np.zeros((1,300))
h2_init = np.zeros((1,300))
oe = sess.run(outputs_eval,{h1_initial_eval:h1_init,h2_initial_eval:h2_init,x_in:x_init})
plt.plot(t[0].reshape(299))
plt.plot(oe[0].reshape(299),'*')
Test 1 : (NOT COMMON) Give just a single initial seed value and let the hidden state evolve
with tf.Session() as sess:
saver.restore(sess,'models/RNN3/my_first_model.ckpt')
hdl = sess.run(test_iterator.string_handle())
t = sess.run(target,{handle:hdl})
h1_init = np.zeros((1,300))
h2_init = np.zeros((1,300))
x_init = np.array(t[0][0]).reshape(1,1,1).astype(np.float)
out = []
for i in range(300):
out.append(x_init)
x_init,h1_init,h2_init = sess.run([outputs_eval,h1_last_eval,h2_last_eval],
{h1_initial_eval:h1_init,
h2_initial_eval:h2_init,
x_in:x_init})
plt.plot(t[0].reshape(299))
plt.plot(np.array(out).reshape(300),'*')
Test 2: (COMMON) Provide initial seed equal to length of training sequence.I.e Append the 600th prediction and use the last 599 values as seed for predicting 601th value.
Note: Using init length < 599 also provided similar results
SEQ = 600
with tf.Session() as sess:
saver.restore(sess,'models/RNN3/my_first_model.ckpt')
hdl = sess.run(train_iterator.string_handle())
t = sess.run(target,{handle:hdl})
h1_init = np.zeros((1,300))
h2_init = np.zeros((1,300))
INIT_LENGTH = 599
x_init = list(t[0][:INIT_LENGTH])
for i in range(500):
out = sess.run(outputs_eval,
{h1_initial_eval:h1_init,
h2_initial_eval:h2_init,
x_in:np.array(x_init[-INIT_LENGTH:]).reshape(1,INIT_LENGTH,1).astype(np.float)})
x_init.append(out[0,-1,0])
plt.plot(t[0].reshape(599))
plt.plot(x_init,'*')
1) We do not get the close enough generation all the time (Wrong evaluation). We get considerable results with the last generation method
2) Do not unroll INSIDE the graph for production run! you will make the graph bigger
3) Unrolled LSTM for this sequence size makes things slower. It also does not show the right result. (shows nan)