昨天突击了这个教程的前20课,记一些笔记,整理下收获。学到的东西主要体现在下面的代码和其中的注释中。对tensorboard的用法我再在下方补一些内容。

代码:

# This program simulates y = x^2 - 0.5

# Our leading actor
import tensorflow as tf
# In this program, array structure in numpy is used
import numpy as np
# This module is used to plot training process
import matplotlib.pyplot as plt

# This function add layer to current network
# Inputs represents a layer
# in_size represents how many attributes we care in the input layer
# same for out_size
# n_layer is an integer, hepling to generate layer name for tensorboard output
# Activation function "activate" certain output. Using it according to situation
def add_layer(inputs, in_size, out_size, n_layer,  activation_function=None):
	# The usage of % is just like printf() in C language
	layer_name = "layer%s" % n_layer

	# Use namescope to help generate graph in tensorboard
	with tf.name_scope("layer"):
		# Ditto, omit this usage in following code
		with tf.name_scope("weights"):
			# random_normal is in tensorflow module, generating random number according to
			# normal distribution
			# Use [a, b] to represent a a*b matrix
			# Assign string to name argument for visualization
			weights = tf.Variable(tf.random_normal([in_size, out_size]), name="W")
		
		# Use summary.histogram([name_string], variable) to show this variable in tensorboard's graph
		tf.summary.histogram('/weights', weights)

		with tf.name_scope("bias"):
			# Use zeros function to fill zeros in a vector or matrix
			# Suggested to use non-zero val for bias initially
			biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, name="b")
		# Also is
		tf.summary.histogram('/biases', biases)

		# Recall matrix multiplication rules and apply it to here:
		# A 1*in_size vector multiplies in_size*out_size matrix, then plus a 1*out_size vector
		# Get the result: a 1*out_size vector. Think horizontally.
		with tf.name_scope("Wx_plus_b"):
			wx_plus_b = tf.matmul(inputs, weights) + biases
		if(activation_function is None):
			outputs = wx_plus_b
		else:
			outputs = activation_function(wx_plus_b)
		tf.summary.histogram('/outputs', outputs)
		return outputs

# Get 300 points in [-1, 1] on x axis
# Use np.newaxis to ascend dimension
# View it as a convension: if you use the one-dimension data for following
# matrix calculation, then you need to use newaxis method to reshape the array
# Like adding dimension after ":"
x_data = np.linspace(-1, 1, 300)[:, np.newaxis]
# Genereate corresponding decimal as bias
noise = np.random.normal(0, 0.05, x_data.shape)
# The theoretical value of y
y_data = np.square(x_data) - 0.5 + noise

with tf.name_scope("inputs"):
	# [None, 1]: Ths input matrix's row number is mutable, and column number is 1
	xs = tf.placeholder(tf.float32, [None, 1], name="x_input")
	ys = tf.placeholder(tf.float32, [None, 1], name="y_input")

# Input size is equal to the number of attributes of data
# L1's output is to the hidden layer, so out_size equals to 10
l1 = add_layer(xs, 1, 10, n_layer=-1, activation_function=tf.nn.relu)
prediction = add_layer(l1, 10, 1, n_layer=2)

with tf.name_scope("loss"):
	# The basic theory is LMSE
	# reduction_indices=[1] means get rows' sum. =[0] means get cols' sum
	# The standard understanding is to reduce the dimension of input tensor
	# 0 represents the hightest dimension, 1, 2 likewise. The default argument is to reduce dimension to 0,
	# which is a constant num
	loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1]))
	tf.summary.scalar("loss/", loss)
with tf.name_scope("train"):
	# 0.1 is learning rate. If it's too big, then the network is unable to reach the optimized solution,
	# otherwise the network may converges too slow.
	train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

# As long as you defined any Variable in tensorflow,
# u need to use this function to initialize all variables
init = tf.global_variables_initializer()

# Use with syntax to omit sess.close()
with tf.Session() as sess:
	# These 2 are used for visualization
	merged = tf.summary.merge_all()
	writer = tf.summary.FileWriter("logs/", sess.graph)
	# View sess as a pointer, points sess to init and "dereference" it
	sess.run(init)
	fig = plt.figure()
	# Divide the canvas to 1*1 and plot in the 1st space
	ax = fig.add_subplot(1, 1, 1)
	# scatter function is used to specify input data in 2 dimensions
	ax.scatter(x_data, y_data)
	# ion function forces painter to paint without waiting interaction
	plt.ion()
	plt.show()
	# Train the network for 1000 times
	for i in range(1000):
		# For now, just remember these visualization statements...
		sess.run(train_step, feed_dict={xs:x_data, ys:y_data})
		if(i % 50 == 0):
			result = sess.run(merged, feed_dict={xs:x_data, ys:y_data})
			writer.add_summary(result, i)
			# Points sess to loss and run it
			print(sess.run(loss, feed_dict={xs:x_data, ys:y_data}))

			# Use try-except to catch the error raised by first draw(At that time, no line for deletion)
			try:
				# Remove previous line each time start drawing for better looking
				ax.lines.remove(lines[0])
			except Exception:
				pass
			
			# Points sess to prediction and run it
			prediction_val = sess.run(prediction, feed_dict={xs:x_data})

			# "-": solid line "r": red color lw: line width
			lines = ax.plot(x_data, prediction_val, "r-", lw=5)		
			# Use pause to see the drawing process
			plt.pause(0.1)

tensorboard的使用:

  • 有些时候因为防火墙等某些奇怪原因,搞的用它默认的0.0.0.0无法连接上。这时候在原来命令的后方加上这个flag:
--host=127.0.0.1

然后在浏览器里就用127.0.0.1这个地址去连接。

  • windows CMD下,–logdir= 后面跟上的路径不需要双引号。
  • 自己尝试的时候,并没有按照教程,把shell的当前路径进入到logs文件夹里面,而是在它上一层目录,运行tensorboard的命令。这样自己认为更make sense吧。

最后放几个有关这个网络的截图:

tensorboard展现的网络结构
结合图去理解reduce_indicies(或axis)形参
拟合图像
loss变化图像