import tensorflowas tf mul= tf.mul(a,b) with tf.session ...x1 x2 x3 y 1 3 1 0 1 3 2 0 1 3 4 0 1 5 5...
TRANSCRIPT
import tensorflow as tfa = tf.placeholder(tf.int64)b = tf.placeholder(tf.int64)
add = tf.add(a,b)mul = tf.mul(a,b)
with tf.Session() as session:print "add %i" % session.run(add,feed_dict={a:1, b:2})print "mul %i" % session.run(mul,feed_dict={a:20, b:3})
import tensorflow as tf
hello = tf.constant("Hi TensorFlow")
print hello
session = tf.Session()
print session.run(hello)
a = tf.constant(1)
b = tf.constant(2)
c= a+b
print session.run(c)tf.placeholder(dtype, shape=None, name=None)• dtype: The type of elements in the tensor to be fed.• shape: The shape of the tensor to be fed (optional). If the
shape is not specified, you can feed a tensor of any shape.• name: A name for the operation (optional)
import numpy as npxy = np.loadtxt('data.txt',unpack=True,dtype='float32')x_data = xy[0:-1]#y데이터는 맨 마지막 번째
y_dya = xy[-1]
x1 x2 x3 y1 1 0 11 0 2 21 3 0 31 0 4 41 5 0 5
import tensorflow as tfimport numpy as npxy = np.loadtxt('data.txt',unpack=True,dtype='float32')x_data = xy[0:-1]y_data = xy[-1]W = tf.Variable(tf.random_uniform([1,len(x_data)],-5.0,5.0))hyp = tf.matmul(W,x_data)cost = tf.reduce_mean(tf.square(hyp-y_data))optimizer = tf.train.GradientDescentOptimizer(1e-2)train = optimizer.minimize(cost)session = tf.Session()session.run(tf.initialize_all_variables())for step in range(5000):session.run(train)if(step%100 ==0):print step, session.run(cost), session.run(W)
The first column corresponds to bias.-1 corresponds to the last column.
.14 .
=use gradient descent until 10
X = tf.placeholder(tf.float32)Y = tf.placeholder(tf.float32)hyp = W * X cost = tf.reduce_mean(tf.square( hyp - Y ))
x1 x2 x3 y1 3 1 01 3 2 01 3 4 01 5 5 11 7 5 11 2 5 1
import tensorflow as tf
import numpy as np
xy = np.loadtxt('softmax.txt',unpack=True,dtype='float32')
x_data = np.transpose(xy[0:3])y_data = np.transpose(xy[3:])
X = tf.placeholder("float",[None,3])
Y = tf.placeholder("float",[None,3])
W = tf.Variable(tf.zeros([3,3]))
hyp = tf.nn.softmax(tf.matmul(X,W))
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hyp),reduction_indices=1))
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
session = tf.Session()
session.run(tf.initialize_all_variables())
for step in range(10000):
session.run(optimizer,feed_dict={X:x_data,Y:y_data})
if(step%1000==0):
print step, session.run(cost, feed_dict={X:x_data,Y:y_data}), session.run(W)
# 0: A, 1:B, 2:C
a = session.run(hyp,feed_dict={X:[[1,11,7]]})
print a, session.run(tf.arg_max(a,1))
b = session.run(hyp,feed_dict={X:[[1,3,4]]})
print b,session.run(tf.arg_max(b,1))
c = session.run(hyp,feed_dict={X:[[1,1,0]]})
print c, session.run(tf.arg_max(c,1))
all = session.run(hyp,feed_dict={X:[[1,11,7],[1,3,4],[1,1,0]]})
print all, session.run(tf.arg_max(all,1))
∑, ,
Softmax
xy = np.loadtxt('xor_data.txt',unpack=True,dtype='float32')x_data = np.transpose(xy[0:-1])
y_data = np.reshape(xy[-1], (len(xy[-1]), 1))X = tf.placeholder(tf.float32)Y = tf.placeholder(tf.float32)W1 = tf.Variable(tf.random_uniform([2,1000],-1.0,1.0))W2 = tf.Variable(tf.random_uniform([1000,1],-1.0,1.0))b1 = tf.Variable(tf.zeros([1000]),name="Bias1")b2 = tf.Variable(tf.zeros([1]),name="Bias2")L2[node1,node2]
L2 = tf.nn.relu(tf.matmul(X,W1)+b1)
hyp = tf.sigmoid(tf.matmul(L2,W2)+b2)cost = -tf.reduce_mean(Y*tf.log(hyp) + (1-Y)*tf.log(1-hyp))a = tf.Variable(0.1)optimizer = tf.train.GradientDescentOptimizer(a)train = optimizer.minimize(cost)
…….
,
ReLu
session = tf.Session()session.run(tf.initialize_all_variables())for step in range(10000):session.run(train, feed_dict={X:x_data, Y:y_data})if(step%200==0):print step, session.run(cost, feed_dict={X:x_data,Y:y_data})correct_predection = tf.equal(tf.floor(hyp+0.5),Y)accuracy = tf.reduce_mean(tf.cast(correct_predection,"float"))print session.run([hyp, tf.floor(hyp+0.5),correct_predection,accuracy],feed_dict={X:x_data,Y:y_data})…….
X = tf.placeholder(tf.float32)Y = tf.placeholder(tf.float32)W = tf.Variable(tf.random_uniform([1,len(x_data)],-1.0,1.0))h = tf.matmul(W,X)hyp = tf.div(1., 1+tf.exp(-h))cost = -tf.reduce_mean(Y*tf.log(hyp) + (1-Y)*tf.log(1-hyp))optimizer = tf.train.GradientDescentOptimizer(1e-2)train = optimizer.minimize(cost)session = tf.Session()session.run(tf.initialize_all_variables())for step in range(10000):session.run(train, feed_dict={X:x_data, Y:y_data})if(step%200==0):print step, session.run(cost, feed_dict={X:x_data,Y:y_data}), session.run(W)correct_predection = tf.equal(tf.floor(hyp+0.5),Y)accuracy = tf.reduce_mean(tf.cast(correct_predection,"float"))print session.run([hyp, tf.floor(hyp+0.5),correct_predection,accuracy],feed_dict={X:x_data,Y:y_data})
XOR ???
#4. 위에서 생성한 Histogram과 변수들을 통합
merged = tf.merge_all_summaries()
#5. (Log) Writer 만들기
session = tf.Session()
#1. 로그 파일에 저장
writer = tf.train.SummaryWriter("../log/tensorboard-log1",session.graph_def)
for step in range(2000):
if(step%100 ==0):
#2. Summary 통합 연산
summary = session.run(merged)
#3. writer에 summary(log데이터) 추가
writer.add_summary(summary)
#6. Launch TensorBoard
# command line : tensorboard --logdir=../log/tensorboard-log1
import tensorflow as tf
import numpy as np
#1. 변수 선언법
X = tf.placeholder(tf.float32,name="X-input")
Y = tf.placeholder(tf.float32,name="Y-input")
W1 = tf.Variable(tf.random_uniform([2,1000],-1.0,1.0),name="Weight1")
W2 = tf.Variable(tf.random_uniform([1000,1],-1.0,1.0), name="Weight2")
b1 = tf.Variable(tf.zeros([1000]),name="Bias1")
b2 = tf.Variable(tf.zeros([1]),name="Bias2")
#2. Graph View를 위한 Scope 작성법
with tf.name_scope("Layer2") as scope:
L2 = tf.sigmoid(tf.matmul(X,W1)+b1)
with tf.name_scope("Layer3") as scope:
hyp = tf.sigmoid(tf.matmul(L2,W2)+b2)
with tf.name_scope("cost") as scope:
cost = -tf.reduce_mean(Y*tf.log(hyp) + (1-Y)*tf.log(1-hyp))
cost_summary = tf.scalar_summary("cost",cost)
with tf.name_scope("opetimizer") as scope:
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(cost)
#3. 히스토그램 추가하기
h1 = tf.histogram_summary("Weight1",W1)
h2 = tf.histogram_summary("Weight2",W2)
b1_historgram = tf.histogram_summary("Bias1",b1)
b2_histogram = tf.histogram_summary("Bias2",b2)
Tensor board
MNISTimport tensorflow as tfimport numpy as np
def xavier_init(input_size, output_size, uniform=True):if uniform:
init_range= tf.sqrt(6.0/(input_size+output_size))return tf.random_uniform_initializer(stdevv=init_range)
else:init_range= tf.sqrt(3.0/(input_size+output_size))return tf.random_uniform_initializer(stdevv=init_range)
from tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)
learning_rate = 0.001traing_approach = 25batch_size = 100default_node_size = 250display_step = 1
X = tf.placeholder("float",[None,784])Y = tf.placeholder("float",[None,10])
W1 = tf.get_variable("W1",shape=[784,default_node_size],initializer=xavier_init[784,default_node_size])W2 = tf.get_variable("W2",shape=[784,default_node_size],initializer=xavier_init[default_node_size,default_node_size])W3 = tf.get_variable("W3",shape=[784,default_node_size],initializer=xavier_init[default_node_size,10])
B1= tf.Variable(tf.random_normal([default_node_size]))B2 = tf.Variable(tf.random_normal([default_node_size]))B3 = tf.Variable(tf.random_normal([10]))
L1 = tf.nn.relu(tf.add(tf.matmul(X,W1),B1))L2 = tf.nn.relu(tf.add(tf.matmul(L1,W2),B2))hyp = tf.add(tf.matmul(L2,W3),B3)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hyp,Y))optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as session:session.run(tf.initialize_all_variables())
for eproch in range(traing_approach):avg_cost = 0total_branch = int(mnist.train.num_examples/batch_size)
for i in range(total_branch):batch_xs, batch_ys =mnist.train.next_batch(batch_size)session.run(optimizer,feed_dict={X:batch_xs,Y:batch_ys})avg_cost += session.run(cost, feed_dict={X:batch_xs,Y:batch_ys})/total_branch
if(eproch % display_step ==0):print "Eproch","%04d" % (eproch+1), "cost:","{:.9f}".format(avg_cost)
Tensorflow-CNN 따라하기
What machine seesWhat human sees
import tensorflow as tfimport numpy as npfrom ….mnist import input_datamnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)
Split the problem of recognizing handwritten digits into two parts: • [Segmentation] Break image containing many
digits into a sequence of separate images, each containing a single digit.
• [Classification] Classify each individual digit.
본 강의 노트의 이미지와 수식은 실제와 정확히 일치합니다.
• CNN to classify each individual digit
28 x 28
24 x 24 x32
5 x 5
X 32 x 64
5 x 5 x32
8 x 8x 64
4 x 4 x 64
x 256
10
bias biasbias
12 x 12 x32
4 x 4x 64256
b
256
x 10
Layer 11@28x28
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
• CNN to classify each individual digit
• Throughout this note, we use rectified linear unit (ReLU) as the activation function
Feed Forward Pass
Feedforward neural networks means there are no loops in the network - information is always fed forward, never fed back.
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
Training data , : 1, … ,
session.run(predict_optimizer,feed_dict={X:test_x[test_index],Y:test_y[test_index],droprate_last:1.0,droprate_hidden:1.0}))
x_data, y_data, test_x, test_y = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
from tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)
X = tf.placeholder("float", [None, 28, 28, 1])Y = tf.placeholder("float", [None, 10])
test_index = np.arange(len(test_x))np.random.shuffle(test_index)test_index = test_index[0:256]
Input [28x28x1]
, ⋯ ,⋮ ⋱ ⋮, ⋯ ,
Input layer
X=28X28X1 image
X = tf.placeholder("float", [None, 28, 28, 1])
session.run(predict_optimizer,feed_dict={X:test_x[test_index],Y:test_y[test_index],droprate_last:1.0,droprate_hidden:1.0}))
Training data , : 1, … , Test data
X shape=[1,28,28,1]
Layer 1 (convolutional)
∗ 3,3 ⋯ ∗ 3,26⋮ ⋱ ⋮
∗ 26,3 ⋯ ∗ 26,26
, , ⋯ , ,⋮ ⋱ ⋮, , ⋯ , ,
i 1, … , 32.
3,3
, , , , , ,
, , , , ,
, , , , ,
, , , , ,
, , , , ,
W1= init_weight ([5,5,1,32])b1=init_bias([32])h1=tf.nn.relu(tf.nn.conv2d(X,W1,strides=[1,1,1,1],padding='SAME')+b1)
32 filters (5X5X1)
def init_weight(size_array, stddev=0.01):return tf.Variable(tf.random_normal(size_array,stddev=stddev))
def init_bias(size_array):return tf.Variable(tf.constant(0.0,size_array))
X shape=[1,28,28,1]
Layer 1 (convolutional)
X shape=[1,28,28,1]
Layer 1 (convolutional)
X shape=[1,28,28,1]
Layer 2 (Max pooling)
, , , ,
, , , ,⋯ , , , ,
, , , ,⋮ ⋱ ⋮
, , , ,
, , , ,⋯ , , , ,
, , , ,
1, … , 32
h2 = tf.nn.max_pool(h1, ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
tf.nn.max_pool(value, ksize, strides, padding, data_format='NHWC', name=None)
•value: A 4-D Tensor with shape [batch, height, width, channels] and type tf.float32.•ksize: A list of ints that has length >= 4. The size of the window for each dimension of the input tensor.•strides: A list of ints that has length >= 4. The stride of the sliding window feach dimension of the input tensor.•padding: A string, either 'VALID' or 'SAME'. The padding algorithm. •data_format: A string. 'NHWC' and 'NCHW' are supported.•name: Optional name for the operation.
h1 shape=[1,24,24,32]
, , , ,
, , , ,⋯ , , , ,
, , , ,⋮ ⋱ ⋮
, , , ,
, , , ,⋯ , , , ,
, , , ,
⋮⋮⋮⋮
, , , ,
, , , ,⋯ , , , ,
, , , ,⋮ ⋱ ⋮
, , , ,
, , , ,⋯ , , , ,
, , , ,
h2 = tf.nn.max_pool(h1, ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
⋮ = ⋮ =
Layer 3 (convolution)
h 3,3
, , , , , , , , , ,
, , , , , , , , , ,
, , , , , , , , , ,
, , , , , , , , , ,
, , , , , , , , , ,
, , , ⋯ , , ,⋮ ⋱ ⋮
, , , ⋯ , , ,
, ⋯ ,, , , ⋯ , , ,⋮ ⋱ ⋮
, , , ⋯ , , ,
i 1, … , 64.
∗ 3,3 ⋅ 3,3 ,⋯ , 3,3
∗ , ⋯ ∗ ,⋮ ⋱ ⋮
∗ , ⋯ ∗ ,
32
64
5 x 5
32
64
W3= init_weight ([5,5,32,64])
b3= init_bias ([64])
h3 = tf.nn.relu(tf.nn.conv2d(h2,W3,strides=[1,1,1,1],padding='SAME')+b3)
h2 shape=[1,12,12,32]
∗ ,,
⋮ ,
64
32
64
5 x 5
Layer 3 (convolution)
32
h3 = tf.nn.relu(tf.nn.conv2d(h2,W3,strides=[1,1,1,1],padding='SAME')+b3)
h2 shape=[1,12,12,32]
∗ ,,
⋮ ,
64
32
64
5 x 5
Layer 3 (convolution)
32
h3 = tf.nn.relu(tf.nn.conv2d(h2,W3,strides=[1,1,1,1],padding='SAME')+b3)
h2 shape=[1,12,12,32]
∗ ,,
⋮ ,
32
64
5 x 5
Layer 3 (convolution)
64
32
h3 = tf.nn.relu(tf.nn.conv2d(h2,W3,strides=[1,1,1,1],padding='SAME')+b3)
h2 shape=[1,12,12,32]
64
Layer 4 (pooling)
h
, , , ,
, , , ,⋯ , , , ,
, , , ,⋮ ⋱ ⋮
, , , ,
, , , ,⋯ , , , ,
, , , ,
i 1, … , 64.
64
h4 = tf.nn.max_pool(h3, ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
h3 shape=[1,8,8,64]
64256
4 x 4
h ⋅
, , , ⋯ , , ,⋮ ⋱ ⋮
, , , ⋯ , , ,
, ⋯ ,, , , ⋯ , , ,⋮ ⋱ ⋮
, , , ⋯ , , ,
i 1, … , 256.
256
Layer 5 (fully-connected)
64
h4 = tf.reshape(h4,[-1,4*4*64])h5 = tf.nn.relu(tf.matmul(h4,W4)+b5)
W5 = init_weight([4*4*64,256])
b5 = init_weight([256])
h4 shape=[1,4,4,64]
64 64256
4 x 4
256
⋅ ,
⋮,
Layer 5 (fully-connected)
, , ⋯ , ,
W ⋮, , ⋯ , ,
⋮, , ⋯ , ,
⋮⋅⋮
⋅
+ = =
Output hyp = tf.matmul(h5,W_h)+b_hW_h = init_weight([256,10])
h5W_h & b_h were generated by training W1, W3, W5, b1, b3, b5 through Back Propagation (see next page)
b_h = init_weight([10])
b_hhyp
Backpropagationcost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(hyp,Y))optimizer =tf.train.GradientDescentOptimizer(1e-3).minimize(cost)
The goal is to find weights and biases so that the output from the network approximates for all training inputs. To quantify how well we're achieving this goal, we define a cost function E(W).
Learning rate
Outline of backpropagation Suppose we are given a training set 1 , 1 ,… , 2000 , 2000 . Whenthe input pattern from the training set presents to the previouslymentioned network, it produced an output different from the target
,⋯ , .
The goal is to determine
, 1,⋯ , 32 ⇒ 5 5 1 32unknowns
, 1,⋯ , 64 ⇒ 5 5 32 1 64
, 1,⋯ , 256 ⇒ 4 4 64 1 256unknowns
, 1,⋯ , 10 ⇒ 256 1 10unknowns
such that , , , , , , , ( 26 32 801 64 1025 256 257 10 variables)minimizes the following error
12 .
We update by
where represents a learning rate.
12
12
12
12
12
12
Express E(W) in term of x[k]
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
,
,
12
12
Gradient of E(W) (F-layer) ,
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
12
,
,
,
,
,
Gradient of E(W) (F-layer)
의 의미는?활성화된 노드를 통해서만 반응이 역전파.
′ 1 00 0
,
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
12
,
,
,
,
,
,
Gradient of E(W) (F-layer)
′ 1 00 0
의 의미는?활성화된 노드를 통해서만 반응이 역전파.
,
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
, ,
,
,
,
,
,
더 나아가기 전에.. (P-layer)
We need the derivative of for
, ,
,
, ,,
64
64
Derivative of function
, , , , , ,
, , , ,
, , , , ,
, , , ,,
For example, if , , , ,
, , , ,, , ,
, , , , , ,
, , , ,,
, , , , , ,
, , , ,,
, , , , , ,
, , , ,,
, , , , , ,
, , , ,.
Feed forward pass 시 저장!
• Activate된 노드로만 반응이 역전파.
Gradient of E(W) (P-layer) Input [1@28x28] – L1[32@24x24] – L2[32@12x12] – L3[64@8x8] – L4[64@4x4] – L5[256@1x1] – output[10]
,
,,
,
Gradient of E(W) (P-layer) Input [1@28x28] – L1[32@24x24] – L2[32@12x12] – L3[64@8x8] – L4[64@4x4] – L5[256@1x1] – output[10]
,
,
,
, , , , , , , , , , , ,
, , , , , , , , , , , ,
, , , , , , , , , , , ,
, , , , , , , , , , , ,
,
0 0, , ,
0 0 0 0 0
, , ,0 0 0 0
, , ,0
, , ,
0 0 0 0 0 0 0, , ,
, , ,0
, , ,0 0
, , ,0 0
, , ,0 0 0
, , ,0
, , ,0
0 0, , ,
0 0 0 0 0
, , ,0
, , ,0 0 0 0 0
0 0 0 0 0, , ,
0, , ,
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
Gradient of E(W) (C-layer)
, , ,
, , , , , ,,
, , ,
, , , , , ,,, ,
, , ,
, , ,. ,
, , ,
, , ,, ,
, , ,
,
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
Gradient of E(W) (C-layer)
, , ,
, , , , , ,,
, , ,
, , , , , ,,, ,
, , ,
, , ,. ,
, , ,
,
, , ,
, , ,, ,
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
Gradient of E(W) (C-layer)
, , ,
, , ,
,
, , ,
, , ,, ,
, , ,
, , ,, ,
, , , , , ,,
, , ,
, , , , , ,,, ,
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
Gradient of E(W) (C-layer)
, , ,
, , ,
,
, , ,
, , ,, ,
, , ,
, , ,, ,
, , , , , ,,
, , ,
, , , , , ,,, ,
Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]
Gradient of E(W) (C-layer)
, , , , , ,,
, , ,
, , , , , ,,, ,
, ,∗ , ,
, , ,
, , ,
,
, , ,
, , ,, ,
, , ,
, , ,, ,