import tensorflowas tf mul= tf.mul(a,b) with tf.session ...x1 x2 x3 y 1 3 1 0 1 3 2 0 1 3 4 0 1 5 5...

import tensorflow as tfa = tf.placeholder(tf.int64)b = tf.placeholder(tf.int64)

add = tf.add(a,b)mul = tf.mul(a,b)

with tf.Session() as session:print "add %i" % session.run(add,feed_dict={a:1, b:2})print "mul %i" % session.run(mul,feed_dict={a:20, b:3})

import tensorflow as tf

hello = tf.constant("Hi TensorFlow")

print hello

session = tf.Session()

print session.run(hello)

a = tf.constant(1)

b = tf.constant(2)

c= a+b

print session.run(c)tf.placeholder(dtype, shape=None, name=None)• dtype: The type of elements in the tensor to be fed.• shape: The shape of the tensor to be fed (optional). If the

shape is not specified, you can feed a tensor of any shape.• name: A name for the operation (optional)

import numpy as npxy = np.loadtxt('data.txt',unpack=True,dtype='float32')x_data = xy[0:-1]#y데이터는 맨 마지막 번째

y_dya = xy[-1]

x1 x2 x3 y1 1 0 11 0 2 21 3 0 31 0 4 41 5 0 5

import tensorflow as tfimport numpy as npxy = np.loadtxt('data.txt',unpack=True,dtype='float32')x_data = xy[0:-1]y_data = xy[-1]W = tf.Variable(tf.random_uniform([1,len(x_data)],-5.0,5.0))hyp = tf.matmul(W,x_data)cost = tf.reduce_mean(tf.square(hyp-y_data))optimizer = tf.train.GradientDescentOptimizer(1e-2)train = optimizer.minimize(cost)session = tf.Session()session.run(tf.initialize_all_variables())for step in range(5000):session.run(train)if(step%100 ==0):print step, session.run(cost), session.run(W)

The first column corresponds to bias.-1 corresponds to the last column.

.14 .

=use gradient descent until 10

X = tf.placeholder(tf.float32)Y = tf.placeholder(tf.float32)hyp = W * X cost = tf.reduce_mean(tf.square( hyp - Y ))

x1 x2 x3 y1 3 1 01 3 2 01 3 4 01 5 5 11 7 5 11 2 5 1


import numpy as np

xy = np.loadtxt('softmax.txt',unpack=True,dtype='float32')

x_data = np.transpose(xy[0:3])y_data = np.transpose(xy[3:])

X = tf.placeholder("float",[None,3])

Y = tf.placeholder("float",[None,3])

W = tf.Variable(tf.zeros([3,3]))

hyp = tf.nn.softmax(tf.matmul(X,W))

cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hyp),reduction_indices=1))

optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(cost)


session.run(tf.initialize_all_variables())

for step in range(10000):

session.run(optimizer,feed_dict={X:x_data,Y:y_data})

if(step%1000==0):

print step, session.run(cost, feed_dict={X:x_data,Y:y_data}), session.run(W)

# 0: A, 1:B, 2:C

a = session.run(hyp,feed_dict={X:[[1,11,7]]})

print a, session.run(tf.arg_max(a,1))

b = session.run(hyp,feed_dict={X:[[1,3,4]]})

print b,session.run(tf.arg_max(b,1))

c = session.run(hyp,feed_dict={X:[[1,1,0]]})

print c, session.run(tf.arg_max(c,1))

all = session.run(hyp,feed_dict={X:[[1,11,7],[1,3,4],[1,1,0]]})

print all, session.run(tf.arg_max(all,1))

∑, ,

Softmax

xy = np.loadtxt('xor_data.txt',unpack=True,dtype='float32')x_data = np.transpose(xy[0:-1])

y_data = np.reshape(xy[-1], (len(xy[-1]), 1))X = tf.placeholder(tf.float32)Y = tf.placeholder(tf.float32)W1 = tf.Variable(tf.random_uniform([2,1000],-1.0,1.0))W2 = tf.Variable(tf.random_uniform([1000,1],-1.0,1.0))b1 = tf.Variable(tf.zeros([1000]),name="Bias1")b2 = tf.Variable(tf.zeros([1]),name="Bias2")L2[node1,node2]

L2 = tf.nn.relu(tf.matmul(X,W1)+b1)

hyp = tf.sigmoid(tf.matmul(L2,W2)+b2)cost = -tf.reduce_mean(Y*tf.log(hyp) + (1-Y)*tf.log(1-hyp))a = tf.Variable(0.1)optimizer = tf.train.GradientDescentOptimizer(a)train = optimizer.minimize(cost)

…….

,

ReLu

session = tf.Session()session.run(tf.initialize_all_variables())for step in range(10000):session.run(train, feed_dict={X:x_data, Y:y_data})if(step%200==0):print step, session.run(cost, feed_dict={X:x_data,Y:y_data})correct_predection = tf.equal(tf.floor(hyp+0.5),Y)accuracy = tf.reduce_mean(tf.cast(correct_predection,"float"))print session.run([hyp, tf.floor(hyp+0.5),correct_predection,accuracy],feed_dict={X:x_data,Y:y_data})…….

X = tf.placeholder(tf.float32)Y = tf.placeholder(tf.float32)W = tf.Variable(tf.random_uniform([1,len(x_data)],-1.0,1.0))h = tf.matmul(W,X)hyp = tf.div(1., 1+tf.exp(-h))cost = -tf.reduce_mean(Y*tf.log(hyp) + (1-Y)*tf.log(1-hyp))optimizer = tf.train.GradientDescentOptimizer(1e-2)train = optimizer.minimize(cost)session = tf.Session()session.run(tf.initialize_all_variables())for step in range(10000):session.run(train, feed_dict={X:x_data, Y:y_data})if(step%200==0):print step, session.run(cost, feed_dict={X:x_data,Y:y_data}), session.run(W)correct_predection = tf.equal(tf.floor(hyp+0.5),Y)accuracy = tf.reduce_mean(tf.cast(correct_predection,"float"))print session.run([hyp, tf.floor(hyp+0.5),correct_predection,accuracy],feed_dict={X:x_data,Y:y_data})

XOR ???

#4. 위에서 생성한 Histogram과 변수들을 통합

merged = tf.merge_all_summaries()

#5. (Log) Writer 만들기


#1. 로그 파일에 저장

writer = tf.train.SummaryWriter("../log/tensorboard-log1",session.graph_def)

for step in range(2000):

if(step%100 ==0):

#2. Summary 통합 연산

summary = session.run(merged)

#3. writer에 summary(log데이터) 추가

writer.add_summary(summary)

#6. Launch TensorBoard

# command line : tensorboard --logdir=../log/tensorboard-log1


import numpy as np

#1. 변수 선언법

X = tf.placeholder(tf.float32,name="X-input")

Y = tf.placeholder(tf.float32,name="Y-input")

W1 = tf.Variable(tf.random_uniform([2,1000],-1.0,1.0),name="Weight1")

W2 = tf.Variable(tf.random_uniform([1000,1],-1.0,1.0), name="Weight2")

b1 = tf.Variable(tf.zeros([1000]),name="Bias1")

b2 = tf.Variable(tf.zeros([1]),name="Bias2")

#2. Graph View를 위한 Scope 작성법

with tf.name_scope("Layer2") as scope:

L2 = tf.sigmoid(tf.matmul(X,W1)+b1)

with tf.name_scope("Layer3") as scope:

hyp = tf.sigmoid(tf.matmul(L2,W2)+b2)

with tf.name_scope("cost") as scope:

cost = -tf.reduce_mean(Y*tf.log(hyp) + (1-Y)*tf.log(1-hyp))

cost_summary = tf.scalar_summary("cost",cost)

with tf.name_scope("opetimizer") as scope:

optimizer = tf.train.GradientDescentOptimizer(0.01)

train = optimizer.minimize(cost)

#3. 히스토그램 추가하기

h1 = tf.histogram_summary("Weight1",W1)

h2 = tf.histogram_summary("Weight2",W2)

b1_historgram = tf.histogram_summary("Bias1",b1)

b2_histogram = tf.histogram_summary("Bias2",b2)

Tensor board

MNISTimport tensorflow as tfimport numpy as np

def xavier_init(input_size, output_size, uniform=True):if uniform:

init_range= tf.sqrt(6.0/(input_size+output_size))return tf.random_uniform_initializer(stdevv=init_range)

else:init_range= tf.sqrt(3.0/(input_size+output_size))return tf.random_uniform_initializer(stdevv=init_range)

from tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)

learning_rate = 0.001traing_approach = 25batch_size = 100default_node_size = 250display_step = 1

X = tf.placeholder("float",[None,784])Y = tf.placeholder("float",[None,10])

W1 = tf.get_variable("W1",shape=[784,default_node_size],initializer=xavier_init[784,default_node_size])W2 = tf.get_variable("W2",shape=[784,default_node_size],initializer=xavier_init[default_node_size,default_node_size])W3 = tf.get_variable("W3",shape=[784,default_node_size],initializer=xavier_init[default_node_size,10])

B1= tf.Variable(tf.random_normal([default_node_size]))B2 = tf.Variable(tf.random_normal([default_node_size]))B3 = tf.Variable(tf.random_normal([10]))

L1 = tf.nn.relu(tf.add(tf.matmul(X,W1),B1))L2 = tf.nn.relu(tf.add(tf.matmul(L1,W2),B2))hyp = tf.add(tf.matmul(L2,W3),B3)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hyp,Y))optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

with tf.Session() as session:session.run(tf.initialize_all_variables())

for eproch in range(traing_approach):avg_cost = 0total_branch = int(mnist.train.num_examples/batch_size)

for i in range(total_branch):batch_xs, batch_ys =mnist.train.next_batch(batch_size)session.run(optimizer,feed_dict={X:batch_xs,Y:batch_ys})avg_cost += session.run(cost, feed_dict={X:batch_xs,Y:batch_ys})/total_branch

if(eproch % display_step ==0):print "Eproch","%04d" % (eproch+1), "cost:","{:.9f}".format(avg_cost)

Tensorflow-CNN 따라하기

What machine seesWhat human sees

import tensorflow as tfimport numpy as npfrom ….mnist import input_datamnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)

Split the problem of recognizing handwritten digits into two parts: • [Segmentation] Break image containing many

digits into a sequence of separate images, each containing a single digit.

• [Classification] Classify each individual digit.

본 강의 노트의 이미지와 수식은 실제와 정확히 일치합니다.

• CNN to classify each individual digit

28 x 28

24 x 24 x32

5 x 5

X 32 x 64

5 x 5 x32

8 x 8x 64

4 x 4 x 64

x 256

10

bias biasbias

12 x 12 x32

4 x 4x 64256

b

256

x 10

Layer 11@28x28

Input [1@28x28] - C1[32@24x24] - P2[32@12x12] - C3[64@8x8] - P4[64@4x4] - F5[256@1x1] - output[10]

• CNN to classify each individual digit

• Throughout this note, we use rectified linear unit (ReLU) as the activation function

Feed Forward Pass

Feedforward neural networks means there are no loops in the network - information is always fed forward, never fed back.


Training data , : 1, … ,

session.run(predict_optimizer,feed_dict={X:test_x[test_index],Y:test_y[test_index],droprate_last:1.0,droprate_hidden:1.0}))

x_data, y_data, test_x, test_y = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels

from tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)

X = tf.placeholder("float", [None, 28, 28, 1])Y = tf.placeholder("float", [None, 10])

test_index = np.arange(len(test_x))np.random.shuffle(test_index)test_index = test_index[0:256]

Input [28x28x1]

, ⋯ ,⋮ ⋱ ⋮, ⋯ ,

Input layer

X=28X28X1 image

X = tf.placeholder("float", [None, 28, 28, 1])

session.run(predict_optimizer,feed_dict={X:test_x[test_index],Y:test_y[test_index],droprate_last:1.0,droprate_hidden:1.0}))

Training data , : 1, … , Test data

X shape=[1,28,28,1]

Layer 1 (convolutional)

∗ 3,3 ⋯ ∗ 3,26⋮ ⋱ ⋮

∗ 26,3 ⋯ ∗ 26,26

, , ⋯ , ,⋮ ⋱ ⋮, , ⋯ , ,

i 1, … , 32.

3,3

, , , , , ,

, , , , ,

, , , , ,

, , , , ,

, , , , ,

W1= init_weight ([5,5,1,32])b1=init_bias([32])h1=tf.nn.relu(tf.nn.conv2d(X,W1,strides=[1,1,1,1],padding='SAME')+b1)

32 filters (5X5X1)

def init_weight(size_array, stddev=0.01):return tf.Variable(tf.random_normal(size_array,stddev=stddev))

def init_bias(size_array):return tf.Variable(tf.constant(0.0,size_array))

X shape=[1,28,28,1]

Layer 1 (convolutional)

X shape=[1,28,28,1]

Layer 2 (Max pooling)

, , , ,

, , , ,⋯ , , , ,

, , , ,⋮ ⋱ ⋮

, , , ,

, , , ,⋯ , , , ,

, , , ,

1, … , 32

h2 = tf.nn.max_pool(h1, ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

tf.nn.max_pool(value, ksize, strides, padding, data_format='NHWC', name=None)

•value: A 4-D Tensor with shape [batch, height, width, channels] and type tf.float32.•ksize: A list of ints that has length >= 4. The size of the window for each dimension of the input tensor.•strides: A list of ints that has length >= 4. The stride of the sliding window feach dimension of the input tensor.•padding: A string, either 'VALID' or 'SAME'. The padding algorithm. •data_format: A string. 'NHWC' and 'NCHW' are supported.•name: Optional name for the operation.

h1 shape=[1,24,24,32]

, , , ,

, , , ,⋯ , , , ,

, , , ,⋮ ⋱ ⋮

, , , ,

, , , ,⋯ , , , ,

, , , ,

⋮⋮⋮⋮

, , , ,

, , , ,⋯ , , , ,

, , , ,⋮ ⋱ ⋮

, , , ,

, , , ,⋯ , , , ,

, , , ,


⋮ = ⋮ =

Layer 3 (convolution)

h 3,3

, , , , , , , , , ,

, , , , , , , , , ,

, , , , , , , , , ,

, , , , , , , , , ,

, , , , , , , , , ,

, , , ⋯ , , ,⋮ ⋱ ⋮

, , , ⋯ , , ,

, ⋯ ,, , , ⋯ , , ,⋮ ⋱ ⋮

, , , ⋯ , , ,

i 1, … , 64.

∗ 3,3 ⋅ 3,3 ,⋯ , 3,3

∗ , ⋯ ∗ ,⋮ ⋱ ⋮

∗ , ⋯ ∗ ,

32

64

5 x 5

32

64

W3= init_weight ([5,5,32,64])

b3= init_bias ([64])

h3 = tf.nn.relu(tf.nn.conv2d(h2,W3,strides=[1,1,1,1],padding='SAME')+b3)

h2 shape=[1,12,12,32]

∗ ,,

⋮ ,

64

32

64

5 x 5


32


h2 shape=[1,12,12,32]

∗ ,,

⋮ ,

32

64

5 x 5


64

32


h2 shape=[1,12,12,32]

64

Layer 4 (pooling)

h

, , , ,

, , , ,⋯ , , , ,

, , , ,⋮ ⋱ ⋮

, , , ,

, , , ,⋯ , , , ,

, , , ,

i 1, … , 64.

64


h3 shape=[1,8,8,64]

64256

4 x 4

h ⋅

, , , ⋯ , , ,⋮ ⋱ ⋮

, , , ⋯ , , ,

, ⋯ ,, , , ⋯ , , ,⋮ ⋱ ⋮

, , , ⋯ , , ,

i 1, … , 256.

256

Layer 5 (fully-connected)

64

h4 = tf.reshape(h4,[-1,4*4*64])h5 = tf.nn.relu(tf.matmul(h4,W4)+b5)

W5 = init_weight([4*4*64,256])

b5 = init_weight([256])

h4 shape=[1,4,4,64]

64 64256

4 x 4

256

⋅ ,

⋮,

Layer 5 (fully-connected)

, , ⋯ , ,

W ⋮, , ⋯ , ,

⋮, , ⋯ , ,

⋮⋅⋮

⋅

+ = =

Output hyp = tf.matmul(h5,W_h)+b_hW_h = init_weight([256,10])

h5W_h & b_h were generated by training W1, W3, W5, b1, b3, b5 through Back Propagation (see next page)

b_h = init_weight([10])

b_hhyp

Backpropagationcost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(hyp,Y))optimizer =tf.train.GradientDescentOptimizer(1e-3).minimize(cost)

The goal is to find weights and biases so that the output from the network approximates for all training inputs. To quantify how well we're achieving this goal, we define a cost function E(W).

Learning rate

Outline of backpropagation Suppose we are given a training set 1 , 1 ,… , 2000 , 2000 . Whenthe input pattern from the training set presents to the previouslymentioned network, it produced an output different from the target

,⋯ , .

The goal is to determine

, 1,⋯ , 32 ⇒ 5 5 1 32unknowns

, 1,⋯ , 64 ⇒ 5 5 32 1 64

, 1,⋯ , 256 ⇒ 4 4 64 1 256unknowns

, 1,⋯ , 10 ⇒ 256 1 10unknowns

such that , , , , , , , ( 26 32 801 64 1025 256 257 10 variables)minimizes the following error

12 .

We update by

where represents a learning rate.

12

12

12

12

12

12

Express E(W) in term of x[k]


,

,

12

12

Gradient of E(W) (F-layer) ,


12

,

,

,

,

,

Gradient of E(W) (F-layer)

의 의미는?활성화된 노드를 통해서만 반응이 역전파.

′ 1 00 0

,


12

,

,

,

,

,

,

Gradient of E(W) (F-layer)

′ 1 00 0

의 의미는?활성화된 노드를 통해서만 반응이 역전파.

,


, ,

,

,

,

,

,

더 나아가기 전에.. (P-layer)

We need the derivative of for

, ,

,

, ,,

64

64

Derivative of function

, , , , , ,

, , , ,

, , , , ,

, , , ,,

For example, if , , , ,

, , , ,, , ,

, , , , , ,

, , , ,,

, , , , , ,

, , , ,,

, , , , , ,

, , , ,,

, , , , , ,

, , , ,.

Feed forward pass 시 저장!

• Activate된 노드로만 반응이 역전파.

Gradient of E(W) (P-layer) Input [1@28x28] – L1[32@24x24] – L2[32@12x12] – L3[64@8x8] – L4[64@4x4] – L5[256@1x1] – output[10]

,

,,

,

Gradient of E(W) (P-layer) Input [1@28x28] – L1[32@24x24] – L2[32@12x12] – L3[64@8x8] – L4[64@4x4] – L5[256@1x1] – output[10]

,

,

,

, , , , , , , , , , , ,

, , , , , , , , , , , ,

, , , , , , , , , , , ,

, , , , , , , , , , , ,

,

0 0, , ,

0 0 0 0 0

, , ,0 0 0 0

, , ,0

, , ,

0 0 0 0 0 0 0, , ,

, , ,0

, , ,0 0

, , ,0 0

, , ,0 0 0

, , ,0

, , ,0

0 0, , ,

0 0 0 0 0

, , ,0

, , ,0 0 0 0 0

0 0 0 0 0, , ,

0, , ,


Gradient of E(W) (C-layer)

, , ,

, , , , , ,,

, , ,

, , , , , ,,, ,

, , ,

, , ,. ,

, , ,

, , ,, ,

, , ,

,



, , ,

, , , , , ,,

, , ,

, , , , , ,,, ,

, , ,

, , ,. ,

, , ,

,

, , ,

, , ,, ,



, , ,

, , ,

,

, , ,

, , ,, ,

, , ,

, , ,, ,

, , , , , ,,

, , ,

, , , , , ,,, ,



, , , , , ,,

, , ,

, , , , , ,,, ,

, ,∗ , ,

, , ,

, , ,

,

, , ,

, , ,, ,

, , ,

, , ,, ,

import tensorflowas tf mul= tf.mul(a,b) with tf.session ...x1 x2 x3 y 1 3 1 0 1 3 2 0 1 3 4 0 1 5 5...

Documents