독학 연구소
공부한 내용을 정리합니다.
소프트맥스 회귀(Softmax Regression) (2)

 

2020/11/10 - [머신러닝/딥러닝] - 소프트맥스 회귀(Softmax Regression) (1)

 

구현

소프트맥스 회귀 모델을 구현합니다.

 

패션 MNIST 데이터셋을 불러옵니다.

import numpy as np
from tensorflow.keras import datasets

(x_train, y_train), (x_test, y_test) = datasets.fashion_mnist.load_data()

print('data shape:', x_train.shape)

print('target shape:', y_train.shape)
print('target label:', np.unique(y_train, return_counts=True))
data shape: (60000, 28, 28)
target shape: (60000,)
target label: (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8), array([6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000],
      dtype=int64))

 

학습 데이터의 20%를 검증 데이터로 분할합니다.

from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)

 

데이터는 28x28 의 그레이스케일 이미지입니다.

x_train[0].shape
(28, 28)

 

또한 이미지의 구성값은 픽셀(fixel)이라고 하며 $ [0, 255] $ 의 범위를 갖습니다.

import matplotlib.pyplot as plt

plt.imshow(x_train[0])
plt.title('Sample')
plt.colorbar()
plt.show()

 

신경망을 정의합니다.

import numpy as np
import tensorflow as tf

class Model:
    def __init__(self, lr=1e-3):
        tf.reset_default_graph()
        
        with tf.name_scope('input'):
            self.x = tf.placeholder(tf.float32, [None, 28, 28])
            self.y = tf.placeholder(tf.int64)

        with tf.name_scope('preprocessing'):
            x_norm = self.x / 255.0
            y_onehot = tf.one_hot(self.y, 10)
            
        with tf.name_scope('layer'):
            flat = tf.layers.flatten(x_norm)
            fc = tf.layers.dense(flat, 128, tf.nn.relu)
            logits = tf.layers.dense(fc, 10)
            
        with tf.name_scope('output'):
            self.predict = tf.argmax(tf.nn.softmax(logits), 1)

        with tf.name_scope('accuracy'):
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.to_int64(self.predict), self.y), dtype=tf.float32))    
        
        with tf.name_scope('loss'):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_onehot, logits=logits)
            self.loss = tf.reduce_mean(cross_entropy)
        
        with tf.name_scope('optimizer'):
            self.train_op = tf.train.GradientDescentOptimizer(lr).minimize(self.loss)

        with tf.name_scope('summary'):
            self.summary_loss = tf.placeholder(tf.float32)
            self.summary_accuracy = tf.placeholder(tf.float32)
            
            tf.summary.scalar('loss', self.summary_loss)
            tf.summary.scalar('accuracy', self.summary_accuracy)
            
            self.merge = tf.summary.merge_all()

        self.train_writer = tf.summary.FileWriter('./tmp/softmax-regression_fashion_mnist/train', tf.get_default_graph())
        self.val_writer = tf.summary.FileWriter('./tmp/softmax-regression_fashion_mnist/val', tf.get_default_graph())
        
        self.sess = tf.Session()
        
        self.sess.run(tf.global_variables_initializer())
    
    def write_summary(self, tl, ta, vl, va, epoch):
        train_summary = self.sess.run(self.merge, {self.summary_loss: tl, self.summary_accuracy: ta})
        val_summary = self.sess.run(self.merge, {self.summary_loss: vl, self.summary_accuracy: va})
        
        self.train_writer.add_summary(train_summary, epoch)
        self.val_writer.add_summary(val_summary, epoch)
    
    def train(self, x_train, y_train, x_val, y_val, epochs=100, batch_size=32):
        data_size = len(x_train)
        num_iter = data_size // batch_size
        
        for e in range(epochs):
            t_l, t_a = [], []
    
            idx = np.random.permutation(np.arange(data_size))
            _x_train, _y_train = x_train[idx], y_train[idx]
            
            for i in range(0, data_size, batch_size):
                si, ei = i, i + batch_size
                if ei > data_size:
                    ei = data_size
                
                x_batch, y_batch = _x_train[si:ei, :, :], _y_train[si:ei]
                
                tl, ta, _ = self.sess.run([self.loss, self.accuracy, self.train_op], {self.x: x_batch, self.y: y_batch})
                t_l.append(tl)
                t_a.append(ta)
                
            vl, va = self.sess.run([self.loss, self.accuracy], {self.x: x_val, self.y: y_val})
            
            self.write_summary(np.mean(t_l), np.mean(t_a), vl, va, e)
            
            print('epoch:', e + 1, ' / train_loss:', np.mean(t_l), '/ train_acc:', np.mean(t_a), ' / val_loss:', vl, '/ val_acc:', va)
    
    def score(self, x, y):
        return self.sess.run(self.accuracy, {self.x: x, self.y: y})

 

입력 데이터는 3차원 배열이며, 샘플은 2차원 배열로 28x28 이미지입니다. 

self.x = tf.placeholder(tf.float32, [None, 28, 28])

 

입력 데이터의 픽셀값의 범위는 $ [0, 255] $ 을 갖기 때문에 255로 나누어 정규화합니다.

x_norm = self.x / 255.0

 

타겟 데이터는 정수 인코딩된 값입니다.

self.y = tf.placeholder(tf.int64)

 

원-핫 인코딩합니다.

y_onehot = tf.one_hot(self.y, 10)

 

입력 데이터는 2차원 배열(28x28)로 완전 연결층과 결합을 위해 플래튼층을 통해 1차원 배열(28*28)로 변환합니다.(tf.layers.flatten)

with tf.name_scope('layer'):
    flat = tf.layers.flatten(x_norm)
    fc = tf.layers.dense(flat, 128, tf.nn.relu)
    logits = tf.layers.dense(fc, 10)

 

미니 배치 경사 하강법으로 학습을 수행합니다.

def train(self, x_train, y_train, x_val, y_val, epochs=100, batch_size=32):
    data_size = len(x_train)
    num_iter = data_size // batch_size

    for e in range(epochs):
        t_l, t_a = [], []

        idx = np.random.permutation(np.arange(data_size))
        _x_train, _y_train = x_train[idx], y_train[idx]

        for i in range(0, data_size, batch_size):
            si, ei = i, i + batch_size
            if ei > data_size:
                ei = data_size

            x_batch, y_batch = _x_train[si:ei, :, :], _y_train[si:ei]

            tl, ta, _ = self.sess.run([self.loss, self.accuracy, self.train_op], {self.x: x_batch, self.y: y_batch})
            t_l.append(tl)
            t_a.append(ta)

        vl, va = self.sess.run([self.loss, self.accuracy], {self.x: x_val, self.y: y_val})

        self.write_summary(np.mean(t_l), np.mean(t_a), vl, va, e)

        print('epoch:', e + 1, ' / train_loss:', np.mean(t_l), '/ train_acc:', np.mean(t_a), ' / val_loss:', vl, '/ val_acc:', va)

 

모델을 학습하고 테스트합니다.

model = Model()
model.train(x_train, y_train, x_val, y_val, epochs=1000)
model.score(x_test, y_test)
epoch: 10  / train_loss: 0.5628707 / train_acc: 0.8145625  / val_loss: 0.5578371 / val_acc: 0.8143333
epoch: 20  / train_loss: 0.48990545 / train_acc: 0.83385414  / val_loss: 0.49084783 / val_acc: 0.834
epoch: 30  / train_loss: 0.45731145 / train_acc: 0.84404165  / val_loss: 0.46129102 / val_acc: 0.83958334
epoch: 40  / train_loss: 0.43643084 / train_acc: 0.84952086  / val_loss: 0.44098717 / val_acc: 0.8476667
epoch: 50  / train_loss: 0.4209792 / train_acc: 0.85539585  / val_loss: 0.42816326 / val_acc: 0.851
epoch: 60  / train_loss: 0.4085188 / train_acc: 0.8597917  / val_loss: 0.4188299 / val_acc: 0.85466665
epoch: 70  / train_loss: 0.39790967 / train_acc: 0.8628333  / val_loss: 0.41054684 / val_acc: 0.857
epoch: 80  / train_loss: 0.3887072 / train_acc: 0.8653333  / val_loss: 0.40308595 / val_acc: 0.8591667
epoch: 90  / train_loss: 0.380518 / train_acc: 0.86833334  / val_loss: 0.39647287 / val_acc: 0.85966665
epoch: 100  / train_loss: 0.3732168 / train_acc: 0.8713125  / val_loss: 0.3912227 / val_acc: 0.86216664

...

epoch: 900  / train_loss: 0.1754578 / train_acc: 0.94079167  / val_loss: 0.32061088 / val_acc: 0.88925
epoch: 910  / train_loss: 0.17418073 / train_acc: 0.94147915  / val_loss: 0.32091013 / val_acc: 0.89016664
epoch: 920  / train_loss: 0.17276596 / train_acc: 0.9421458  / val_loss: 0.32165438 / val_acc: 0.88983333
epoch: 930  / train_loss: 0.17167586 / train_acc: 0.94277084  / val_loss: 0.32007438 / val_acc: 0.89016664
epoch: 940  / train_loss: 0.170372 / train_acc: 0.9423958  / val_loss: 0.322622 / val_acc: 0.88975
epoch: 950  / train_loss: 0.1690425 / train_acc: 0.943875  / val_loss: 0.3234346 / val_acc: 0.888
epoch: 960  / train_loss: 0.16783537 / train_acc: 0.9438125  / val_loss: 0.32234168 / val_acc: 0.8890833
epoch: 970  / train_loss: 0.1663011 / train_acc: 0.944125  / val_loss: 0.32178867 / val_acc: 0.89091665
epoch: 980  / train_loss: 0.16476133 / train_acc: 0.94545835  / val_loss: 0.32161164 / val_acc: 0.889
epoch: 990  / train_loss: 0.16388097 / train_acc: 0.94547915  / val_loss: 0.32247958 / val_acc: 0.8889167
epoch: 1000  / train_loss: 0.16279806 / train_acc: 0.9459375  / val_loss: 0.32239875 / val_acc: 0.891

0.8796

 

에포크에 대한 정확도와 손실 함수의 그래프는 다음과 같습니다.(주황: 학습, 파랑: 검증)

 

 

 

'머신러닝 > 강화학습' 카테고리의 다른 글

함수 근사(Function Approximation)  (0) 2020.11.18
Model-Free Control  (0) 2020.11.13
Model-Free Prediction  (0) 2020.11.09
OpenAI Gym  (0) 2020.11.09
동적 프로그래밍(Dynamic Programming)  (0) 2020.11.06
  Comments,     Trackbacks