진박사의 일상

Binary Classification (IMDB 영화 리뷰 분류) 본문

프로그래밍/딥러닝(Keras)

Binary Classification (IMDB 영화 리뷰 분류)

진박사. 2021. 4. 25. 00:19
from keras.datasets import imdb
#영화 리뷰 데이터셋
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
#num_words=10000 자주 사용하는 단어 10000개만 사용
'''
word_index = imdb.get_word_index()
reverse_word_index = dict(
    [(value, key) for (key, value) in word_index.items()])
decoded_review = ' '.join(
    [reverse_word_index.get(i -3, '?') for i in train_data[0]])
'''#리뷰 디코딩 테스트

import numpy as np

def vectorize_sequence(sequence, dimension=10000): #one-hot encoding
    results = np.zeros((len(sequence), dimension)) #0으로 채워진 len(sequence)xdimension 벡터를 생성
    for i, sequence in enumerate(sequence):
        results[i, sequence] = 1
    return results

#전처리
x_train = vectorize_sequence(train_data)
x_test = vectorize_sequence(test_data)
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

from keras import models
from keras import layers
#모델 쌓기
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
#모델 검증
x_val = x_train[:10000]
x_train = x_train[10000:]
y_val = y_train[:10000]
y_train = y_train[10000:]
#모델 컴파일
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
#모델 훈련
history = model.fit(x_train,
                    y_train,
                    #epochs=20, #과적합
                    epochs=4,
                    batch_size=512,
                    validation_data=(x_val, y_val))

results = model.evaluate(x_test, y_test)
print('Accuracy : ', results[1]*100, '%')

#결과 시각화
import matplotlib.pyplot as plt

history_dict = history.history
loss = history_dict['loss']
val_loss = history_dict['val_loss']
acc = history_dict['acc']
val_acc = history_dict['val_acc']

epochs = range(1, len(loss) + 1)

plt.plot(epochs, loss, 'bo', label='Training loss') #파란 점선
plt.plot(epochs, val_loss, 'b', label='Valdation loss')#파란 실선
plt.plot(epochs, acc, 'ro', label='Training Accuracy')#빨간 점선
plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')#빨간 실선
plt.title('Training & Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

케라스 창시자로부터 배우는 딥러닝 책을 참고하여 작성하였습니다.