인공지능/공부

AI 챌린지 본선

이게될까 2023. 11. 24. 06:51
728x90
728x90

처음보곤 뭔 말이지 싶었다. 또 정답이 있는지도 모르고 인코딩 디코딩으로 풀려고 했었다.....

그러나 ytrain이 있는 것을 한참 뒤에 알았고, 그 이후부터 학습을 시작했다.
데이터 읽기 부터가 문제다..

import tensorflow as tf
import numpy as np
import pandas as pd
from keras.datasets.mnist import load_data
from keras.models import Sequential, Model
from keras.layers import Dense, Input ,Flatten, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D,Conv1D, MaxPooling1D, GlobalAveragePooling1D
from keras.utils import plot_model, to_categorical
from keras.regularizers import l2
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
tf.debugging.set_log_device_placement (False)
acc_x_train = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Acc_x_train.csv")
acc_y_train = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Acc_y_train.csv")
acc_z_train = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Acc_z_train.csv")
gyro_x_train = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Gyro_x_train.csv")
gyro_y_train = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Gyro_y_train.csv")
gyro_z_train = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Gyro_z_train.csv")
acc_x_test = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Acc_x_test.csv")
acc_y_test = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Acc_y_test.csv")
acc_z_test = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Acc_z_test.csv")
gyro_x_test = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Gyro_x_test.csv")
gyro_y_test = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Gyro_y_test.csv")
gyro_z_test = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/Gyro_z_test.csv")
ytrain = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/yTrain.csv")

acc_z_train=np.array(acc_z_train)
acc_y_train=np.array(acc_y_train)
acc_x_train=np.array(acc_x_train)
gyro_x_train=np.array(gyro_x_train)
gyro_y_train=np.array(gyro_y_train)
gyro_z_train=np.array(gyro_z_train)

ytrain = pd.read_csv("/kaggle/input/2023-sejong-ai-challenge/yTrain.csv")
ytrain = np.array(ytrain)
ytrain = ytrain[:,1]-1
ytrain = to_categorical(ytrain)


acc_x_test=np.array(acc_x_test)
gyro_x_test=np.array(gyro_x_test)
acc_y_test=np.array(acc_y_test)
gyro_y_test=np.array(gyro_y_test)
acc_z_test=np.array(acc_z_test)
gyro_z_test=np.array(gyro_z_test)

train_Data = np.zeros((7352,128,6))
test_Data = np.zeros((2947,128,6))

acc_y_train=acc_y_train.reshape(7352,129,1)
acc_x_train=acc_x_train.reshape(7352,129,1)
acc_z_train=acc_z_train.reshape(7352,129,1)
gyro_y_train=gyro_y_train.reshape(7352,129,1)
gyro_x_train=gyro_x_train.reshape(7352,129,1)
gyro_z_train=gyro_z_train.reshape(7352,129,1)

acc_y_test=acc_y_test.reshape(2947,129,1)
acc_x_test=acc_x_test.reshape(2947,129,1)
acc_z_test=acc_z_test.reshape(2947,129,1)
gyro_y_test=gyro_y_test.reshape(2947,129,1)
gyro_x_test=gyro_x_test.reshape(2947,129,1)
gyro_z_test=gyro_z_test.reshape(2947,129,1)

train_Data[:,:,0]=acc_x_train[:,1:,0]
train_Data[:,:,1]=acc_y_train[:,1:,0]
train_Data[:,:,2]=acc_z_train[:,1:,0]
train_Data[:,:,3]=gyro_x_train[:,1:,0]
train_Data[:,:,4]=gyro_y_train[:,1:,0]
train_Data[:,:,5]=gyro_z_train[:,1:,0]

test_Data[:,:,0]=acc_x_test[:,1:,0]
test_Data[:,:,1]=acc_y_test[:,1:,0]
test_Data[:,:,2]=acc_z_test[:,1:,0]
test_Data[:,:,3]=gyro_x_test[:,1:,0]
test_Data[:,:,4]=gyro_y_test[:,1:,0]
test_Data[:,:,5]=gyro_z_test[:,1:,0]

# random seed 고정
import random
import tensorflow as tf
import torch
seed = 42
random.seed(seed)
np.random.seed(seed)

#  tensorflow 시드 고정
tf.random.set_seed(seed) 

#  pytorch관련 시드 고정
torch.manual_seed(seed)  
if torch.cuda.is_available() : 
   torch.cuda.manual_seed(seed)
   torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

여기까진 시드 고정 및 데이터 읽어서 붙이는 영역이다. 정리가 안되어 있어서 그런지 여기만 한참이다.

input = Input(shape = (128,6))
f = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(input)
f = MaxPooling1D(pool_size=2)(f)
f = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(f)
f = MaxPooling1D(pool_size=2)(f)
f = Conv1D(filters=256, kernel_size=3, activation='relu', padding='same')(f)
f = MaxPooling1D(pool_size=2)(f)
f = Conv1D(filters=512, kernel_size=3, activation='relu', padding='same')(f)
f = MaxPooling1D(pool_size=2)(f)
f = Conv1D(filters=1024, kernel_size=3, activation='relu', padding='same')(f)
f = GlobalAveragePooling1D()(f)
f=Dense(512,activation = 'LeakyReLU', kernel_regularizer = l2(0.01))(f)
f = Dropout (0.1)(f)
f=Dense(123,activation = 'LeakyReLU', kernel_regularizer = l2(0.01))(f)
f = Dropout (0.1)(f)
f=Dense(40,activation = 'LeakyReLU', kernel_regularizer = l2(0.01))(f)
output = Dense(6, activation = 'softmax')(f)
model = Model(inputs = input, outputs = output)
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001), loss = 'categorical_crossentropy', metrics = ['acc'])

hist = model.fit(train_Data,ytrain, epochs = 1, batch_size =1200,validation_split = 0.3)

이 네트워크가 제일 성능이 좋았다. 1D conv를 사용함으로서 시간에 대한 데이터 읽는데도 충분히 힘을 썼고, 뒤쪽은 확 줄여주어 오버피팅을 방지했다. 여기서 vali가 95.9까지 올렸다.

plt.subplot(1,2,1)
plt.plot(hist.history['loss'],label = 'train loss')
plt.plot(hist.history['val_loss'], label = 'val loss')
plt.legend()
plt.subplot(1,2,2)
plt.plot(hist.history['acc'],label = 'train acc')
plt.plot(hist.history['val_acc'], label = 'val acc')
plt.legend()

이전 코드에도 있던 시각화

out = model.predict(test_Data)
predicted_classes = tf.argmax(out, axis=1).numpy()
# 첫 번째 열 (인덱스)과 예측값으로 새로운 DataFrame 생성
new_submit = pd.DataFrame({
    'measureID': submit.iloc[:, 0],  # 첫 번째 열을 인덱스로 사용
    'label': predicted_classes+1  # 예측값
})
new_submit.to_csv('submission.csv', index=False)

데이터 집어넣기

캐글은 데이터 집어넣는 것도 문제다..... 그래도 2등으로 잘 끝냈다!

빨리 발표하고 상받고 자야징..

728x90

'인공지능 > 공부' 카테고리의 다른 글

센서신호, FFT, STFT data를 통해 하중 예측하기  (49) 2023.11.30
드디어 첫 수상  (33) 2023.11.27
AI 챌린지 예선  (1) 2023.11.24
FCN quiz  (0) 2023.11.16
FCN - tensorflow  (0) 2023.11.15