AI/공부

[인공지능]뉴런을 이용해 딥러닝 맛보기(유방암, 아이리스, 타이타닉)

살랑춤춰요 2024. 8. 2. 14:54

<맛보기 모델>

뉴런을 이용한 Logistic 모델

class LogisticNeuren:
    def __init__(self):
        self.w = None
        self.b = None
        # self.l_r = 0.001
    def forpass(self, x):
        z = np.sum(x*self.w) + self.b
        return z
    def activation(self, z):
        z = np.clip(z, -100, None)
        a = 1/(1 + np.exp(-z))
        return a
    def backpass(self, x, err):
        w_grad = x*err
        b_grad = 1*err
        return w_grad, b_grad

    def fit(self, x, y, epochs=10):
        self.w = np.ones(x.shape[1])
        self.b = 0
        for i in range(epochs):
            for in_x, in_y in zip(x, y):
                z   = self.forpass(in_x)
                py  = self.activation(z)
                err = -(in_y - py)
                w_grad, b_grad = self.backpass(in_x, err)
                self.w -= w_grad # * self.l_r
                self.b -= b_grad # * self.l_r
    def predict(self, x):
        z  = np.array([self.forpass(in_x) for in_x in x])
        py = self.activation(z)
        return py>0.5

 

 

<실습환경>

Google Colab

breast cancer(유방암) 머신러닝/딥러닝

 

<dataset>

from sklearn.datasets import load_breast_cancer
d = load_breast_cancer()

 

x = d.data
y = d.target
 
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(x, y,
                                                       train_size=0.7,
                                                       random_state=42,
                                                       stratify=y)
 
m = LogisticNeuren()
m.fit(train_x, train_y)
 
# test 셋으로 검증
np.mean(m.predict(test_x)==test_y) # 0.8947368421052632
 

 

<실습환경>

Google Colab

iris(아이리스) 머신러닝/딥러닝

<dataset>

import seaborn as sns
df = sns.load_dataset('iris')

 

np.unique(df['species']) # array(['setosa', 'versicolor', 'virginica'], dtype=object)
 
df1 = df[(df['species']=='setosa') | (df['species']=='versicolor')]
X = df1.drop('species', axis=1)
y = df1['species']
 
from sklearn.preprocessing import StandardScaler, LabelEncoder
scaler = StandardScaler()
scaler.fit(X)
s_x = scaler.transform(X)
 
encoder = LabelEncoder()
encoder.fit(y)
s_y = encoder.transform(y)
 
x_train, x_test, y_train, y_test = train_test_split(s_x, s_y,
                                                    train_size=0.7,
                                                    random_state=42)
 
m=LogisticNeuren()
m.fit(x_train, y_train)
np.mean(m.predict(x_test)==y_test) # 1.0
 

 

<실습환경>

Google Colab

titanic(타이타닉) 머신러닝/딥러닝

 

<dataset>

import seaborn as sns
titanic = sns.load_dataset('titanic')

 

print(titanic.info())
 
'''
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   survived     891 non-null    int64  
 1   pclass       891 non-null    int64  
 2   sex          891 non-null    object  
 3   age          714 non-null    float64
 4   sibsp        891 non-null    int64  
 5   parch        891 non-null    int64  
 6   fare         891 non-null    float64
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB
'''

 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# 결측치 처리 (age와 fare의 결측치를 평균으로 대체, 나머지는 제거)
titanic['age'].fillna(titanic['age'].mean(), inplace=True)
titanic['fare'].fillna(titanic['fare'].mean(), inplace=True)
titanic.dropna(subset=['embarked', 'deck'], inplace=True)

label_encoders = {}
categorical_columns = ['sex', 'embarked', 'deck', 'class', 'who', 'adult_male', 'embark_town', 'alive', 'alone']

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    titanic[column] = label_encoders[column].fit_transform(titanic[column])

X = titanic.drop(columns=['survived'])
y = titanic['survived']
 
# 라벨 인코더를 사용하지 않고도 하는 방법
# titanic.drop(['class', 'who', 'adult_male', 'deck', 'embark_town', 'alive', 'alone'], axis=1, inplace=True)
# titanic.dropna(inplace=True)
# titanic = titanic.reset_index(drop=True)
# titanic.sex.replace(['male', 'female'], [0,1], inplace=True)
# titanic.embarked.replace(['S', 'C', 'Q'], [0, 1, 2], inplace=True)
# X = titanic.drop('survived', axis=1)
# y = titanic['survived']

x_train, x_test, y_train, y_test = train_test_split(X, y,
                                                    train_size=0.7,
                                                    random_state=42)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
 
model = LogisticNeuren()
model.fit(x_train, y_train, epochs=100)

y_pred = model.predict(X_test)
np.mean(y_pred == y_test) # 1.0