<맛보기 모델>
뉴런을 이용한 Logistic 모델
class LogisticNeuren:
def __init__(self):
self.w = None
self.b = None
# self.l_r = 0.001
def forpass(self, x):
z = np.sum(x*self.w) + self.b
return z
def activation(self, z):
z = np.clip(z, -100, None)
a = 1/(1 + np.exp(-z))
return a
def backpass(self, x, err):
w_grad = x*err
b_grad = 1*err
return w_grad, b_grad
def fit(self, x, y, epochs=10):
self.w = np.ones(x.shape[1])
self.b = 0
for i in range(epochs):
for in_x, in_y in zip(x, y):
z = self.forpass(in_x)
py = self.activation(z)
err = -(in_y - py)
w_grad, b_grad = self.backpass(in_x, err)
self.w -= w_grad # * self.l_r
self.b -= b_grad # * self.l_r
def predict(self, x):
z = np.array([self.forpass(in_x) for in_x in x])
py = self.activation(z)
return py>0.5
<실습환경>
Google Colab
breast cancer(유방암) 머신러닝/딥러닝
<dataset>
from sklearn.datasets import load_breast_cancer
d = load_breast_cancer()
x = d.data
y = d.target
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(x, y,
train_size=0.7,
random_state=42,
stratify=y)
m = LogisticNeuren()
m.fit(train_x, train_y)
# test 셋으로 검증
np.mean(m.predict(test_x)==test_y) # 0.8947368421052632
<실습환경>
Google Colab
iris(아이리스) 머신러닝/딥러닝
<dataset>
import seaborn as sns
df = sns.load_dataset('iris')
np.unique(df['species']) # array(['setosa', 'versicolor', 'virginica'], dtype=object)
df1 = df[(df['species']=='setosa') | (df['species']=='versicolor')]
X = df1.drop('species', axis=1)
y = df1['species']
from sklearn.preprocessing import StandardScaler, LabelEncoder
scaler = StandardScaler()
scaler.fit(X)
s_x = scaler.transform(X)
encoder = LabelEncoder()
encoder.fit(y)
s_y = encoder.transform(y)
x_train, x_test, y_train, y_test = train_test_split(s_x, s_y,
train_size=0.7,
random_state=42)
m=LogisticNeuren()
m.fit(x_train, y_train)
np.mean(m.predict(x_test)==y_test) # 1.0
<실습환경>
Google Colab
titanic(타이타닉) 머신러닝/딥러닝
<dataset>
import seaborn as sns
titanic = sns.load_dataset('titanic')
print(titanic.info())
'''
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 survived 891 non-null int64
1 pclass 891 non-null int64
2 sex 891 non-null object
3 age 714 non-null float64
4 sibsp 891 non-null int64
5 parch 891 non-null int64
6 fare 891 non-null float64
7 embarked 889 non-null object
8 class 891 non-null category
9 who 891 non-null object
10 adult_male 891 non-null bool
11 deck 203 non-null category
12 embark_town 889 non-null object
13 alive 891 non-null object
14 alone 891 non-null bool
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB
'''
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
# 결측치 처리 (age와 fare의 결측치를 평균으로 대체, 나머지는 제거)
titanic['age'].fillna(titanic['age'].mean(), inplace=True)
titanic['fare'].fillna(titanic['fare'].mean(), inplace=True)
titanic.dropna(subset=['embarked', 'deck'], inplace=True)
label_encoders = {}
categorical_columns = ['sex', 'embarked', 'deck', 'class', 'who', 'adult_male', 'embark_town', 'alive', 'alone']
for column in categorical_columns:
label_encoders[column] = LabelEncoder()
titanic[column] = label_encoders[column].fit_transform(titanic[column])
X = titanic.drop(columns=['survived'])
y = titanic['survived']
# 라벨 인코더를 사용하지 않고도 하는 방법
# titanic.drop(['class', 'who', 'adult_male', 'deck', 'embark_town', 'alive', 'alone'], axis=1, inplace=True)
# titanic.dropna(inplace=True)
# titanic = titanic.reset_index(drop=True)
# titanic.sex.replace(['male', 'female'], [0,1], inplace=True)
# titanic.embarked.replace(['S', 'C', 'Q'], [0, 1, 2], inplace=True)
# X = titanic.drop('survived', axis=1)
# y = titanic['survived']
x_train, x_test, y_train, y_test = train_test_split(X, y,
train_size=0.7,
random_state=42)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
model = LogisticNeuren()
model.fit(x_train, y_train, epochs=100)
y_pred = model.predict(X_test)
np.mean(y_pred == y_test) # 1.0
'AI > 공부' 카테고리의 다른 글
[딥러닝]인공지능 모델의 레이어 (0) | 2024.08.01 |
---|---|
인공지능 데이터 전처리방법 (0) | 2024.08.01 |
인공지능 데이터분석 방법 (0) | 2024.08.01 |
인공지능 모델 학습 시 설정해주는 파라미터 - 배치 사이즈, 학습률 (0) | 2023.08.03 |