ÇнÀ µ¥ÀÌÅÍ¿Í Å×½ºÆ® µ¥ÀÌÅ͸¦ ºÐ¸® ÇÒ¶§´Â »çÀÌŶ ·±ÀÇ train_test_splits ÇÔ¼ö¸¦ ÀÌ¿ëÇÑ´Ù. import numpy as np
from sklearn.model_selection import train_test_split col = np.arange(1, 11).reshape(10, 1) data = [[1], [2], [1], [2], [1], [2], [1], [2], [1], [2]] data = np.append(data, col, 1) data array([[ 1, 1], [ 2, 2], [ 1, 3], [ 2, 4], [ 1, 5], [ 2, 6], [ 1, 7], [ 2, 8], [ 1, 9], [ 2, 10]]) col¿¡ 1ºÎÅÍ 10±îÁö 1Â÷¿ø ¹è¿¿¡ ³Ö´Â´Ù. reshape·Î ÇàÀÇ Å©±â°¡ 10, ¿ÀÇ Å©±â°¡ 1ÀÎ ¹è¿·Î º¯È¯ÇÑ´Ù. ÇàÀ» Row·Î ¿À» ColumnÀ̶ó ºÎ¸¥´Ù. data 0Ä®·³¿¡ ¼Ó¼ºÀÌ 1À̳ª 2ÀÎ °ªÀ» ³Ö´Â´Ù. data¿¡ colÀ» ¿·Î Ãß°¡ÇÑ´Ù. append 3¹ø° ¸Å°³º¯¼ö°¡ 1ÀÌ¸é ¿(Column) Ãß°¡ÀÌ´Ù. newcol = col + 10
newdata = np.append(data, newcol, 1) newdata array([[ 1, 1, 11], [ 2, 2, 12], [ 1, 3, 13], [ 2, 4, 14], [ 1, 5, 15], [ 2, 6, 16], [ 1, 7, 17], [ 2, 8, 18], [ 1, 9, 19], [ 2, 10, 20]]) data¿Í newcol·Î newdata¸¦ ¸¸µç´Ù. x = newdata[:, 0:2]
y = newdata[:, 2] x array([[ 1, 1], [ 2, 2], [ 1, 3], [ 2, 4], [ 1, 5], [ 2, 6], [ 1, 7], [ 2, 8], [ 1, 9], [ 2, 10]]) µ¥ÀÌÅ͸¦ x, y ¸®½ºÆ®¿¡ ´ã´Â´Ù. seed = 5
#x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed, stratify=newdata[:, 0:1], shuffle=True) print('x_train Àüü µ¥ÀÌÅÍ %d: ' % len(x_train)) print('x_test Àüü µ¥ÀÌÅÍ %d:' % len(x_test)) x_test x_train Àüü µ¥ÀÌÅÍ 8: x_test Àüü µ¥ÀÌÅÍ 2: array([[2, 2], [1, 7]]) »çÀÌŶ·±ÀÇ train_test_split ÇÔ¼ö·Î ÇнÀ µ¥ÀÌÅÍ¿Í Å×½ºÆ® µ¥ÀÌÅÍ·Î ºÐ¸® ÇÒ¼ö ÀÖ´Ù. test_size=0.2 Å×½ºÆ® µ¥ÀÌÅÍÀÇ Å©±âÀÇ ºñÀ²À» Àüü°¡ 1À϶§ 0.2ÀÇ ºñÀ²·Î ºÐ¸®ÇÑ´Ù. ¿©±â¼´Â Àüü°¡ 10°³, Å×½ºÆ® µ¥ÀÌÅÍ´Â 2°³ÀÌ´Ù. random_state=seed ·£´ý ½ÃµåÀÇ °ªÀ» ¼³Á¤ ÇÑ´Ù. stratify=newdata[:, 0:1] 0¹ø° Ä®·³ÀÇ 1, 2°ªÀÌ °ñ°í·ç Æ÷ÇÔ µÇµµ·Ï ÇÑ´Ù. stratify °ªÀ» ¼³Á¤ ÇÏÁö ¾ÊÀ¸¸é ´ÙÀ½°ú °°ÀÌ x_test °ªÀÌ ºÐ¸® µÉ¼ö ÀÖ´Ù. ¿©·¯ °ªÀÌ Æ÷ÇÔÀÌ µÇ¾úÀ»¶§ °úÀûÇÕÀ» ¸·À»¼ö ÀÖ´Ù. array([[2, 2], [2, 8]]) shuffle=True shuffleÀ» ÇÏ´Â ÀÌÀ¯´Â ¹Ì´Ï ¹èÄ¡¸¦ ÇÒ¶§ ±â¿ï±âÀÇ Æò±ÕÀ¸·Î ÇнÀÇÒ¶§ À߸øµÈ ¹æÇâÀ¸·Î ÇнÀÇÒ¼ö Àֱ⠶§¹®ÀÌ´Ù. ÀüüÄÚµå´Â ´ÙÀ½°ú °°´Ù. import numpy as np
from sklearn.model_selection import train_test_split col = np.arange(1, 11).reshape(10, 1) data = [[1], [2], [1], [2], [1], [2], [1], [2], [1], [2]] data = np.append(data, col, 1) newcol = col + 10 newdata = np.append(data, newcol, 1) x = newdata[:, 0:2] y = newdata[:, 2] seed = 5 #x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed, stratify=newdata[:, 0:1], shuffle=True) print('x_train Àüü µ¥ÀÌÅÍ %d: ' % len(x_train)) print('x_test Àüü µ¥ÀÌÅÍ %d:' % len(x_test)) |