Pandas Basics

ÆÇ´Ù½º Series¿Í DataFrame¿¡ ´ëÇؼ­ °£´ÜÇÏ°Ô ¾Ë¾Æ º»´Ù.
Series´Â 1Â÷¿ø ¹è¿­ÀÌ°í DataFrameÀº 2Â÷¿ø ¹è¿­ÀÌ´Ù.

Series 񃧯

ditionary, list, tuple·Î ÆÇ´Ù½º ½Ã¸®Áî·Î º¯È¯ÇÏ°í ÀÖ´Ù.
import pandas as pd

#dictionary
data1 = { 'a': 1, 'b': 2, 'c': 3}
#sr = pd.Series(data1)

#list
data2 = [1, 2, 3]

#tuple
#data2 = (1, 2, 3)

#available
#sr = pd.Series(data2)
sr = pd.Series(data2, index=['a', 'b', 'c'])

#print(type(sr))
print("\n")

print(sr['a']) #print(sr[0])
print(sr) #print(sr[[0, 1, 2]])

°á°ú)
1
a    1
b    2
c    3
dtype: int64

DataFrame 񃧯

dictionary·Î »ý¼º

import pandas as pd

data1 = { 'a': [10, 20, 30], 'b': [11, 21, 31], 'c': [12, 22, 32]}
df = pd.DataFrame(data1)

print(type(df))

print(df)

DataFrameÀº Çà(row)°ú ¿­(Column)·Î ±¸¼ºµÈ 2Â÷¿ø ¹è¿­ÀÌ´Ù.
ditionary´Â ¿­ ´ÜÀ§·Î ä¿öÁø´Ù.

°á°ú)
<class 'pandas.core.frame.DataFrame'>
    a   b   c
0  10  11  12
1  20  21  22
2  30  31  32

list·Î »ý¼º

import pandas as pd

data1 = [10, 20, 12]  
df = pd.DataFrame(data1, index=['Á¦·Î', 'Çϳª', 'µÑ'], columns=['a'])

print(type(df))
print(df)

°á°ú)
<class 'pandas.core.frame.DataFrame'>
        a
Á¦·Î  10
Çϳª  20
µÑ     12

import pandas as pd

data1 = [[10, 20, 30], [11, 21, 31], [12, 22, 32]]      
df = pd.DataFrame(data1, index=['Á¦·Î', 'Çϳª', 'µÑ'], columns=['a', 'b', 'c'])

print(type(df))
print(df)

dictionary¿Í list°¡ ºñ½ÁÇÏÁö¸¸ 2Â÷¿ø ¸®½ºÆ®À϶§´Â dictionary´Â Çà¿ì¼±, list´Â ¿­¿ì¼±ÀÌ´Ù.

°á°ú)
<class 'pandas.core.frame.DataFrame'>
          a   b   c
Á¦·Î  10  20  30
Çϳª  11  21  31
µÑ     12  22  32

Çà À妽º, ¿­ À̸§ ¼³Á¤

import pandas as pd

data1 = [[10, 20, 30], [11, 21, 31], [12, 22, 32]]   
df = pd.DataFrame(data1, index=['Á¦·Î', 'Çϳª', 'µÑ'], columns=['a', 'b', 'c'])

print(type(df))
print(df)

°á°ú)
<class 'pandas.core.frame.DataFrame'>
         a   b   c
Á¦·Î  10  20  30
Çϳª  11  21  31
µÑ     12  22  32

À妽º À̸§À¸·Î Çà ¼±Åà loc

import pandas as pd

data1 = [[10, 20, 30], [11, 21, 31], [12, 22, 32]]      
df = pd.DataFrame(data1, index=['Á¦·Î', 'Çϳª', 'µÑ'], columns=['a', 'b', 'c'])

df0 = df.loc['Á¦·Î']
df1 = df.loc[['Á¦·Î', 'µÑ']]
df2 = df.loc['Á¦·Î' : 'µÑ']

print(type(df0))
print(df0)

print(type(df1))
print(df1)

print(type(df2))
print(df2)

loc :  - À妽º À̸§ ÁöÁ¤ 
                     ex) df.loc['Á¦·Î']
                          df.loc[['Á¦·Î', 'µÑ']]
         - À妽º À̸§À¸·Î ¹üÀ§ ÁöÁ¤
                          df.loc['Á¦·Î' : 'µÑ'] 

°á°ú)
<class 'pandas.core.series.Series'>
a    10
b    20
c    30
Name: Á¦·Î, dtype: int64
<class 'pandas.core.frame.DataFrame'>
         a   b   c
Á¦·Î  10  20  30
µÑ     12  22  32
<class 'pandas.core.frame.DataFrame'>
         a   b   c
Á¦·Î  10  20  30
Çϳª  11  21  31
µÑ     12  22  32

À妽º·Î Ç༱Åà iloc

import pandas as pd

data1 = [[10, 20, 30], [11, 21, 31], [12, 22, 32]]      
df = pd.DataFrame(data1, index=['Á¦·Î', 'Çϳª', 'µÑ'], columns=['a', 'b', 'c'])

df0 = df.iloc[0]
df1 = df.iloc[[0, 2]]
df2 = df.iloc[0 : 2]

print(type(df0))
print(df0)

print(type(df1))
print(df1)

print(type(df2))
print(df2)

À妽º À̸§°ú À妽º·Î ÇÒ¶§ ½½¶óÀ̽̿¡¼­ Â÷ÀÌÁ¡ÀÌ ÀÖ´Ù.
À妽º À̸§ ½½¶óÀ̽Ì: ³¡ À̸§±îÁö Æ÷ÇÔÀÌ´Ù.
À妽º ½½¶óÀ̽Ì: ³¡ À妽º´Â Æ÷ÇÔÇÏÁö ¾Ê´Â´Ù.

°á°ú)
<class 'pandas.core.series.Series'>
a    10
b    20
c    30
Name: Á¦·Î, dtype: int64
<class 'pandas.core.frame.DataFrame'>
          a   b   c
Á¦·Î  10  20  30
µÑ     12  22  32
<class 'pandas.core.frame.DataFrame'>
          a   b   c
Á¦·Î  10  20  30
Çϳª  11  21  31

¿­¼±ÅÃ

import pandas as pd

data1 = [[10, 20, 30], [11, 21, 31], [12, 22, 32]]      
df = pd.DataFrame(data1, index=['Á¦·Î', 'Çϳª', 'µÑ'], columns=['a', 'b', 'c'])

df0 = df['a']
df1 = df.a
dcol = df.columns
df2 = df[dcol[0]]
df3 = df[['a', 'c']]
df4 = df[df.columns[:3]]

print(type(df0))
print(df0)

print(type(df1))
print(df1)

print(type(df2))
print(df2)

print(type(df3))
print(df3)

print(type(df4))
print(df4)

¿­¼±ÅÃÀº DataFrame °´Ã¼[¿­À̸§]ÀÌ´Ù.
ex)   df0 = df['a']

¿­À妽º·Î ¼±ÅÃÇÒ·Á¸é ´ÙÀ½°ú °°ÀÌ ÇÑ´Ù.
dcol = df.columns
df2 = df[dcol[0]]
À妽º¸¦ Á÷Á¢ ³ÖÀ¸¸é ¿¡·¯°¡ ¹ß»ýÇÑ´Ù.

¿©·¯°³ÀÇ ¿­ ¼±ÅÃ
ex)   df3 = df[['a', 'c']]

¿­ ½½¶óÀ̵̽µ columns¸¦ ÀÌ¿ëÇÑ´Ù.
df4 = df[df.columns[:3]]

°á°ú)
<class 'pandas.core.series.Series'>
Á¦·Î    10
Çϳª    11
µÑ       12
Name: a, dtype: int64
<class 'pandas.core.series.Series'>
Á¦·Î    10
Çϳª    11
µÑ       12
Name: a, dtype: int64
<class 'pandas.core.series.Series'>
Á¦·Î    10
Çϳª    11
µÑ       12
Name: a, dtype: int64
<class 'pandas.core.frame.DataFrame'>
          a   c
Á¦·Î  10  30
Çϳª  11  31
µÑ     12  32
<class 'pandas.core.frame.DataFrame'>
          a   b   c
Á¦·Î  10  20  30
Çϳª  11  21  31
µÑ     12  22  32

¿ø¼Ò ¼±ÅÃ

import pandas as pd

data1 = [[10, 20, 30], [11, 21, 31], [12, 22, 32]]      
df = pd.DataFrame(data1, index=['Á¦·Î', 'Çϳª', 'µÑ'], columns=['a', 'b', 'c'])

df0 = df.loc['Á¦·Î', 'a']
df1 = df.iloc[0, 1]

df2 = df.loc['Á¦·Î', ['a', 'c']]
df3 = df.loc['Á¦·Î', 'a': 'c']
df4 = df.iloc[2, 0: 3]

print(df0)
print(df1)
print(df2)

print("slicing")
print(df3)
print(df4)

°á°ú)
10
20
a    10
c    30
Name: Á¦·Î, dtype: int64
slicing
a    10
b    20
c    30
Name: Á¦·Î, dtype: int64
a    12
b    22
c    32
Name: µÑ, dtype: int64

ƯÁ¤ Ä®·³À» dataframeÀ¸·Î º¸´Â ¹æ¹ýÀº ´ÙÀ½ÀÇ ¼¼°¡Áö°¡ ÀÖ´Ù.

df[['a', 'c']]
df.loc[:, ['a', 'c']]
df.iloc[:, [0, 2]]

°°Àº °á°ú·Î ÁêÇÇÅÍ ³ëÆ®ºÏ¿¡¼­ º¸¸é ´ÙÀ½°ú °°´Ù.