SQL ํ์ด#
Attention
ํ
์ด๋ธ ์์ฑ ๋ฐ ๋ฐ์ดํฐ ๋ก๋๋ ํ์ด์ฌ ๊ธฐ๋ฐ์ผ๋ก ๋์ํฉ๋๋ค.
์ ์ ํ ์ฟผ๋ฆฌ๋ฅผ ์
๋ ฅํ์ฌ ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ๋ฅผ ์งํํฉ๋๋ค
์ฝ๋ ๊ฐ์ด๋#
Attention
connectDB ํจ์๋ฅผ ํตํด url csv๋ฅผ ๋ก์ปฌ์ sqlite3 ํ์์ ๋ฐ์ดํฐ ๋ฒ ์ด์ค๋ก ๋ฐ๊ฟ๋๋ค.
์ฟผ๋ฆฌ๋ฅผ strํ์์ผ๋ก ์์ฑํฉ๋๋ค(DB table ์ด๋ฆ์ ์ฃผ์ด์ง๋๋ค)
pd.read_sql({์ฟผ๋ฆฌ},con={ํ ์ด๋ธ}) ํ์์ผ๋ก ์ฟผ๋ฆฌ๋ฅผ ๋ ๋ ค ๊ฒฐ๊ณผ๋ฅผ ์ถ๋ ฅํฉ๋๋ค.
ex) table = connectDB(โhttps://raw.githubusercontent.com/Datamanim/pandas/main/lol.csvโ,โlolโ,โ\tโ)
url, table_name, sep
sql =โselect * from lolโ # query Ans = pd.read_sql(sql,con=table) # result
import pandas as pd
def connectDB(url,table_name,sep=',',encoding='utf-8'):
import sqlite3
cnx = sqlite3.connect(':memory:')
df = pd.read_csv(url,sep=sep,encoding=encoding)
df.to_sql(name=table_name, con=cnx)
return cnx
Question 1
์ ์ฒด ๋ฐ์ดํฐ๋ฅผ ๋ก๋ํ๋ผ.
๊ธฐ๋ณธ์ฝ๋
table = connectDB(โhttps://raw.githubusercontent.com/Datamanim/pandas/main/lol.csvโ,โlolโ,โ\tโ) sql =โโ # table_name = lol Ans = pd.read_sql(sql,con=table)
table = connectDB('https://raw.githubusercontent.com/Datamanim/pandas/main/lol.csv','lol','\t')
sql ='select * from lol'
Ans = pd.read_sql(sql,con=table)
display(Ans)
index | gameId | creationTime | gameDuration | seasonId | winner | firstBlood | firstTower | firstInhibitor | firstBaron | ... | t2_towerKills | t2_inhibitorKills | t2_baronKills | t2_dragonKills | t2_riftHeraldKills | t2_ban1 | t2_ban2 | t2_ban3 | t2_ban4 | t2_ban5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3326086514 | 1504279457970 | 1949 | 9 | 1 | 2 | 1 | 1 | 1 | ... | 5 | 0 | 0 | 1 | 1 | 114 | 67 | 43 | 16 | 51 |
1 | 1 | 3229566029 | 1497848803862 | 1851 | 9 | 1 | 1 | 1 | 1 | 0 | ... | 2 | 0 | 0 | 0 | 0 | 11 | 67 | 238 | 51 | 420 |
2 | 2 | 3327363504 | 1504360103310 | 1493 | 9 | 1 | 2 | 1 | 1 | 1 | ... | 2 | 0 | 0 | 1 | 0 | 157 | 238 | 121 | 57 | 28 |
3 | 3 | 3326856598 | 1504348503996 | 1758 | 9 | 1 | 1 | 1 | 1 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 164 | 18 | 141 | 40 | 51 |
4 | 4 | 3330080762 | 1504554410899 | 2094 | 9 | 1 | 2 | 1 | 1 | 1 | ... | 3 | 0 | 0 | 1 | 0 | 86 | 11 | 201 | 122 | 18 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
51485 | 51485 | 3308904636 | 1503076540231 | 1944 | 9 | 2 | 1 | 2 | 2 | 0 | ... | 10 | 2 | 0 | 4 | 0 | 55 | -1 | 90 | 238 | 157 |
51486 | 51486 | 3215685759 | 1496957179355 | 3304 | 9 | 2 | 1 | 1 | 2 | 2 | ... | 11 | 7 | 4 | 4 | 1 | 157 | 55 | 119 | 154 | 105 |
51487 | 51487 | 3322765040 | 1504029863961 | 2156 | 9 | 2 | 2 | 2 | 2 | 0 | ... | 10 | 2 | 0 | 2 | 0 | 113 | 122 | 53 | 11 | 157 |
51488 | 51488 | 3256675373 | 1499562036246 | 1475 | 9 | 2 | 2 | 2 | 2 | 0 | ... | 11 | 3 | 0 | 1 | 0 | 154 | 39 | 51 | 90 | 114 |
51489 | 51489 | 3317333020 | 1503612754059 | 1445 | 9 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 0 | 0 | 1 | 0 | 11 | 157 | 141 | 31 | 18 |
51490 rows ร 62 columns
Question 2
๋ฐ์ดํฐ์ ์์ 5๊ฐ ํ์ ์ถ๋ ฅํ๋ผ
๊ธฐ๋ณธ์ฝ๋
table = connectDB(โhttps://raw.githubusercontent.com/Datamanim/pandas/main/lol.csvโ,โlolโ,โ\tโ)
1๋ฒ์์ ์คํํ๋ค๋ฉด ์ถ๊ฐ ์คํ ํ ํ์์์
sql =โโ # table_name = lol Ans = pd.read_sql(sql,con=table)
sql ='select * from lol LIMIT 5'
Ans = pd.read_sql(sql,con=table)
Ans
index | gameId | creationTime | gameDuration | seasonId | winner | firstBlood | firstTower | firstInhibitor | firstBaron | ... | t2_towerKills | t2_inhibitorKills | t2_baronKills | t2_dragonKills | t2_riftHeraldKills | t2_ban1 | t2_ban2 | t2_ban3 | t2_ban4 | t2_ban5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3326086514 | 1504279457970 | 1949 | 9 | 1 | 2 | 1 | 1 | 1 | ... | 5 | 0 | 0 | 1 | 1 | 114 | 67 | 43 | 16 | 51 |
1 | 1 | 3229566029 | 1497848803862 | 1851 | 9 | 1 | 1 | 1 | 1 | 0 | ... | 2 | 0 | 0 | 0 | 0 | 11 | 67 | 238 | 51 | 420 |
2 | 2 | 3327363504 | 1504360103310 | 1493 | 9 | 1 | 2 | 1 | 1 | 1 | ... | 2 | 0 | 0 | 1 | 0 | 157 | 238 | 121 | 57 | 28 |
3 | 3 | 3326856598 | 1504348503996 | 1758 | 9 | 1 | 1 | 1 | 1 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 164 | 18 | 141 | 40 | 51 |
4 | 4 | 3330080762 | 1504554410899 | 2094 | 9 | 1 | 2 | 1 | 1 | 1 | ... | 3 | 0 | 0 | 1 | 0 | 86 | 11 | 201 | 122 | 18 |
5 rows ร 62 columns
Question 3
๋ฐ์ดํฐ์ ํ์ ๊ฐฏ์๋ฅผ ํ์ ํ๋ผ
sql ="select count(*) as rowcount from lol;"
Ans = pd.read_sql(sql,con=table)
Ans
rowcount | |
---|---|
0 | 51490 |
Question 4
์ ์ฒด ์ปฌ๋ผ์ ์ถ๋ ฅํ๋ผ(sqlite๋ง์ ๋ฌธ๋ฒ)
sql ="select name from pragma_table_info('lol');"
Ans = pd.read_sql(sql,con=table)
Ans
name | |
---|---|
0 | index |
1 | gameId |
2 | creationTime |
3 | gameDuration |
4 | seasonId |
... | ... |
57 | t2_ban1 |
58 | t2_ban2 |
59 | t2_ban3 |
60 | t2_ban4 |
61 | t2_ban5 |
62 rows ร 1 columns
Question 5
6๋ฒ์งธ ์ปฌ๋ผ๋ช ์ ์ถ๋ ฅํ๋ผ
sql ="select name from pragma_table_info('lol') LIMIT 1 OFFSET 5;"
Ans = pd.read_sql(sql,con=table)
Ans
name | |
---|---|
0 | winner |
Question 6
6๋ฒ์งธ ์ปฌ๋ผ์ ๋ฐ์ดํฐ ํ์ ์ ํ์ธํ๋ผ
sql ="select type from pragma_table_info('lol') LIMIT 1 OFFSET 5;"
Ans = pd.read_sql(sql,con=table)
Ans
type | |
---|---|
0 | INTEGER |
Question 7
gameId ์ปฌ๋ผ์ 3๋ฒ์งธ ๊ฐ์ ๋ฌด์์ธ๊ฐ?
sql ="SELECT gameId FROM lol LIMIT 1 OFFSET 2;"
Ans = pd.read_sql(sql,con=table)
Ans
gameId | |
---|---|
0 | 3327363504 |
Question 8
3๋ฒ์งธ ํ์ creationTime, gameDuration ์ปฌ๋ผ์ ํด๋นํ๋ ๊ฐ์ ๋ฌด์์ธ๊ฐ?
sql ="SELECT creationTime,gameDuration FROM lol LIMIT 1 OFFSET 2;"
Ans = pd.read_sql(sql,con=table)
Ans
creationTime | gameDuration | |
---|---|---|
0 | 1504360103310 | 1493 |
Attention
์ ์ฃผ ๋ ์จ,์ธ๊ตฌ์ ๋ฐ๋ฅธ ๊ตํต๋๋ฐ์ดํฐ : ์ถ์ฒ ์ ์ฃผ ๋ฐ์ดํฐ ํ๋ธ DataUrl = โhttps://raw.githubusercontent.com/Datamanim/pandas/main/Jeju.csvโ
๊ธฐ๋ณธ์ฝ๋
table = connectDB(โhttps://raw.githubusercontent.com/Datamanim/pandas/main/Jeju.csvโ,โjejuโ)
์ต์ด 1ํ๋ง ์คํ
sql =โโ # table_name = jeju Ans = pd.read_sql(sql,con=table)
Question 9
๋ฐ์ดํฐ๋ฅผ ๋ก๋ํ๋ผ. ์ปฌ๋ผ์ด ํ๊ธ์ด๊ธฐ์ ์ ์ ํ ์ฒ๋ฆฌํด์ค์ผํจ
table = connectDB('https://raw.githubusercontent.com/Datamanim/pandas/main/Jeju.csv','jeju',encoding='euc-kr')
# ์ต์ด 1ํ๋ง ์คํ
sql ='SELECT * FROM jeju LIMIT 5' # table_name = jeju
Ans = pd.read_sql(sql,con=table)
Ans
index | id | ์ผ์ | ์๋๋ช | ์๋ฉด๋๋ช | ๊ฑฐ์ฃผ์ธ๊ตฌ | ๊ทผ๋ฌด์ธ๊ตฌ | ๋ฐฉ๋ฌธ์ธ๊ตฌ | ์ด ์ ๋์ธ๊ตฌ | ํ๊ท ์๋ | ํ๊ท ์์ ์๊ฐ | ํ๊ท ๊ธฐ์จ | ์ผ๊ฐ์๋ | ํ๊ท ํ์ | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 22448 | 2018-01-01 | ์ ์ฃผ์ | ๋๋๋ | 32249.987 | 3418.266 | 102709.092 | 138377.345 | 39.556 | 29.167 | 5.0 | 0.0 | 2.5 |
1 | 1 | 22449 | 2018-01-01 | ์ ์ฃผ์ | ์ธ๋๋ | 213500.997 | 10341.172 | 112692.789 | 336534.958 | 32.900 | 30.900 | 5.0 | 0.0 | 2.5 |
2 | 2 | 22450 | 2018-01-01 | ์ ์ฃผ์ | ์ด๋2๋ | 1212382.218 | 96920.834 | 541194.481 | 1850497.533 | 29.538 | 35.692 | 2.9 | 0.0 | 2.4 |
3 | 3 | 22451 | 2018-01-01 | ์ ์ฃผ์ | ์ผ๋1๋ | 33991.653 | 6034.253 | 72155.919 | 112181.825 | 30.000 | 23.500 | 2.9 | 0.0 | 2.4 |
4 | 4 | 22452 | 2018-01-01 | ์๊ทํฌ์ | ๋์ฒ๋ | 155036.925 | 9403.969 | 150882.409 | 315323.303 | 41.583 | 14.375 | 5.1 | 0.0 | 2.3 |
Question 10
๋ฐ์ดํฐ ๋ง์ง๋ง 3๊ฐํ์ ์ถ๋ ฅํ๋ผ
sql ='SELECT * FROM jeju LIMIT 10 OFFSET (SELECT count(*) FROM jeju)-3' # table_name = jeju
Ans = pd.read_sql(sql,con=table)
Ans
index | id | ์ผ์ | ์๋๋ช | ์๋ฉด๋๋ช | ๊ฑฐ์ฃผ์ธ๊ตฌ | ๊ทผ๋ฌด์ธ๊ตฌ | ๋ฐฉ๋ฌธ์ธ๊ตฌ | ์ด ์ ๋์ธ๊ตฌ | ํ๊ท ์๋ | ํ๊ท ์์ ์๊ฐ | ํ๊ท ๊ธฐ์จ | ์ผ๊ฐ์๋ | ํ๊ท ํ์ | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 9618 | 32066 | 2020-04-30 | ์ ์ฃผ์ | ๋๋๋ | 28397.481 | 3144.895 | 84052.697 | 115595.073 | 41.053 | 29.421 | 20.3 | 0.0 | 3.0 |
1 | 9619 | 32067 | 2020-04-30 | ์๊ทํฌ์ | ์๋๋ฉด | 348037.846 | 29106.286 | 251129.660 | 628273.792 | 46.595 | 49.189 | 17.6 | 0.0 | 3.5 |
2 | 9620 | 32068 | 2020-04-30 | ์ ์ฃผ์ | ์ฐ๋ | 1010643.372 | 65673.477 | 447622.068 | 1523938.917 | 40.863 | 27.765 | 14.1 | 0.0 | 4.8 |
Question 11
์์นํ ๋ณ์๋ฅผ ๊ฐ์ง ์ปฌ๋ผ๋ง ํํฐํ์ฌ ๋ฐ์ดํฐํ๋ ์์ ๋ง๋ค๊ณ ์์ 5ํ์ ์ถ๋ ฅํ๋ผ
sql ="SELECT name FROM pragma_table_info('jeju') WHERE type ='INTEGER' or type='REAL';"
Ans = pd.read_sql(sql,con=table)
Ans
name | |
---|---|
0 | index |
1 | id |
2 | ๊ฑฐ์ฃผ์ธ๊ตฌ |
3 | ๊ทผ๋ฌด์ธ๊ตฌ |
4 | ๋ฐฉ๋ฌธ์ธ๊ตฌ |
5 | ์ด ์ ๋์ธ๊ตฌ |
6 | ํ๊ท ์๋ |
7 | ํ๊ท ์์ ์๊ฐ |
8 | ํ๊ท ๊ธฐ์จ |
9 | ์ผ๊ฐ์๋ |
10 | ํ๊ท ํ์ |