์ฌ์ฉ ๋ฐ์ดํฐ์
https://www.data.go.kr/dataset/3035522/fileData.do
ํ์ฌ ์ด ๋ฐ์ดํฐ์ ์ ํ๊ธฐ ๋์๋ค๊ณ ๋์จ๋ค.
์ ๊ณต๊ณต๋ฐ์ดํฐ๋ฅผ ์ฌ์ฉํ์ฌ ์ธํ๋ฐ๊ฐ์ (๊ณต๊ณต๋ฐ์ดํฐ๋ก ํ์ด์ฌ ๋ฐ์ดํฐ ๋ถ์) (https://bit.ly/3sISk6Z) ์ ๋ฐฉ๋ฒ์ผ๋ก ์ ์ฒ๋ฆฌํ ๋ฐ์ดํฐ๋ก ์๊ฐํ ์ ๋ฆฌ ์งํํ๋ค.
cf1) figure, axes ์์ฑ
fig=plt.figure(figsize=(10,3), dpi=100)
ax1=fig.subplots()
cf2) ๋ชจ๋ x tick ํํํ๊ธฐ
_=plt.xticks(ticks=np.arange(len(df)), labels=df.index)
cf3) x์ถ ์์์ ์ ๊ฑฐ
from matplotlib.ticker import MaxNLocator
ax1.xaxis.set_major_locator(MaxNLocator(integer=True))
(cf4) ๊ทธ๋ํ์ ๋ฐ์ Legend ํ์ํ๋๋ก ์ค์
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
lineplot
1. pandas plot
(1) pandas plot์ ๊ธฐ๋ณธ plot - lineplot
- df์ index ๋๋ column ๊ฐ์ ๊ธฐ์ค์ผ๋ก ๊ทธ๋ ค์ง
df.plot(figsize=(10,3))
cf) ๋ชจ๋ x tick ํํํ๊ธฐ
_=plt.xticks(ticks=np.arange(len(g)), labels=g.index)
- df ์ column์ด ์ฌ๋ฌ ๊ฐ ์กด์ฌํ ๋ (df์ column์ด seaborn์ hue์ญํ )
2. seaborn plot
sns.lineplot(data=df, x="์ฐ๋", y="ํ๋น๋ถ์๊ฐ๊ฒฉ", hue="์ง์ญ๋ช
", ci=None, ax=ax1)
ax1.legend(bbox_to_anchor=(1.02, 1), loc=2)
pointplot
sns.pointplot(data=df, x="์ฐ๋", y="ํ๋น๋ถ์๊ฐ๊ฒฉ", hue="์ง์ญ๋ช
", ci=None, ax=ax2)
ax2.legend(bbox_to_anchor=(1.02, 1), loc=2)
barplot
1. pandas plot
(1) df.plot(kind='bar')
- df์ index ๋๋ columm ๊ฐ์ ๊ธฐ์ค์ผ๋ก ๊ทธ๋ ค์ง
df.plot.bar(rot=0, figsize=(10, 3))
# or
df.plot(kind='bar',rot=0, ax=ax1)
(2) df.plot.bar()
df.plot.bar(color='g',rot=0, figsize=(10,3)) # cmap='Pastel1' ๋ํ ๊ฐ๋ฅ
- column ์ฌ๋ฌ๊ฐ์ผ ๋ ( df์ column์ด seaborn์ hue์ ๊ฐ์ ์ญํ )
ax=df2.plot.bar(figsize=(10,3), rot=0)
ax.set_ylabel('ํ๋น๋ถ์๊ฐ๊ฒฉ')
2. seaborn plot
sns.barplot(data=df, x="์ง์ญ๋ช
", y="ํ๋น๋ถ์๊ฐ๊ฒฉ")
# estimator default: mean
# color changable
# palette (https://seaborn.pydata.org/tutorial/color_palettes.html)
# ci: bootstrap resampling (with replacement), sorted means
- hue ์ง์
sns.barplot(data=df, x="์ง์ญ๋ช
", y="ํ๋น๋ถ์๊ฐ๊ฒฉ", hue='์ฐ๋', ci=None)
histplot
1. pandas plot
(1) df.plot(kind='hist') or df.plot.hist()
df.plot(kind='hist', figsize=(10, 3), title='ํ๋น๋ถ์๊ฐ๊ฒฉ')
# or
ax=df.plot(kind='hist', figsize=(10, 3))
ax.set_title('ํ๋น๋ถ์๊ฐ๊ฒฉ')
df["ํ๋น๋ถ์๊ฐ๊ฒฉ"].plot.hist(bins=50)
(2) df.hist(bins=)
df["ํ๋น๋ถ์๊ฐ๊ฒฉ"].hist(bins=50)
axs=df.hist(bins=50, figsize=(10,10))
ax1,ax2,ax3,ax4=axs.flatten()
ax2.set_title('ax๋ณ ์ ๋ชฉ ์ง์ ๊ฐ๋ฅ')
2. seaborn plot
sns.histplot(df["ํ๋น๋ถ์๊ฐ๊ฒฉ"], kde=True)
kdeplot
1. seaborn plot
sns.kdeplot(data=df['ํ๋น๋ถ์๊ฐ๊ฒฉ'])
sns.kdeplot(data=df[['ํ๋น๋ถ์๊ฐ๊ฒฉ','๋ถ์๊ฐ๊ฒฉ']])
boxplot
1. pandas plot
(1) df.plot(kind='box')
df.plot(kind='box', figsize=(5, 5))
(2) df.plot.box()
- df ์ column์ด x์ถ
df.plot.box(fontsize=15)
- ์ด์ค column์ผ ๊ฒฝ์ฐ
df.plot.box(figsize=(15, 3), rot=30)
(3) df.boxplot(column='', by='')
- by: x์ถ
df.boxplot(column='ํ๋น๋ถ์๊ฐ๊ฒฉ',by='์ฐ๋', figsize=(5,3), rot=30)
- by๊ฐ ๋ฆฌ์คํธ์ผ ๋
df.boxplot(column='ํ๋น๋ถ์๊ฐ๊ฒฉ',by=['์ฐ๋','์ ์ฉ๋ฉด์ '], figsize=(20,3), rot=30)
2. seaborn plot
sns.boxplot(data=df, x="์ฐ๋", y="ํ๋น๋ถ์๊ฐ๊ฒฉ")
- hue ์ง์
plt.figure(figsize=(12, 3))
sns.boxplot(data=df_last, x="์ฐ๋", y="ํ๋น๋ถ์๊ฐ๊ฒฉ", hue="์ ์ฉ๋ฉด์ ")
violinplot
1. seaborn plot
sns.violinplot(data=df, x="์ฐ๋", y="ํ๋น๋ถ์๊ฐ๊ฒฉ")
- hue ์ง์
plt.figure(figsize=(12, 3))
sns.violinplot(data=df, x="์ฐ๋", y="ํ๋น๋ถ์๊ฐ๊ฒฉ", hue="์ ์ฉ๋ฉด์ ")
heatmap
1. seaborn plot
plt.figure(figsize=(15, 7), dpi=100)
ax=sns.heatmap(df, cmap="Blues", annot=True, fmt=".0f")
2. matplotlib pcolor
fig=plt.figure(figsize=(15,5), dpi=100)
ax=fig.subplots()
t2=t.iloc[::-1]
t2
hm1=ax.pcolor(t2, cmap="Blues")
_=fig.colorbar(hm1, ax=ax)
col_len=len(t2.columns)
row_len=len(t2.index)
for r in range(row_len):
for c in range(col_len):
_=ax.text(c+0.5, r+0.5, int(t2.iloc[r, c]),ha="center", va="center", color="k", fontsize=11)
_=ax.set_xticks(np.arange(col_len)+0.5)
_=ax.set_xticklabels(t2.columns)
_=ax.set_yticks(np.arange(row_len)+0.5)
_=ax.set_yticklabels(t2.index)