1. 공공기관 Data를 사용하여 분석
- DB : http://www.index.go.kr/potal/main/EachDtlPageDetail.do?idx_cd=1614
2. 교통 사고 사망 / 사고 / 분석 DB 추출
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
3. import 모듈
from print_df import print_df
from pandas import DataFrame
import matplotlib.pyplot as plt
import numpy as np
4. Pandas 분석
- DB 출력
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
print_df(df)
+---+--------+------+--------+------+
| | 사고 | 사망 | 부상 | 년도 |
+---+--------+------+--------+------+
| 0 | 231990 | 5838 | 361875 | 2009 |
| 1 | 226878 | 5505 | 352458 | 2010 |
| 2 | 221711 | 5229 | 341391 | 2011 |
| 3 | 223656 | 5392 | 344565 | 2012 |
| 4 | 215354 | 5092 | 328711 | 2013 |
| 5 | 223552 | 4762 | 337497 | 2014 |
| 6 | 232035 | 4621 | 350400 | 2015 |
| 7 | 220917 | 4292 | 331720 | 2016 |
+---+--------+------+--------+------+
Process finished with exit code 0
- 년도에 대한 컬럼만 리스트로 추출
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
year = list(df['년도'])
- 년도 리스트에 대해 인덱스 번호 부여
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
year = list(df['년도'])
new_name = {}
for i, v in enumerate(year):
new_name[i] = v
- 데이터프레임의 인덱스 변경
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
year = list(df['년도'])
new_name = {}
for i, v in enumerate(year):
new_name[i] = v
df.rename(index=new_name, inplace=True)
- 기존의 년도 컬럽 삭제 후 출력
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
year = list(df['년도'])
new_name = {}
for i, v in enumerate(year):
new_name[i] = v
df.rename(index=new_name, inplace=True)
df.drop('년도', axis=1, inplace=True)
print_df(df)
+------+--------+------+--------+
| | 사고 | 사망 | 부상 |
+------+--------+------+--------+
| 2009 | 231990 | 5838 | 361875 |
| 2010 | 226878 | 5505 | 352458 |
| 2011 | 221711 | 5229 | 341391 |
| 2012 | 223656 | 5392 | 344565 |
| 2013 | 215354 | 5092 | 328711 |
| 2014 | 223552 | 4762 | 337497 |
| 2015 | 232035 | 4621 | 350400 |
| 2016 | 220917 | 4292 | 331720 |
+------+--------+------+--------+
Process finished with exit code 0
- 박스 그래프 출력
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
year = list(df['년도'])
new_name = {}
for i, v in enumerate(year):
new_name[i] = v
df.rename(index=new_name, inplace=True)
df.drop('년도', axis=1, inplace=True)
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams["font.size"] = 14
plt.rcParams["figure.figsize"] = (15 ,10)
plt.figure()
plt.grid()
df.boxplot()
plt.title('교통 사고 사망 / 사고 / 부상')
plt.ylabel('교통사고 수')
plt.savefig('box2019.png', dpi=200)
plt.close()
- 단일 선 그래프 출력
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
year = list(df['년도'])
df.drop('년도', axis=1, inplace=True)
x = np.arange(len(year))
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams["font.size"] = 14
plt.rcParams["figure.figsize"] = (15 ,10)
df.plot()
plt.grid()
plt.legend()
plt.title("교통 사고 사망 / 사고 / 부상")
plt.ylabel("교통사고 수")
plt.xticks(x, year)
plt.xlim(0, 7)
plt.savefig('plot2019.png')
plt.close()
- 세로 막대 그래프 출력
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
year = list(df['년도'])
new_name = {}
for i, v in enumerate(year):
new_name[i] = v
df.rename(index=new_name, inplace=True)
df.drop('년도', axis=1, inplace=True)
x = np.arange(len(year))
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams["font.size"] = 14
plt.rcParams["figure.figsize"] = (15 ,10)
plt.figure()
df.plot.bar()
plt.grid()
plt.title('교통 사고 사망 / 사고 / 부상')
plt.legend()
plt.ylabel('교통사고 수')
plt.xticks(x, year)
plt.savefig('bar2019.png', dpi=200)
plt.close()
- 가로 막대 그래프 출력
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
year = list(df['년도'])
new_name = {}
for i, v in enumerate(year):
new_name[i] = v
df.rename(index=new_name, inplace=True)
df.drop('년도', axis=1, inplace=True)
x = np.arange(len(year))
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams["font.size"] = 14
plt.rcParams["figure.figsize"] = (15 ,10)
plt.figure()
df.plot.barh()
plt.grid()
plt.title('교통 사고 사망 / 사고 / 부상')
plt.legend()
plt.xlabel('교통사고 수')
plt.yticks(x, year)
plt.savefig('barh2019.png', dpi=200)
plt.close()
- 사망, 부상량 관계 산점도 그래프 출력
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
year = list(df['년도'])
new_name = {}
for i, v in enumerate(year):
new_name[i] = v
df.rename(index=new_name, inplace=True)
df.drop('년도', axis=1, inplace=True)
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams["font.size"] = 14
plt.rcParams["figure.figsize"] = (15 ,10)
plt.figure()
df.plot.scatter(x='사망', y='부상')
plt.grid()
plt.title('교통 사고 사망 부상량 관계')
plt.savefig('scatter2019.png', dpi=200)
plt.close()
- 교통 사고 사망 부상 비율 파이 그래프 출력
traffic = {
'사고' : [231990, 226878, 221711, 223656, 215354, 223552, 232035, 220917],
'사망' : [5838, 5505, 5229, 5392, 5092, 4762, 4621, 4292],
'부상' : [361875, 352458, 341391, 344565, 328711, 337497, 350400, 331720],
'년도' : [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016]
}
df = DataFrame(traffic)
year = list(df['년도'])
new_name = {}
for i, v in enumerate(year):
new_name[i] = v
df.rename(index=new_name, inplace=True)
df.drop('년도', axis=1, inplace=True)
total = df.sum()
total_df = DataFrame(total, columns=['교통사고'])
plt.rcParams["font.family"] = 'NanumGothic'
plt.rcParams["font.size"] = 14
plt.rcParams["figure.figsize"] = (15 ,10)
plt.figure()
color = ['#FF0000', '#800000', '#FFFF00']
explode = [0.0, 0.0, 0.0]
total_df['교통사고'].plot.pie(colors=color, explode=explode, autopct='%0.1f%%', startangle=180)
plt.title('교통사고 사고 사망 부상 비율')
plt.legend()
plt.savefig('pie2019.png', dpi=200)
plt.close()
'Python_Intermediate > Pandas' 카테고리의 다른 글
Python Pandas 박스오피스 180516 순위 분석 (0) | 2019.05.17 |
---|---|
Pandas - 어린이집 시설 현황 분석(Excel +그래프) (0) | 2019.05.15 |
Pandas - 서울시 자치구 년도별 CCTV 설치 현황 (0) | 2019.05.14 |
Python Pandas 박스오피스 180507 순위 분석 (0) | 2019.05.08 |
Data Refining NA Data(데이터 정제 결측치) (0) | 2019.05.07 |