-
Notifications
You must be signed in to change notification settings - Fork 8
/
pisa_xls.py
52 lines (41 loc) · 1.26 KB
/
pisa_xls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""
PISA education data
International surveys of student performance
Sources
* https://en.wikipedia.org/wiki/Programme_for_International_Student_Assessment
* http://www.oecd.org/pisa/
* https://nces.ed.gov/surveys/pisa/
* http://www.oecd.org/pisa/keyfindings/pisa-2012-results-volume-I.pdf
* https://github.com/NYUDataBootcamp/Materials/blob/master/Code/Projects/PISA_SusanChen_Aug_15.ipynb
Repository of materials (including this file):
* https://github.com/NYUDataBootcamp/Materials/
Written by Dave Backus, December 2015, based on earlier work by Susan Chen
Created with Python 3.5
"""
"""
Check Python version
"""
import pandas as pd # the data package
import sys # system module (don't ask)
print('\nPython version:', sys.version)
print('Pandas version: ', pd.__version__)
#%%
"""
Read xls file
Note: skip notes at top/bottom, set double column labels
"""
import pandas as pd
pisa = pd.read_excel('http://dx.doi.org/10.1787/888932937035',
skiprows=18,
skipfooter=7,
index_col=0,
header=[0,1]
)
pisa = pisa.dropna()
print(pisa.index)
#print('\nPISA data \n', pisa)
#%%
"""
Plot
"""
# extract mean math score, sort top to bottom