Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
matinnuhamunada committed Aug 1, 2021
1 parent 9bd115b commit a4d50d2
Show file tree
Hide file tree
Showing 13 changed files with 462 additions and 425 deletions.
8 changes: 4 additions & 4 deletions data/IPB/ipb_affil_information.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"affil_name" : "ugm",
"affil_name_long" : "Universitas Gadjah Mada",
"affil_id" : 384,
"dept_id" : [46201, 46101, 46001, 46202, 46102, 46002]
"affil_name" : "ipb",
"affil_name_long" : "Institut Pertanian Bogor",
"affil_id" : 428,
"dept_id" : [46002, 54107, 54113, 54007, 46102, 46004, 47101, 54153, 47202, 46104, 54208, 46103, 46101, 54132, 54053, 46003, 46201, 54032]
}
22 changes: 21 additions & 1 deletion data/ITB/itb_data_NIDN.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
No,name,id_manual,NIDN
No,name,id,NIDN
1,Robert Manurung,6036454,3125401
2,Asep Hidayat,6035214,401066606
3,Aos,6032302,13116704
Expand Down Expand Up @@ -79,3 +79,23 @@ No,name,id_manual,NIDN
78,Tati Karliati,6033797,11016905
79,Susana Paulina Dewi,6032402,422097101
80,Sutrisno,6036370,4106210
81,Rizkita Rachmi Esyanti,6041496,0007126101
82,Husna Nugrahapraja,6652007,0021018703
83,Shanty Rahayu Kusumawardani,6720778,0005078911
84,Andira Rahmawati,5991104,0405089001
85,Anriansyah Renggaman,6695165,0008088803
86,Sartika Indah Amalia Sudiarto,6695253,0019128802
87,Ima Mulyama Zainuddin,6657904,0330058201
88,Popi Septiani,6720779,0024098406
89,Aditya Dimas Pramudya,6676526,0006108904
90,Karlia Meitha,6652785,0022058404
91,Tati Suryati Syamsudin,6002375,0026035701
92,Sri Nanan B Widiyanto,6036289,0014035701
93,Pingkan Aditiawati,6197842,0010095802
94,Kamarisima,6035443,0001039002
95,Jayen Aris Kriswantoro,6746687,0031079401
96,Muhammad Yusuf Abduh,6033897,0025078308
97,Neil Priharto,6035402,0005018606
98,Taufikurahman,6659462,0013096101
99,Mochammad Firmansyah,6717657,0015069501
100,Khairul Hadi Burhan,6667723,0002079005
2 changes: 1 addition & 1 deletion data/UB/ub_data_NIDN.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
No,name,id_manual,NIDN
No,name,id,NIDN
0,Sutiman Bambang Sumitro,5998175,11035404
1,Estri Laras Arumingtyas,5978827,18086306
2,Fatchiyah,6137819,27116307
Expand Down
2 changes: 1 addition & 1 deletion data/UGM/ugm_affil_information.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"affil_name" : "ugm",
"affil_name_long" : "Universitas Gadjah Mada",
"affil_id" : 384,
"dept_id" : [46201, 46101, 46001, 46202, 46102, 46002]
"dept_id" : [46201, 46101, 46001, 46202, 46102, 46002, 54111]
}
21 changes: 18 additions & 3 deletions data/UGM/ugm_data_NIDN.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ No,name,id,NIDN
3,Akbar Reza,6200486,0010049103
4,Andhika Puspito Nugroho,5976983,0008087602
5,Annas Rabbani,6690307,0029088904
6,Aprilia Sufi Subiastuti,,0020049303
6,Aprilia Sufi Subiastuti,6725040,0020049303
7,Ardaning Nuriliani,6037774,0020128001
8,Ari Indrianto,,0025115805
9,Arief Muammar,6017987,1120170055
Expand All @@ -19,7 +19,7 @@ No,name,id,NIDN
18,Dwi Umi Siswanti,6000546,0510097701
19,Eko Agus Suyono,42920, 0018127101
20,Endah Retnaningrum,42921,0019037201
21,Endang Semiarti,5973460,00231162o4
21,Endang Semiarti,5973460,0023116204
22,Fajar Sofyantoro,6073146,0003058904
23,Ganies Riza Aristya,5973150,0016028403
24,Hari Purwanto,6015559,0012076605
Expand Down Expand Up @@ -74,4 +74,19 @@ No,name,id,NIDN
73,Woro Anindito Sri Tunjung,6021239,0520077903
74,Yekti Asih Purwestri,21472,0023057104
75,Zuliyati Rohmah,5988050,0001077901
76,Widodo,21471,0018097005
76,Widodo,21471,0018097005
77,Irfan Dwidya Prijambada,6006002,0030116104
78,Jaka Widada,6041544,0028126606
79,Desi Utami,6037491,0029128803
80,Sebastian Margino,6064225,0015035310
81,Ngadiman,6064364,0018036204
82,Chusnul Hanim,21470,0016036506
83,Widya Asmara,5975201,0005055408
84,Siti Muslimah Widyastuti,41561,0031075704
85,Donny Widianto,6029545,0031106104
86,Trini Susmiati,6022809,0021105803
87,Siti Subandiyah,5986240,0001065808
88,Tri Joko,21491,0027027513
89,Sedyo Hartono,6043717,0005046803
90,Arif Wibowo,6040719,0014056704
91,Achmadi Priyatmojo,6022778,0021066402
10 changes: 5 additions & 5 deletions figures/bio_sinta.html

Large diffs are not rendered by default.

32 changes: 13 additions & 19 deletions notebooks/00_data_scraping-template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"source": [
"# get life sciences author from UGM\n",
"## Set variables\n",
"with open(f\"../data/{univ[0].upper()}/ugm_affil_information.txt\", \"r\", encoding = 'utf-8') as f:\n",
"with open(f\"../data/{univ[0].upper()}/{univ[0]}_affil_information.txt\", \"r\", encoding = 'utf-8') as f:\n",
" d = ast.literal_eval(f.read())\n",
" \n",
"affil_name = d['affil_name']\n",
Expand Down Expand Up @@ -77,7 +77,7 @@
"outputs": [],
"source": [
"# Load data in manual\n",
"MANUAL = pd.read_csv(f'../data/{affil_name.upper()}/data_NIDN-versi-sinta.txt', index_col=0, dtype=str)\n",
"MANUAL = pd.read_csv(f'../data/{affil_name.upper()}/{affil_name}_data_NIDN-versi-sinta.txt', index_col=0, dtype=str)\n",
"#MANUAL"
]
},
Expand Down Expand Up @@ -108,19 +108,6 @@
"#ALL"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "624b26db-b8fc-42ca-b92e-43b07bdfd259",
"metadata": {},
"outputs": [],
"source": [
"# first clean?\n",
"#ALL = ALL.sort_values(by=['name']).reset_index(drop=True)\n",
"#ALL.to_csv(f'../data/{affil_name.upper()}/all.csv')\n",
"#ALL"
]
},
{
"cell_type": "code",
"execution_count": 9,
Expand Down Expand Up @@ -194,7 +181,14 @@
"Widya Asmara\n",
"Siti Muslimah Widyastuti\n",
"Donny Widianto\n",
"Trini Susmiati\n"
"Trini Susmiati\n",
"Siti Subandiyah\n",
"Tri Joko\n",
"Tri+Joko: retrieving page 1 of 2\n",
"Tri+Joko: retrieving page 2 of 2\n",
"Sedyo Hartono\n",
"Arif Wibowo\n",
"Achmadi Priyatmojo\n"
]
}
],
Expand Down Expand Up @@ -246,9 +240,9 @@
"source": [
"# get metadata from SINTA\n",
"df = pd.read_csv(f'../tables/{affil_name}_bio_clean.csv', index_col=0, dtype=str)\n",
"#sinta_get = sinta.authors(df.id)\n",
"#df2 = pd.DataFrame(sinta_get)\n",
"#df2.to_csv(f'../tables/{affil_name}_bio_clean_sinta.csv')\n",
"sinta_get = sinta.authors(df.id)\n",
"df2 = pd.DataFrame(sinta_get)\n",
"df2.to_csv(f'../tables/{affil_name}_bio_clean_sinta.csv')\n",
"df2 = pd.read_csv(f'../tables/{affil_name}_bio_clean_sinta.csv', index_col=0, dtype=str)\n",
"df = df.merge(df2, left_on='id', right_on='id')#2.set_index('id', drop=False)\n",
"df['Instansi'] = affil_name_long\n",
Expand Down
Loading

0 comments on commit a4d50d2

Please sign in to comment.