-
Notifications
You must be signed in to change notification settings - Fork 1
/
BEIC_records2publishers.py
44 lines (40 loc) · 1.06 KB
/
BEIC_records2publishers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Script to extract publisher data from BEIC's APE records in UNIMARC format.
"""
#
# (C) Federico Leva e Fondazione BEIC, 2018
#
# Distributed under the terms of the MIT license.
#
__version__ = '0.1.0'
#
from pymarc import MARCReader
import unicodecsv as csv
import re
# Make sure it's UTF-8
# yaz-marcdump -i marc -f marc8 -t utf8 -o marc input.mrc > input.utf8.mrc
f = open('input.mrc', 'rb')
out = open('publishers.csv', 'w')
reader = MARCReader(f)
writer = csv.writer(out,
delimiter=b',',
lineterminator='\n',
quoting=csv.QUOTE_MINIMAL,
encoding='utf-8'
)
writer.writerow([u'Città', u'Editore', u'Anno', 'Concatenato'])
for record in reader:
try:
pub = record.get_fields('210')[0]
city = pub.get_subfields('a')[0]
pubname = pub.get_subfields('c')[0]
year = pub.get_subfields('d')[0]
# Remove noisy specifications
strip = re.sub(r'([\[\]\\]|c(?=[0-9])|(?<=[0-9])[?!-]+$|stampa )', '', pub.value())
writer.writerow([city, pubname, year, strip])
except IndexError:
continue
f.close()
out.close()