Skip to content

Commit

Permalink
Add limited XML create-template support
Browse files Browse the repository at this point in the history
OpenDataServices/cove#775

Based on this unmerged commit to CoVE
OpenDataServices/cove@e274142
  • Loading branch information
Bjwebb committed Aug 14, 2018
1 parent 0843693 commit 56880b8
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 13 deletions.
11 changes: 10 additions & 1 deletion examples/help/create-template/expected.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
usage: flatten-tool create-template [-h] -s SCHEMA [-f {csv,xlsx,all}]
usage: flatten-tool create-template [-h] [-s SCHEMA] [-f {csv,xlsx,all}]
[-m MAIN_SHEET_NAME] [-o OUTPUT_NAME]
[--rollup] [-r ROOT_ID] [--use-titles]
[--xml]
[--xml-schema [XML_SCHEMA [XML_SCHEMA ...]]]
[--root-list-path ROOT_LIST_PATH]

optional arguments:
-h, --help show this help message and exit
Expand All @@ -22,3 +25,9 @@ optional arguments:
-r ROOT_ID, --root-id ROOT_ID
Root ID of the data format, e.g. ocid for OCDS
--use-titles Convert titles. Requires a schema to be specified.
--xml Use XML as the input format
--xml-schema [XML_SCHEMA [XML_SCHEMA ...]]
Path to one or more XML schemas
--root-list-path ROOT_LIST_PATH
Path of the root list, defaults to main. Needed for
XML template creation only.
11 changes: 8 additions & 3 deletions flattentool/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,28 @@
from flattentool.input import FORMATS as INPUT_FORMATS
from flattentool.xml_output import toxml
from flattentool.lib import parse_sheet_configuration
from flattentool.xml_create_template import XMLSchemaParser
import sys
import json
import codecs
from decimal import Decimal
from collections import OrderedDict


def create_template(schema, output_name='template', output_format='all', main_sheet_name='main',
rollup=False, root_id=None, use_titles=False, **_):
def create_template(schema=None, output_name='template', output_format='all', main_sheet_name='main',
rollup=False, root_id=None, use_titles=False,
xml=False, xml_schemas=None, root_list_path=None, **_):
"""
Creates template file(s) from given inputs
This function is built to deal with commandline input and arguments
but to also be called from elswhere in future
"""

parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles)
if xml:
parser = XMLSchemaParser(xml_schemas=xml_schemas, root_list_path=root_list_path)
else:
parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles)
parser.parse()

def spreadsheet_output(spreadsheet_output_class, name):
Expand Down
19 changes: 16 additions & 3 deletions flattentool/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ def create_parser():
parser_create_template = subparsers.add_parser(
'create-template',
help='Create a template from the given schema')
parser_create_template.add_argument(
schema_group = parser_create_template.add_mutually_exclusive_group(required=True)
schema_group.add_argument(
"-s", "--schema",
help="Path to the schema file you want to use to create the template",
required=True)
help="Path to the schema file you want to use to create the template")
parser_create_template.add_argument(
"-f", "--output-format",
help="Type of template you want to create. Defaults to all available options",
Expand All @@ -61,6 +61,19 @@ def create_parser():
"--use-titles",
action='store_true',
help="Convert titles. Requires a schema to be specified.")
parser_create_template.add_argument(
"--xml",
action='store_true',
help="Use XML as the input format")
schema_group.add_argument(
"--xml-schema",
dest='xml_schemas',
metavar='XML_SCHEMA',
nargs='*',
help="Path to one or more XML schemas")
parser_create_template.add_argument(
"--root-list-path",
help="Path of the root list, defaults to main. Needed for XML template creation only.")

parser_flatten = subparsers.add_parser(
'flatten',
Expand Down
10 changes: 4 additions & 6 deletions flattentool/sort_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def get_schema_element(self, tag_name, name_attribute):
return schema_element
return schema_element

def element_loop(self, element, path):
def element_loop(self, element):
"""
Return information about the children of the supplied element.
"""
Expand All @@ -95,14 +95,12 @@ def element_loop(self, element, path):
'xsd:complexType/xsd:all/xsd:element',
namespaces=namespaces)
+ type_elements)
child_tuples = []
for child in children:
a = child.attrib
if 'name' in a:
child_tuples.append((a['name'], child, None, a.get('minOccurs'), a.get('maxOccurs')))
yield a['name'], child, None, a.get('minOccurs'), a.get('maxOccurs')
else:
child_tuples.append((a['ref'], None, child, a.get('minOccurs'), a.get('maxOccurs')))
return child_tuples
yield a['ref'], None, child, a.get('minOccurs'), a.get('maxOccurs')

def create_schema_dict(self, parent_name, parent_element=None):
"""
Expand All @@ -114,7 +112,7 @@ def create_schema_dict(self, parent_name, parent_element=None):

return OrderedDict([
(name, self.create_schema_dict(name, element))
for name, element, _, _, _ in self.element_loop(parent_element, '')])
for name, element, _, _, _ in self.element_loop(parent_element)])


def sort_element(element, schema_subdict):
Expand Down
103 changes: 103 additions & 0 deletions flattentool/xml_create_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import sys

from .sort_xml import XMLSchemaWalker, namespaces
from .sheet import Sheet


class XMLSchemaWalkerForTemplate(XMLSchemaWalker):
def attribute_loop(self, element):
"""
Returns a list containing a tuple for each attribute the given element
can have.
The format of the tuple is (name, is_required)
"""
#if element.find("xsd:complexType[@mixed='true']", namespaces=namespaces) is not None:
# print_column_info('text', indent)

a = element.attrib
type_attributes = []
type_attributeGroups = []
if 'type' in a:
complexType = self.get_schema_element('complexType', a['type'])
if complexType is not None:
type_attributes = (
complexType.findall('xsd:attribute', namespaces=namespaces) +
complexType.findall('xsd:simpleContent/xsd:extension/xsd:attribute', namespaces=namespaces)
)
type_attributeGroups = (
complexType.findall('xsd:attributeGroup', namespaces=namespaces) +
complexType.findall('xsd:simpleContent/xsd:extension/xsd:attributeGroup', namespaces=namespaces)
)

group_attributes = []
for attributeGroup in (
element.findall('xsd:complexType/xsd:attributeGroup', namespaces=namespaces) +
element.findall('xsd:complexType/xsd:simpleContent/xsd:extension/xsd:attributeGroup', namespaces=namespaces) +
type_attributeGroups
):
group_attributes += self.get_schema_element('attributeGroup', attributeGroup.attrib['ref']).findall('xsd:attribute', namespaces=namespaces)

for attribute in (
element.findall('xsd:complexType/xsd:attribute', namespaces=namespaces) +
element.findall('xsd:complexType/xsd:simpleContent/xsd:extension/xsd:attribute', namespaces=namespaces) +
type_attributes + group_attributes
):
doc = attribute.find(".//xsd:documentation", namespaces=namespaces)
if 'ref' in attribute.attrib:
referenced_attribute = self.get_schema_element('attribute', attribute.get('ref'))
if referenced_attribute is not None:
attribute = referenced_attribute
if doc is None:
# Only fetch the documentation of the referenced definition
# if we don't already have documentation.
doc = attribute.find(".//xsd:documentation", namespaces=namespaces)
yield attribute.get('name') or attribute.get('ref'), attribute.get('use') == 'required'

def has_simple_content(self, element):
a = element.attrib
simple_content = False
if 'type' in a:
complexType = self.get_schema_element('complexType', a['type'])
if complexType is not None:
simple_content = bool(complexType.findall('xsd:simpleContent', namespaces=namespaces))
return simple_content or bool(element.findall('xsd:complexType/xsd:simpleContent', namespaces=namespaces))

def generate_paths(self, parent_name, parent_element=None, parent_path=''):
if parent_element is None:
parent_element = self.get_schema_element('element', parent_name)

for name, required, in self.attribute_loop(parent_element):
if name == 'xml:lang':
# Namespaces not supported yet https://github.com/OpenDataServices/flatten-tool/issues/148
# And no way to specify two narrative elements anyway https://github.com/OpenDataServices/cove/issues/777
continue
yield parent_path + '@' + name

for name, element, _, minOccurs, maxOccurs in self.element_loop(parent_element):
if element is None:
element = self.get_schema_element('element', name)
path = parent_path + name
if self.has_simple_content(element):
yield path
if maxOccurs == 'unbounded' or int(maxOccurs) > 1:
path += '/0/'
else:
path += '/'
yield from list(self.generate_paths(name, element, path))


class XMLSchemaParser(object):
"""Parse the fields of a JSON schema into a flattened structure."""

def __init__(self, xml_schemas=[], root_list_path=None):
self.sub_sheets = {}
self.main_sheet = Sheet()
self.sub_sheet_mapping = {}
self.xml_schemas = xml_schemas
assert root_list_path is not None
self.root_list_path = root_list_path

def parse(self):
for path in XMLSchemaWalkerForTemplate(self.xml_schemas).generate_paths(self.root_list_path):
self.main_sheet.append(path)

0 comments on commit 56880b8

Please sign in to comment.