generated from ivoa-std/doc-template
-
Notifications
You must be signed in to change notification settings - Fork 2
/
RegTAP.tex
3612 lines (2830 loc) · 140 KB
/
RegTAP.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[11pt,a4paper]{ivoa}
\input tthdefs
\input gitmeta
\tolerance=6000
\hbadness=6000
\usepackage[utf8]{inputenc}
\usepackage{longtable}
\usepackage{listings}
\usepackage{todonotes}
\lstloadlanguages{XML,SQL}
\lstset{flexiblecolumns=true}
\definecolor{rtcolor}{rgb}{0.15,0.4,0.3}
\definecolor{tapcolor}{rgb}{0.4,0.1,0.1}
\iftth
\def\rtent#1{\texttt{\color{rtcolor}\verb|#1|}}
\else
\makeatletter
\renewcommand\l@subsection{\@dottedtocline{2}{1.8em}{3em}}
\def\makeunderscoreletter{\catcode`\_=12}
\def\makeunderscoresubscript{\catcode`\_=8}
\def\rtent{\makeunderscoreletter\relax\rt@nt}
\def\rt@nt#1{\texttt{\color{rtcolor} #1}\makeunderscoresubscript{}}
\makeatother
\fi
\newcommand{\tapent}[1]{\texttt{\color{tapcolor} #1}}
\ivoagroup{Registry}
\author[http://www.ivoa.net/cgi-bin/twiki/bin/view/IVOA/MarkusDemleitner]{Markus Demleitner}
\author[http://www.ivoa.net/cgi-bin/twiki/bin/view/IVOA/PaulHarrison]{Paul Harrison}
\author[http://www.ivoa.net/cgi-bin/twiki/bin/view/IVOA/MarcoMolinaro]{Marco Molinaro}
\author[http://www.ivoa.net/cgi-bin/twiki/bin/view/IVOA/GretchenGreene]{Gretchen Greene}
\author[http://www.ivoa.net/cgi-bin/twiki/bin/view/IVOA/TheresaDower]{Theresa Dower}
\author[http://wiki.ivoa.net/twiki/bin/view/IVOA/MenelaosPerdikeas]{Menelaos Perdikeas}
\editor{Markus Demleitner}
\previousversion[https://www.ivoa.net/documents/RegTAP/20240124]{PR-1.2-20240124}
\previousversion[https://www.ivoa.net/documents/RegTAP/20220519]{WD-1.2-20220519}
\previousversion[https://ivoa.net/documents/RegTAP/20191011]{REC-1.1}
\previousversion[https://ivoa.net/documents/RegTAP/20190911]{PR-20190911}
\previousversion[https://ivoa.net/documents/RegTAP/20190529]{PR-20190529}
\previousversion[https://ivoa.net/documents/RegTAP/20190326]{PR-20190326}
\previousversion[https://ivoa.net/documents/RegTAP/20180731]{PR-20180731}
\previousversion[https://ivoa.net/documents/RegTAP/20171206]{WD-20171206}
\previousversion[https://ivoa.net/documents/RegTAP/20141208]{REC-1.0}
\title{IVOA Registry Relational Schema}
\begin{document}
\begin{abstract}
Registries provide a mechanism with which VO applications can
discover and select resources -- first and foremost data and
services -- that are relevant for a particular scientific problem.
This specification defines an interface for searching this resource
metadata based on the IVOA's TAP protocol. It specifies a set of tables
that comprise a useful subset of the information contained in the
registry records, as well as the table's data content in terms of the
XML VOResource data model. The general design of the system is geared
towards allowing easy authoring of queries.
\end{abstract}
\section{Introduction}
\label{intro}
In the Virtual Observatory (VO), registries provide a means for
discovering useful resources, i.e., data and services. Individual
publishers offer the descriptions for their resources (``resource
records'') in publishing registries. As of March 2024, there are
almost 29000 such resource records active within the VO, originating
from about 50 publishing registries.
The protocol spoken by these
publishing registries, OAI-PMH \citep{std:OAIPMH}, only allows restricting queries by
modification date and identifier and is hence not suitable for data discovery.
Even if it were, data discovery would at least be fairly time consuming if
each client had to query dozens or, potentially, hundreds of
publishing registries.
To enable efficient data discovery nevertheless, there are services
(``searchable registries'') harvesting the
resource records from the publishing registries and offering rich query
facilities to Registry clients.
Version 1.0 of the IVOA Registry
Interfaces specification \citep{2009ivoa.spec.1104B} defined, among other aspects of
the VO registry system, a standard interface for such services.
Built on SOAP and an early draft of an XML-based query language,
this first attempt was quickly obsoleted by parallel
developments in the VO. It was then decided to have searchable
registries specified outside of Registry Interfaces.
This document provides one such specification, based in particular on TAP \citep{2010ivoa.spec.0327D}
and ADQL \citep{2023ivoa.spec.1215M}. It follows the model of ObsCore
\citep{2017ivoa.spec.0509L} of defining a representation of a data model
within a relational database. In this case, the data model is a
simplification of the VO's resource metadata interchange representation,
the VOResource XML format \citep{2018ivoa.spec.0625P}. The simplification
yields a schema with 18 tables. For each table, \tapent{TAP\_SCHEMA} metadata is given together
with rules for how to fill these tables from VOResource-serialized
metadata records as well as conditions on foreign keys and
recommendations on indexes.
The resulting set of tables has a modest size by today's standards,
but is still non-trivial. The largest table, \makeunderscoreletter\rtent{table_column},
has about a million rows at the time of writing.
The architecture laid out here allows client applications to perform ``canned''
queries on behalf of their users as well as complex queries formulated
directly by advanced users, using the same TAP clients they employ to
query astronomical data servers.
\subsection{Terminology and Syntactic Conventions}
\label{terms}
The set of tables and their metadata specified here, together with
the mapping from VOResource (``ingestion rules'') is collectively
called ``relational registry schema'' or ``relational registry'' for
short, with a standard schema name of \tapent{rr}.
The specificiation additionally talks about how to embed these into TAP
services, gives additional user defined functions, talks about
discovering compliant services, etc. Since all this is tightly coupled
to the ``relational registry'' as defined above, we do not
introduce a new term for it. Hence, the entire standard is now known as
``IVOA registry relational schema''.
Historically, we intended to follow the ObsCore/ObsTAP model and
talked about RegTAP. As changing this acronym is technically painful
(e.g., identifiers and URLs would need to be adapted), we kept it even after
the distinction between the schema and its mapping on the one hand and
its combination with a TAP service on the other went away. This
means that the official acronym for ``IVOA registry relational schema'' is
RegTAP. This aesthetic defect seems preferable to causing actual
incompatibilities.
Since RegTAP mentions concepts from several different but related
domains, we try to give typographic hints as to the nature of entities
discussed:
\begin{itemize}
\item Names of tables, columns, and functions of the relational registry
are written in \rtent{green typewriter}.
\item Names coming from generic TAP are written in \tapent{brown
typewriter}.
\item VOResource concepts are written in \vorent{caps and small caps}
(where small caps correspond to lowercase letters in element names of
the XML serialisation).
\item XML literals (like tag, attribute or XSD type names or special
values) are written in \xmlel{cursive typewriter}.
\end{itemize}
\subsection{The Relational Registry within the VO Architecture}
\label{rolewithinivoa}
\begin{figure}[th]
\begin{center}
\includegraphics[width=0.95\textwidth]{role_diagram.pdf}
\end{center}
\caption{IVOA Architecture
diagram with the IVOA Registry Relational Specification (shown as
``RegTAP'') and the related standards.}
\end{figure}
This specification directly relates to other VO standards in the
following ways:
\begin{bigdescription}
\item[VOResource, v1.1 \citep{2018ivoa.spec.0625P}] This standard
sets the foundation for a formal definition of the data
model for resource records via its schema definition. This document
refers to concepts laid down there via xpaths \citep{std:XPATH}. Since
its version 1.1, RegTAP
incorporates the concepts from VOResource 1.1 but can represent
VOResource 1.0 instances (within the limits laid out below) as well.
\item[VODataService, v1.2 \citep{2021ivoa.spec.1102D}] VODataService
de\-scribes several concepts and resource types extending
VOResource's data model, including
tablesets, data services and data
collections. These concepts and types are reflected in the database
schema. Again xpaths link this specification and VODataService.
\item[Other Registry Extensions]Registry extensions are VO standards
defining how particular resources (e.g., Standards) or capabilities
(e.g., IVOA defined interfaces) are described. Most aspects
introduced by them are reflected in the \rtent{res_detail} table using
xpaths into the registry documents.
The present standard should not in general need updates
for registry extension updates. For completeness, we note the
versions current as of this specification: SimpleDALRegExt 1.2
\citep{2022ivoa.spec.0222D},
StandardsRegExt 1.0 \citep{2012ivoa.spec.0508H}, TAPRegExt 1.0
\citep{2012ivoa.spec.0827D}, Registry Interfaces 1.1
\citep{2018ivoa.spec.0723D}
\item[TAP, v1.1 \citep{2019ivoa.spec.0927D}]
The queries against the schema defined in the present document, and the results of
these queries, will usually be transported using the Table Access
Protocol TAP. It also allows discovering
local additions to the registry relations via TAP's metadata publishing
mechanisms.
\item[IVOA Identifiers, v2.0 \citep{2016ivoa.spec.0523D}]IVOA identifiers are
essentially the primary keys within the VO
registry; as such, they are actual primary keys of the central table of
the relational registry. Also, the notion of an authority as laid down
in IVOA Identifiers plays an important role as publishing registries can
be viewed as a realization of a set of authorities.
\end{bigdescription}
This standard also relates to other IVOA standards:
\begin{bigdescription}
\item[ADQL 2.1 \citep{2023ivoa.spec.1215M}] The rules for ingestion are designed to allow
easy queries given the constraints of the IVOA Astronomical Data Query
Language. Also, we give some functions that extend ADQL using the
language's built-in facility for user-defined functions.
\end{bigdescription}
\section{Design Considerations}
\label{design}
In the design of the tables, the goal has been to preserve as much of
VOResource and its extensions, including the element names, as
possible.
An overriding consideration has been, however, to make natural joins
between the tables behave usefully, i.e., to actually combine rows
relevant to the same entity (resource, table, capability, etc.).
To disambiguate column names that name the same concept on different
entities (name, description, etc.) and would therefore interfere with
the natural join, a shortened tag for the source object
is prepended to the name. Thus, a \vorent{description} element within
a resource ends up in a column named
\rtent{res_description}, whereas the same element from a
\vorent{capability} becomes \rtent{cap_description}.
We further renamed some columns and most tables
with respect to their VOResource
counterparts to avoid clashes with reserved words in popular database
management systems. The alternatives would have been to either recommend
quoting them or burden ADQL translation layers with the task of
automatically converting them to delimited identifiers. Both
alternatives seemed more confusing and less robust than the renaming
proposed here.
Furthermore, camel-case identifiers have been converted to
underscore-separated ones (thus, \vorent{standardID} becomes
\rtent{standard_id}) to have all-lowercase column names; this saves
potential headache if users choose to reference the columns using SQL
delimited identifiers. Dashes in VOResource attribute names are
converted to underscores, too, with the exception of
\vorent{ivo-id}, which is just rendered \rtent{ivoid}.
Another design goal of this specification has been that different registries
operating on the same set of registry records will return identical responses
for most queries; hence, we try to avoid relying on features left not
defined by ADQL (e.g., the case sensitivity of string matches). However,
with a view to non-uniform support for information retrieval-type
queries in database systems, the \rtent{ivo_hasword} user defined
function is not fully specified here; queries employing it may yield
different results on different implementations, even if they operate on
the same set of resource records.
\section{Primary Keys}
\label{primarykeys}
The primary key in the Registry as an abstract concept is a resource
record's IVOID. Hence, for all tables having primary keys at all, the
\rtent{ivoid} column is part of its primary key. This
specification does not require implementations to actually declare
primary keys in the underlying database, and no aspect of user-visible
behavior depends on such explicit declarations; in particular, this
specification makes no requirements on the contents of
\tapent{tap\_schema.keys}.
We nevertheless make recommendations on explicit primary keys, as
we expect definitions according to our recommendations will enhance
robustness of services.
In several RegTAP tables -- \rtent{capability},
\rtent{res_schema}, \rtent{res_table}, and
\rtent{interface} -- artificial primary keys are necessary, as
in VOResource XML sibling elements are not otherwise distinguished. To
allow such artificial primary keys, a column is added to each table, the
name of which ends in \texttt{\_index} (\rtent{cap_index},
\rtent{schema_index}, \rtent{table_index}, and
\rtent{intf_index}).
The type and content of these \texttt{X\_index} columns is
implementation-defined, and clients must not make assumptions on their
content except that the pair \rtent{ivoid}, \texttt{X\_index} is a primary
key for the relation (plus, of course, that references from other tables
correctly resolve). In the tables of columns given below, the
\texttt{X\_index} columns have ``(key)'' given for type. Implementors
have to insert whatever ADQL type is appropriate for their
choice or \texttt{X\_index} implementation.
Obvious implementations for \texttt{X\_index} include having
\texttt{X\_index} enumerate the sibling elements or using some sort
of UUID.
\section{Notes on string handling}
\label{stringnorm}
In the interest of consistent behavior between different RegTAP
implementations regardless of their technology choices, this section
establishes some rules on the treatment of strings -- both those
obtained from attributes and those obtained from element
content -- during ingestion from VOResource XML to database
tables.
\subsection{Whitespace Normalization}
\label{whitenorm}
Most string-valued items in VOResource and extensions are of type
\texttt{xs:to\-ken}, with the clear intent that whitespace in them is
to be normalized in the sense of that XML schema type (i.e., all
whitespace is just a single blank, and there is no leading or trailing
whitespace). For the few exceptions
that actually are directly derived from xs:string (e.g.,
\vorent{vstd:EndorsedVersion}, \vorent{vs:Waveband}) it does not
appear that the intent regarding whitespace is different.
In order to provide reliable querying and simple rules for ingestors
even when these do not employ schema-aware XML parsers, this standard
requires that during ingestion, leading and trailing whitespace MUST be
removed from all strings; in particular, there are no strings consisting
exclusively of whitespace in RegTAP. The treatment of internal
whitespace is implementation-defined. This reflects the expectation
that, wherever multi-word items are queried, whitespace-ignoring
constraints will be used (e.g., LIKE-based regular expressions or the
\rtent{ivo_hasword} user defined function defined below).
\subsection{NULL/Empty String Normalization}
\label{nullnorm}
While empty strings and NULL values are not usually well
distinguished in VO practice -- as reflected in the conventional
TABLEDATA and BINARY serializations of VOTable -- , the distinction
must be strictly maintained in the database tables to ensure
reproduceable queries across different RegTAP implementations.
Ingestors therefore MUST turn empty strings (which, by section \ref{whitenorm}, include strings consisting of whitespace
only in VOResource's XML serialization) into NULL values in the
database. Clients expressing constraints on the presence (or absence)
of some information must therefore do so using SQL's \texttt{IS NOT NULL}
(or \texttt{IS NULL}) operators.
\subsection{Case Normalization}
\label{casenorm}
ADQL 2.0 has no operators for case-insensitive matching of strings
(ILIKE, required by this version of RegTAP, was only defined in ADQL
2.1).
Mainly for this reason, RegTAP 1.0 required most columns
containing values not usually intended for display to be
converted to lower case on ingestion. This also somewhat reduces the
likelihood that matches are missed because of different capitalisation,
since queries disregarding capitalisation variations will yield empty
(rather than partial) results.
In the table descriptions below, there are
explicit requirements on case normalization near the end of each
section. This is particularly important when the entities to be
compared are defined to be case-insensitive (e.g., UCDs, IVOIDs).
Client software that can inspect user-provided arguments (e.g., when
filling template queries) should also convert the respective fields to
lower case.
This conversion MUST cover all ASCII letters, i.e., A through Z.
The conversion SHOULD take place according to
algorithm R2 in section 3.13, ``Default Case Algorithms'' of the Unicode
Standard
\citep{std:UNICODE}. In practice, non-ASCII characters are not expected
to occur in columns for which lowercasing is required.
Analogously, case-insensitive comparisons as required by some of the
user-defined functions for the relational registry MUST compare
the ASCII letters without regard for case. They SHOULD compare according
to D144 in the Unicode Standard.
Columns intended for presentation are not case-normalised. When
matching against these, queries should use case-insensitive matching
using ADQL 2.1's ILIKE or, equivalently, the \verb|ivo_nocasematch| user
defined function required by RegTAP.
\subsection{Non-ASCII Characters}
\label{utfreq}
Neither TAP nor ADQL mention non-ASCII in service parameters -- in
particular the queries -- or returned values. For RegTAP, that is
unfortunate, as several columns will contain relevant non-ASCII
characters. Columns for which extra care is necessary include all
descriptions, \rtent{res_title} and \rtent{creator_seq} in
\rtent{rr.resource}, as well as \rtent{role_name} and
\rtent{street_address} in \rtent{rr.res_role}.
RegTAP implementations SHOULD be able to faithfully represent all
characters defined in the latest version of the Unicode standard
\citep{std:UNICODE} at
any given time and allow querying using them (having support for UTF-8
in the database should cover this requirement) for at least the fields
mentioned above.
On VOResource ingestion, non-ASCII characters that a service cannot
faithfully store MUST be replaced by a question mark character (``?'').
RegTAP services MUST interpret incoming ADQL as encoded in UTF-8,
again replacing unsupported characters with question marks.
We leave character replacement on result generation unspecified, as
best-effort representations (e.g., ``Angstrom'' instead of ``Ångström'')
should not impact interoperability but significantly improve user
experience over consistent downgrading. In VOTable output,
implementations SHOULD support full Unicode in at least the fields
enumerated above. Clients are advised to retrieve results in VOTable or
other encoding-aware formats.
Note that at least up to VOTable 1.5, non-ASCII in char-typed fields, while
supported by most clients in TABLEDATA serialization, is technically
illegal; it is essentially undefined in other serializations. To
produce standards-compliant VOTables, columns containing non-ASCII must
be of type unicodeChar.
\subsection{Vocabulary considerations}
\label{sect:vocab-use}
Since version 1.1, VOResource employs RDF vocabularies to control terms
used in several places; in version 1.2, this concerns
\vorent{content/con\-tentLevel}, \vorent{content/type},
\vorent{content/subject},
\vorent{date/role}, \vorent{content/relationship/relationshipType}.
These vocabularies are available from the IVOA vocabulary
repository\footnote{\url{https://www.ivoa.net/rdf}} as specified by
Vocabularies in the VO, Version 2 \citep{2023ivoa.spec.0206D}. The
relevant vocabulary URIs are given in the VOResource specification and
\xmlel{xs:documentation} elements in the schema file.
For RegTAP, these vocabulary resources are important because the
VOResource relationship types and date roles contain some deprecated
terms kept for compatibility with VOResource 1.0, together with guidance
what to use instead. In order to simplify the usage of
vocabulary-controlled RegTAP columns, services MUST translate such deprecated
terms when the vocabularies give replacements (i.e., appear as subjects
of \emph{ivoasem:useInstead} triples).
Since the vocabularies are expected to develop independently of their
originating standards, RegTAP service operators furthermore SHOULD regularly
revisit IVOA vocabularies to see if further translations must be done.
In VO practice, many resource records still use subject identifiers that
are not taken from the IVOA UAT\footnote{{http://www.ivoa.net/rdf/uat}}.
Where only the lexical form of the identifier is wrong, RegTAP operators
are free to correct the syntax; otherwise, subject identifiers should be
ingested as given by the data providers even if they are not drawn from
the UAT.
\section{QNames in VOResource attributes}
\label{qnameatts}
VOResource and its extensions make use of XML QNames in attribute
values, most prominently in \texttt{xsi:type}. The standard
representation of these QNames in XML instance documents makes use of an
abbreviated notation employing prefixes declared using the xmlns mechanism
as discussed in \citet{std:XMLNS}. Within an ADQL-exposed database, no
standard mechanism exists that could provide a similar mapping of URLs
and abbreviations. The correct way to handle this problem would thus be
to have full QNames in the database (e.g.,
\verb|{http://www.ivoa.net/xml/ConeSearch/v1.0}ConeSearch| for the
canonical \vorent{cs:ConeSearch}). This, of course, would make for
excessively tedious and error-prone querying.
For various reasons, VOResource authors have always been encouraged
to use a set of ``standard'' prefixes. This allows an easy and, to users,
unsurprising exit from the problem of the missing xmlns declarations:
For the representation of QNames within the database, these recommended
prefixes are mandatory in RegTAP. Future VOResource extensions define their
mandatory prefixes themselves.
As described in the IVOA endorsed Note ``XML schema versioning
policies'' \citep{2018ivoa.spec.0529H}, minor-version updates to XML
schemas do not change the namespace URIs. Before the adoption of that
note, some schemas introduced namespace URIs that did change on minor
versions. For consistency, and because there should not really be
discovery use cases based on minor versions of XML schemas, all
namespace URIs for the same major version of a standard have the same
canonical prefix -- e.g., the schema URIs for both SSAP namespaces that
SimpleDALRegExt has defined are mapped to \texttt{ssap:}.
For reference, table~\ref{tab:prefixmap}
lists the XML namespace URIs and their canonical prefixes
for schemata widely used in the VO Registry.
\begin{table}
\begin{tabular}{ll}
\sptablerule
cs&http://www.ivoa.net/xml/ConeSearch/v1.0\\
dc&http://purl.org/dc/elements/1.1/\\
oai&http://www.openarchives.org/OAI/2.0/\\
ri&http://www.ivoa.net/xml/RegistryInterface/v1.0\\
sia&http://www.ivoa.net/xml/SIA/v1.0\\
sia&http://www.ivoa.net/xml/SIA/v1.1\\
slap&http://www.ivoa.net/xml/SLAP/v1.0\\
ssap&http://www.ivoa.net/xml/SSA/v1.0\\
ssap&http://www.ivoa.net/xml/SSA/v1.1\\
tr&http://www.ivoa.net/xml/TAPRegExt/v1.0\\
vg&http://www.ivoa.net/xml/VORegistry/v1.0\\
vr&http://www.ivoa.net/xml/VOResource/v1.0\\
vs&http://www.ivoa.net/xml/VODataService/v1.0\\
vs&http://www.ivoa.net/xml/VODataService/v1.1\\
vstd&http://www.ivoa.net/xml/StandardsRegExt/v1.0\\
xsi&http://www.w3.org/2001/XMLSchema-instance\\
\sptablerule
\end{tabular}
\caption{The canonical prefix mapping in the VO Registry as of the
publication of this specification.}
\label{tab:prefixmap}
\end{table}
\section{Xpaths}
\label{vorutypes}
This specification piggybacks on top of the well-established
VOResource standard. This means that it does not define a full data model,
but rather something like a reasonably query-friendly view of a partial
representation of one. The link between the actual data model, i.e.,
VOResource and its extensions as defined by the XML Schema documents, and
the fields within this database schema, is provided by
xpaths, which are here slightly abbreviated for both brevity and
generality.
All xpaths given in this specification are assumed to be relative to
the enclosing \vorent{vr:Resource} element; these are called
``resource xpaths'' in the following. If resource xpaths are to be
applied to an OAI-PMH response, the Xpath expression
\texttt{*/*/*/oai:metadata/ri:Resource} must be prepended to it,
with the canonical prefixes from section \ref{qnameatts} implied. The resource xpaths themselves
largely do not need explicit namespaces since VOResource elements are by
default unqualified. Elements and attributes from non-VOResource
schemata in such resource xpaths have the canonical namespace prefixes, which in this
specification only applies to several \texttt{xsi:type} attribute
names.
Some tables draw data from several different VOResource elements.
For those, we have introduced an extended syntax with additional
metacharacters \verb$($, \verb$)$, and \verb$|$,
where the vertical bar denotes an
alternative and the parentheses grouping. For instance, our notation
\texttt{/(tableset/schema/|)table/} corresponds to the two xpaths
\texttt{/table} and \texttt{/tableset/schema/table}.
Within the Virtual Observatory, the link between data models and
concrete data representations is usually made using utypes.
Since VOResource is directly modelled
in XML Schema, the choice of XPath as the bridging formalism is
compelling, though, and utypes themselves are not necessary for the
operation of a TAP service containing the relational registry.
TAP, however, offers fields for utypes in its \tapent{TAP\_SCHEMA}. Since they
are not otherwise required, this specification takes the liberty of
using them to denote the xpaths.
In the metadata for tables and columns below, the utypes given are
obtained from the xpaths by simply prepending them with
\texttt{xpath:}. To avoid repetition, we allow relative xpaths:
when the xpath in a column utype does not start with a slash, it is
understood that it must be concatenated with the table utype to obtain
the full xpath.
For illustration, if a table has a utype of
$$\texttt{xpath:/capability/interface/}$$ and a column within this table
has a utype of $$\texttt{xpath:accessURL/@use},$$ the resulting resource
xpath would come out to be
$$\texttt{/capability/interface/accessURL/@use};$$ to match this in an
OAI-PMH response, the XPath would be
$$\texttt{\small
*/*/*/oai:metadata/ri:Resource/capability/interface/accessURL/@use}.$$
While clients MUST NOT rely on these utypes in either
\tapent{TAP\_SCHEMA} or the
metadata delivered with TAP replies, service operators SHOULD provide them, in
particular when there are local extensions to the relational registry in their
services. Giving xpaths for extra columns and tables helps human
interpretation of them at least when the defining schema files are
available.
Resource xpaths are also used in the \rtent{res_detail} table (section
\ref{table_res_detail}). These are normal xpaths
(although again understood relative to the enclosing Resource element),
which, in particular, means that they are case sensitive. On the other
hand, to clients they are simply opaque strings, i.e., clients cannot
just search for any xpaths into VOResource within \rtent{res_detail}.
Non-normatively, we give an XSLT
sheet\footnote{\auxiliaryurl{makeutypes.xslt}} producing resource xpaths
for suitable VOResource extensions. It is, however, not fully general,
as it will only notice direct subclasses of VOResource's
\vorent{Resource}, \vorent{Capability}, and \vorent{Interface} classes.
If extensions derive from other extensions' subclasses of these classes,
the stylesheet would need to be amended.
\section{Discovering Relational Registries}
\label{registration}
The relational registry can be part of any TAP service. The presence
of the tables discussed here is indicated by declaring support for the
data model \texttt{Registry 1.2} with the IVOID
$$\texttt{ivo://ivoa.net/std/regtap\#1.2}$$ in the service's
capabilities as governed by TAPRegExt \citep{2012ivoa.spec.0827D}. Technically, this
entails adding
\begin{verbatim}
<dataModel ivo-id="ivo://ivoa.net/std/regtap#1.2"
>Registry 1.2</dataModel>
\end{verbatim}
as a child of the capability element with the type
\vorent{tr:TableAccess}.
A client that knows the access URL of one TAP service containing
a relational
registry can thus discover all other services exposing one. The
``Find all TAP endpoints offering the
relational registry'' example (sect.~\ref{ex:find-regtap})
shows a query that does
this.
Services implementing this data model that do not (strive to) offer
the full data content of the VO registry (like domain-specific
registries or experimental systems) MUST NOT declare the above data
model in order to not invite clients expecting the VO registry to send
queries to it.
Section 5.2 of Registry Interfaces 1.1 additionally requires full RegTAP
services to register a \xmlel{vg:Registry}-typed record with a (possibly
auxiliary) TAP capability. This record is being used by the RofR, and
it opens up a migration path to a data-based discovery
pattern\footnote{This would look for schema utypes and appears
desirable to enable multiple instances of a data model within one TAP
service; it is expected that the recommended discovery pattern
in RegTAP 1.3 will be updated accordingly.}.
\section{RegTAP Tables}
\label{vortables}
All tables making up the RegTAP schema are in the \tapent{rr} schema.
In both \tapent{TAP\_SCHEMA} and the VODataService tableset, the
\tapent{rr} schema
MUST be associated with a \tapent{utype} matching the data model
identifier given in sect.~\ref{registration}, i.e.,
$$\texttt{ivo://ivoa.net/std/regtap\#1.2}.$$
In the following table descriptions, the names of tables
(cf.~Table \ref{table:dm}) and columns
are normative and MUST be used as given, and all-lowercase. The utypes
given in the table descriptions are formed as discussed
in section~\ref{vorutypes} and are subject to the requirements given
there. All columns defined in
this document MUST have a 1 in the \tapent{std} column of the
\tapent{TAP\_SCHEMA.table\_columns} table. Unless otherwise
specified, all values of ucd and unit in
\tapent{TAP\_SCHEMA.table\_columns} are NULL for columns defined here.
Descriptions are not normative (as given, they usually are taken from
the schema files of VOResource and its extensions with slight
redaction). Registry operators MAY provide additional columns in their
tables, but they MUST provide all columns given in this
specification.
Many of the columns specified below are defined as having a ``string''
data type. This is to be translated into arrays of \texttt{char} or
\texttt{unicodeChar} on VOTable output depending on the service
operators' decisions as to the representation of non-ASCII data in the
database. For requirements and recommendations regarding national
characters in RegTAP, see Sect.~\ref{utfreq}. The length of these
arrays is not defined by this standard, where no artificial
length limits should be imposed by implementations.
Some of the types are given as ``datatype+xtype''. In these cases, the
xtype MUST be given in VOTable output, and the serialisation rules from
DALI \citep{2017ivoa.spec.0517D} apply.
All table descriptions start out with brief remarks on the
relationship of the table to the VOResource XML data model. Then, the
columns are described in a selection of \tapent{TAP\_SCHEMA} metadata. For each
table, recommendations on explicit primary and foreign keys as well as
indexed columns are given, where it is understood that primary and
foreign keys are already indexed in order to allow efficient joins;
these parts are not normative, but operators should ensure decent
performance for queries assuming the presence of the given indexes and
relationships. Finally, miscellaneous normative requirements, typically
on case normalization, are given.
\begin{figure}
\includegraphics[width=\textwidth]{schema.pdf}
\caption{A sketch of the
Relational Registry schema.
Only the columns considered
most interesting for client use are shown. Arrows indicate foreign
key-like relationships.}
\end{figure}
% GENERATED: gettables.sh
\begin{table}[t]
\small
\hbox to\hsize{\hss
\begin{tabular}{p{0.35\textwidth}p{0.64\textwidth}}
\sptablerule
\textbf{Name and UType}&\textbf{Description}\\
\sptablerule
rr.alt\_identifier\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/(curation/creator/|)altIdentifier}&
An alternate identifier associated with this record.\\
rr.capability\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/capability/}&
Pieces of behaviour of a resource.\\
rr.interface\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/capability/interface/}&
Information on access modes of a capability.\\
rr.intf\_param\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/capability/interface/param/}&
Input parameters for services.\\
rr.relationship\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/content/relationship/}&
Relationships between resources (like mirroring, derivation, serving
a data collection).\\
rr.res\_date\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/curation/}&
A date associated with an event in the life cycle of the resource.\\
rr.res\_detail\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily }&
XPath-value pairs for members of resource or capability and their
derivations that are less used and/or from VOResource extensions.\\
rr.res\_role\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily }&
Entities (persons or organizations) operating on resources: creators,
contacts, publishers, contributors.\\
rr.res\_schema\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/tableset/schema/}&
Sets of tables related to resources.\\
rr.res\_subject\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/content/}&
Topics, object types, or other descriptive keywords about the
resource.\\
rr.res\_table\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/(tableset/schema/|)table/}&
(Relational) tables that are part of schemata or resources.\\
rr.resource\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/}&
The resources (like services, data collections, organizations)
present in this registry.\\
rr.stc\_spatial\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/coverage/spatial}&
The spatial coverage of resources.\\
rr.stc\_spectral\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/coverage/spectral}&
The spectral coverage of resources, given as one or more intervals.\\
rr.stc\_temporal\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/coverage/temporal}&
The temporal coverage of resources, given as one or more intervals.\\
rr.table\_column\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/(tableset/schema/|)/table/column/}&
Metadata on columns of a resource's tables.\\
rr.tap\_table\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily }&
TAP-queriable tables.\\
rr.validation\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:/(capability/|)validationLevel}&
Validation levels for resources and capabilities.\\
\sptablerule
\end{tabular}\hss}
\caption{The tables making up the TAP data model \texttt{Registry 1.2}}
\label{table:dm}
\end{table}
% /GENERATED
\subsection{The resource Table}
\label{table_resource}
The \rtent{rr.resource} table contains most atomic members of
\rtent{vr:Resource} that have a 1:1 relationship to the resource
itself. Members of derived types are, in general, handled through
the \rtent{res_detail}
table even if 1:1 (see \ref{table_res_detail}). The
\rtent{content_level}, \rtent{content_type}, and \rtent{waveband}
members are 1:n but still appear
here. If there are multiple values, they are concatenated with hash
characters (\#). Use the \rtent{ivo_hashlist_has} ADQL extension
function to check for the presence of a single value. This convention
saves on the number of tables while not complicating common queries significantly.
In VOResource documents, multiple \vorent{rights} elements are allowed
on a single record. This is mainly for compatiblity with DataCite, and
multiple \vorent{rights} elements are discouraged by the VOResource
specification at least for use within the VO. RegTAP uses that freedom
to include \rtent{rights} and \rtent{rights_uri} columns in
\rtent{rr.resource} directly. These columns must be populated,
respectively, with the content and the value of the rightsURI attribute
of the \emph{first} \vorent{rights} element within a resource record
(falling back to NULL). RegTAP services may provide all \vorent{rights}
and \vorent{rightsURI} values through \rtent{rr.res_detail} (see
sect.~\ref{table_res_detail}).
A local addition is the \rtent{creator_seq} column. It contains
all content of the \vorent{name} elements below a resource element
\vorent{curation} child's \vorent{creator} children, concatenated with a
sequence of semicolon and blank characters (``\mbox{\texttt{; }}''). The
individual parts must be concatenated preserving the sequence of the XML
elements. The resulting string is primarily intended for display
purposes (``author list'') and is hence not case-normalized. It was
added since the equivalent of an author list is expected to be a
metadatum that is displayed fairly frequently, but also since the
sequence of author names is generally considered significant. The
\rtent{res_role} table, on the other hand, does not allow recovering
the input sequence of the rows belonging to one resource.
The \rtent{res_type} column reflects the lower-cased value of
the \vorent{ri:Resource} element's \texttt{xsi:type} attribute,
where the canonical prefixes (cf.~sect.~\ref{qnameatts})
are used. While custom or experimental
VOResource extensions may lead to more or less arbitrary strings in that
column, VOResource and its IVOA-recommended extensions at the time of
writing define the following values for \rtent{res_type}:
\begin{description}
\item[vg:authority]A naming authority; as described in the IVOA
Identifiers specification \citep{2016ivoa.spec.0523D}, these records
are used to guarantee global uniqueness of IVOIDs.
\item[vg:registry]A registry. This can be a publishing registry (which
have at least one capability element of type \xmlel{vg:Harvest}), or a
searchable registry (like a RegTAP service). See Registry Interfaces
1.1 on how to apply this resource type.
\item[vr:organisation]The main purpose of an organisation as a registered resource is to
be referenced by IVOID as a publisher of other resources.
\item[vr:resource]Any entity or component of a VO application that is describable and
identifiable by an IVOA identifier; while it is technically possible to
publish such records, the authors of such records should probably be
asked to use a more specific type.
\item[vr:service]A resource that can be invoked by a client to perform some action on
its behalf.
\item[vs:catalogservice]A service that interacts with one or more
specified tables.
\item[vs:catalogresource] A resource accessible through collective services
(which would typically be declared through auxiliary capabilities) or non-IVOA protocols
(typical example: A set of tables accessible within a larger TAP
service).
\item[vs:dataservice]A service for accessing astronomical data; publishers choosing
this over \vorent{vs:CatalogService} probably intend to communicate
that the resource does not have an intrinsically tabular structure.
\item[vs:dataresource] A non-tabular resource accessible through collective
services (which would typically be declared through auxiliary
capabilities) or non-IVOA protocols.
\item[vs:datacollection] A resource type intended by VODataService
version 1.1 to be used for data-only resources. Data providers should
use \vorent{vs:CatalogResource} or \vorent{vs:DataResource} instead.
\item[vstd:standard]A description of a standard specification.
\end{description}
The \vorent{status} attribute of \vorent{vr:Resource} is
considered an implementation detail of the XML serialization and is not
reflected here. Neither \vorent{inactive} nor \vorent{deleted}
records may be kept in the \rtent{resource} table. Since all
other tables in the relational registry should keep a foreign key on the
\rtent{ivoid} column, this implies that only metadata on
\vorent{active} records
is being kept in the relational registry. In other words, users can
expect a resource to exist and work if they find it in a relational
registry.
% GENERATED: maketable.sh rr.resource
\begin{inlinetable}
\renewcommand*{\arraystretch}{1.2}
\small
\begin{tabular}{p{0.28\textwidth}p{0.2\textwidth}p{0.66\textwidth}}
\sptablerule
\multicolumn{3}{l}{\textit{Column names, utypes, datatypes, and descriptions for the rr.resource table}}\\
\sptablerule
\baselineskip=9pt\relax ivoid\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:identifier}&
\footnotesize string&
Unambiguous reference to the resource conforming to the IVOA standard for identifiers.\\
\baselineskip=9pt\relax res\_type\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:@xsi:type}&
\footnotesize string&
Resource type (something like vg:authority, vs:catalogservice, etc).\\
\baselineskip=9pt\relax created\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:@created}&
\footnotesize character[19]\hfil\break+timestamp&
The UTC date and time this resource metadata description was created.\\
\baselineskip=9pt\relax short\_name\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:shortName}&
\footnotesize string&
A short name or abbreviation given to something, for presentation in space-constrained fields (up to 16 characters).\\
\baselineskip=9pt\relax res\_title\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:title}&
\footnotesize string&
The full name given to the resource.\\
\baselineskip=9pt\relax updated\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:@updated}&
\footnotesize character[19]\hfil\break+timestamp&
The UTC date this resource metadata description was last updated.\\
\baselineskip=9pt\relax content\_level\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:content/contentLevel}&
\footnotesize string&
A hash-separated list of content levels specifying the intended audience.\\
\baselineskip=9pt\relax res\_description\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:content/description}&
\footnotesize string&
An account of the nature of the resource.\\
\baselineskip=9pt\relax reference\_url\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:content/referenceURL}&
\footnotesize string&
URL pointing to a human-readable document describing this resource.\\
\baselineskip=9pt\relax creator\_seq\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:curation/creator/name}&
\footnotesize string&
The creator(s) of the resource in the order given by the resource record author, separated by semicolons.\\
\baselineskip=9pt\relax content\_type\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:content/type}&
\footnotesize string&
A hash-separated list of natures or genres of the content of the resource.\\
\baselineskip=9pt\relax source\_format\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:content/source/@format}&
\footnotesize string&
The format of source\_value. This, in particular, can be ``bibcode''.\\
\baselineskip=9pt\relax source\_value\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:content/source}&
\footnotesize string&
A bibliographic reference from which the present resource is derived or extracted.\\
\baselineskip=9pt\relax res\_version\hfil\break
\makebox[0pt][l]{\scriptsize\ttfamily xpath:curation/version}&
\footnotesize string&