diff --git a/.Rbuildignore b/.Rbuildignore index 22362b5..e9411ec 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,4 @@ ^CODE_OF_CONDUCT\.md$ ^LICENSE\.md$ ^\.github$ +^Meta$ diff --git a/.gitignore b/.gitignore index 8ec251f..c881e3d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ .RData .Ruserdata eurlex.Rproj +/doc/ +/Meta/ diff --git a/doc/eurlexpkg.R b/doc/eurlexpkg.R index fb4df0b..2c2632a 100644 --- a/doc/eurlexpkg.R +++ b/doc/eurlexpkg.R @@ -93,13 +93,13 @@ rec_eurovoc %>% ## ----getdatapur, message = FALSE, warning=FALSE, error=FALSE------------------ # the function is not vectorized by default -elx_fetch_data(results$work[1],"title") +elx_fetch_data(url = results$work[1], type = "title") # we can use purrr::map() to play that role library(purrr) dir_titles <- results[1:10,] %>% # take the first 10 directives only to save time - mutate(title = map_chr(work,elx_fetch_data, "title")) %>% + mutate(title = map_chr(work, elx_fetch_data, "title")) %>% as_tibble() %>% select(celex, title) diff --git a/doc/eurlexpkg.Rmd b/doc/eurlexpkg.Rmd index 05b1e3d..0403b3c 100644 --- a/doc/eurlexpkg.Rmd +++ b/doc/eurlexpkg.Rmd @@ -181,13 +181,13 @@ One of the most sought-after data in the Eur-Lex dataverse is the text. It is po ```{r getdatapur, message = FALSE, warning=FALSE, error=FALSE} # the function is not vectorized by default -elx_fetch_data(results$work[1],"title") +elx_fetch_data(url = results$work[1], type = "title") # we can use purrr::map() to play that role library(purrr) dir_titles <- results[1:10,] %>% # take the first 10 directives only to save time - mutate(title = map_chr(work,elx_fetch_data, "title")) %>% + mutate(title = map_chr(work, elx_fetch_data, "title")) %>% as_tibble() %>% select(celex, title) @@ -262,6 +262,6 @@ dirs_1970_title %>% I use term-frequency inverse-document frequency (tf-idf) to weight the importance of the words in the wordcloud. If we used pure frequencies, the wordcloud would largely consist of words conveying little meaning ("the", "and", ...). -This is an extremely basic application of the `eurlex` package. Much more sophisticated methods can be used to analyse both the content and metadata of European Union legislation. If the package is useful for your research, please consider citing the [accompanying paper](https://www.tandfonline.com/doi/full/10.1080/2474736X.2020.1870150).^[Michal Ovádek (2021) Facilitating access to data on European Union laws, Political Research Exchange, 3:1, DOI: [10.1080/2474736X.2020.1870150](https://www.tandfonline.com/doi/full/10.1080/2474736X.2020.1870150)] +This is an extremely basic application of the `eurlex` package. Much more sophisticated methods can be used to analyse both the content and metadata of European Union legislation. If the package is useful for your research, please cite the [accompanying paper](https://www.tandfonline.com/doi/full/10.1080/2474736X.2020.1870150).^[Michal Ovádek (2021) Facilitating access to data on European Union laws, Political Research Exchange, 3:1, DOI: [10.1080/2474736X.2020.1870150](https://www.tandfonline.com/doi/full/10.1080/2474736X.2020.1870150)] diff --git a/doc/eurlexpkg.html b/doc/eurlexpkg.html index 1fd2889..8390ddc 100644 --- a/doc/eurlexpkg.html +++ b/doc/eurlexpkg.html @@ -14,30 +14,49 @@
eurlex
packageelx_make_query()
: Generate SPARQL queriesThe function elx_make_query
takes as its first argument the type of resource to be retrieved from the semantic database that powers Eur-Lex (and other publications) called Cellar.
library(eurlex)
-library(dplyr) # my preference, not needed for the package
-
-query_dir <- elx_make_query(resource_type = "directive")
library(eurlex)
+library(dplyr) # my preference, not needed for the package
+
+<- elx_make_query(resource_type = "directive") query_dir
Currently, it is possible to choose from among a host of resource types, including directives, regulations and even case law (see function description for the full list). It is also possible to manually specify a resource type from the eligible list.1
The choice of resource type is then reflected in the SPARQL query generated by the function:
-query_dir %>%
- cat() # for nicer printing
-#> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
-#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
-#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
-#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
-#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
-#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
-#> select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/DIR>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_IMPL>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_DEL>)
-#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }
-
-elx_make_query(resource_type = "caselaw") %>%
- cat()
-#> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
-#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
-#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
-#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
-#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
-#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
-#> select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/ORDER>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_JUR>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/THIRDPARTY_PROCEED>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/GARNISHEE_ORDER>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RULING>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG_EXTRACT>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/INFO_JUDICIAL>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/VIEW_AG>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_AG>) OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }
-
-elx_make_query(resource_type = "manual", manual_type = "SWD") %>%
- cat()
-#> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
-#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
-#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
-#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
-#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
-#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
-#> select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type.FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/SWD>)
-#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }
%>%
+ query_dir cat() # for nicer printing
+ #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
+#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
+#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
+#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
+#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
+#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
+#> select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/DIR>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_IMPL>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_DEL>)
+#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }
+
+elx_make_query(resource_type = "caselaw") %>%
+cat()
+ #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
+#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
+#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
+#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
+#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
+#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
+#> select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/ORDER>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_JUR>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/THIRDPARTY_PROCEED>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/GARNISHEE_ORDER>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RULING>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG_EXTRACT>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/INFO_JUDICIAL>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/VIEW_AG>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_AG>) OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }
+
+elx_make_query(resource_type = "manual", manual_type = "SWD") %>%
+cat()
+ #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
+#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
+#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
+#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
+#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
+#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
+#> select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type.FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/SWD>)
+#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }
There are various ways of querying the same information in the Cellar database due to the existence of several overlapping classes and identifiers describing the same resources. The queries generated by the function should offer a reliable way of obtaining exhaustive results, as they have been validated by the helpdesk of the Publication Office. At the same time, it is always possible there will be issues either on the query or the database side; please report any you encounter through Github.
The other arguments in elx_make_query()
relate to additional metadata to be returned. The results include by default the CELEX number and exclude corrigenda (corrections of errors in legislation). Other data needs to be opted into. Make sure to select ones that are logically compatible (e.g. case law does not have a legal basis). More options should be added in the future.
Note that availability of data for each variable might have an impact on the results. The data frame returned by the query might be shrunken to the size of the variable with most missing data. It is recommended to always compare results from a desired query to a minimal query requesting only celex ids.
-elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE) %>%
- cat()
-#> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
-#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
-#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
-#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
-#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
-#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
-#> select distinct ?work ?type ?celex str(?date) ?force where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/DIR>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_IMPL>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_DEL>)
-#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} OPTIONAL{?work cdm:work_date_document ?date.} OPTIONAL{?work cdm:resource_legal_in-force ?force.} }
-
-# minimal query: elx_make_query(resource_type = "directive")
-
-elx_make_query(resource_type = "recommendation", include_date = TRUE, include_lbs = TRUE) %>%
- cat()
-#> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
-#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
-#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
-#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
-#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
-#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
-#> select distinct ?work ?type ?celex str(?date) ?lbs ?lbcelex ?lbsuffix where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/RECO>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_DEC>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_DIR>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_OPIN>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_RES>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_REG>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_RECO>||
-#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_DRAFT>)
-#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} OPTIONAL{?work cdm:work_date_document ?date.} OPTIONAL{?work cdm:resource_legal_based_on_resource_legal ?lbs.
-#> ?lbs cdm:resource_legal_id_celex ?lbcelex.
-#> OPTIONAL{?bn owl:annotatedSource ?work.
-#> ?bn owl:annotatedProperty <http://publications.europa.eu/ontology/cdm#resource_legal_based_on_resource_legal>.
-#> ?bn owl:annotatedTarget ?lbs.
-#> ?bn annot:comment_on_legal_basis ?lbsuffix}} }
-
-# minimal query: elx_make_query(resource_type = "recommendation")
elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE) %>%
+cat()
+ #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
+#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
+#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
+#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
+#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
+#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
+#> select distinct ?work ?type ?celex str(?date) ?force where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/DIR>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_IMPL>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_DEL>)
+#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} OPTIONAL{?work cdm:work_date_document ?date.} OPTIONAL{?work cdm:resource_legal_in-force ?force.} }
+
+# minimal query: elx_make_query(resource_type = "directive")
+
+elx_make_query(resource_type = "recommendation", include_date = TRUE, include_lbs = TRUE) %>%
+cat()
+ #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
+#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
+#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
+#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
+#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
+#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
+#> select distinct ?work ?type ?celex str(?date) ?lbs ?lbcelex ?lbsuffix where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/RECO>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_DEC>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_DIR>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_OPIN>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_RES>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_REG>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_RECO>||
+#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_DRAFT>)
+#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} OPTIONAL{?work cdm:work_date_document ?date.} OPTIONAL{?work cdm:resource_legal_based_on_resource_legal ?lbs.
+#> ?lbs cdm:resource_legal_id_celex ?lbcelex.
+#> OPTIONAL{?bn owl:annotatedSource ?work.
+#> ?bn owl:annotatedProperty <http://publications.europa.eu/ontology/cdm#resource_legal_based_on_resource_legal>.
+#> ?bn owl:annotatedTarget ?lbs.
+#> ?bn annot:comment_on_legal_basis ?lbsuffix}} }
+
+# minimal query: elx_make_query(resource_type = "recommendation")
You can also decide to not specify any resource types, in which case all types of documents will be returned. As there are over a million documents with a CELEX identifier, this is likely not efficient for a majority of users. But since version 0.3.5 it is possible to request documents belonging to a particular “sector” or directory code.
-# request documents from directory 18 ("Common Foreign and Security Policy")
-# and sector 3 ("Legal acts")
-
-elx_make_query(resource_type = "any",
- directory = "18",
- sector = 3) %>%
- cat()
-#> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
-#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
-#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
-#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
-#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
-#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
-#> select distinct ?work ?type ?celex where{
-#> VALUES (?value)
-#> { (<http://publications.europa.eu/resource/authority/fd_555/18>)
-#> (<http://publications.europa.eu/resource/authority/dir-eu-legal-act/18>)
-#> }
-#> {?work cdm:resource_legal_is_about_concept_directory-code ?value.
-#> }
-#> UNION
-#> {?work cdm:resource_legal_is_about_concept_directory-code ?directory.
-#> ?value skos:narrower+ ?directory.
-#> }
-#>
-#> ?work cdm:resource_legal_id_sector ?sector.
-#> FILTER(str(?sector)='3')
-#>
-#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }
# request documents from directory 18 ("Common Foreign and Security Policy")
+# and sector 3 ("Legal acts")
+
+elx_make_query(resource_type = "any",
+directory = "18",
+ sector = 3) %>%
+ cat()
+ #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
+#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
+#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
+#> PREFIX dc:<http://purl.org/dc/elements/1.1/>
+#> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
+#> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+#> PREFIX owl:<http://www.w3.org/2002/07/owl#>
+#> select distinct ?work ?type ?celex where{
+#> VALUES (?value)
+#> { (<http://publications.europa.eu/resource/authority/fd_555/18>)
+#> (<http://publications.europa.eu/resource/authority/dir-eu-legal-act/18>)
+#> }
+#> {?work cdm:resource_legal_is_about_concept_directory-code ?value.
+#> }
+#> UNION
+#> {?work cdm:resource_legal_is_about_concept_directory-code ?directory.
+#> ?value skos:narrower+ ?directory.
+#> }
+#>
+#> ?work cdm:resource_legal_id_sector ?sector.
+#> FILTER(str(?sector)='3')
+#>
+#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }
Now that we have a query, we are ready to run it.
elx_run_query()
: Execute SPARQL querieselx_run_query()
sends SPARQL queries to a pre-specified endpoint. The function takes the query string as the main argument, which means you can manually pass it any working SPARQL query (relevant to official EU publications).
results <- elx_run_query(query = query_dir)
-
-# the functions are compatible with piping
-#
-# elx_make_query("directive") %>%
-# elx_run_query()
as_tibble(results)
-#> # A tibble: 4,335 x 3
-#> work type celex
-#> <chr> <chr> <chr>
-#> 1 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31979L~
-#> 2 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31989L~
-#> 3 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31984L~
-#> 4 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31966L~
-#> # ... with 4,331 more rows
<- elx_run_query(query = query_dir)
+ results
+# the functions are compatible with piping
+#
+# elx_make_query("directive") %>%
+# elx_run_query()
as_tibble(results)
+#> # A tibble: 4,383 x 3
+#> work type celex
+#> <chr> <chr> <chr>
+#> 1 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31979L~
+#> 2 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31989L~
+#> 3 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31984L~
+#> 4 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31966L~
+#> # ... with 4,379 more rows
The function outputs a data.frame
where each column corresponds to one of the requested variables, while the rows accumulate observations of the resource type satisfying the query criteria. Obviously, the more data is to be returned, the longer the execution time, varying from a few seconds to several minutes, depending also on your connection.
The first column always contains the unique URI of a “work” (legislative act or court judgment) which identifies each resource in Cellar. Several human-readable identifiers are normally associated with each “work” but the most useful one is CELEX, retrieved by default.2
One column you should always pay attention to is type
(as in resource_type
). The URIs contained there reflect the FILTER argument in the SPARQL query, which is manually pre-specified. All resources are indexed as being of one type or another. For example, when retrieving directives, the results are going to return also delegated directives, which might not be desirable, depending on your needs. You can filter results by type
to make the necessary adjustments. The queries are expansive by default in the spirit of erring on the side of over-inclusiveness rather than vice versa.
head(results$type,5)
-#> [1] "http://publications.europa.eu/resource/authority/resource-type/DIR"
-#> [2] "http://publications.europa.eu/resource/authority/resource-type/DIR"
-#> [3] "http://publications.europa.eu/resource/authority/resource-type/DIR"
-#> [4] "http://publications.europa.eu/resource/authority/resource-type/DIR"
-#> [5] "http://publications.europa.eu/resource/authority/resource-type/DIR"
-
-results %>%
- distinct(type)
-#> # A tibble: 3 x 1
-#> type
-#> <chr>
-#> 1 http://publications.europa.eu/resource/authority/resource-type/DIR
-#> 2 http://publications.europa.eu/resource/authority/resource-type/DIR_IMPL
-#> 3 http://publications.europa.eu/resource/authority/resource-type/DIR_DEL
head(results$type,5)
+#> [1] "http://publications.europa.eu/resource/authority/resource-type/DIR"
+#> [2] "http://publications.europa.eu/resource/authority/resource-type/DIR"
+#> [3] "http://publications.europa.eu/resource/authority/resource-type/DIR"
+#> [4] "http://publications.europa.eu/resource/authority/resource-type/DIR"
+#> [5] "http://publications.europa.eu/resource/authority/resource-type/DIR"
+
+%>%
+ results distinct(type)
+ #> # A tibble: 3 x 1
+#> type
+#> <chr>
+#> 1 http://publications.europa.eu/resource/authority/resource-type/DIR
+#> 2 http://publications.europa.eu/resource/authority/resource-type/DIR_IMPL
+#> 3 http://publications.europa.eu/resource/authority/resource-type/DIR_DEL
The data is returned in the long format, which means that rows are recycled up to the length of the variable with the most data points. For example, if 20 directives are returned, each with two legal bases, the resulting data.frame
will have 40 rows. Some variables, such as dates, contain unexpectedly several entries for some documents. You should always check the number of unique identifiers in the results instead of assuming that each row is a unique observation.
EuroVoc is a multilingual thesaurus, keywords from which are used to describe the content of European Union documents. Most resource types that can be retrieved with the pre-defined queries in this package can be accompanied by EuroVoc keywords and these can be retrieved as other variables.
-
-rec_eurovoc <- elx_make_query("recommendation", include_eurovoc = TRUE, limit = 10) %>%
- elx_run_query() # truncated results for sake of the example
-
-rec_eurovoc %>%
- select(celex, eurovoc)
-#> # A tibble: 10 x 2
-#> celex eurovoc
-#> <chr> <chr>
-#> 1 32012H0090 http://eurovoc.europa.eu/1425
-#> 2 31962H0816 http://eurovoc.europa.eu/1004
-#> 3 31974H0435 http://eurovoc.europa.eu/1085
-#> 4 31996H0592 http://eurovoc.europa.eu/1076
-#> # ... with 6 more rows
+<- elx_make_query("recommendation", include_eurovoc = TRUE, limit = 10) %>%
+ rec_eurovoc elx_run_query() # truncated results for sake of the example
+
+%>%
+ rec_eurovoc select(celex, eurovoc)
+ #> # A tibble: 10 x 2
+#> celex eurovoc
+#> <chr> <chr>
+#> 1 32012H0090 http://eurovoc.europa.eu/1425
+#> 2 31962H0816 http://eurovoc.europa.eu/1004
+#> 3 31974H0435 http://eurovoc.europa.eu/1085
+#> 4 31996H0592 http://eurovoc.europa.eu/1076
+#> # ... with 6 more rows
By default, the endpoint returns the EuroVoc concept codes rather than the labels (keywords). The function elx_label_eurovoc()
needs to be called to obtain a look-up table with the labels.
eurovoc_lookup <- elx_label_eurovoc(uri_eurovoc = rec_eurovoc$eurovoc)
-
-print(eurovoc_lookup)
-#> # A tibble: 9 x 2
-#> eurovoc labels
-#> <chr> <chr>
-#> 1 http://eurovoc.europa.eu/1085 France
-#> 2 http://eurovoc.europa.eu/1442 food inspection
-#> 3 http://eurovoc.europa.eu/1076 form
-#> 4 http://eurovoc.europa.eu/1318 Germany
-#> # ... with 5 more rows
<- elx_label_eurovoc(uri_eurovoc = rec_eurovoc$eurovoc)
+ eurovoc_lookup
+print(eurovoc_lookup)
+#> # A tibble: 9 x 2
+#> eurovoc labels
+#> <chr> <chr>
+#> 1 http://eurovoc.europa.eu/1085 France
+#> 2 http://eurovoc.europa.eu/1442 food inspection
+#> 3 http://eurovoc.europa.eu/1076 form
+#> 4 http://eurovoc.europa.eu/1318 Germany
+#> # ... with 5 more rows
The results include labels only for unique identifiers, but with dplyr::left_join()
it is straightforward to append the labels to the entire dataset.
rec_eurovoc %>%
- left_join(eurovoc_lookup)
-#> Joining, by = "eurovoc"
-#> # A tibble: 10 x 5
-#> work type celex eurovoc labels
-#> <chr> <chr> <chr> <chr> <chr>
-#> 1 http://publications.euro~ http://publications.e~ 32012~ http://euro~ consumer~
-#> 2 http://publications.euro~ http://publications.e~ 31962~ http://euro~ welfare
-#> 3 http://publications.euro~ http://publications.e~ 31974~ http://euro~ France
-#> 4 http://publications.euro~ http://publications.e~ 31996~ http://euro~ form
-#> # ... with 6 more rows
%>%
+ rec_eurovoc left_join(eurovoc_lookup)
+ #> Joining, by = "eurovoc"
+#> # A tibble: 10 x 5
+#> work type celex eurovoc labels
+#> <chr> <chr> <chr> <chr> <chr>
+#> 1 http://publications.euro~ http://publications.e~ 32012~ http://euro~ consumer~
+#> 2 http://publications.euro~ http://publications.e~ 31962~ http://euro~ welfare
+#> 3 http://publications.euro~ http://publications.e~ 31974~ http://euro~ France
+#> 4 http://publications.euro~ http://publications.e~ 31996~ http://euro~ form
+#> # ... with 6 more rows
As elsewhere in the API, we can tap into the multilingual nature of EU documents also when it comes to the EuroVoc keywords. Moreover, most concepts in the thesaurus are associated with alternative labels; these can be returned as well (separated by a comma).
-eurovoc_lookup <- elx_label_eurovoc(uri_eurovoc = rec_eurovoc$eurovoc,
- alt_labels = TRUE,
- language = "sk")
-
-rec_eurovoc %>%
- left_join(eurovoc_lookup) %>%
- select(celex, eurovoc, labels)
-#> Joining, by = "eurovoc"
-#> # A tibble: 10 x 3
-#> celex eurovoc labels
-#> <chr> <chr> <chr>
-#> 1 32012H0~ http://eurovoc.euro~ informácie pre spotrebitela,vzdelávanie spotreb~
-#> 2 31962H0~ http://eurovoc.euro~ blahobyt
-#> 3 31974H0~ http://eurovoc.euro~ Francúzska republika,Francúzsko
-#> 4 31996H0~ http://eurovoc.euro~ formulár
-#> # ... with 6 more rows
<- elx_label_eurovoc(uri_eurovoc = rec_eurovoc$eurovoc,
+ eurovoc_lookup alt_labels = TRUE,
+ language = "sk")
+
+%>%
+ rec_eurovoc left_join(eurovoc_lookup) %>%
+ select(celex, eurovoc, labels)
+ #> Joining, by = "eurovoc"
+#> # A tibble: 10 x 3
+#> celex eurovoc labels
+#> <chr> <chr> <chr>
+#> 1 32012H0090 http://eurovoc.europa.eu/1425 informácie pre spotrebitela,vzdeláva~
+#> 2 31962H0816 http://eurovoc.europa.eu/1004 blahobyt
+#> 3 31974H0435 http://eurovoc.europa.eu/1085 Francúzska republika,Francúzsko
+#> 4 31996H0592 http://eurovoc.europa.eu/1076 formulár
+#> # ... with 6 more rows
elx_fetch_data()
: Fire GET requestsA core contribution of the SPARQL requests is that we obtain a comprehensive list of identifiers that we can subsequently use to obtain more data relating to the document in question. While the results of the SPARQL queries are useful also for webscraping (with the rvest
package), the function elx_fetch_data()
enables us to fire GET requests to retrieve data on documents with known identifiers (including Cellar URI).
One of the most sought-after data in the Eur-Lex dataverse is the text. It is possible now to automate the pipeline for downloading html and plain texts from Eur-Lex. Similarly, you can retrieve the title of the document. For both you can specify also the desired language (English by default). Other metadata might be added in the future.
-# the function is not vectorized by default
-elx_fetch_data(results$work[1],"title")
-#> [1] "Council Directive 79/173/EEC of 6 February 1979 on the programme for the acceleration and guidance of collective irrigation works in Corsica"
-
-# we can use purrr::map() to play that role
-library(purrr)
-
-dir_titles <- results[1:10,] %>% # take the first 10 directives only to save time
- mutate(title = map_chr(work,elx_fetch_data, "title")) %>%
- as_tibble() %>%
- select(celex, title)
-
-print(dir_titles)
-#> # A tibble: 10 x 2
-#> celex title
-#> <chr> <chr>
-#> 1 31979L0173 Council Directive 79/173/EEC of 6 February 1979 on the programme f~
-#> 2 31989L0194 Council Directive 89/194/EEC of 13 March 1989 amending Directive 6~
-#> 3 31984L0378 Council Directive 84/378/EEC of 28 June 1984 amending the Annexes ~
-#> 4 31966L0683 Commission Directive 66/683/EEC of 7 November 1966 eliminating all~
-#> # ... with 6 more rows
# the function is not vectorized by default
+elx_fetch_data(url = results$work[1], type = "title")
+#> [1] "Council Directive 79/173/EEC of 6 February 1979 on the programme for the acceleration and guidance of collective irrigation works in Corsica"
+
+# we can use purrr::map() to play that role
+library(purrr)
+
+<- results[1:10,] %>% # take the first 10 directives only to save time
+ dir_titles mutate(title = map_chr(work, elx_fetch_data, "title")) %>%
+ as_tibble() %>%
+ select(celex, title)
+
+print(dir_titles)
+#> # A tibble: 10 x 2
+#> celex title
+#> <chr> <chr>
+#> 1 31979L0173 Council Directive 79/173/EEC of 6 February 1979 on the programme f~
+#> 2 31989L0194 Council Directive 89/194/EEC of 13 March 1989 amending Directive 6~
+#> 3 31984L0378 Council Directive 84/378/EEC of 28 June 1984 amending the Annexes ~
+#> 4 31966L0683 Commission Directive 66/683/EEC of 7 November 1966 eliminating all~
+#> # ... with 6 more rows
Note that text requests are by far the most time-intensive; requesting the full text for thousands of documents is liable to extend the run-time into hours. Texts are retrieved from html by priority, but methods for pdfs and .docs are also implemented.3 The function even handles multi-document resources (by pasting them together).
In this section I showcase a simple application of eurlex
on making overviews of EU legislation. First, we collate data on directives.
dirs <- elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE) %>%
- elx_run_query() %>%
- rename(date = `callret-3`)
<- elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE) %>%
+ dirs elx_run_query() %>%
+ rename(date = `callret-3`)
Let’s calculate the proportion of directives currently in force in the entire set of directives ever adopted. This variable offers a particularly good demonstration of the usefulness of the package to retrieve EU law data, because it changes every day, as new acts enter into force and old ones drop out. Regularly scraping webpages for this purpose and scale is simply impractical and disproportional.
- - +library(ggplot2)
+
+%>%
+ dirs count(force) %>%
+ ggplot(aes(x = force, y = n)) +
+ geom_col()
Directives become naturally outdated with time. It might be all the more interesting to see which older acts are thus still surviving.
-dirs %>%
- filter(!is.na(force)) %>%
- mutate(date = as.Date(date)) %>%
- ggplot(aes(x = date, y = celex)) +
- geom_point(aes(color = force), alpha = 0.1) +
- theme(axis.text.y = element_blank(),
- axis.line.y = element_blank(),
- axis.ticks.y = element_blank())
%>%
+ dirs filter(!is.na(force)) %>%
+ mutate(date = as.Date(date)) %>%
+ ggplot(aes(x = date, y = celex)) +
+ geom_point(aes(color = force), alpha = 0.1) +
+ theme(axis.text.y = element_blank(),
+ axis.line.y = element_blank(),
+ axis.ticks.y = element_blank())
We want to know a bit more about the directives from 1970s that are still in force today. Their titles could give us a clue.
-dirs_1970_title <- dirs %>%
- filter(between(as.Date(date), as.Date("1970-01-01"), as.Date("1980-01-01")),
- force == "true") %>%
- mutate(title = map_chr(work,elx_fetch_data,"title")) %>%
- as_tibble()
-
-print(dirs_1970_title)
-#> # A tibble: 67 x 6
-#> work type celex date force title
-#> <chr> <chr> <chr> <chr> <chr> <chr>
-#> 1 http://publications~ http://publicatio~ 31975~ 1975~ true Council Directive ~
-#> 2 http://publications~ http://publicatio~ 31977~ 1977~ true First Commission D~
-#> 3 http://publications~ http://publicatio~ 31977~ 1977~ true Council Directive ~
-#> 4 http://publications~ http://publicatio~ 31973~ 1973~ true Council Directive ~
-#> # ... with 63 more rows
<- dirs %>%
+ dirs_1970_title filter(between(as.Date(date), as.Date("1970-01-01"), as.Date("1980-01-01")),
+ == "true") %>%
+ force mutate(title = map_chr(work,elx_fetch_data,"title")) %>%
+ as_tibble()
+
+print(dirs_1970_title)
+#> # A tibble: 62 x 6
+#> work type celex date force title
+#> <chr> <chr> <chr> <chr> <chr> <chr>
+#> 1 http://publications~ http://publicatio~ 31975~ 1975~ true Council Directive ~
+#> 2 http://publications~ http://publicatio~ 31977~ 1977~ true First Commission D~
+#> 3 http://publications~ http://publicatio~ 31977~ 1977~ true Council Directive ~
+#> 4 http://publications~ http://publicatio~ 31973~ 1973~ true Council Directive ~
+#> # ... with 58 more rows
I will use the tidytext
package to get a quick idea of what the legislation is about.
library(tidytext)
-library(wordcloud)
-
-dirs_1970_title %>%
- select(celex,title) %>%
- unnest_tokens(word, title) %>%
- count(celex, word, sort = TRUE) %>%
- filter(!grepl("\\d", word)) %>%
- bind_tf_idf(word, celex, n) %>%
- with(wordcloud(word, tf_idf, max.words = 40, scale = c(1.8,0.1)))
library(tidytext)
+library(wordcloud)
+
+%>%
+ dirs_1970_title select(celex,title) %>%
+ unnest_tokens(word, title) %>%
+ count(celex, word, sort = TRUE) %>%
+ filter(!grepl("\\d", word)) %>%
+ bind_tf_idf(word, celex, n) %>%
+ with(wordcloud(word, tf_idf, max.words = 40, scale = c(1.8,0.1)))
I use term-frequency inverse-document frequency (tf-idf) to weight the importance of the words in the wordcloud. If we used pure frequencies, the wordcloud would largely consist of words conveying little meaning (“the”, “and”, …).
-This is an extremely basic application of the eurlex
package. Much more sophisticated methods can be used to analyse both the content and metadata of European Union legislation. If the package is useful for your research, please consider citing the accompanying paper.4
This is an extremely basic application of the eurlex
package. Much more sophisticated methods can be used to analyse both the content and metadata of European Union legislation. If the package is useful for your research, please cite the accompanying paper.4
Note, however, that not all resource types will work properly with the pre-specified query.↩
Occasionally, you may encounter legal acts without CELEX numbers, especially when digging through older legislation. It is good to report these to the Eur-Lex helpdesk.↩
It is worth pointing out that the html and pdf contents of older case law differs. Whereas typically the html file is only going to contain a summary and grounds of a judgment, the pdf should also contain background to the dispute.↩
Michal Ovádek (2021) Facilitating access to data on European Union laws, Political Research Exchange, 3:1, DOI: 10.1080/2474736X.2020.1870150↩
Note, however, that not all resource types will work properly with the pre-specified query.↩︎
Occasionally, you may encounter legal acts without CELEX numbers, especially when digging through older legislation. It is good to report these to the Eur-Lex helpdesk.↩︎
It is worth pointing out that the html and pdf contents of older case law differs. Whereas typically the html file is only going to contain a summary and grounds of a judgment, the pdf should also contain background to the dispute.↩︎
Michal Ovádek (2021) Facilitating access to data on European Union laws, Political Research Exchange, 3:1, DOI: 10.1080/2474736X.2020.1870150↩︎
as_tibble(results)
-#> # A tibble: 4,367 x 3
+#> # A tibble: 4,382 x 3
#> work type celex
#> <chr> <chr> <chr>
#> 1 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31979L~
#> 2 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31989L~
#> 3 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31984L~
#> 4 http://publications.europa.eu/resourc~ http://publications.europa.eu/~ 31966L~
-#> # ... with 4,363 more rows
The function outputs a data.frame
where each column corresponds to one of the requested variables, while the rows accumulate observations of the resource type satisfying the query criteria. Obviously, the more data is to be returned, the longer the execution time, varying from a few seconds to several minutes, depending also on your connection.
The first column always contains the unique URI of a “work” (legislative act or court judgment) which identifies each resource in Cellar. Several human-readable identifiers are normally associated with each “work” but the most useful one is CELEX, retrieved by default.2
One column you should always pay attention to is type
(as in resource_type
). The URIs contained there reflect the FILTER argument in the SPARQL query, which is manually pre-specified. All resources are indexed as being of one type or another. For example, when retrieving directives, the results are going to return also delegated directives, which might not be desirable, depending on your needs. You can filter results by type
to make the necessary adjustments. The queries are expansive by default in the spirit of erring on the side of over-inclusiveness rather than vice versa.
Retrieve data on European Union law in R with pre-defined SPARQL and -REST queries.
-Retrieve data on European Union law in R with pre-defined SPARQL and REST queries.
+The eurlex
R package attempts to significantly reduce
-the overhead associated with using SPARQL and REST APIs made available
-by the EU Publication Office and other EU institutions. Compared to pure
-web-scraping, the package provides more efficient and transparent access
-to data on European Union laws and policies.
See the vignette -for a walkthrough on how to use the package. Check function -documentation for most up-to-date overview of features. Example use -cases are shown in this paper.
+ +The eurlex
R package attempts to significantly reduce the overhead associated with using SPARQL and REST APIs made available by the EU Publication Office and other EU institutions. Compared to pure web-scraping, the package provides more efficient and transparent access to data on European Union laws and policies.
See the vignette for a walkthrough on how to use the package. Check function documentation for most up-to-date overview of features. Example use cases are shown in this paper.
Install from CRAN via install.packages("eurlex")
.
The development version is available via
-remotes::install_github("michalovadek/eurlex")
.
The development version is available via remotes::install_github("michalovadek/eurlex")
.
Michal Ovádek (2021) Facilitating access to data on European -Union laws, Political Research Exchange, 3:1, DOI: 10.1080/2474736X.2020.1870150
+Michal Ovádek (2021) Facilitating access to data on European Union laws, Political Research Exchange, 3:1, DOI: 10.1080/2474736X.2020.1870150
The eurlex
package currently envisions the typical
-use-case to consist of getting bulk information about EU legislation
-into R as fast as possible. The package contains three core functions to
-achieve that objective: elx_make_query()
to create
-pre-defined or customized SPARQL queries; elx_run_query()
-to execute the pre-made or any other manually input query; and
-elx_fetch_data()
to fire GET requests for certain metadata
-to the REST API.
The function elx_make_query
takes as its first argument
-the type of resource to be retrieved (such as “directive”) from the
-semantic database that powers Eur-Lex (and other publications) called
-Cellar. If you are familiar with SPARQL, you can always specify your own
-queries and execute them with elx_run_query()
.
elx_run_query()
executes SPARQL queries on a
-pre-specified endpoint of the EU Publication Office. It outputs a
-data.frame
where each column corresponds to one of the
-requested variables, while the rows accumulate observations of the
-resource type satisfying the query criteria. Obviously, the more data is
-to be returned, the longer the execution time, varying from a few
-seconds to several hours, depending also on your connection. The first
-column always contains the unique URI of a “work” (legislative act or
-court judgment) which identifies each resource in Cellar. Several
-human-readable identifiers are normally associated with each “work” but
-the most useful one is CELEX,
-retrieved by default.
For the moment, it is recommended to retrieve metadata one variable -at a time. For example, if you wish to obtain the legal bases of -directives and the date of transposition, you should run separate -calls:
+The eurlex
package currently envisions the typical use-case to consist of getting bulk information about EU legislation into R as fast as possible. The package contains three core functions to achieve that objective: elx_make_query()
to create pre-defined or customized SPARQL queries; elx_run_query()
to execute the pre-made or any other manually input query; and elx_fetch_data()
to fire GET requests for certain metadata to the REST API.
The function elx_make_query
takes as its first argument the type of resource to be retrieved (such as “directive”) from the semantic database that powers Eur-Lex (and other publications) called Cellar. If you are familiar with SPARQL, you can always specify your own queries and execute them with elx_run_query()
.
elx_run_query()
executes SPARQL queries on a pre-specified endpoint of the EU Publication Office. It outputs a data.frame
where each column corresponds to one of the requested variables, while the rows accumulate observations of the resource type satisfying the query criteria. Obviously, the more data is to be returned, the longer the execution time, varying from a few seconds to several hours, depending also on your connection. The first column always contains the unique URI of a “work” (legislative act or court judgment) which identifies each resource in Cellar. Several human-readable identifiers are normally associated with each “work” but the most useful one is CELEX, retrieved by default.
For the moment, it is recommended to retrieve metadata one variable at a time. For example, if you wish to obtain the legal bases of directives and the date of transposition, you should run separate calls:
ids <- elx_make_query("directive") %>%
-elx_run_query()
lbs <- elx_make_query("directive", include_lbs = TRUE)
-%>% elx_run_query()
dates <- elx_make_query("directive", include_date_transpos
-= TRUE) %>% elx_run_query()
ids %>% dplyr::left_join(lbs) %>%
-dplyr::left_join(dates)
ids <- elx_make_query("directive") %>% elx_run_query()
lbs <- elx_make_query("directive", include_lbs = TRUE) %>% elx_run_query()
dates <- elx_make_query("directive", include_date_transpos = TRUE) %>% elx_run_query()
ids %>% dplyr::left_join(lbs) %>% dplyr::left_join(dates)
rather than elx_make_query("directive", include_lbs = TRUE,
-include_date_transpos = TRUE)
. This approach is usually faster
-and should also make it easier to understand the returned data frame(s),
-especially when some variables contain missing or duplicated data.
-Always keep an eye on whether the work
and
-celex
columns identify rows uniquely or not.
One of the main contributions of the SPARQL requests is that we
-obtain a comprehensive list of identifiers that we can subsequently use
-to obtain more data relating to the document in question. While the
-results of the SPARQL queries are useful also for webscraping (with the
-rvest
package), the function elx_fetch_data()
-enables us to fire GET requests to retrieve data on documents with known
-identifiers (including Cellar URI). The function currently enables
-downloading the title and the full text of a document in all available
-languages.
rather than elx_make_query("directive", include_lbs = TRUE, include_date_transpos = TRUE)
. This approach is usually faster and should also make it easier to understand the returned data frame(s), especially when some variables contain missing or duplicated data. Always keep an eye on whether the work
and celex
columns identify rows uniquely or not.
One of the main contributions of the SPARQL requests is that we obtain a comprehensive list of identifiers that we can subsequently use to obtain more data relating to the document in question. While the results of the SPARQL queries are useful also for webscraping (with the rvest
package), the function elx_fetch_data()
enables us to fire GET requests to retrieve data on documents with known identifiers (including Cellar URI). The function currently enables downloading the title and the full text of a document in all available languages.
This package nor its author are in any way affiliated with the EU -Publications Office. Please refer to the applicable data -reuse policies.
-Please consider contributing to the maintenance and development of -the package by reporting bugs or suggesting new features.
+This package nor its author are in any way affiliated with the EU Publications Office. Please refer to the applicable data reuse policies.
+Please consider contributing to the maintenance and development of the package by reporting bugs or suggesting new features.
elx_download_xml()
but instead of saving to path givess
-access to XML notice in R
-include_
-options in elx_make_query()
+elx_fetch_data(type = "notice", notice = c("tree","branch", "object"))
now mirrors the behaviour of elx_download_xml()
but instead of saving to path givess access to XML notice in Rinclude_
options in elx_make_query()
elx_download_xml()
parameter checkingelx_download_xml(notice = "object")
now retrieves
-metadata correctlyelx_download_xml(notice = "object")
now retrieves metadata correctly
elx_download_xml()
+elx_download_xml()
elx_make_query(include_ecli = TRUE)
+elx_make_query(include_ecli = TRUE)
elx_run_query()
now fails gracefully in presence of internet/server problems
elx_fetch_data()
now automatically fixes urls with
-parentheses (e.g. “32019H1115(01)” used to fail)elx_fetch_data()
now automatically fixes urls with parentheses (e.g. “32019H1115(01)” used to fail)
elx_parse_xml
no longer an exported functionit is now possible to select all resource types available with
-elx_make_query(resource_type = "any")
. Since there are
-nearly 1 million CELEX codes, use with discretion and expect long
-execution times
results can be restricted to a particular directory code with
-elx_make_query(directory = "18")
(directory code “18”
-denotes Common Foreign and Security Policy)
results can be restricted to a particular sector with
-elx_make_query(sector = 2)
(sector code 2 denotes EU
-international agreements)
new feature: request date of court case submission
-elx_make_query(include_date_lodged = TRUE)
new feature: request type of court procedure and outcome
-elx_make_query(include_court_procedure = TRUE)
new feature: request directory code of legal act
-elx_make_query(include_directory = TRUE)
elx_curia_list()
has a new default parameter
-parse = TRUE
which creates separate columns for
-ecli
, see_case
, appeal
applying
-regular expressions on case_info
it is now possible to select all resource types available with elx_make_query(resource_type = "any")
. Since there are nearly 1 million CELEX codes, use with discretion and expect long execution times
results can be restricted to a particular directory code with elx_make_query(directory = "18")
(directory code “18” denotes Common Foreign and Security Policy)
results can be restricted to a particular sector with elx_make_query(sector = 2)
(sector code 2 denotes EU international agreements)
new feature: request date of court case submission elx_make_query(include_date_lodged = TRUE)
new feature: request type of court procedure and outcome elx_make_query(include_court_procedure = TRUE)
new feature: request directory code of legal act elx_make_query(include_directory = TRUE)
elx_curia_list()
has a new default parameter parse = TRUE
which creates separate columns for ecli
, see_case
, appeal
applying regular expressions on case_info
elx_fetch_data(type = "notice", notice = c("tree","branch", "object"))
now mirrors the behaviour of elx_download_xml()
but instead of saving to path givess access to XML notice in Rinclude_
options in elx_make_query()
+elx_download_xml()
parameter checkingelx_download_xml(notice = "object")
now retrieves metadata correctlyelx_download_xml()
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index a04e0d9..5ad73b2 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -1,7 +1,7 @@
-pandoc: 2.17.0.1
+pandoc: 2.14.0.3
pkgdown: 2.0.2
pkgdown_sha: ~
articles:
eurlexpkg: eurlexpkg.html
-last_built: 2022-03-31T18:05Z
+last_built: 2022-03-31T20:47Z
diff --git a/docs/reference/elx_council_votes.html b/docs/reference/elx_council_votes.html
index 242f609..a56d08b 100644
--- a/docs/reference/elx_council_votes.html
+++ b/docs/reference/elx_council_votes.html
@@ -1,99 +1,98 @@
-
-elx_council_votes.Rd
Executes a SPARQL query to the Council's endpoint.
-elx_council_votes()
A data frame with Council votes on EU acts.
-# \donttest{
-votes <- elx_council_votes()
-# }
-
elx_council_votes.Rd
Executes a SPARQL query to the Council's endpoint.
+elx_council_votes()
A data frame with Council votes on EU acts.
+# \donttest{
+votes <- elx_council_votes()
+# }
+
elx_curia_list.Rd
Harvests data from lists of EU court cases from curia.europa.eu. -CELEX identifiers are extracted from hyperlinks where available.
-elx_curia_list(
- data = c("all", "ecj_old", "ecj_new", "gc_all", "cst_all"),
- parse = TRUE
-)
Data to be scraped from four separate lists of cases maintained by Curia, defaults to "all" -which contains cases from Court of Justice, General Court and Civil Service Tribunal.
If `TRUE`, references to cases and appeals are parsed out from `case_info` into separate columns
A data frame containing case identifiers and information as character columns. Where the case id -contains a hyperlink to Eur-Lex, the CELEX identifier is retrieved as well. Hyperlinks to Eur-Lex -disappeared from more recent cases.
-# \donttest{
-elx_curia_list(data = "cst_all")
-#> # A tibble: 1,759 x 6
-#> case_id case_id_celex case_info ecli see_case appeal
-#> <chr> <chr> <chr> <chr> <chr> <chr>
-#> 1 F-1/05 * NA Judgment of 26 October 2006, ~ ECLI~ NA T-404~
-#> 2 F-1/05 NA Order of 22 May 2007, Landgre~ ECLI~ NA NA
-#> 3 F-1/05 INT NA Order of 13 July 2007, Landgr~ ECLI~ NA NA
-#> 4 F-1/05 NA Order of 9 November 2010, Lan~ ECLI~ NA NA
-#> 5 F-2/05 NA Removed from the register on ~ ECLI~ NA NA
-#> 6 F-3/05 NA Order of 15 May 2006, Schmit ~ ECLI~ NA NA
-#> 7 F-4/05 NA Removed from the register on ~ ECLI~ NA NA
-#> 8 F-5/05 * NA Judgment of 28 April 2009, Vi~ ECLI~ NA T-261~
-#> 9 F-6/05 NA Removed from the register on ~ ECLI~ NA NA
-#> 10 F-7/05 NA Schmit / Commission (F-7/05) ~ NA F-5/05 NA
-#> # ... with 1,749 more rows
-# }
-
elx_curia_list.Rd
Harvests data from lists of EU court cases from curia.europa.eu. +CELEX identifiers are extracted from hyperlinks where available.
+elx_curia_list(
+ data = c("all", "ecj_old", "ecj_new", "gc_all", "cst_all"),
+ parse = TRUE
+)
Data to be scraped from four separate lists of cases maintained by Curia, defaults to "all" +which contains cases from Court of Justice, General Court and Civil Service Tribunal.
If `TRUE`, references to cases and appeals are parsed out from `case_info` into separate columns
A data frame containing case identifiers and information as character columns. Where the case id +contains a hyperlink to Eur-Lex, the CELEX identifier is retrieved as well. Hyperlinks to Eur-Lex +disappeared from more recent cases.
+# \donttest{
+elx_curia_list(data = "cst_all")
+#> # A tibble: 1,759 x 6
+#> case_id case_id_celex case_info ecli see_case appeal
+#> <chr> <chr> <chr> <chr> <chr> <chr>
+#> 1 F-1/05 * NA Judgment of 26 October 200~ ECLI:EU~ NA T-404~
+#> 2 F-1/05 NA Order of 22 May 2007, Land~ ECLI:EU~ NA NA
+#> 3 F-1/05 INT NA Order of 13 July 2007, Lan~ ECLI:EU~ NA NA
+#> 4 F-1/05 NA Order of 9 November 2010, ~ ECLI:EU~ NA NA
+#> 5 F-2/05 NA Removed from the register ~ ECLI:EU~ NA NA
+#> 6 F-3/05 NA Order of 15 May 2006, Schm~ ECLI:EU~ NA NA
+#> 7 F-4/05 NA Removed from the register ~ ECLI:EU~ NA NA
+#> 8 F-5/05 * NA Judgment of 28 April 2009,~ ECLI:EU~ NA T-261~
+#> 9 F-6/05 NA Removed from the register ~ ECLI:EU~ NA NA
+#> 10 F-7/05 NA Schmit / Commission (F-7/0~ NA F-5/05 NA
+#> # ... with 1,749 more rows
+# }
+
elx_download_xml.Rd
Downloads an XML notice of a given type associated with a Cellar resource.
-A valid url as character vector of length one based on a resource identifier such as CELEX or Cellar URI.
A character string with the name where the downloaded file is saved.
The type of notice requested controls what kind of metadata are returned.
The priority language in which the data will be attempted to be retrieved, in ISO 639 2-char code
If data not available in `language_1`, try `language_2`
If data not available in `language_2`, try `language_3`
A character string specifying the mode with which to write the file. Useful values are "w", "wb" (binary), "a" (append) and "ab".
Path of downloaded file (invisibly) if server validates request (http status code has to be 200). For more information about notices, see Cellar documentation.
-To retrieve all identifiers associated with a url, use elx_fetch_data(type = "ids").
-# \donttest{
-elx_download_xml(url = "http://publications.europa.eu/resource/celex/32014R0001", notice = "object")
-# }
-
elx_download_xml.Rd
Downloads an XML notice of a given type associated with a Cellar resource.
+A valid url as character vector of length one based on a resource identifier such as CELEX or Cellar URI.
A character string with the name where the downloaded file is saved.
The type of notice requested controls what kind of metadata are returned.
The priority language in which the data will be attempted to be retrieved, in ISO 639 2-char code
If data not available in `language_1`, try `language_2`
If data not available in `language_2`, try `language_3`
A character string specifying the mode with which to write the file. Useful values are "w", "wb" (binary), "a" (append) and "ab".
Path of downloaded file (invisibly) if server validates request (http status code has to be 200). For more information about notices, see Cellar documentation.
+To retrieve all identifiers associated with a url, use elx_fetch_data(type = "ids").
+# \donttest{
+elx_download_xml(url = "http://publications.europa.eu/resource/celex/32014R0001", notice = "object")
+# }
+
elx_fetch_data.Rd
Wraps httr::GET with pre-specified headers and parses retrieved data.
-A valid url as character vector of length one based on a resource identifier such as CELEX or Cellar URI.
The type of data to be retrieved. When type = "text", the returned list contains named elements reflecting the source of each text. When type = "notice", the results return an XML notice associated with the url.
If type = "notice", controls what kind of metadata are returned by the notice.
The priority language in which the data will be attempted to be retrieved, in ISO 639 2-char code
If data not available in `language_1`, try `language_2`
If data not available in `language_2`, try `language_3`
If TRUE, text includes tags showing where pages ("---pagebreak---", for pdfs) and documents ("---documentbreak---") were concatenated
A character vector of length one containing the result. When `type = "text"`, named character vector where the name contains the source of the text.
-# \donttest{
-elx_fetch_data(url = "http://publications.europa.eu/resource/celex/32014R0001", type = "title")
-#> [1] "Commission Delegated Regulation (EU) No 1/2014 of 28 August 2013 establishing Annex III to Regulation (EU) No 978/2012 of the European Parliament and of the Council applying a scheme of generalised tariff preferences"
-# }
-
elx_fetch_data.Rd
Wraps httr::GET with pre-specified headers and parses retrieved data.
+A valid url as character vector of length one based on a resource identifier such as CELEX or Cellar URI.
The type of data to be retrieved. When type = "text", the returned list contains named elements reflecting the source of each text. When type = "notice", the results return an XML notice associated with the url.
If type = "notice", controls what kind of metadata are returned by the notice.
The priority language in which the data will be attempted to be retrieved, in ISO 639 2-char code
If data not available in `language_1`, try `language_2`
If data not available in `language_2`, try `language_3`
If TRUE, text includes tags showing where pages ("---pagebreak---", for pdfs) and documents ("---documentbreak---") were concatenated
A character vector of length one containing the result. When `type = "text"`, named character vector where the name contains the source of the text.
+# \donttest{
+elx_fetch_data(url = "http://publications.europa.eu/resource/celex/32014R0001", type = "title")
+#> [1] "Commission Delegated Regulation (EU) No 1/2014 of 28 August 2013 establishing Annex III to Regulation (EU) No 978/2012 of the European Parliament and of the Council applying a scheme of generalised tariff preferences"
+# }
+
elx_label_eurovoc.Rd
Create a look-up table with labels for EuroVoc concept URIs. Only unique identifiers are returned.
-elx_label_eurovoc(uri_eurovoc = "", alt_labels = FALSE, language = "en")
Character vector with valid EuroVoc URIs
If `TRUE`, results include comma-separated alternative labels in addition to the preferred label
Language in which to return the labels, in ISO 639 2-char code
A `tibble` containing EuroVoc unique concept identifiers and labels.
-elx_label_eurovoc(uri_eurovoc = "http://eurovoc.europa.eu/5760", alt_labels = TRUE, language = "fr")
-#> # A tibble: 1 x 2
-#> eurovoc labels
-#> <chr> <chr>
-#> 1 http://eurovoc.europa.eu/5760 oiseau,oiseau migrateur,rapace
-elx_label_eurovoc(uri_eurovoc = c("http://eurovoc.europa.eu/5760","http://eurovoc.europa.eu/576"))
-#> # A tibble: 2 x 2
-#> eurovoc labels
-#> <chr> <chr>
-#> 1 http://eurovoc.europa.eu/5760 bird
-#> 2 http://eurovoc.europa.eu/576 private law
-
elx_label_eurovoc.Rd
Create a look-up table with labels for EuroVoc concept URIs. Only unique identifiers are returned.
+elx_label_eurovoc(uri_eurovoc = "", alt_labels = FALSE, language = "en")
Character vector with valid EuroVoc URIs
If `TRUE`, results include comma-separated alternative labels in addition to the preferred label
Language in which to return the labels, in ISO 639 2-char code
A `tibble` containing EuroVoc unique concept identifiers and labels.
+elx_label_eurovoc(uri_eurovoc = "http://eurovoc.europa.eu/5760", alt_labels = TRUE, language = "fr")
+#> # A tibble: 1 x 2
+#> eurovoc labels
+#> <chr> <chr>
+#> 1 http://eurovoc.europa.eu/5760 oiseau,oiseau migrateur,rapace
+elx_label_eurovoc(uri_eurovoc = c("http://eurovoc.europa.eu/5760","http://eurovoc.europa.eu/576"))
+#> # A tibble: 2 x 2
+#> eurovoc labels
+#> <chr> <chr>
+#> 1 http://eurovoc.europa.eu/5760 bird
+#> 2 http://eurovoc.europa.eu/576 private law
+
elx_make_query.Rd
Generates pre-defined or manual SPARQL queries to retrieve document ids from Cellar. -List of available resource types: http://publications.europa.eu/resource/authority/resource-type . -Note that not all resource types are compatible with default parameter values.
-elx_make_query(
- resource_type = c("any", "directive", "regulation", "decision", "recommendation",
- "intagr", "caselaw", "manual", "proposal", "national_impl"),
- manual_type = "",
- directory = NULL,
- sector = NULL,
- include_corrigenda = FALSE,
- include_celex = TRUE,
- include_lbs = FALSE,
- include_date = FALSE,
- include_date_force = FALSE,
- include_date_endvalid = FALSE,
- include_date_transpos = FALSE,
- include_date_lodged = FALSE,
- include_force = FALSE,
- include_eurovoc = FALSE,
- include_author = FALSE,
- include_citations = FALSE,
- include_court_procedure = FALSE,
- include_directory = FALSE,
- include_sector = FALSE,
- include_ecli = FALSE,
- include_judge_rapporteur = FALSE,
- include_advocate_general = FALSE,
- include_court_formation = FALSE,
- include_court_scholarship = FALSE,
- order = FALSE,
- limit = NULL
-)
Type of resource to be retrieved via SPARQL query
Define manually the type of resource to be retrieved
Restrict the results to a given directory code
Restrict the results to a given sector code
If `TRUE`, results include corrigenda
If `TRUE`, results include CELEX identifier for each resource URI
If `TRUE`, results include legal bases of legislation
If `TRUE`, results include document date
If `TRUE`, results include date of entry into force
If `TRUE`, results include date of end of validity
If `TRUE`, results include date of transposition deadline for directives
If `TRUE`, results include date a court case was lodged with the court
If `TRUE`, results include whether legislation is in force
If `TRUE`, results include EuroVoc descriptors of subject matter
If `TRUE`, results include document author(s)
If `TRUE`, results include citations (CELEX-labelled)
If `TRUE`, results include type of court procedure and outcome
If `TRUE`, results include the Eur-Lex directory code
If `TRUE`, results include the Eur-Lex sector code
If `TRUE`, results include the ECLI identifier for court documents
If `TRUE`, results include the Judge-Rapporteur
If `TRUE`, results include the Advocate General
If `TRUE`, results include the court formation
If `TRUE`, results include court-curated relevant scholarship
Order results by ids
Limit the number of results, for testing purposes mainly
A character string containing the SPARQL query
-elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE)
-#> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex str(?date) ?force where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/DIR>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_IMPL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_DEL>) \n FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} OPTIONAL{?work cdm:work_date_document ?date.} OPTIONAL{?work cdm:resource_legal_in-force ?force.} }"
-elx_make_query(resource_type = "regulation", include_corrigenda = TRUE, order = TRUE)
-#> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/REG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_IMPL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_FINANC>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_DEL>) OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} } order by str(?date)"
-elx_make_query(resource_type = "caselaw")
-#> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/ORDER>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_JUR>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/THIRDPARTY_PROCEED>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/GARNISHEE_ORDER>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/RULING>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG_EXTRACT>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/INFO_JUDICIAL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/VIEW_AG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_AG>) OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }"
-elx_make_query(resource_type = "manual", manual_type = "SWD")
-#> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type.FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/SWD>) \n FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }"
-
elx_make_query.Rd
Generates pre-defined or manual SPARQL queries to retrieve document ids from Cellar. +List of available resource types: http://publications.europa.eu/resource/authority/resource-type . +Note that not all resource types are compatible with default parameter values.
+elx_make_query(
+ resource_type = c("any", "directive", "regulation", "decision", "recommendation",
+ "intagr", "caselaw", "manual", "proposal", "national_impl"),
+ manual_type = "",
+ directory = NULL,
+ sector = NULL,
+ include_corrigenda = FALSE,
+ include_celex = TRUE,
+ include_lbs = FALSE,
+ include_date = FALSE,
+ include_date_force = FALSE,
+ include_date_endvalid = FALSE,
+ include_date_transpos = FALSE,
+ include_date_lodged = FALSE,
+ include_force = FALSE,
+ include_eurovoc = FALSE,
+ include_author = FALSE,
+ include_citations = FALSE,
+ include_court_procedure = FALSE,
+ include_directory = FALSE,
+ include_sector = FALSE,
+ include_ecli = FALSE,
+ include_judge_rapporteur = FALSE,
+ include_advocate_general = FALSE,
+ include_court_formation = FALSE,
+ include_court_scholarship = FALSE,
+ order = FALSE,
+ limit = NULL
+)
Type of resource to be retrieved via SPARQL query
Define manually the type of resource to be retrieved
Restrict the results to a given directory code
Restrict the results to a given sector code
If `TRUE`, results include corrigenda
If `TRUE`, results include CELEX identifier for each resource URI
If `TRUE`, results include legal bases of legislation
If `TRUE`, results include document date
If `TRUE`, results include date of entry into force
If `TRUE`, results include date of end of validity
If `TRUE`, results include date of transposition deadline for directives
If `TRUE`, results include date a court case was lodged with the court
If `TRUE`, results include whether legislation is in force
If `TRUE`, results include EuroVoc descriptors of subject matter
If `TRUE`, results include document author(s)
If `TRUE`, results include citations (CELEX-labelled)
If `TRUE`, results include type of court procedure and outcome
If `TRUE`, results include the Eur-Lex directory code
If `TRUE`, results include the Eur-Lex sector code
If `TRUE`, results include the ECLI identifier for court documents
If `TRUE`, results include the Judge-Rapporteur
If `TRUE`, results include the Advocate General
If `TRUE`, results include the court formation
If `TRUE`, results include court-curated relevant scholarship
Order results by ids
Limit the number of results, for testing purposes mainly
A character string containing the SPARQL query
+elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE)
+#> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex str(?date) ?force where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/DIR>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_IMPL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_DEL>) \n FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} OPTIONAL{?work cdm:work_date_document ?date.} OPTIONAL{?work cdm:resource_legal_in-force ?force.} }"
+elx_make_query(resource_type = "regulation", include_corrigenda = TRUE, order = TRUE)
+#> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/REG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_IMPL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_FINANC>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_DEL>) OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} } order by str(?date)"
+elx_make_query(resource_type = "caselaw")
+#> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/ORDER>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_JUR>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/THIRDPARTY_PROCEED>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/GARNISHEE_ORDER>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/RULING>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG_EXTRACT>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/INFO_JUDICIAL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/VIEW_AG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_AG>) OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }"
+elx_make_query(resource_type = "manual", manual_type = "SWD")
+#> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type.FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/SWD>) \n FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} OPTIONAL{?work cdm:resource_legal_id_celex ?celex.} }"
+
elx_run_query.Rd
Executes cURL request to a pre-defined endpoint of the EU Publications Office. -Relies on elx_make_query to generate valid SPARQL queries. -Results are capped at 1 million rows.
-elx_run_query(
- query = "",
- endpoint = "http://publications.europa.eu/webapi/rdf/sparql"
-)
A valid SPARQL query specified by `elx_make_query()` or manually
SPARQL endpoint
A data frame containing the results of the SPARQL query. -Column `work` contains the Cellar URI of the resource.
-# \donttest{
-elx_run_query(elx_make_query("directive", include_force = TRUE))
-#> # A tibble: 4,382 x 4
-#> work type celex force
-#> <chr> <chr> <chr> <chr>
-#> 1 http://publications.europa.eu/resource/cellar/469391ea-6c7~ http~ 3197~ false
-#> 2 http://publications.europa.eu/resource/cellar/e8fcaf0d-443~ http~ 3198~ false
-#> 3 http://publications.europa.eu/resource/cellar/52639f5f-eca~ http~ 3198~ false
-#> 4 http://publications.europa.eu/resource/cellar/c7560407-689~ http~ 3196~ true
-#> 5 http://publications.europa.eu/resource/cellar/803aa7a4-5a2~ http~ 3199~ false
-#> 6 http://publications.europa.eu/resource/cellar/a9ab7f4b-063~ http~ 3199~ false
-#> 7 http://publications.europa.eu/resource/cellar/d83c00d6-946~ http~ 3198~ false
-#> 8 http://publications.europa.eu/resource/cellar/311441f3-787~ http~ 3196~ false
-#> 9 http://publications.europa.eu/resource/cellar/eebd7224-5f8~ http~ 3197~ false
-#> 10 http://publications.europa.eu/resource/cellar/f2e14ae4-6ba~ http~ 3198~ false
-#> # ... with 4,372 more rows
-# }
-
elx_run_query.Rd
Executes cURL request to a pre-defined endpoint of the EU Publications Office. +Relies on elx_make_query to generate valid SPARQL queries. +Results are capped at 1 million rows.
+elx_run_query(
+ query = "",
+ endpoint = "http://publications.europa.eu/webapi/rdf/sparql"
+)
A valid SPARQL query specified by `elx_make_query()` or manually
SPARQL endpoint
A data frame containing the results of the SPARQL query. +Column `work` contains the Cellar URI of the resource.
+# \donttest{
+elx_run_query(elx_make_query("directive", include_force = TRUE))
+#> # A tibble: 4,382 x 4
+#> work type celex force
+#> <chr> <chr> <chr> <chr>
+#> 1 http://publications.europa.eu/res~ http://publications.europa.e~ 31979~ false
+#> 2 http://publications.europa.eu/res~ http://publications.europa.e~ 31989~ false
+#> 3 http://publications.europa.eu/res~ http://publications.europa.e~ 31984~ false
+#> 4 http://publications.europa.eu/res~ http://publications.europa.e~ 31966~ true
+#> 5 http://publications.europa.eu/res~ http://publications.europa.e~ 31993~ false
+#> 6 http://publications.europa.eu/res~ http://publications.europa.e~ 31992~ false
+#> 7 http://publications.europa.eu/res~ http://publications.europa.e~ 31983~ false
+#> 8 http://publications.europa.eu/res~ http://publications.europa.e~ 31966~ false
+#> 9 http://publications.europa.eu/res~ http://publications.europa.e~ 31974~ false
+#> 10 http://publications.europa.eu/res~ http://publications.europa.e~ 31982~ false
+#> # ... with 4,372 more rows
+# }
+
- All functions- - |
- |
---|---|
- - | -Retrieve Council votes on EU acts |
-
- - | -Scrape list of court cases from Curia |
-
- - | -Download XML notice associated with a URL |
-
- - | -Retrieve additional data on EU documents |
-
- - | -Label EuroVoc concepts |
-
- - | -Create SPARQL queries |
-
- - | -Execute SPARQL queries |
-
+ All functions+ + |
+ |
---|---|
+ + | +Retrieve Council votes on EU acts |
+
+ + | +Scrape list of court cases from Curia |
+
+ + | +Download XML notice associated with a URL |
+
+ + | +Retrieve additional data on EU documents |
+
+ + | +Label EuroVoc concepts |
+
+ + | +Create SPARQL queries |
+
+ + | +Execute SPARQL queries |
+