diff --git a/DESCRIPTION b/DESCRIPTION index 9d01c1c..42d0d60 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: eurlex Type: Package Title: Retrieve Data on European Union Law -Version: 0.3.1 +Version: 0.3.2 Authors@R: c(person(given = "Michal", family = "Ovadek", role = c("aut", "cre", "cph"), diff --git a/NEWS.md b/NEWS.md index b5479ba..7c09feb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +# eurlex 0.3.2 + +## Major changes + +- improvement to legal basis harvesting thanks to help from Eur-Lex insiders +- legal basis results are now slightly more comprehensive and correct +- legal basis results now include a new column detailing the "suffix" (paragraph, subparagraph, etc.) in string form + +## Minor changes + +- minor updates to documentation + # eurlex 0.3.1 ## Minor changes diff --git a/R/elx_council_votes.R b/R/elx_council_votes.R index bb08bd7..9683b5e 100644 --- a/R/elx_council_votes.R +++ b/R/elx_council_votes.R @@ -3,6 +3,8 @@ #' Executes a SPARQL query to the Council's endpoint. #' #' @importFrom rlang .data +#' @return +#' A data frame with Council votes on EU acts. #' @export #' @examples #' \donttest{ diff --git a/R/elx_curia_list.R b/R/elx_curia_list.R index eee4868..6ec1e3c 100644 --- a/R/elx_curia_list.R +++ b/R/elx_curia_list.R @@ -5,7 +5,8 @@ #' #' @param data Data to be scraped from four separate lists of cases maintained by Curia, defaults to "all" #' which contains cases from Court of Justice, General Court and Civil Service Tribunal. -#' +#' @return +#' A data frame containing case identifiers and information as character columns. #' @importFrom rlang .data #' @export #' @examples diff --git a/R/elx_fetch_data.R b/R/elx_fetch_data.R index d0cf1b7..bd45ad8 100644 --- a/R/elx_fetch_data.R +++ b/R/elx_fetch_data.R @@ -2,12 +2,14 @@ #' #' Wraps httr::GET with pre-specified headers to retrieve data. #' -#' @param url A valid url based on a resource identifier such as CELEX or Cellar URI. +#' @param url A valid url as character vector of length one based on a resource identifier such as CELEX or Cellar URI. #' @param type The type of data to be retrieved. When type = "text", the returned list contains named elements reflecting the source of each text. #' @param language_1 The priority language in which the data will be attempted to be retrieved, in ISO 639 2-char code #' @param language_2 If data not available in `language_1`, try `language_2` #' @param language_3 If data not available in `language_2`, try `language_3` #' @param include_breaks If TRUE, text includes tags showing where pages ("---pagebreak---", for pdfs) and documents ("---documentbreak---") were concatenated +#' @return +#' A character vector of length one containing the result. #' @export #' @examples #' \donttest{ diff --git a/R/elx_label_eurovoc.R b/R/elx_label_eurovoc.R index b262d62..fff2bb4 100644 --- a/R/elx_label_eurovoc.R +++ b/R/elx_label_eurovoc.R @@ -5,6 +5,8 @@ #' @param uri_eurovoc Character vector with valid EuroVoc URIs #' @param alt_labels If `TRUE`, results include comma-separated alternative labels in addition to the preferred label #' @param language Language in which to return the labels, in ISO 639 2-char code +#' @return +#' A `tibble` containing EuroVoc unique concept identifiers and labels. #' @export #' @examples #' elx_label_eurovoc(uri_eurovoc = "http://eurovoc.europa.eu/5760", alt_labels = TRUE, language = "fr") diff --git a/R/elx_make_query.R b/R/elx_make_query.R index 3dbef93..7b1a3c0 100644 --- a/R/elx_make_query.R +++ b/R/elx_make_query.R @@ -19,6 +19,8 @@ #' @param include_eurovoc If `TRUE`, results include EuroVoc descriptors of subject matter #' @param order Order results by ids #' @param limit Limit the number of results, for testing purposes mainly +#' @return +#' A character string containing the SPARQL query #' @export #' @examples #' elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE) @@ -45,6 +47,7 @@ elx_make_query <- function(resource_type = c("directive","regulation","decision" } query <- "PREFIX cdm: + PREFIX annot: PREFIX skos: PREFIX dc: PREFIX xsd: @@ -88,7 +91,7 @@ elx_make_query <- function(resource_type = c("directive","regulation","decision" stop("Legal basis variable incompatible with requested resource type", call. = TRUE) } - query <- paste(query, "?lbs ?lbcelex", sep = " ") + query <- paste(query, "?lbs ?lbcelex ?lbsuffix", sep = " ") } @@ -267,8 +270,12 @@ elx_make_query <- function(resource_type = c("directive","regulation","decision" if (include_lbs == TRUE & resource_type!="caselaw"){ - query <- paste(query, "?work cdm:resource_legal_based_on_resource_legal ?lbs. - ?lbs cdm:resource_legal_id_celex ?lbcelex.", + query <- paste(query, "OPTIONAL{?work cdm:resource_legal_based_on_resource_legal ?lbs. + ?lbs cdm:resource_legal_id_celex ?lbcelex. + OPTIONAL{?bn owl:annotatedSource ?work. + ?bn owl:annotatedProperty . + ?bn owl:annotatedTarget ?lbs. + ?bn annot:comment_on_legal_basis ?lbsuffix}}", sep = " ") } diff --git a/R/elx_run_query.R b/R/elx_run_query.R index 3b8ae3e..1e77ad6 100644 --- a/R/elx_run_query.R +++ b/R/elx_run_query.R @@ -5,6 +5,9 @@ #' #' @param query A valid SPARQL query specified by `elx_make_query` or manually #' @param endpoint SPARQL endpoint +#' @return +#' A data frame containing the results of the SPARQL query. +#' Column `work` contains the Cellar URI of the resource. Rows with even one missing variable are dropped. #' @export #' @examples #' \donttest{ diff --git a/README.md b/README.md index aa5c261..c602e14 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ The `eurlex` package currently envisions the typical use-case to consist of gett The function `elx_make_query` takes as its first argument the type of resource to be retrieved (such as "directive") from the semantic database that powers Eur-Lex (and other publications) called Cellar. If you are familiar with SPARQL, you can always specify your own queries and execute them with `elx_run_query()`. -`elx_run_query()` executes SPARQL queries on a pre-specified endpoint of the EU Publication Office. It outputs a `data.frame` where each column corresponds to one of the requested variables, while the rows accumulate observations of the resource type satisfying the query criteria. Obviously, the more data is to be returned, the longer the execution time, varying from a few seconds to several minutes, depending also on your connection. The first column always contains the unique URI of a "work" (legislative act or court judgment) which identifies each resource in Cellar. Several human-readable identifiers are normally associated with each "work" but the most useful one is CELEX, retrieved by default. +`elx_run_query()` executes SPARQL queries on a pre-specified endpoint of the EU Publication Office. It outputs a `data.frame` where each column corresponds to one of the requested variables, while the rows accumulate observations of the resource type satisfying the query criteria. Obviously, the more data is to be returned, the longer the execution time, varying from a few seconds to several minutes, depending also on your connection. The first column always contains the unique URI of a "work" (legislative act or court judgment) which identifies each resource in Cellar. Several human-readable identifiers are normally associated with each "work" but the most useful one is [CELEX](https://eur-lex.europa.eu/content/tools/TableOfSectors/types_of_documents_in_eurlex.html), retrieved by default. For the moment, it is recommended to retrieve metadata one variable at a time. For example, if you wish to obtain the legal bases of directives and the date of transposition, you should run separate calls: diff --git a/docs/404.html b/docs/404.html index 88c1da8..77109d3 100644 --- a/docs/404.html +++ b/docs/404.html @@ -79,7 +79,7 @@ eurlex - 0.3.1 + 0.3.2 diff --git a/docs/articles/eurlexpkg.html b/docs/articles/eurlexpkg.html index 83abfcf..01dc747 100644 --- a/docs/articles/eurlexpkg.html +++ b/docs/articles/eurlexpkg.html @@ -39,7 +39,7 @@ eurlex - 0.3.1 + 0.3.2 @@ -118,6 +118,7 @@

query_dir %>%
   glue::as_glue() # for nicer printing
 #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
+#>   PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
 #>   PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
 #>   PREFIX dc:<http://purl.org/dc/elements/1.1/>
 #>   PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
@@ -131,6 +132,7 @@ 

elx_make_query(resource_type = "caselaw") %>% glue::as_glue() #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#> +#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#> #> PREFIX skos:<http://www.w3.org/2004/02/skos/core#> #> PREFIX dc:<http://purl.org/dc/elements/1.1/> #> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#> @@ -150,6 +152,7 @@

elx_make_query(resource_type = "manual", manual_type = "SWD") %>% glue::as_glue() #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#> +#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#> #> PREFIX skos:<http://www.w3.org/2004/02/skos/core#> #> PREFIX dc:<http://purl.org/dc/elements/1.1/> #> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#> @@ -163,6 +166,7 @@

elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE) %>%
   glue::as_glue()
 #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
+#>   PREFIX annot: <http://publications.europa.eu/ontology/annotation#>
 #>   PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
 #>   PREFIX dc:<http://purl.org/dc/elements/1.1/>
 #>   PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
@@ -178,12 +182,13 @@ 

elx_make_query(resource_type = "recommendation", include_date = TRUE, include_lbs = TRUE) %>% glue::as_glue() #> PREFIX cdm: <http://publications.europa.eu/ontology/cdm#> +#> PREFIX annot: <http://publications.europa.eu/ontology/annotation#> #> PREFIX skos:<http://www.w3.org/2004/02/skos/core#> #> PREFIX dc:<http://purl.org/dc/elements/1.1/> #> PREFIX xsd:<http://www.w3.org/2001/XMLSchema#> #> PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> #> PREFIX owl:<http://www.w3.org/2002/07/owl#> -#> select distinct ?work ?type ?celex str(?date) ?lbs ?lbcelex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/RECO>|| +#> select distinct ?work ?type ?celex str(?date) ?lbs ?lbcelex ?lbsuffix where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/RECO>|| #> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_DEC>|| #> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_DIR>|| #> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_OPIN>|| @@ -191,8 +196,12 @@

#> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_REG>|| #> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_RECO>|| #> ?type=<http://publications.europa.eu/resource/authority/resource-type/RECO_DRAFT>) -#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} ?work cdm:resource_legal_id_celex ?celex. ?work cdm:work_date_document ?date. ?work cdm:resource_legal_based_on_resource_legal ?lbs. -#> ?lbs cdm:resource_legal_id_celex ?lbcelex. } +#> FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} ?work cdm:resource_legal_id_celex ?celex. ?work cdm:work_date_document ?date. OPTIONAL{?work cdm:resource_legal_based_on_resource_legal ?lbs. +#> ?lbs cdm:resource_legal_id_celex ?lbcelex. +#> OPTIONAL{?bn owl:annotatedSource ?work. +#> ?bn owl:annotatedProperty <http://publications.europa.eu/ontology/cdm#resource_legal_based_on_resource_legal>. +#> ?bn owl:annotatedTarget ?lbs. +#> ?bn annot:comment_on_legal_basis ?lbsuffix}} } # minimal query: elx_make_query(resource_type = "recommendation")

Now that we have a query, we are ready to run it.

diff --git a/docs/articles/eurlexpkg_files/figure-html/wordcloud-1.png b/docs/articles/eurlexpkg_files/figure-html/wordcloud-1.png index f17af7d..a17a045 100644 Binary files a/docs/articles/eurlexpkg_files/figure-html/wordcloud-1.png and b/docs/articles/eurlexpkg_files/figure-html/wordcloud-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 528c863..c1b5d22 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -79,7 +79,7 @@ eurlex - 0.3.1 + 0.3.2

diff --git a/docs/authors.html b/docs/authors.html index 36a3321..0e4e86c 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -79,7 +79,7 @@ eurlex - 0.3.1 + 0.3.2 diff --git a/docs/index.html b/docs/index.html index 0a0a89c..33468f0 100644 --- a/docs/index.html +++ b/docs/index.html @@ -38,7 +38,7 @@ eurlex - 0.3.1 + 0.3.2 @@ -100,8 +100,16 @@

The eurlex R package attempts to significantly reduce the overhead associated with using SPARQL and REST APIs made available by the EU Publication Office. Although at present it does not offer access to the same array of information as comprehensive web scraping might, the package provides simpler, more efficient and transparent access to data on European Union law.

The eurlex package currently envisions the typical use-case to consist of getting bulk information about EU legislation into R as fast as possible. The package contains three core functions to achieve that objective: elx_make_query() to create pre-defined or customized SPARQL queries; elx_run_query() to execute the pre-made or any other manually input query; and elx_fetch_data() to fire GET requests for certain metadata to the REST API.

The function elx_make_query takes as its first argument the type of resource to be retrieved (such as “directive”) from the semantic database that powers Eur-Lex (and other publications) called Cellar. If you are familiar with SPARQL, you can always specify your own queries and execute them with elx_run_query().

-

elx_run_query() executes SPARQL queries on a pre-specified endpoint of the EU Publication Office. It outputs a data.frame where each column corresponds to one of the requested variables, while the rows accumulate observations of the resource type satisfying the query criteria. Obviously, the more data is to be returned, the longer the execution time, varying from a few seconds to several minutes, depending also on your connection. The first column always contains the unique URI of a “work” (legislative act or court judgment) which identifies each resource in Cellar. Several human-readable identifiers are normally associated with each “work” but the most useful one is CELEX, retrieved by default.

-

The core contribution of the SPARQL requests is that we obtain a comprehensive list of identifiers that we can subsequently use to obtain more data relating to the document in question. While the results of the SPARQL queries are useful also for webscraping (with the rvest package), the function elx_fetch_data() enables us to fire GET requests to retrieve data on documents with known identifiers (including Cellar URI). The function currently enables downloading the title and the full text (where available in html) of a document.

+

elx_run_query() executes SPARQL queries on a pre-specified endpoint of the EU Publication Office. It outputs a data.frame where each column corresponds to one of the requested variables, while the rows accumulate observations of the resource type satisfying the query criteria. Obviously, the more data is to be returned, the longer the execution time, varying from a few seconds to several minutes, depending also on your connection. The first column always contains the unique URI of a “work” (legislative act or court judgment) which identifies each resource in Cellar. Several human-readable identifiers are normally associated with each “work” but the most useful one is CELEX, retrieved by default.

+

For the moment, it is recommended to retrieve metadata one variable at a time. For example, if you wish to obtain the legal bases of directives and the date of transposition, you should run separate calls:

+
    +
  1. ids <- elx_make_query("directive") %>% elx_run_query()
  2. +
  3. lbs <- elx_make_query("directive", include_lbs = TRUE) %>% elx_run_query()
  4. +
  5. dates <- elx_make_query("directive", include_date_transpos = TRUE) %>% elx_run_query()
  6. +
  7. ids %>% dplyr::left_join(lbs) %>% dplyr::left_join(dates)
  8. +
+

rather than elx_make_query("directive", include_lbs = TRUE, include_date_transpos = TRUE). The reason is that observations with missing data on any variable are currently dropped entirely when cumulating variable requests. By separating the calls, you are able to at least identify the missing data.

+

One of the main contributions of the SPARQL requests is that we obtain a comprehensive list of identifiers that we can subsequently use to obtain more data relating to the document in question. While the results of the SPARQL queries are useful also for webscraping (with the rvest package), the function elx_fetch_data() enables us to fire GET requests to retrieve data on documents with known identifiers (including Cellar URI). The function currently enables downloading the title and the full text of a document in all available languages.

See the vignette for a walkthrough on how to use the package.

diff --git a/docs/news/index.html b/docs/news/index.html index a971187..8f74832 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -79,7 +79,7 @@ eurlex - 0.3.1 + 0.3.2
@@ -129,14 +129,35 @@

Changelog

-
-

-eurlex 0.3.1 Unreleased +
+

+eurlex 0.3.2 Unreleased

+
+

+Major changes

+
    +
  • improvement to legal basis harvesting thanks to help from Eur-Lex insiders
  • +
  • legal basis results are now slightly more comprehensive and correct
  • +
  • legal basis results now include a new column detailing the “suffix” (paragraph, subparagraph, etc.) in string form
  • +
+

Minor changes

    +
  • minor updates to documentation
  • +
+
+
+
+

+eurlex 0.3.1 2020-09-11 +

+
+

+Minor changes

+
  • elx_fetch_data() now prefers CELEX-based URLs (instead of Cellar URIs) as input, as they appear to yield fewer missing documents
@@ -146,9 +167,9 @@

eurlex 0.3.0 Unreleased

-
+

-Major changes

+Major changes

  • elx_fetch_data("text") now retrieves plain text from html, pdf and MS Word documents
  • @@ -162,9 +183,9 @@

    eurlex 0.2.3 Unreleased

    -
    +

    -Minor changes

    +Minor changes
    • fixed serious bugs in elx_curia_list()
    • @@ -177,9 +198,9 @@

      eurlex 0.2.2 Unreleased

      -
      +

      -Major changes

      +Major changes
      • elx_council_votes() made fully operational
      • @@ -190,9 +211,9 @@

        eurlex 0.2.1 2020-08-19

        -
        +

        -Minor changes

        +Minor changes
        • optimization, reducing dependencies, etc.
        @@ -202,9 +223,9 @@

        eurlex 0.2.0 Unreleased

        -
        +

        -Major changes

        +Major changes
        • addition of proposals and national implementing laws to possible SPARQL queries
        • EuroVoc topics, retrievable in all EU languages, can now be included in SPARQL results
        • @@ -212,9 +233,9 @@

        • added elx_curia_list() to retrieve full list of EU court cases
        -
        +

        -Minor changes

        +Minor changes
        • switch from XML to xml2
        • SPARQL package dependency removed
        • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 66492ad..d894b88 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -3,5 +3,5 @@ pkgdown: 1.5.1 pkgdown_sha: ~ articles: eurlexpkg: eurlexpkg.html -last_built: 2020-09-10T22:55Z +last_built: 2020-09-24T22:38Z diff --git a/docs/reference/elx_council_votes.html b/docs/reference/elx_council_votes.html index 90718ea..7197eea 100644 --- a/docs/reference/elx_council_votes.html +++ b/docs/reference/elx_council_votes.html @@ -80,7 +80,7 @@ eurlex - 0.3.1 + 0.3.2
        @@ -138,6 +138,9 @@

        Retrieve Council votes on EU acts

        elx_council_votes()
        +

        Value

        + +

        A data frame with Council votes on EU acts.

        Examples

        # \donttest{ diff --git a/docs/reference/elx_curia_list.html b/docs/reference/elx_curia_list.html index 8a29965..8dde22f 100644 --- a/docs/reference/elx_curia_list.html +++ b/docs/reference/elx_curia_list.html @@ -81,7 +81,7 @@ eurlex - 0.3.1 + 0.3.2
        @@ -149,24 +149,27 @@

        Arg +

        Value

        + +

        A data frame containing case identifiers and information as character columns.

        Examples

        # \donttest{ -elx_curia_list(data = "cst_all")
        #> # A tibble: 1,759 x 3 +elx_curia_list(data = "cst_all")
        #> # A tibble: 1,759 x 3 #> case_id case_info linked_celex -#> <chr> <chr> <chr> -#> 1 F-1/05 * Judgment of 26 October 2006, Landgren / ETF (F-1/05, ~ NA -#> 2 F-1/05 Order of 22 May 2007, Landgren / ETF (F-1/05, ECR-SC ~ NA -#> 3 F-1/05 I~ Order of 13 July 2007, Landgren / ETF (F-1/05 INT, EC~ NA -#> 4 F-1/05 Order of 9 November 2010, Landgren / ETF (F-1/05, unp~ NA -#> 5 F-2/05 Removed from the register on 18 June 2008, Kröppelin ~ NA -#> 6 F-3/05 Order of 15 May 2006, Schmit / Commission (F-3/05, EC~ NA -#> 7 F-4/05 Removed from the register on 18 June 2008, Huober / C~ NA -#> 8 F-5/05 * Judgment of 28 April 2009, Violetti and others / Comm~ NA -#> 9 F-6/05 Removed from the register on 18 June 2008, Kröppelin ~ NA -#> 10 F-7/05 Schmit / Commission (F-7/05) , see Case F-5/05 NA -#> # ... with 1,749 more rows
        # } -
        +#> <chr> <chr> <chr> +#> 1 F-1/05 * Judgment of 26 October 2006, Landgren / ETF (F-1/05, ~ <NA> +#> 2 F-1/05 Order of 22 May 2007, Landgren / ETF (F-1/05, ECR-SC ~ <NA> +#> 3 F-1/05 I~ Order of 13 July 2007, Landgren / ETF (F-1/05 INT, EC~ <NA> +#> 4 F-1/05 Order of 9 November 2010, Landgren / ETF (F-1/05, unp~ <NA> +#> 5 F-2/05 Removed from the register on 18 June 2008, Kröppelin ~ <NA> +#> 6 F-3/05 Order of 15 May 2006, Schmit / Commission (F-3/05, EC~ <NA> +#> 7 F-4/05 Removed from the register on 18 June 2008, Huober / C~ <NA> +#> 8 F-5/05 * Judgment of 28 April 2009, Violetti and others / Comm~ <NA> +#> 9 F-6/05 Removed from the register on 18 June 2008, Kröppelin ~ <NA> +#> 10 F-7/05 Schmit / Commission (F-7/05) , see Case F-5/05 <NA> +#> # ... with 1,749 more rows
        # } +
        @@ -149,7 +149,7 @@

        Arg url -

        A valid url based on a resource identifier such as CELEX or Cellar URI.

        +

        A valid url as character vector of length one based on a resource identifier such as CELEX or Cellar URI.

        type @@ -173,6 +173,9 @@

        Arg +

        Value

        + +

        A character vector of length one containing the result.

        Examples

        # \donttest{ diff --git a/docs/reference/elx_label_eurovoc.html b/docs/reference/elx_label_eurovoc.html index 6261e56..5096de6 100644 --- a/docs/reference/elx_label_eurovoc.html +++ b/docs/reference/elx_label_eurovoc.html @@ -80,7 +80,7 @@ eurlex - 0.3.1 + 0.3.2
        @@ -154,16 +154,19 @@

        Arg +

        Value

        + +

        A `tibble` containing EuroVoc unique concept identifiers and labels.

        Examples

        -
        elx_label_eurovoc(uri_eurovoc = "http://eurovoc.europa.eu/5760", alt_labels = TRUE, language = "fr")
        #> # A tibble: 1 x 2 +
        elx_label_eurovoc(uri_eurovoc = "http://eurovoc.europa.eu/5760", alt_labels = TRUE, language = "fr")
        #> # A tibble: 1 x 2 #> eurovoc labels -#> <chr> <chr> -#> 1 http://eurovoc.europa.eu/5760 oiseau,oiseau migrateur,rapace
        elx_label_eurovoc(uri_eurovoc = c("http://eurovoc.europa.eu/5760","http://eurovoc.europa.eu/576"))
        #> # A tibble: 2 x 2 +#> <chr> <chr> +#> 1 http://eurovoc.europa.eu/5760 oiseau,oiseau migrateur,rapace
        elx_label_eurovoc(uri_eurovoc = c("http://eurovoc.europa.eu/5760","http://eurovoc.europa.eu/576"))
        #> # A tibble: 2 x 2 #> eurovoc labels -#> <chr> <chr> -#> 1 http://eurovoc.europa.eu/5760 bird -#> 2 http://eurovoc.europa.eu/576 private law
        +#> <chr> <chr> +#> 1 http://eurovoc.europa.eu/5760 bird +#> 2 http://eurovoc.europa.eu/576 private law
        @@ -213,9 +213,12 @@

        Arg +

        Value

        + +

        A character string containing the SPARQL query

        Examples

        -
        elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE)
        #> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex str(?date) ?force where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/DIR>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_IMPL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_DEL>) \n FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} ?work cdm:resource_legal_id_celex ?celex. ?work cdm:work_date_document ?date. ?work cdm:resource_legal_in-force ?force. }"
        elx_make_query(resource_type = "regulation", include_corrigenda = TRUE, order = TRUE)
        #> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/REG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_IMPL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_FINANC>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_DEL>) ?work cdm:resource_legal_id_celex ?celex. } order by str(?date)"
        elx_make_query(resource_type = "caselaw")
        #> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/ORDER>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_JUR>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/THIRDPARTY_PROCEED>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/GARNISHEE_ORDER>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/RULING>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG_EXTRACT>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/INFO_JUDICIAL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/VIEW_AG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_AG>) ?work cdm:resource_legal_id_celex ?celex. }"
        elx_make_query(resource_type = "manual", manual_type = "SWD")
        #> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type.FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/SWD>) \n FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} ?work cdm:resource_legal_id_celex ?celex. }"
        +
        elx_make_query(resource_type = "directive", include_date = TRUE, include_force = TRUE)
        #> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex str(?date) ?force where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/DIR>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_IMPL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/DIR_DEL>) \n FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} ?work cdm:resource_legal_id_celex ?celex. ?work cdm:work_date_document ?date. ?work cdm:resource_legal_in-force ?force. }"
        elx_make_query(resource_type = "regulation", include_corrigenda = TRUE, order = TRUE)
        #> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/REG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_IMPL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_FINANC>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/REG_DEL>) ?work cdm:resource_legal_id_celex ?celex. } order by str(?date)"
        elx_make_query(resource_type = "caselaw")
        #> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type. FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/ORDER>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_JUR>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/THIRDPARTY_PROCEED>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/GARNISHEE_ORDER>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/RULING>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/JUDG_EXTRACT>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/INFO_JUDICIAL>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/VIEW_AG>||\n ?type=<http://publications.europa.eu/resource/authority/resource-type/OPIN_AG>) ?work cdm:resource_legal_id_celex ?celex. }"
        elx_make_query(resource_type = "manual", manual_type = "SWD")
        #> [1] "PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>\n PREFIX annot: <http://publications.europa.eu/ontology/annotation#>\n PREFIX skos:<http://www.w3.org/2004/02/skos/core#>\n PREFIX dc:<http://purl.org/dc/elements/1.1/>\n PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>\n PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n PREFIX owl:<http://www.w3.org/2002/07/owl#>\n select distinct ?work ?type ?celex where{ ?work cdm:work_has_resource-type ?type.FILTER(?type=<http://publications.europa.eu/resource/authority/resource-type/SWD>) \n FILTER not exists{?work cdm:work_has_resource-type <http://publications.europa.eu/resource/authority/resource-type/CORRIGENDUM>} ?work cdm:resource_legal_id_celex ?celex. }"
        diff --git a/docs/reference/elx_run_query.html b/docs/reference/elx_run_query.html index 52ebae7..b1e6c98 100644 --- a/docs/reference/elx_run_query.html +++ b/docs/reference/elx_run_query.html @@ -81,7 +81,7 @@ eurlex - 0.3.1 + 0.3.2
      @@ -155,24 +155,28 @@

      Arg +

      Value

      + +

      A data frame containing the results of the SPARQL query. +Column `work` contains the Cellar URI of the resource. Rows with even one missing variable are dropped.

      Examples

      # \donttest{ -elx_run_query(elx_make_query("directive", include_force = TRUE))
      #> # A tibble: 4,192 x 4 +elx_run_query(elx_make_query("directive", include_force = TRUE))
      #> # A tibble: 4,192 x 4 #> work type celex force -#> <chr> <chr> <chr> <chr> -#> 1 http://publications.europa.eu/res~ http://publications.europa.e~ 31979~ false -#> 2 http://publications.europa.eu/res~ http://publications.europa.e~ 31989~ false -#> 3 http://publications.europa.eu/res~ http://publications.europa.e~ 31984~ false -#> 4 http://publications.europa.eu/res~ http://publications.europa.e~ 31966~ true -#> 5 http://publications.europa.eu/res~ http://publications.europa.e~ 31993~ false -#> 6 http://publications.europa.eu/res~ http://publications.europa.e~ 31992~ false -#> 7 http://publications.europa.eu/res~ http://publications.europa.e~ 31983~ false -#> 8 http://publications.europa.eu/res~ http://publications.europa.e~ 31966~ false -#> 9 http://publications.europa.eu/res~ http://publications.europa.e~ 31974~ false -#> 10 http://publications.europa.eu/res~ http://publications.europa.e~ 31982~ false -#> # ... with 4,182 more rows
      # } -
      +#> <chr> <chr> <chr> <chr> +#> 1 http://publications.europa.eu/res~ http://publications.europa.e~ 31979~ false +#> 2 http://publications.europa.eu/res~ http://publications.europa.e~ 31989~ false +#> 3 http://publications.europa.eu/res~ http://publications.europa.e~ 31984~ false +#> 4 http://publications.europa.eu/res~ http://publications.europa.e~ 31966~ true +#> 5 http://publications.europa.eu/res~ http://publications.europa.e~ 31993~ false +#> 6 http://publications.europa.eu/res~ http://publications.europa.e~ 31992~ false +#> 7 http://publications.europa.eu/res~ http://publications.europa.e~ 31983~ false +#> 8 http://publications.europa.eu/res~ http://publications.europa.e~ 31966~ false +#> 9 http://publications.europa.eu/res~ http://publications.europa.e~ 31974~ false +#> 10 http://publications.europa.eu/res~ http://publications.europa.e~ 31982~ false +#> # ... with 4,182 more rows
      # } +
    diff --git a/man/elx_council_votes.Rd b/man/elx_council_votes.Rd index f99cef1..f2658f3 100644 --- a/man/elx_council_votes.Rd +++ b/man/elx_council_votes.Rd @@ -6,6 +6,9 @@ \usage{ elx_council_votes() } +\value{ +A data frame with Council votes on EU acts. +} \description{ Executes a SPARQL query to the Council's endpoint. } diff --git a/man/elx_curia_list.Rd b/man/elx_curia_list.Rd index 6afb34a..5e39191 100644 --- a/man/elx_curia_list.Rd +++ b/man/elx_curia_list.Rd @@ -10,6 +10,9 @@ elx_curia_list(data = c("all", "ecj_old", "ecj_new", "gc_all", "cst_all")) \item{data}{Data to be scraped from four separate lists of cases maintained by Curia, defaults to "all" which contains cases from Court of Justice, General Court and Civil Service Tribunal.} } +\value{ +A data frame containing case identifiers and information as character columns. +} \description{ Harvests data from lists of EU court cases from curia.europa.eu. CELEX identifiers are extracted from hyperlinks where available. diff --git a/man/elx_fetch_data.Rd b/man/elx_fetch_data.Rd index 8582b1b..454d74e 100644 --- a/man/elx_fetch_data.Rd +++ b/man/elx_fetch_data.Rd @@ -14,7 +14,7 @@ elx_fetch_data( ) } \arguments{ -\item{url}{A valid url based on a resource identifier such as CELEX or Cellar URI.} +\item{url}{A valid url as character vector of length one based on a resource identifier such as CELEX or Cellar URI.} \item{type}{The type of data to be retrieved. When type = "text", the returned list contains named elements reflecting the source of each text.} @@ -26,6 +26,9 @@ elx_fetch_data( \item{include_breaks}{If TRUE, text includes tags showing where pages ("---pagebreak---", for pdfs) and documents ("---documentbreak---") were concatenated} } +\value{ +A character vector of length one containing the result. +} \description{ Wraps httr::GET with pre-specified headers to retrieve data. } diff --git a/man/elx_label_eurovoc.Rd b/man/elx_label_eurovoc.Rd index fab145e..ea39dc2 100644 --- a/man/elx_label_eurovoc.Rd +++ b/man/elx_label_eurovoc.Rd @@ -13,6 +13,9 @@ elx_label_eurovoc(uri_eurovoc = "", alt_labels = FALSE, language = "en") \item{language}{Language in which to return the labels, in ISO 639 2-char code} } +\value{ +A `tibble` containing EuroVoc unique concept identifiers and labels. +} \description{ Create a look-up table with labels for EuroVoc concept URIs. Only unique identifiers are returned. } diff --git a/man/elx_make_query.Rd b/man/elx_make_query.Rd index f09e91e..b6b510b 100644 --- a/man/elx_make_query.Rd +++ b/man/elx_make_query.Rd @@ -48,6 +48,9 @@ elx_make_query( \item{limit}{Limit the number of results, for testing purposes mainly} } +\value{ +A character string containing the SPARQL query +} \description{ Generates pre-defined or manual SPARQL queries to retrieve document ids from Cellar. List of available resource types: http://publications.europa.eu/resource/authority/resource-type . diff --git a/man/elx_run_query.Rd b/man/elx_run_query.Rd index b6a2411..499d5b0 100644 --- a/man/elx_run_query.Rd +++ b/man/elx_run_query.Rd @@ -14,6 +14,10 @@ elx_run_query( \item{endpoint}{SPARQL endpoint} } +\value{ +A data frame containing the results of the SPARQL query. +Column `work` contains the Cellar URI of the resource. Rows with even one missing variable are dropped. +} \description{ Executes cURL request to a pre-defined endpoint of the EU Publications Office. Relies on elx_make_query to generate valid SPARQL queries