diff --git a/.env.example b/.env.example index d7be6c19..9194c7fc 100644 --- a/.env.example +++ b/.env.example @@ -2,7 +2,10 @@ #DC_WEB_LIB_REGISTER_SOURCES_BODS=../register-sources-bods #DC_WEB_LIB_REGISTER_SOURCES_OC=../register-sources-oc #DC_WEB_LIB_REGISTER_SOURCES_PSC=../register-sources-psc +#DC_WEB_LIB_REGISTER_SOURCES_SK=../register-sources-sk +#DC_WEB_LIB_REGISTER_SOURCES_DK=../register-sources-dk ADMIN_EMAILS= +BODS_EXPORT_AWS_REGION=eu-west-1 BODS_EXPORT_AWS_ACCESS_KEY_ID= BODS_EXPORT_AWS_SECRET_ACCESS_KEY= BODS_EXPORT_S3_BUCKET_NAME= diff --git a/.env.test b/.env.test index aa3042a6..be8e1a01 100644 --- a/.env.test +++ b/.env.test @@ -4,6 +4,7 @@ SITE_BASE_URL="http://0.0.0.0:3000" REDIS_URL="redis://localhost:6379/1" BODS_EXPORT_S3_BUCKET_NAME=test-export-bucket BODS_EXPORT_AWS_ACCESS_KEY_ID=test +BODS_EXPORT_AWS_REGION=eu-west-1 BODS_EXPORT_AWS_SECRET_ACCESS_KEY=test DEV_DATA_AWS_ACCESS_KEY_ID=test DEV_DATA_AWS_SECRET_ACCESS_KEY=test diff --git a/Gemfile b/Gemfile index 05bc3015..d8b7eb01 100644 --- a/Gemfile +++ b/Gemfile @@ -47,6 +47,8 @@ gem 'register_common', git: 'https://github.com/openownership/register-common.gi gem 'register_sources_oc', git: 'https://github.com/openownership/register-sources-oc.git' gem 'register_sources_psc', git: 'https://github.com/openownership/register-sources-psc.git' gem 'register_sources_bods', git: 'https://github.com/openownership/register-sources-bods.git' +gem 'register_sources_sk', git: 'https://github.com/openownership/register-sources-sk.git' +gem 'register_sources_dk', git: 'https://github.com/openownership/register-sources-dk.git' group :development, :test do gem 'byebug', '~> 11.1' diff --git a/Gemfile.lock b/Gemfile.lock index 76f7aea6..dc82df23 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -19,7 +19,7 @@ GIT GIT remote: https://github.com/openownership/register-sources-bods.git - revision: 9952fbb644b870ecb13899e03bb130762526bf6a + revision: 9fc7ec354ac58480556d0a44d581e9bffd4886a5 specs: register_sources_bods (0.1.0) activesupport (>= 6, < 8) @@ -30,6 +30,19 @@ GIT iso8601 xxhash +GIT + remote: https://github.com/openownership/register-sources-dk.git + revision: b97deb65255d825e11c94108785744936f3abec6 + specs: + register_sources_dk (0.1.0) + activesupport (>= 6, < 8) + countries (~> 4.0.1) + dry-struct (>= 1, < 2) + dry-types (>= 1, < 2) + elasticsearch (>= 7.10, < 8) + iso8601 + xxhash + GIT remote: https://github.com/openownership/register-sources-oc.git revision: 04ef5f2991164717d90f9f0499d082c296335e89 @@ -50,7 +63,7 @@ GIT GIT remote: https://github.com/openownership/register-sources-psc.git - revision: 7dcd200bddca4c99497d0676a9fd5bcacf807939 + revision: c9755e8e37bc095b2e580c2c14f9d65c5f33f5cc specs: register_sources_psc (0.1.0) activesupport (>= 6, < 8) @@ -61,6 +74,19 @@ GIT iso8601 xxhash +GIT + remote: https://github.com/openownership/register-sources-sk.git + revision: b96fbdaa1d9bb7103271b616fa1ba74582b44fe4 + specs: + register_sources_sk (0.1.0) + activesupport (>= 6, < 8) + countries (~> 4.0.1) + dry-struct (>= 1, < 2) + dry-types (>= 1, < 2) + elasticsearch (>= 7.10, < 8) + iso8601 + xxhash + GEM remote: https://rubygems.org/ specs: @@ -566,8 +592,10 @@ DEPENDENCIES redcarpet (~> 3.5) register_common! register_sources_bods! + register_sources_dk! register_sources_oc! register_sources_psc! + register_sources_sk! rexml (~> 3.2) roadie-rails (~> 2.3) rollbar (~> 3.3) diff --git a/app.json b/app.json index 6b933fec..bfb96c9e 100644 --- a/app.json +++ b/app.json @@ -17,6 +17,9 @@ "BODS_EXPORT_AWS_SECRET_ACCESS_KEY": { "required": true }, + "BODS_EXPORT_AWS_REGION": { + "required": true + }, "BODS_EXPORT_S3_BUCKET_NAME": { "required": true }, diff --git a/app/controllers/entities_controller.rb b/app/controllers/entities_controller.rb index 39a27221..79920994 100644 --- a/app/controllers/entities_controller.rb +++ b/app/controllers/entities_controller.rb @@ -76,10 +76,12 @@ def show @similar_people = entity.natural_person? ? similar_people(entity) : nil end - @data_source_names = DATA_SOURCE_REPOSITORY.data_source_names_for_entity(entity) + raw_records = RAW_DATA_RECORD_REPOSITORY.all_for_entity(entity) + @data_source_names = DATA_SOURCE_REPOSITORY.data_source_names_for_raw_records(raw_records) + unless @data_source_names.empty? - @newest_raw_record = RAW_DATA_RECORD_REPOSITORY.newest_for_entity(entity).data.notified_on # .updated_at - @raw_record_count = RAW_DATA_RECORD_REPOSITORY.all_for_entity(entity).size + @newest_raw_record = RAW_DATA_RECORD_REPOSITORY.newest_for_entity_date(entity) + @raw_record_count = raw_records.size end # Conversion @@ -103,6 +105,8 @@ def show entity.merged_entities.map(&:bods_statement) ].compact.flatten.uniq { |s| s.statementID } + statements = BodsStatementSorter.new.sort_statements(statements) + render json: JSON.pretty_generate(statements.as_json) end end @@ -125,13 +129,13 @@ def raw end redirect_to_master_entity?(:raw, entity) && return @sentity = entity - @raw_data_records = RAW_DATA_RECORD_REPOSITORY.all_for_entity(entity) # .page(params[:page]).per(10) + @raw_data_records = RAW_DATA_RECORD_REPOSITORY.all_for_entity(entity) return if @raw_data_records.empty? @oc_data = get_opencorporates_company_hash(entity) || {} - @newest = RAW_DATA_RECORD_REPOSITORY.newest_for_entity(entity).data.notified_on # .updated_at - @oldest = RAW_DATA_RECORD_REPOSITORY.oldest_for_entity(entity).data.notified_on # created_at - @data_sources = DATA_SOURCE_REPOSITORY.all_for_entity(entity) + @newest = RAW_DATA_RECORD_REPOSITORY.newest_for_entity_date(entity) + @oldest = RAW_DATA_RECORD_REPOSITORY.oldest_for_entity_date(entity) + @data_sources = DATA_SOURCE_REPOSITORY.all_for_raw_records(@raw_data_records) end def opencorporates_additional_info diff --git a/app/controllers/relationships_controller.rb b/app/controllers/relationships_controller.rb index 82e022d9..5d79ddd0 100644 --- a/app/controllers/relationships_controller.rb +++ b/app/controllers/relationships_controller.rb @@ -2,8 +2,8 @@ class RelationshipsController < ApplicationController ENTITY_SERVICE = Rails.application.config.entity_service def show - target_entity = ENTITY_SERVICE.find(params[:entity_id]) - source_entity = resolve_master_entity(ENTITY_SERVICE.find(params[:id])) + target_entity = ENTITY_SERVICE.find_by_entity_id(params[:entity_id]) + source_entity = resolve_master_entity(ENTITY_SERVICE.find_by_entity_id(params[:id])) relationships = InferredRelationshipGraph2 .new(target_entity) diff --git a/app/decorators/entity_graph_decorator.rb b/app/decorators/entity_graph_decorator.rb index 8289c91b..b7eade67 100644 --- a/app/decorators/entity_graph_decorator.rb +++ b/app/decorators/entity_graph_decorator.rb @@ -42,7 +42,7 @@ def cytoscape_entity_node(node) entity = node.entity #.decorate(context: context) classes = entity.dissolution_date ? ['dissolved'] : [] tooltip = nil - unless false # entity.is_a? UnknownPersonsEntity + unless entity.unknown? tooltip = h.render( partial: 'entities/graph_tooltip', locals: { entity: entity }, diff --git a/app/helpers/entity_helper.rb b/app/helpers/entity_helper.rb index 06c2cfeb..24aa3bc7 100644 --- a/app/helpers/entity_helper.rb +++ b/app/helpers/entity_helper.rb @@ -1,6 +1,6 @@ module EntityHelper def entity_link(entity, &block) - if entity.master_entity.present? #entity.is_a?(CircularOwnershipEntity) || entity.is_a?(UnknownPersonsEntity) || entity.master_entity.present? + if entity.master_entity.present? || entity.unknown? #entity.is_a?(CircularOwnershipEntity) capture(&block) else link_to(entity_path(entity.id), &block) diff --git a/app/repositories/bods_export_repository.rb b/app/repositories/bods_export_repository.rb index 8bff8979..84da89e6 100644 --- a/app/repositories/bods_export_repository.rb +++ b/app/repositories/bods_export_repository.rb @@ -4,7 +4,7 @@ class BodsExportRepository def initialize(s3_adapter: nil, s3_bucket: nil, s3_prefix: nil) @s3_adapter = s3_adapter || Rails.application.config.s3_adapter @s3_bucket = s3_bucket || ENV.fetch('BODS_EXPORT_S3_BUCKET_NAME') - @s3_prefix = s3_prefix || ENV.fetch('BODS_EXPORT_S3_PREFIX', 'public/exports/') + @s3_prefix = s3_prefix || ENV.fetch('BODS_EXPORT_S3_PREFIX', 'exports/') end def completed_exports(limit: 5) @@ -20,11 +20,13 @@ def most_recent attr_reader :s3_adapter, :s3_bucket, :s3_prefix def list_all - s3_paths = s3_adapter.list_objects(s3_bucket: s3_bucket, s3_prefix: File.join(s3_prefix, 'statements.')) + s3_paths = s3_adapter.list_objects(s3_bucket: s3_bucket, s3_prefix: s3_prefix) s3_paths.sort.reverse.map do |s3_path| + matched = /ex(?\d{4})(?\d{2})(?\d{2})/.match s3_path + next unless matched time = begin - Time.parse(s3_path.split('.')[1]) + Time.new(matched[:year], matched[:month], matched[:day]) rescue ArgumentError next end diff --git a/app/repositories/data_source_repository.rb b/app/repositories/data_source_repository.rb index a0989603..6391c5ad 100644 --- a/app/repositories/data_source_repository.rb +++ b/app/repositories/data_source_repository.rb @@ -1,3 +1,7 @@ +require 'register_sources_psc/structs/company_record' +require 'register_sources_dk/structs/record' +require 'register_sources_sk/structs/record' + class DataSourceRepository def all path = File.join(File.dirname(__FILE__), 'datasources.json') @@ -13,19 +17,29 @@ def find(id) end def find_many(ids) - all.filter { |data_source| ids.include? data_source.id } + all.filter { |data_source| ids.include?(data_source.id) || ids.include?(data_source.slug) } end def where_overview_present all.filter { |data_source| data_source.overview.present? } end - def data_source_names_for_entity(entity) - ["UK PSC Register"] # TODO: generate from sources of entity (or identifiers) + def data_source_names_for_raw_records(raw_records) + datasource_names = raw_records.map do |raw_record| + case raw_record + when RegisterSourcesDk::Deltagerperson + "Denmark Central Business Register (Centrale Virksomhedsregister [CVR])" + when RegisterSourcesPsc::CompanyRecord + "UK PSC Register" + when RegisterSourcesSk::Record + "Slovakia Public Sector Partners Register (Register partnerov verejného sektora)" + end + end.compact.uniq.sort end - def all_for_entity(entity) - data_source_names = data_source_names_for_entity(entity) - all.filter { |data_source| data_source_names.include? data_source.name } + def all_for_raw_records(raw_records) + datasource_names = data_source_names_for_raw_records(raw_records) + + all.filter { |data_source| datasource_names.include? data_source.name } end end diff --git a/app/repositories/datasources.json b/app/repositories/datasources.json index 21daf098..b3e0301d 100644 --- a/app/repositories/datasources.json +++ b/app/repositories/datasources.json @@ -1,97 +1,5 @@ { "datasources": [ - { - "_id": { - "$oid": "5d5fb1beeda3b30004498790" - }, - "_slugs": [ - "eiti-pilot-data-myanmar" - ], - "created_at": "2019-08-23T09:28:30.192Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Myanmar", - "name": "EITI pilot data - Myanmar", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.192Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=950147039" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b30004498799" - }, - "_slugs": [ - "eiti-pilot-data-mali" - ], - "created_at": "2019-08-23T09:28:30.280Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Mali", - "name": "EITI pilot data - Mali", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.280Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=177493552" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b300044987a0" - }, - "_slugs": [ - "eiti-pilot-data-zambia" - ], - "created_at": "2019-08-23T09:28:30.364Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Zambia", - "name": "EITI pilot data - Zambia", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.364Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1198225001" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449878d" - }, - "_slugs": [ - "eiti-pilot-data-ghana" - ], - "created_at": "2019-08-23T09:28:30.161Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Ghana", - "name": "EITI pilot data - Ghana", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.161Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1926655631" - }, { "_id": { "$oid": "5d4d2f1d07963b0004c00778" @@ -114,396 +22,6 @@ "updated_at": "2019-11-01T14:21:25.809Z", "url": "https://rpvs.gov.sk/" }, - { - "_id": { - "$oid": "5d5fb1beeda3b30004498795" - }, - "_slugs": [ - "eiti-pilot-data-honduras" - ], - "created_at": "2019-08-23T09:28:30.246Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Honduras", - "name": "EITI pilot data - Honduras", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.246Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=793160552" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b30004498797" - }, - "_slugs": [ - "eiti-pilot-data-madagascar" - ], - "created_at": "2019-08-23T09:28:30.263Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Madagascar", - "name": "EITI pilot data - Madagascar", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.263Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1184544716" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449879e" - }, - "_slugs": [ - "eiti-pilot-data-togo" - ], - "created_at": "2019-08-23T09:28:30.349Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Togo", - "name": "EITI pilot data - Togo", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.349Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=389809839" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449878e" - }, - "_slugs": [ - "eiti-pilot-data-indonesia" - ], - "created_at": "2019-08-23T09:28:30.170Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Indonesia", - "name": "EITI pilot data - Indonesia", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.170Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=538768558" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449879b" - }, - "_slugs": [ - "eiti-pilot-data-norway" - ], - "created_at": "2019-08-23T09:28:30.326Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Norway", - "name": "EITI pilot data - Norway", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.326Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1257742514" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b30004498793" - }, - "_slugs": [ - "eiti-pilot-data-burkina-faso" - ], - "created_at": "2019-08-23T09:28:30.221Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Burkina Faso", - "name": "EITI pilot data - Burkina Faso", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.221Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1984808479" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b30004498791" - }, - "_slugs": [ - "eiti-pilot-data-uk" - ], - "created_at": "2019-08-23T09:28:30.202Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - UK", - "name": "EITI pilot data - UK", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.202Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1503351240" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b30004498792" - }, - "_slugs": [ - "eiti-pilot-data-afghanistan" - ], - "created_at": "2019-08-23T09:28:30.212Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Afghanistan", - "name": "EITI pilot data - Afghanistan", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.212Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=100859567" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b30004498798" - }, - "_slugs": [ - "eiti-pilot-data-liberia" - ], - "created_at": "2019-08-23T09:28:30.271Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Liberia", - "name": "EITI pilot data - Liberia", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.271Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=680199964" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449879c" - }, - "_slugs": [ - "eiti-pilot-data-seychelles" - ], - "created_at": "2019-08-23T09:28:30.334Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Seychelles", - "name": "EITI pilot data - Seychelles", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.334Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1527384866" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449879d" - }, - "_slugs": [ - "eiti-pilot-data-tanzania" - ], - "created_at": "2019-08-23T09:28:30.342Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Tanzania", - "name": "EITI pilot data - Tanzania", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.342Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=859907622" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449878c" - }, - "_slugs": [ - "eiti-pilot-data-democratic-republic-of-congo" - ], - "created_at": "2019-08-23T09:28:30.150Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Democratic Republic of Congo", - "name": "EITI pilot data - Democratic Republic of Congo", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.150Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1443594667" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449878a" - }, - "_slugs": [ - "openownership-register" - ], - "created_at": "2019-08-23T09:28:30.123Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": null, - "name": "OpenOwnership Register", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "selfDeclaration" - ], - "updated_at": "2019-08-23T09:28:30.123Z", - "url": "https://register.openownership.org" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449878f" - }, - "_slugs": [ - "eiti-pilot-data-nigeria" - ], - "created_at": "2019-08-23T09:28:30.179Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Nigeria", - "name": "EITI pilot data - Nigeria", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.179Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1307797548" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b30004498794" - }, - "_slugs": [ - "eiti-pilot-data-cate-divoire" - ], - "created_at": "2019-08-23T09:28:30.235Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Côte d'Ivoire", - "name": "EITI pilot data - Côte d'Ivoire", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.235Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=150471739" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449879a" - }, - "_slugs": [ - "eiti-pilot-data-mauritania" - ], - "created_at": "2019-08-23T09:28:30.289Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Mauritania", - "name": "EITI pilot data - Mauritania", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.289Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=909011938" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b300044987a1" - }, - "_slugs": [ - "ua-edr-register" - ], - "created_at": "2019-08-23T09:28:30.372Z", - "current_statistic_types": [ - - ], - "data_availability": "The Unified State Register of Legal Entities, Individual Entrepreneurs and Public\nFormations has data publicly and freely available as bulk download, via a\n[daily zip file](https://data.gov.ua/dataset/1c7f3815-3259-45e0-bdf1-64dca07ddc10)\n(unstructured data).\n\nThe data is licensed under\n[Creative Commons Attribution (cc-by)](https://creativecommons.org/licenses/by/4.0/).\n\nIn order to conduct a record search via a\n[simple website interface](https://usr.minjust.gov.ua/), one has to file a\nrequest and pay a small fee.\n", - "document_id": "Ukraine EDR", - "name": "Ukraine Consolidated State Registry (Edinyy Derzhavnyj Reestr [EDR])", - "overview": "Beneficial ownership data in Ukraine has been publicly available as open data in\nthe Unified State Register of Legal Entities and Individual Entrepreneurs (USR)\nsince 2017. We regularly imported this data until September 2020, when a data\nformat change caused us to pause imports indefinitely.\n\nUkrainian law refers to ‘controllers’ (контролери) as opposed to ‘founders’\n(засновниками) to differentiate beneficial ownership from legal\nownership/shareholding. ‘Founders’ can be legal entities or natural persons.\n\nA beneficial owner of a legal entity is a natural person who directly or\nindirectly:\n\n- Has the right to execute decisive influence on the management or economic\n activity of a legal entity\n- Has decisive influence on the composition and decisions of the entity’s\n managing body\n- Has at least 25% of shares or voting rights of the entity\n\nNominees and agents of such rights cannot be named as beneficial owners.\n\n## Exclusions and exemptions\n\nIf the ‘founders’ of a legal entity are both natural persons and the beneficial\nowners of the entity, then the law relieves them from the duty to separately\ndeclare beneficial ownership (as such information is collected about the founders\nalready).\n\nJoint stock companies and listed companies are not obliged to report their\nbeneficial ownership data in the USR, however they must report owners of 10% and\nabove of company shares to the Stock Market Infrastructure Development Agency\n([SMIDA](https://smida.gov.ua/)).\n\nFinally, the following entities are excluded from the scope of the law: civic\nformations, attorney associations, trade chambers, associations of co-owners of\napartment buildings, religious organisations, state authorities, local\nself-government bodies and their associations, state and municipal\nenterprises/establishments/organisations.\n", - "timeline_url": "https://twitter.com/sheislaurence/timelines/1111242328805650433?ref_src=twsrc%5Etfw", - "types": [ - "officialRegister" - ], - "updated_at": "2021-12-17T17:55:14.831Z", - "url": "https://data.gov.ua/dataset/1c7f3815-3259-45e0-bdf1-64dca07ddc10" - }, { "_id": { "$oid": "5ca49242b51e4f000403da6b" @@ -685,29 +203,6 @@ "updated_at": "2021-12-17T17:55:14.855Z", "url": "http://download.companieshouse.gov.uk/en_pscdata.html" }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449879f" - }, - "_slugs": [ - "eiti-pilot-data-trinidad-and-tobago" - ], - "created_at": "2019-08-23T09:28:30.357Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Trinidad and Tobago", - "name": "EITI pilot data - Trinidad and Tobago", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.357Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1697865173" - }, { "_id": { "$oid": "5d4d2f1d07963b0004c00779" @@ -729,52 +224,6 @@ ], "updated_at": "2019-11-01T14:21:27.022Z", "url": "https://cvr.dk" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b3000449878b" - }, - "_slugs": [ - "eiti-pilot-data-cameroon" - ], - "created_at": "2019-08-23T09:28:30.137Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Cameroon", - "name": "EITI pilot data - Cameroon", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.137Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=1944716262" - }, - { - "_id": { - "$oid": "5d5fb1beeda3b30004498796" - }, - "_slugs": [ - "eiti-pilot-data-kyrgyz-republic" - ], - "created_at": "2019-08-23T09:28:30.255Z", - "current_statistic_types": [ - - ], - "data_availability": null, - "document_id": "EITI Structured Data - Kyrgyz Republic", - "name": "EITI pilot data - Kyrgyz Republic", - "overview": null, - "timeline_url": null, - "types": [ - "thirdParty", - "primaryResearch" - ], - "updated_at": "2019-08-23T09:28:30.255Z", - "url": "https://docs.google.com/spreadsheets/d/1OKl6oe6RbYicPIZEGljYZy29M06Pm4vGvGDoNzq6dV4/export?format=csv&gid=677766885" } ] } diff --git a/app/repositories/raw_data_record_repository.rb b/app/repositories/raw_data_record_repository.rb index e4244c24..fbb499ae 100644 --- a/app/repositories/raw_data_record_repository.rb +++ b/app/repositories/raw_data_record_repository.rb @@ -1,10 +1,20 @@ require 'ostruct' require 'register_sources_psc/repositories/company_record_repository' +require 'register_sources_sk/repositories/record_repository' +require 'register_sources_dk/repositories/deltagerperson_repository' require 'register_sources_bods/structs/identifier' class RawDataRecordRepository def initialize - @repository = RegisterSourcesPsc::Repositories::CompanyRecordRepository.new + @psc_repository = RegisterSourcesPsc::Repositories::CompanyRecordRepository.new + @sk_repository = RegisterSourcesSk::Repositories::RecordRepository.new + @dk_repository = RegisterSourcesDk::Repositories::DeltagerpersonRepository.new + + @repositories = [ + @psc_repository, + @sk_repository, + @dk_repository + ] end def all_for_entity(main_entity) @@ -37,22 +47,47 @@ def all_for_entity(main_entity) return [] if bods_identifiers.empty? - get_by_bods_identifiers(bods_identifiers.uniq) # .order_by(updated_at: :desc, created_at: :desc) + get_by_bods_identifiers(bods_identifiers.uniq).sort_by { |raw_record| raw_record_date(main_entity, raw_record) }.reverse + end + + def newest_for_entity_date(entity) + raw_record = all_for_entity(entity).first + raw_record_date(entity, raw_record) + end + + def oldest_for_entity_date(entity) + raw_record = all_for_entity(entity).last + raw_record_date(entity, raw_record) end def newest_for_entity(entity) - all_for_entity(entity).last + all_for_entity(entity).first end def oldest_for_entity(entity) - all_for_entity(entity).first + all_for_entity(entity).last end private - attr_reader :repository + attr_reader :psc_repository, :dk_repository, :sk_repository, :repositories def get_by_bods_identifiers(identifiers) - repository.get_by_bods_identifiers(identifiers) + repositories.map do |repository| + repository.get_by_bods_identifiers(identifiers) + end.flatten.compact + end + + def raw_record_date(entity, raw_record) + entity_date = (entity.bods_statement.source&.retrievedAt || entity.bods_statement.publicationDetails.publicationDate)&.to_date + + case raw_record + when RegisterSourcesDk::Deltagerperson + entity_date + when RegisterSourcesPsc::CompanyRecord + raw_record.data.notified_on.to_date + when RegisterSourcesSk::Record + entity_date + end end end diff --git a/app/service_objects/bods_statement_sorter.rb b/app/service_objects/bods_statement_sorter.rb new file mode 100755 index 00000000..19e83425 --- /dev/null +++ b/app/service_objects/bods_statement_sorter.rb @@ -0,0 +1,54 @@ +require 'register_sources_bods/enums/statement_types' + +class BodsStatementSorter + # accepts array of statements + # returns sorted by publication date, with any statements referred to by relationships + def sort_statements(statements) + statements = statements.sort_by { |statement| statement.publicationDetails&.publicationDate } + + statements_by_id = statements.to_h { |statement| [statement.statementID, statement] } + + used_ids = Set.new + new_statements = [] + + while new_statements.length < statements.length + current_new_statement_count = new_statements.length + + statements.each do |statement| + next if used_ids.include?(statement.statementID) + + replaced_ids = statement.replacesStatements || [] + + dependent_ids = + case statement.statementType + when RegisterSourcesBods::StatementTypes['personStatement'], RegisterSourcesBods::StatementTypes['entityStatement'] + [] + when RegisterSourcesBods::StatementTypes['ownershipOrControlStatement'] + [ + statement.subject&.describedByEntityStatement, + statement.interestedParty&.describedByEntityStatement, + statement.interestedParty&.describedByPersonStatement + ].compact + end + + all_dependent = (replaced_ids + dependent_ids).compact.uniq + + all_dependencies_satisfied = all_dependent.all? { |dependency_id| used_ids.include? dependency_id } + + next unless all_dependencies_satisfied + + new_statements << statement + used_ids << statement.statementID + end + + if current_new_statement_count == new_statements.count + # This only happens when the level limiting means that there are relationship statements + # without the entity dependency being in this statement list + # In this scenario, these relationship statements should be skipped, so just stop here + break + end + end + + new_statements + end +end diff --git a/app/views/data_sources/index.haml b/app/views/data_sources/index.haml index b215080d..3a277d99 100644 --- a/app/views/data_sources/index.haml +++ b/app/views/data_sources/index.haml @@ -9,4 +9,4 @@ - @data_sources.each do |data_source| %h2= data_source.name = sanitize data_source.short_overview - = link_to "Read more", "data_sources/#{data_source.id}", class: 'btn btn-primary' + = link_to "Read more", "data_sources/#{data_source.slug}", class: 'btn btn-primary' diff --git a/app/views/entities/show.html.haml b/app/views/entities/show.html.haml index d4f1e86e..2c0ec27e 100644 --- a/app/views/entities/show.html.haml +++ b/app/views/entities/show.html.haml @@ -30,7 +30,7 @@ .col-lg-4.header-sep .entity-header-meta.meta - if @sentity.natural_person? - - @sentity.country.try do |country_of_residence| + - @sentity.country_of_residence.try do |country_of_residence| %h6= t(".fields.country_of_residence") %p= country_of_residence - @sentity.date_of_birth.presence.try do |date_of_birth| diff --git a/app/views/pages/download.html.haml b/app/views/pages/download.html.haml index ce04bf2f..0f426cd6 100644 --- a/app/views/pages/download.html.haml +++ b/app/views/pages/download.html.haml @@ -30,7 +30,7 @@ %p Links to the most recent files we've exported: %ul - @exports.each do |export| - %li= link_to export.created_at.to_date, "https://oo-register-production.s3-eu-west-1.amazonaws.com/public/exports/statements.#{export.created_at.iso8601}.jsonl.gz", rel: 'nofollow' + %li= link_to export.created_at.to_date, "https://#{ENV.fetch('BODS_EXPORT_S3_BUCKET_NAME')}.s3-eu-west-1.amazonaws.com/#{export.s3_path}", rel: 'nofollow' %h2{ id: 'what-is-it' } What is this data? %p @@ -260,7 +260,7 @@ %h5.card-title Latest data %p Exported: #{@exports.first.created_at.to_date} %p - = link_to 'https://oo-register-production.s3-eu-west-1.amazonaws.com/public/exports/statements.latest.jsonl.gz', rel: 'nofollow', class: 'btn btn-primary', id: 'latest-data' do + = link_to "https://#{ENV['BODS_EXPORT_S3_BUCKET_NAME']}.s3-eu-west-1.amazonaws.com/#{@exports.first.s3_path}", rel: 'nofollow', class: 'btn btn-primary', id: 'latest-data' do Download   .card .card-block diff --git a/app/views/searches/show.html.haml b/app/views/searches/show.html.haml index dce1ea69..31f3b82a 100644 --- a/app/views/searches/show.html.haml +++ b/app/views/searches/show.html.haml @@ -26,17 +26,17 @@ %p= t(".data_sources_preface", count: number_with_delimiter(@legal_entity_count)) %ul.list-unstyled - if @data_sources['uk-psc-register'].present? - %li= link_to('UK PSC Register', data_source_path(@data_sources['uk-psc-register'])) + %li= link_to('UK PSC Register', data_source_path(@data_sources['uk-psc-register'].slug)) - else %li= link_to('UK PSC Register', 'https://find-and-update.company-information.service.gov.uk/', target: '_blank', rel: 'noopener') - if @data_sources['dk-cvr-register'].present? - %li= link_to('Denmark Central Business Register (CVR)', data_source_path(@data_sources['dk-cvr-register'])) + %li= link_to('Denmark Central Business Register (CVR)', data_source_path(@data_sources['dk-cvr-register'].slug)) - else %li= link_to('Denmark Central Business Register (CVR)', 'https://datacvr.virk.dk/', target: '_blank', rel: 'noopener') - if @data_sources['sk-rpvs-register'].present? - %li= link_to('Slovakia Public Sector Partners Register', data_source_path(@data_sources['sk-rpvs-register'])) + %li= link_to('Slovakia Public Sector Partners Register', data_source_path(@data_sources['sk-rpvs-register'].slug)) - else %li= link_to('Slovakia Public Sector Partners Register', 'https://rpvs.gov.sk/rpvs', target: '_blank', rel: 'noopener') diff --git a/app/views/shared/_raw_data_record.html.haml b/app/views/shared/_raw_data_record.html.haml index a94db05a..199297c1 100644 --- a/app/views/shared/_raw_data_record.html.haml +++ b/app/views/shared/_raw_data_record.html.haml @@ -1,4 +1,4 @@ -.row.raw_data_record{ id: "raw_data_record_#{raw_record.data.etag}" } +.row.raw_data_record{ id: "raw_data_record_#{raw_record.respond_to?(:etag) ? raw_record.etag : raw_record.data.etag}" } .col-lg-8 .raw-data-records %pre.pre-scrollable= JSON.pretty_generate(raw_record.to_h) @@ -7,4 +7,4 @@ .frame-wrap %h6= t("raw_data_records.show.fields.etag") %p - %code= raw_record.data.etag + %code= raw_record.respond_to?(:etag) ? raw_record.etag : raw_record.data.etag diff --git a/bin/configure-dev-lib b/bin/configure-dev-lib index 1add3bed..7e801744 100755 --- a/bin/configure-dev-lib +++ b/bin/configure-dev-lib @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -Eeuo pipefail -libs=(register-common register-sources-bods register-sources-oc register-sources-psc) +libs=(register-common register-sources-bods register-sources-oc register-sources-psc register-sources-sk register-sources-dk) echo -e "\n=== Configuring dev libs" diff --git a/docker-compose.yml b/docker-compose.yml index 36196cc4..034f2696 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,6 +21,8 @@ services: - ${DC_WEB_LIB_REGISTER_SOURCES_BODS:-.dockerempty}:/home/x/lib/register-sources-bods - ${DC_WEB_LIB_REGISTER_SOURCES_OC:-.dockerempty}:/home/x/lib/register-sources-oc - ${DC_WEB_LIB_REGISTER_SOURCES_PSC:-.dockerempty}:/home/x/lib/register-sources-psc + - ${DC_WEB_LIB_REGISTER_SOURCES_DK:-.dockerempty}:/home/x/lib/register-sources-dk + - ${DC_WEB_LIB_REGISTER_SOURCES_SK:-.dockerempty}:/home/x/lib/register-sources-sk - ./app:/home/x/r/app - ./bin:/home/x/r/bin - ./config:/home/x/r/config