Skip to content

Working... Verificações de nomes

Peter edited this page Jun 1, 2020 · 9 revisions

As consultas abaixo permitem avaliar e normalizar nomes de via e códigos de estradas municipais.

Ver https://github.com/ppKrauss/sql-term/tree/master/src/sql_mode1

create schema working ;

CREATE VIEW working.vw_osm_roads_names AS
  -- para recuperar nomes de rua ou estrada 
  SELECT DISTINCT  
       tags->>'ref' as ref, 
       CASE WHEN tags->>'ref'!=tags->>'name' THEN tags->>'name' ELSE '' END as name
       -- dispensa coalesce pois "WHEN 1!=NULL THEN 1 ELSE 2" resulta em 2. 
  FROM planet_osm_roads 
  WHERE not(tags?'boundary')
;
CREATE VIEW working.vw_osm_cod_estrada AS
  -- Como não são padronizados, conferir o prefixo municipal conforme st_intersects(geom_segmento,geom_municipio).
  SELECT ref 
  FROM (
    select distinct upper(trim(regexp_split_to_table(ref,'[;/]'))) as ref  
    from working.vw_osm_roads_names
  ) t 
  WHERE ref ~ '^[A-Z]{2,3}\-\d+$'  -- municipais {3,3}
;
-- nomes de via, muitos errados,
-- select DISTINCT name from working.vw_osm_roads_names; -- "salida", "BR-123", 
-- Prospecção de prefixos:
-- select DISTINCT (regexp_split_to_array(name,'\s'))[1] from working.vw_osm_roads_names; -- "salida", "BR-123", 

-- Obtendo pelos pontos:
;
CREATE VIEW working.vw_osm_addrstreet  AS
 SELECT name
   FROM (
     SELECT DISTINCT tags->>'addr:street' AS name 
     FROM planet_osm_point
   ) t 
 WHERE name !~ '\d\d|[;/]' -- evita códigos como br-116 ou "nome; outro nome", etc.
;
 ---- FINAL 
CREATE FUNCTION working.via_name_split(name text) RETURNS text[] AS $f$
  SELECT regexp_match(
    $1, 
	'^(Acesso|Alameda|Alça|Anel|Avenida|Caminho|Córrego|Estrada|Estrada Municipal|Estrada Vicinal|Linha|Ponte|Rodoanel|Rodovia|Rodovia Estadual|Rodovia Municipal|Rodovia Vicinal|Rua|Saída|Trevo|Viaduto|Vicinal|Vila|Al\.?|Av\.?|Avnd\.?|Est\.?|Estr\.?|Pç\.?|Pça\.?|Pnte\.?|R\.?|Rod\.?|Rod\.? vic\.?|Trv\.?|Tv\.?)\s+(.+)$'
	)
$f$ LANGUAGE SQL IMMUTABLE;

CREATE VIEW working.vw_osm_via_name AS  
 SELECT p[1] AS via_name_prefix, p[2] as via_name
 FROM (
   SELECT DISTINCT  working.via_name_split(name) AS p
   FROM  working.vw_osm_addrstreet
   UNION 
   SELECT DISTINCT  working.via_name_split(name)
   FROM working.vw_osm_roads_names 
 ) t
 WHERE p IS NOT NULL;

-- por fim registram-se os nomes novos e velhos, agregando citação de fonte com 
-- SELECT tstore.upsert(via_name,x,y,z) FROM working.vw_osm_via_name;

para estudos de segunda palavra no prefixo, "estrada municipal", "estrada vicinal", etc. conferir em

CREATE VIEW working.vw_prefix_prospect AS  
 SELECT pref
 FROM (
   select DISTINCT (regexp_split_to_array(name,'\s'))[1]
   from working.vw_osm_roads_names
 ) t(pref)
 WHERE pref !~ '\d'

 UNION 
 
 SELECT pref
 FROM (
   select DISTINCT (regexp_split_to_array(name,'\s'))[1]
   from working.vw_osm_addrstreet
 ) t(pref)
 WHERE pref !~ '\d'
;

select DISTINCT a[2] as second_prefix 
from (
  select DISTINCT regexp_split_to_array(name,'\s') 
  from working.vw_osm_roads_names
) t(a) 
WHERE lower(a[1]) IN ('viaduto', 'ponte', 'saída', 'rua', 'caminho', 'linha', 'acesso', 'avenida', 'estrada', 'rodoanel', 'rodovia', 'vicinal', 'alameda', 'trevo', 'viaduto', 'pnte.', 'r.', 'av.', 'est.', 'rod.', 'ant.', 'rod. vic.', 'al.','pnte', 'r', 'av', 'est', 'rod', 'ant', 'rod. vic', 'al')
;

Preparo e upsert dos termos

CREATE EXTENSION IF NOT EXISTS unaccent; -- for unaccent()
CREATE EXTENSION IF NOT EXISTS fuzzyStrMatch; -- for metaphone() and levenshtein()
-- CREATE EXTENSION IF NOT EXISTS pgCrypto; -- for SHA1 and crc32.
CREATE EXTENSION IF NOT EXISTS file_fdw;
CREATE SERVER IF NOT EXISTS files FOREIGN DATA WRAPPER file_fdw;

DROP FOREIGN TABLE IF EXISTS tstore.ingest_ns_viaNamePrefix CASCADE;
CREATE FOREIGN TABLE tstore.ingest_ns_viaNamePrefix (
	nsid int,
	term text,
	fk_canonic text,
	is_canonic boolean,
	is_pref boolean
) SERVER files OPTIONS (
	filename '/tmp/pg_io/ns_viaNamePrefix.csv',
	format 'csv',
	header 'true'
);
----------

INSERT INTO tstore.source (name,jinfo) VALUES 
  ('OSM_stable2018-10-02','{"original_file":"brazil-latest.osm.pbf","SHA256d":"477b9c42709f3fd7f5e44dcb38752c1d4f4ef132b2cf39fa1015a742934b42db"}'),
  ('a4f_via_nameprefix2020-05-30','{"original_file":"ns_viaNamePrefix.csv"}')
; -- ... estranho, usando potencias de dois, deu  2 | osm_stable2018-10-02; 4 | a4f_via_nameprefix2020-05-30

-- Inserindo prefixos:
SELECT tstore.upsert(term,nsid,NULL,is_canonic,null,false, is_pref, 4) 
FROM tstore.ingest_ns_viaNamePrefix
WHERE NOT(is_canonic)
;
-- tlib.n2id(fk_canonic,1) conferir nsid
SELECT tstore.upsert(term, nsid,NULL, is_canonic, tlib.n2id(fk_canonic,1), false, is_pref, 4) 
FROM tstore.ingest_ns_viaNamePrefix
WHERE NOT(is_canonic) AND tlib.n2id(fk_canonic,1) IS NOT NULL
;
-----
CREATE TABLE tstore.nome_via (
 id  int  NOT null primary key,
 prefix_id  int NOT NULL  REFERENCES tstore.term(id), -- CHECK nsid=1
 name_id    int NOT NULL  REFERENCES tstore.term(id), -- CHECK nsid=2
 UNIQUE(prefix_id,name_id)
);

SELECT count(*) FROM (
    SELECT tstore.upsert(via_name,2 ,NULL, true,NULL, false, NULL,4) as name_id
    FROM working.vw_osm_via_name
    WHERE via_name IS NOT NULl AND via_name_prefix IS NOT NULL AND length(via_name)>1
) t where name_id is null
; --  1854

INSERT INTO tstore.nome_via(prefix_id,name_id)
  SELECT distinct prefix_id, name_id FROM (
    SELECT tlib.n2id(via_name_prefix,5) as prefix_id,
           tlib.n2id(via_name,2) as name_id
    FROM working.vw_osm_via_name
  ) t 
  WHERE prefix_id is not null AND name_id IS NOT NULL
;