-
Notifications
You must be signed in to change notification settings - Fork 5
Working... Verificações de nomes
Peter edited this page Jun 1, 2020
·
9 revisions
As consultas abaixo permitem avaliar e normalizar nomes de via e códigos de estradas municipais.
Ver https://github.com/ppKrauss/sql-term/tree/master/src/sql_mode1
create schema working ;
CREATE VIEW working.vw_osm_roads_names AS
-- para recuperar nomes de rua ou estrada
SELECT DISTINCT
tags->>'ref' as ref,
CASE WHEN tags->>'ref'!=tags->>'name' THEN tags->>'name' ELSE '' END as name
-- dispensa coalesce pois "WHEN 1!=NULL THEN 1 ELSE 2" resulta em 2.
FROM planet_osm_roads
WHERE not(tags?'boundary')
;
CREATE VIEW working.vw_osm_cod_estrada AS
-- Como não são padronizados, conferir o prefixo municipal conforme st_intersects(geom_segmento,geom_municipio).
SELECT ref
FROM (
select distinct upper(trim(regexp_split_to_table(ref,'[;/]'))) as ref
from working.vw_osm_roads_names
) t
WHERE ref ~ '^[A-Z]{2,3}\-\d+$' -- municipais {3,3}
;
-- nomes de via, muitos errados,
-- select DISTINCT name from working.vw_osm_roads_names; -- "salida", "BR-123",
-- Prospecção de prefixos:
-- select DISTINCT (regexp_split_to_array(name,'\s'))[1] from working.vw_osm_roads_names; -- "salida", "BR-123",
-- Obtendo pelos pontos:
;
CREATE VIEW working.vw_osm_addrstreet AS
SELECT name
FROM (
SELECT DISTINCT tags->>'addr:street' AS name
FROM planet_osm_point
) t
WHERE name !~ '\d\d|[;/]' -- evita códigos como br-116 ou "nome; outro nome", etc.
;
---- FINAL
CREATE FUNCTION working.via_name_split(name text) RETURNS text[] AS $f$
SELECT regexp_match(
$1,
'^(Acesso|Alameda|Alça|Anel|Avenida|Caminho|Córrego|Estrada|Estrada Municipal|Estrada Vicinal|Linha|Ponte|Rodoanel|Rodovia|Rodovia Estadual|Rodovia Municipal|Rodovia Vicinal|Rua|Saída|Trevo|Viaduto|Vicinal|Vila|Al\.?|Av\.?|Avnd\.?|Est\.?|Estr\.?|Pç\.?|Pça\.?|Pnte\.?|R\.?|Rod\.?|Rod\.? vic\.?|Trv\.?|Tv\.?)\s+(.+)$'
)
$f$ LANGUAGE SQL IMMUTABLE;
CREATE VIEW working.vw_osm_via_name AS
SELECT p[1] AS via_name_prefix, p[2] as via_name
FROM (
SELECT DISTINCT working.via_name_split(name) AS p
FROM working.vw_osm_addrstreet
UNION
SELECT DISTINCT working.via_name_split(name)
FROM working.vw_osm_roads_names
) t
WHERE p IS NOT NULL;
-- por fim registram-se os nomes novos e velhos, agregando citação de fonte com
-- SELECT tstore.upsert(via_name,x,y,z) FROM working.vw_osm_via_name;
para estudos de segunda palavra no prefixo, "estrada municipal", "estrada vicinal", etc. conferir em
CREATE VIEW working.vw_prefix_prospect AS
SELECT pref
FROM (
select DISTINCT (regexp_split_to_array(name,'\s'))[1]
from working.vw_osm_roads_names
) t(pref)
WHERE pref !~ '\d'
UNION
SELECT pref
FROM (
select DISTINCT (regexp_split_to_array(name,'\s'))[1]
from working.vw_osm_addrstreet
) t(pref)
WHERE pref !~ '\d'
;
select DISTINCT a[2] as second_prefix
from (
select DISTINCT regexp_split_to_array(name,'\s')
from working.vw_osm_roads_names
) t(a)
WHERE lower(a[1]) IN ('viaduto', 'ponte', 'saída', 'rua', 'caminho', 'linha', 'acesso', 'avenida', 'estrada', 'rodoanel', 'rodovia', 'vicinal', 'alameda', 'trevo', 'viaduto', 'pnte.', 'r.', 'av.', 'est.', 'rod.', 'ant.', 'rod. vic.', 'al.','pnte', 'r', 'av', 'est', 'rod', 'ant', 'rod. vic', 'al')
;
CREATE EXTENSION IF NOT EXISTS unaccent; -- for unaccent()
CREATE EXTENSION IF NOT EXISTS fuzzyStrMatch; -- for metaphone() and levenshtein()
-- CREATE EXTENSION IF NOT EXISTS pgCrypto; -- for SHA1 and crc32.
CREATE EXTENSION IF NOT EXISTS file_fdw;
CREATE SERVER IF NOT EXISTS files FOREIGN DATA WRAPPER file_fdw;
DROP FOREIGN TABLE IF EXISTS tstore.ingest_ns_viaNamePrefix CASCADE;
CREATE FOREIGN TABLE tstore.ingest_ns_viaNamePrefix (
nsid int,
term text,
fk_canonic text,
is_canonic boolean,
is_pref boolean
) SERVER files OPTIONS (
filename '/tmp/pg_io/ns_viaNamePrefix.csv',
format 'csv',
header 'true'
);
----------
INSERT INTO tstore.source (name,jinfo) VALUES
('OSM_stable2018-10-02','{"original_file":"brazil-latest.osm.pbf","SHA256d":"477b9c42709f3fd7f5e44dcb38752c1d4f4ef132b2cf39fa1015a742934b42db"}'),
('a4f_via_nameprefix2020-05-30','{"original_file":"ns_viaNamePrefix.csv"}')
; -- ... estranho, usando potencias de dois, deu 2 | osm_stable2018-10-02; 4 | a4f_via_nameprefix2020-05-30
-- Inserindo prefixos:
SELECT tstore.upsert(term,nsid,NULL,is_canonic,null,false, is_pref, 4)
FROM tstore.ingest_ns_viaNamePrefix
WHERE NOT(is_canonic)
;
-- tlib.n2id(fk_canonic,1) conferir nsid
SELECT tstore.upsert(term, nsid,NULL, is_canonic, tlib.n2id(fk_canonic,1), false, is_pref, 4)
FROM tstore.ingest_ns_viaNamePrefix
WHERE NOT(is_canonic) AND tlib.n2id(fk_canonic,1) IS NOT NULL
;
-----
CREATE TABLE tstore.nome_via (
id int NOT null primary key,
prefix_id int NOT NULL REFERENCES tstore.term(id), -- CHECK nsid=1
name_id int NOT NULL REFERENCES tstore.term(id), -- CHECK nsid=2
UNIQUE(prefix_id,name_id)
);
SELECT count(*) FROM (
SELECT tstore.upsert(via_name,2 ,NULL, true,NULL, false, NULL,4) as name_id
FROM working.vw_osm_via_name
WHERE via_name IS NOT NULl AND via_name_prefix IS NOT NULL AND length(via_name)>1
) t where name_id is null
; -- 1854
INSERT INTO tstore.nome_via(prefix_id,name_id)
SELECT distinct prefix_id, name_id FROM (
SELECT tlib.n2id(via_name_prefix,5) as prefix_id,
tlib.n2id(via_name,2) as name_id
FROM working.vw_osm_via_name
) t
WHERE prefix_id is not null AND name_id IS NOT NULL
;