Skip to content

LIXO, municipios

Peter edited this page Oct 29, 2018 · 12 revisions

A tabela primária de identificação de municípios é a http://datasets.ok.org.br/city-codes A partir dela simplesmente verificamos se há ou não consistência no OSM. A contagem de 5570 mostra que sim, há consistência.

name_path osm_id n tps km2 km
MG/SantaCruzMinas 315008 1 {ST_Polygon} 4 10
PA/Altamira 185554 1 {ST_Polygon} 159525 3296
PR/Curitiba 297514 1 {ST_Polygon} 434 125
RJ/AngraReis 2217370 134 {ST_Polygon} 814 605
SC/JaraguaSul 296625 1 {ST_Polygon} 531 177
SP/MonteiroLobato 298450 1 {ST_Polygon} 332 99
SP/SaoPaulo 298285 1 {ST_Polygon} 1524 328
name_path osm_id n tps km n_lines
MG/SantaCruzMinas 315008 1 {ST_LineString} 10 1
PA/Altamira 185554 1 {ST_LineString} 3296 30
PR/Curitiba 297514 1 {ST_LineString} 125 2
RJ/AngraReis 2217370 1 {ST_LineString} 605 137
SC/JaraguaSul 296625 1 {ST_LineString} 177 2
SP/MonteiroLobato 298450 1 {ST_LineString} 99 1
SP/SaoPaulo 298285 1 {ST_LineString} 328 4
CREATE SCHEMA IF NOT EXISTS stable; -- OSM BR Stable

-- os códigos IBGE de municipio são prefixados pela UF:
SELECT substr("idIBGE"::text,1,2) uf, count(*) n 
FROM mvw_br_city_codes group by 1 order by 1;

-- ... mas cuidado, os prefixos não podem ser cortados:
SELECT count(*) as duplicacoes, avg(n) multiplicidade_media 
FROM( 
  SELECT substr("idIBGE"::text,2,5)::int, count(*) n 
  FROM mvw_br_city_codes group by 1 having count(*)>2
) t; --       395 |   3.46 

-- ...e também cuidado com as lines que duplicam relations, 
-- option osm2pgsql "multi-geometry" só incide em relations
-- select count(*) n, osm_id from planet_osm_line group by  2  having count(*)>1;

SELECT count(*) 
FROM planet_osm_rels  
WHERE tags->>'IBGE:GEOCODIGO' IN (
 SELECT "idIBGE"::text from mvw_br_city_codes
); -- =  5570  -- indiferente AND tags->>'type'='boundary'

SELECT count(*) n, tags->>'admin_level' admin_level, 
       length(tags->>'IBGE:GEOCODIGO') cod_len
FROM planet_osm_rels 
WHERE tags->>'IBGE:GEOCODIGO' IN (
 SELECT "idIBGE"::text from mvw_br_city_codes
) GROUP BY 2,3
; -- =  5570 | 8 | 7

SELECT count(*) n, tags->>'admin_level' admin_level, 
       length(tags->>'IBGE:GEOCODIGO') cod_len
FROM planet_osm_polygon 
WHERE tags->>'IBGE:GEOCODIGO' IN (
 SELECT "idIBGE"::text from mvw_br_city_codes
) GROUP BY 2,3
; -- =  5570 | 8 | 7;     1 | NULL | 7.

-----
CREATE or replace FUNCTION stable.lexname_to_path( p_lexname text ) RETURNS text AS $f$
  SELECT string_agg(initcap(t),'') 
  FROM regexp_split_to_table(p_lexname, E'[\\.\\s]+') t
$f$ LANGUAGE SQL IMMUTABLE;


CREATE TABLE stable.city_test_names AS
  SELECT unnest(
    '{PR/Curitiba,SC/JaraguaSul,SP/MonteiroLobato,MG/SantaCruzMinas,SP/SaoPaulo,PA/Altamira,RJ/AngraReis}'::text[]
  ) name_path
;

CREATE or replace FUNCTION stable.getcity_rels_id(
  p_cod_ibge text  -- código IBGE do município, wikidata-id, lex-name ou path-name
  ,p_admin_level text default '8'
) RETURNS bigint AS $f$
 SELECT id
 FROM planet_osm_rels
 WHERE tags->>'admin_level'=p_admin_level AND CASE
	  WHEN substr(p_cod_ibge,1,1)='Q' THEN p_cod_ibge=tags->>'wikidata' 
	  WHEN substr(p_cod_ibge,3,1) IN (':','-',';') THEN (
	    SELECT "idIBGE"::text FROM mvw_br_city_codes 
	    WHERE upper(substr(p_cod_ibge,1,2))=state AND substr(lower(p_cod_ibge),4)="lexLabel"
	  ) = tags->>'IBGE:GEOCODIGO'
	  WHEN substr(p_cod_ibge,3,1)='/' THEN (
	    SELECT "idIBGE"::text FROM mvw_br_city_codes 
	    WHERE upper(substr(p_cod_ibge,1,2))=state AND substr(p_cod_ibge,4)=stable.lexname_to_path("lexLabel")
	  ) = tags->>'IBGE:GEOCODIGO'
	  WHEN length(p_cod_ibge)=7 THEN p_cod_ibge=tags->>'IBGE:GEOCODIGO'
	  ELSE p_cod_ibge::bigint = (tags->>'IBGE:GEOCODIGO')::bigint
  END
$f$ LANGUAGE SQL IMMUTABLE;

CREATE or replace FUNCTION stable.getcity_rels_id(
  p_cod_ibge bigint  -- código IBGE do município
  ,p_admin_level text default '8'
) RETURNS bigint AS $wrap$
   SELECT stable.getcity_rels_id($1::text,$2)
$wrap$ LANGUAGE SQL IMMUTABLE;


/*- - 
 select stable.getcity_rels_id('4304408');
 select stable.getcity_rels_id('rS-canEla'); -- case ins.
 select stable.getcity_rels_id('RS/Canela'); -- case sens.
 select stable.getcity_rels_id('Q995318');
 select stable.getcity_rels_id('0004304408');
 select stable.getcity_rels_id('SP/SaoCarlos');
*/

CREATE or replace FUNCTION stable.getcity_polygon_geom(
  p_cod_ibge text  -- código IBGE do município. Completo ou parcial.
  ,p_admin_level text default '8'
) RETURNS geometry AS $f$
 SELECT way
 FROM planet_osm_polygon
 WHERE osm_id = stable.getcity_rels_id(p_cod_ibge,$2)
$f$ LANGUAGE SQL IMMUTABLE;

CREATE or replace FUNCTION stable.getcity_line_geom(
  p_cod_ibge text  -- código IBGE do município. Completo ou parcial.
  ,p_admin_level text default '8'
) RETURNS geometry AS $f$
 SELECT way
 FROM planet_osm_line
 WHERE -osm_id = stable.getcity_rels_id(p_cod_ibge,$2)
$f$ LANGUAGE SQL IMMUTABLE;

CREATE or replace FUNCTION stable.osm_to_jsonb_remove_prefix(jsonb,text default 'name:') RETURNS text[] AS $f$
  SELECT COALESCE((
    SELECT array_agg(t) 
    FROM jsonb_object_keys($1) t 
    WHERE position($2 in t)=1
  ), '{}'::text[])
$f$ LANGUAGE SQL IMMUTABLE;

/*
-- tags essenciais:
SELECT munic.name_path, t2.id, 
       t2.tags 
        - stable.osm_to_jsonb_remove() 
        - stable.osm_to_jsonb_remove_prefix(t2.tags,'name:')
        - array['flag','website']
       AS tags_clean
FROM stable.city_test_names munic, LATERAL (
  SELECT id, tags 
  FROM planet_osm_rels 
  WHERE id=stable.getcity_rels_id(munic.name_path)
) t2
;

-- verificando lixo ST_CollectionExtract() nada... 
SELECT munic.name_path, t2.osm_id,name_path, t2.osm_id,st_astext(sc),ST_GeometryType(way)
FROM stable.city_test_names munic, LATERAL (
  SELECT -osm_id osm_id, ST_CollectionExtract(way,1) sc, way 
  FROM planet_osm_polygon WHERE -osm_id=stable.getcity_rels_id(munic.name_path)
) t2
;
-- verificando ST_dump() polygons... 
SELECT munic.name_path, t2.osm_id, t3.n, t3.tps, 
       round(st_area(t2.way,true)/1000000.0) km2, 
       round(st_perimeter(t2.way,true)/1000.0) km,
       (SELECT members->'admin_centre-n'->0 FROM planet_osm_rels WHERE id=t2.osm_id) admin_centre_node
FROM stable.city_test_names munic, 
LATERAL (
  SELECT -osm_id osm_id, way 
  FROM planet_osm_polygon 
  WHERE -osm_id=stable.getcity_rels_id(munic.name_path)
) t2,
LATERAL (
  SELECT array_agg(DISTINCT ST_GeometryType(tt.geom)) tps, COUNT(tt.geom) n 
  FROM ST_Dump(t2.way) tt
) t3; 

-- verificando ST_dump() lines...
SELECT munic.name_path, 
       t2.osm_id, t3.n, t3.tps, 
       round(sum(st_length(t2.way,true)/1000.0)) km, count(*) n_lines
FROM stable.city_test_names munic, 
LATERAL (
  SELECT -osm_id osm_id, way 
  FROM planet_osm_line
  WHERE -osm_id=stable.getcity_rels_id(munic.name_path)
) t2,
LATERAL (
  SELECT array_agg(DISTINCT ST_GeometryType(tt.geom)) tps, COUNT(tt.geom) n 
  FROM ST_Dump(t2.way) tt
) t3
GROUP BY 1,2,3,4
ORDER BY 1,2,3,4
;
*/

Inits

CREATE SCHEMA IF NOT EXISTS stable;

CREATE or replace FUNCTION stable.osm_to_jsonb_remove() RETURNS text[] AS $f$
   SELECT array['osm_uid','osm_user','osm_version','osm_changeset','osm_timestamp'];     
$f$ LANGUAGE SQL IMMUTABLE;

CREATE or replace FUNCTION stable.osm_to_jsonb(text[]) RETURNS JSONb AS $f$
   SELECT jsonb_object($1) - stable.osm_to_jsonb_remove()
$f$ LANGUAGE SQL IMMUTABLE;

CREATE or replace FUNCTION stable.osm_to_jsonb(hstore) RETURNS JSONb AS $f$
   SELECT hstore_to_jsonb_loose($1) - stable.osm_to_jsonb_remove()
$f$ LANGUAGE SQL IMMUTABLE;

CREATE or replace FUNCTION stable.jsonb_members(jsonb) RETURNS jsonb AS $f$
  SELECT jsonb_object_agg(tg,val)
  FROM (
    SELECT v ||'-'||substr(k,1,1) as tg,  jsonb_agg(substr(k,2)::bigint) as val
    FROM jsonb_each_text($1) t(k,v)
    GROUP BY 1
  ) t
$f$ LANGUAGE SQL IMMUTABLE;

alter table planet_osm_line alter column tags type jsonb USING stable.osm_to_jsonb(tags);
alter table planet_osm_polygon alter column tags type jsonb USING stable.osm_to_jsonb(tags); 
alter table planet_osm_rels alter column tags type jsonb USING stable.osm_to_jsonb(tags);
alter table planet_osm_rels alter column members type jsonb USING stable.jsonb_members(jsonb_object(members));

Teste da geometria

Conferir validade das áreas (buscar tabelas do IBGE e Embrapa) e uso de pontos interiores (conversão GeoJSON deveria excluir o ponto interior).