Skip to content

Commit

Permalink
Cria Base Ptio e adiicona 3 municípios do RJ
Browse files Browse the repository at this point in the history
  • Loading branch information
slfabio committed Sep 16, 2024
1 parent 576492e commit 07cc75b
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 0 deletions.
44 changes: 44 additions & 0 deletions data_collection/gazette/spiders/base/ptio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import dateparser
import scrapy

from gazette.items import Gazette
from gazette.spiders.base import BaseGazetteSpider


class BasePtioSpider(BaseGazetteSpider):
def start_requests(self):
yield scrapy.Request(self.BASE_URL)

def parse(self, response):
for gazette_div in response.xpath("//div[@class='edicoes']"):
raw_gazete_date = gazette_div.xpath(
".//div[@class='data-caderno hidden-phone']/text()"
).get()
gazette_date = dateparser.parse(raw_gazete_date).date()

if gazette_date > self.end_date:
continue
if gazette_date < self.start_date:
return

gazette_edition = gazette_div.xpath(
".//span[@class='edicao']/strong/text()"
).get()
gazette_edition_number = gazette_edition.split()[1].replace(".", "")

sub_dir = gazette_div.xpath(".//button[1]/@href").get()
gazette_url = response.urljoin(sub_dir[sub_dir.index("?") :])

yield Gazette(
date=gazette_date,
edition_number=gazette_edition_number,
is_extra_edition=False,
file_urls=[gazette_url],
power="executive",
)

next_page = response.xpath(
"//ul[@class='paginacao']//a[@class='proximo']/@href"
)
if next_page:
yield scrapy.Request(response.urljoin(next_page.get()))
11 changes: 11 additions & 0 deletions data_collection/gazette/spiders/rj/rj_areal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from datetime import date

from gazette.spiders.base.ptio import BasePtioSpider


class RjArealSpider(BasePtioSpider):
name = "rj_areal"
TERRITORY_ID = "3300225"
allowed_domains = ["portaldatransparencia.com.br"]
BASE_URL = "http://rj.portaldatransparencia.com.br/prefeitura/areal/"
start_date = date(2006, 8, 1)
13 changes: 13 additions & 0 deletions data_collection/gazette/spiders/rj/rj_comendador_levy_gasparian.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from datetime import date

from gazette.spiders.base.ptio import BasePtioSpider


class RjComendadorLevyGasparianSpider(BasePtioSpider):
name = "rj_comendador_levy_gasparian"
TERRITORY_ID = "3300951"
allowed_domains = ["portaldatransparencia.com.br"]
BASE_URL = (
"http://rj.portaldatransparencia.com.br/prefeitura/comendadorlevygasparian/"
)
start_date = date(2013, 11, 26)
11 changes: 11 additions & 0 deletions data_collection/gazette/spiders/rj/rj_sapucaia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from datetime import date

from gazette.spiders.base.ptio import BasePtioSpider


class RjSapucaiaSpider(BasePtioSpider):
name = "rj_sapucaia"
TERRITORY_ID = "3305406"
allowed_domains = ["portaldatransparencia.com.br"]
BASE_URL = "http://rj.portaldatransparencia.com.br/prefeitura/sapucaia/"
start_date = date(2019, 1, 16)

0 comments on commit 07cc75b

Please sign in to comment.