4.1 Revisão
Obtenção dos nossos dados
library(tidyverse)
library(esaj)
library(glue)
library(abjutils)
path <- "data-raw/camaras" # onde salvar?
dir.create(path, showWarnings = FALSE)
Quais câmaras vamos baixar? OBS: Usa um pouco de stringr
e dplyr
, que veremos adiante.
# camaras <- cjsg_table("courts")
camaras <- read_rds("data/cjsg_camaras.rds")
id_camaras <- camaras %>%
filter(str_detect(court, "Câmara.*Direito Criminal$")) %>%
pull(id)
Quantas decisões no total?
peek_cjsg(query = "", courts = id_camaras,
registration_start = "2017-12-01", registration_end = "2018-01-18")
Baixando decisões: CJSG
cjsg_path <- glue("{path}/cjsg")
download_cjsg(query = "", path = cjsg_path,
courts = id_camaras,
registration_start = "2017-12-01",
registration_end = "2018-01-18",
max_page = Inf, wait = 0.8)
cjsg_files <- dir(cjsg_path, full.names = TRUE, pattern = "page")
# parse
d_cjsg <- parse_cjsg(cjsg_files)
# salvando tibble parseada
write_rds(d_cjsg, glue("{path}/d_cjsg.rds"), compress = "bz2")
d_cjsg <- read_rds(glue("{path}/d_cjsg.rds"))
glimpse(d_cjsg)
#> Observations: 11,731
#> Variables: 14
#> $ file <chr> "data-raw/camaras/cjsg/page100.html", "data-ra...
#> $ id_page <chr> "1981", "1982", "1983", "1984", "1985", "1986"...
#> $ id_decision <chr> "11094999", "11093733", "11093677", "11093270"...
#> $ id_lawsuit <chr> "0057003-20.2017.8.26.0000", "0052762-03.2017....
#> $ class_subject <chr> "Classe/Assunto:\n\t\t\t\t\t\t\t\t\t\t\t Habea...
#> $ district <chr> "Cosmópolis", "São Paulo", "Ribeirão Preto", "...
#> $ court <chr> "3ª Câmara de Direito Criminal", "3ª Câmara de...
#> $ dt_decision <chr> "19/12/2017", "19/12/2017", "19/12/2017", "14/...
#> $ dt_publication <chr> "19/12/2017", "19/12/2017", "19/12/2017", "19/...
#> $ dt_registration <chr> "19/12/2017", "19/12/2017", "19/12/2017", "19/...
#> $ rapporteur <chr> "Luiz Antonio Cardoso", "Luiz Antonio Cardoso"...
#> $ summary <chr> NA, NA, NA, "Execução Penal – Comutação de Pe...
#> $ txt_summary <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
#> $ result <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
Baixando processos: CPOSG
cposg_path <- glue("{path}/cposg")
# baixando processos individuais
d_cjsg %>%
filter(!is.na(id_lawsuit)) %>%
pull(id_lawsuit) %>%
unique() %>%
clean_id() %>%
download_cposg(cposg_path)
cposg_files <- dir(cposg_path, full.names = TRUE)
# parse
parser <- make_parser() %>%
parse_data() %>%
parse_parts() %>%
parse_decisions()
# esse parser salva intermediários em rds
rds_path <- glue("{path}/cposg_rds")
run_parser(cposg_files, parser, path = rds_path)
rds_files <- dir(rds_path, full.names = TRUE)
d_cposg <- map_dfr(rds_files, read_rds) %>%
mutate(id_lawsuit = build_id(str_extract(id, "^[0-9]+"))) %>%
select(id, id_lawsuit, everything())
# salvando tibble parseada
write_rds(d_cposg, glue("{path}/d_cposg.rds"), compress = "bz2")
d_cposg <- read_rds(glue("{path}/d_cposg.rds"))
glimpse(d_cposg)
#> Observations: 11,762
#> Variables: 7
#> $ id <chr> "00000037120168260073", "00000040920178260142", "00...
#> $ id_lawsuit <chr> "0000003-71.2016.8.26.0073", "0000004-09.2017.8.26....
#> $ file <chr> "data-raw/camaras/cposg/00000037120168260073.html",...
#> $ hidden <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA...
#> $ data <list> [<# A tibble: 11 x 2, data value ...
#> $ parts <list> [<# A tibble: 3 x 4, id name ...
#> $ decisions <list> [<# A tibble: 1 x 2, date decision ...