-
Notifications
You must be signed in to change notification settings - Fork 0
Lembretes TSE
Peter edited this page Mar 21, 2017
·
2 revisions
There are 3520846 full-names, but 1492419 distinct. There are some little of names with problems, as "0DÁRICO", "0SEAS", and "ABRRAÃO".
Run first `` to go in bath.
python tse-getZips.py # or run in background "&"
python tse-makeCSV.py
cp *.csv /tmp # make a copy on folder with all permissions`
psql -h localhost -U postgres restest
Add the SQL commands:
CREATE EXTENSION file_fdw;
CREATE SERVER files FOREIGN DATA WRAPPER file_fdw;
CREATE FOREIGN TABLE temp1 (id int, urlref text, filename text)
SERVER files OPTIONS (
filename '/tmp/tse-fontes.csv',
format 'csv',
header 'true'
);
CREATE FOREIGN TABLE temp2 (year int, state text, name text, bdate text, cpf text,fonte_id int)
SERVER files OPTIONS (
filename '/tmp/tse-FIM.csv',
format 'csv',
header 'true'
);
CREATE TABLE pubperson.source (
id bigserial NOT NULL PRIMARY KEY,
label text,
country char(2) NOT NULL DEFAULT 'br',
info JSONb,
UNIQUE(label)
);
CREATE TABLE pubperson.br_tse_person (
id bigserial NOT NULL PRIMARY KEY,
source_id bigint REFERENCES pubperson.source(id),
fullname text,
vatid bigint,
info JSONb
);
CREATE INDEX idx_tse_fullname ON pubperson.br_tse_person (fullname);
UPDATE pubperson.br_tse_person
SET source_id = src.id
FROM pubperson.source src
WHERE src.info->'id' = br_tse_person.info->'fonte_id'
;
CREATE TABLE pubperson.person (
id bigserial NOT NULL PRIMARY KEY,
source_id bigint REFERENCES pubperson.source(id),
fullname text,
vatid bigint,
info JSONb
);
CREATE INDEX idx_fullname ON pubperson.person (fullname);
--SELECT src.id as fonte_id, id
--FROM pubperson.br_tse_person p INNER JOIN pubperson.source src
-- ON src.info->'id' = p.info->'fonte_id'
CREATE MATERIALIZED VIEW pubperson.kx_firstname AS
SELECT regexp_replace(fullname,'\s.+$', '') AS word,
count(*) AS freq
FROM pubperson.br_tse_person
GROUP BY 1
ORDER BY 1
;
CREATE INDEX idx_firstname ON pubperson.kx_firstname (word);
INSERT INTO pubperson.source (label,info)
SELECT 'br:tse:'||regexp_replace(filename, '\.\w\w\w\w?$', ''),
jsonb_build_object('id',id, 'urlref',urlref, 'file',filename)
FROM temp1;
INSERT INTO pubperson.person (fullname,info)
SELECT name,
jsonb_build_object( 'bdate',bdate, 'fonte_id',fonte_id, 'year',year, 'state',state, 'cpf',cpf, 'cpfcheck',verify_cpf(cpf) )
FROM temp2;
-- need some minutes
-----
SELECT
FROM (SELECT info->>'bdate' FROM pubperson.br_tse_person t order by id DESC) t;
SELECT *,
CASE WHEN aux IS NULL THEN 'X'||(info->>'bdate') ELSE concat(aux[3],'-',aux[2],'-',aux[1]) END AS bdate
FROM (
SELECT *, (select a from
regexp_matches( regexp_replace(info->>'bdate','^(\d\d)/(\d\d)/(\d\d)$','\1\219\3') , '^(\d\d)(\d\d)(\d\d\d\d)$') t2(a)
) as aux
FROM pubperson.br_tse_person t order by id
) t3