Skip to content

Lembretes TSE

Peter edited this page Mar 21, 2017 · 2 revisions

There are 3520846 full-names, but 1492419 distinct. There are some little of names with problems, as "0DÁRICO", "0SEAS", and "ABRRAÃO".

Run first `` to go in bath.

python tse-getZips.py # or run in background "&"
python tse-makeCSV.py
cp *.csv /tmp  # make a copy on folder with all permissions`
psql -h localhost -U postgres restest

Add the SQL commands:

   CREATE EXTENSION file_fdw;
   CREATE SERVER files FOREIGN DATA WRAPPER file_fdw;

   CREATE FOREIGN TABLE temp1 (id int, urlref text, filename text) 
   SERVER files OPTIONS ( 
     filename '/tmp/tse-fontes.csv', 
       format 'csv', 
       header 'true'
   );


   CREATE FOREIGN TABLE temp2 (year int, state text, name text, bdate text, cpf text,fonte_id int) 
   SERVER files OPTIONS (
     filename '/tmp/tse-FIM.csv', 
       format 'csv', 
       header 'true'
   );

CREATE TABLE pubperson.source (
 id bigserial NOT NULL PRIMARY KEY,
 label text,
 country char(2) NOT NULL DEFAULT 'br',
 info JSONb,
 UNIQUE(label)
);

CREATE TABLE pubperson.br_tse_person (
 id bigserial NOT NULL PRIMARY KEY,
 source_id bigint REFERENCES pubperson.source(id),
 fullname text,
 vatid bigint,
 info JSONb
);
CREATE INDEX idx_tse_fullname ON pubperson.br_tse_person (fullname);

UPDATE pubperson.br_tse_person
SET source_id = src.id
FROM pubperson.source src
WHERE src.info->'id' = br_tse_person.info->'fonte_id'
;



CREATE TABLE pubperson.person (
 id bigserial NOT NULL PRIMARY KEY,
 source_id bigint REFERENCES pubperson.source(id),
 fullname text,
 vatid bigint,
 info JSONb
);
CREATE INDEX idx_fullname ON pubperson.person (fullname);




--SELECT src.id as fonte_id, id
--FROM pubperson.br_tse_person p INNER JOIN pubperson.source src 
--     ON src.info->'id' = p.info->'fonte_id'





CREATE MATERIALIZED VIEW pubperson.kx_firstname AS
 SELECT regexp_replace(fullname,'\s.+$', '') AS word, 
        count(*) AS freq
 FROM pubperson.br_tse_person
 GROUP BY 1
 ORDER BY 1
;
CREATE INDEX idx_firstname ON pubperson.kx_firstname (word);





INSERT INTO pubperson.source (label,info) 
   SELECT 'br:tse:'||regexp_replace(filename, '\.\w\w\w\w?$', ''), 
          jsonb_build_object('id',id, 'urlref',urlref, 'file',filename)
   FROM temp1;

INSERT INTO pubperson.person (fullname,info) 
   SELECT name, 
          jsonb_build_object( 'bdate',bdate, 'fonte_id',fonte_id,  'year',year, 'state',state, 'cpf',cpf,  'cpfcheck',verify_cpf(cpf) )
   FROM temp2;
-- need some minutes


-----

SELECT 
FROM (SELECT info->>'bdate' FROM  pubperson.br_tse_person t order by id DESC) t;

SELECT *, 
     CASE WHEN aux IS NULL THEN 'X'||(info->>'bdate') ELSE concat(aux[3],'-',aux[2],'-',aux[1]) END AS bdate
FROM (
  SELECT *, (select a from 
    regexp_matches( regexp_replace(info->>'bdate','^(\d\d)/(\d\d)/(\d\d)$','\1\219\3') ,  '^(\d\d)(\d\d)(\d\d\d\d)$')  t2(a) 
  ) as aux
  FROM pubperson.br_tse_person t order by id
) t3
Clone this wiki locally