Skip to content

Commit

Permalink
fix(pipeline): remove district codes from zdf
Browse files Browse the repository at this point in the history
Currently zone_diffusion_code can contain district codes
introduced by geocodings. This is a side effect from our
previous work on district codes.

This leads to services being only discoverable using the
district codes, not city codes.

We can easily make the assumption that, in real life,
services are never confined to districts. Therefore we dont
want to allow a district zdf type.

This commit replaces district codes by city codes for the zdf.
  • Loading branch information
vmttn committed Oct 15, 2024
1 parent 64ec1ee commit cb73235
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 35 deletions.
15 changes: 14 additions & 1 deletion pipeline/dbt/models/intermediate/_models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ models:
severity: warn
- dbt_utils.not_empty_string
- dbt_utils.not_constant
- dbt_utils.expression_is_true:
expression: "!~ '^(751|693|132)'"
config:
severity: warn
- relationships:
to: ref('stg_decoupage_administratif__regions')
field: code
Expand Down Expand Up @@ -107,9 +111,18 @@ models:
- name: code_postal
data_tests:
- dbt_utils.not_empty_string
- name: code_insee
- name: code_commune
data_tests:
- dbt_utils.not_empty_string
- relationships:
to: ref('stg_decoupage_administratif__communes')
field: code
- name: code_arrondissement
data_tests:
- dbt_utils.not_empty_string
- relationships:
to: ref('stg_decoupage_administratif__arrondissements')
field: code
- name: latitude
- name: longitude
- name: score
Expand Down
13 changes: 12 additions & 1 deletion pipeline/dbt/models/intermediate/int__geocodages.sql
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,18 @@ final AS (
geocodings.result_city AS "commune",
geocodings.result_name AS "adresse",
geocodings.result_postcode AS "code_postal",
geocodings.result_citycode AS "code_insee",
-- ban api returns district codes for Paris, Lyon and Marseille
-- replace them with actual city codes
CASE
WHEN LEFT(geocodings.result_citycode, 3) = '751' THEN '75056' -- Paris
WHEN LEFT(geocodings.result_citycode, 3) = '693' THEN '69123' -- Lyon
WHEN LEFT(geocodings.result_citycode, 3) = '132' THEN '13055' -- Marseille
ELSE geocodings.result_citycode
END AS "code_commune",
CASE
WHEN LEFT(geocodings.result_citycode, 3) = ANY(ARRAY['751', '693', '132'])
THEN geocodings.result_citycode
END AS "code_arrondissement",
geocodings.result_score AS "score",
geocodings.result_type AS "type",
geocodings.longitude AS "longitude",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ overriden_adresses AS (
COALESCE(geocodages.latitude, adresses.latitude) AS "latitude",
COALESCE(geocodages.commune, adresses.commune) AS "commune",
COALESCE(geocodages.code_postal, adresses.code_postal) AS "code_postal",
COALESCE(geocodages.code_insee, adresses.code_insee) AS "code_insee",
COALESCE(geocodages.code_commune, adresses.code_insee) AS "code_insee",
geocodages.score AS "score_geocodage"
FROM adresses
LEFT JOIN geocodages
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,66 @@ models:
field: code
- name: code_epci
data_tests:
- not_null:
config:
error_if: "!=98"
- dbt_utils.not_constant
- dbt_utils.not_empty_string
- relationships:
to: ref('stg_decoupage_administratif__epcis')
field: code
- name: centre
data_tests:
- not_null
- dbt_utils.not_constant
- name: codes_postaux

- name: stg_decoupage_administratif__arrondissements
config:
indexes:
- columns: [code]
unique: true
- columns: [code_departement]
- columns: [code_region]
- columns: [code_commune]
columns:
- name: code
data_tests:
- unique
- not_null
- dbt_utils.not_constant
- dbt_utils.not_empty_string
- name: nom
data_tests:
- not_null
- dbt_utils.not_constant
- dbt_utils.not_empty_string
- name: code_region
data_tests:
- not_null
- dbt_utils.not_constant
- dbt_utils.not_empty_string
- relationships:
to: ref('stg_decoupage_administratif__regions')
field: code
- name: code_departement
data_tests:
- not_null
- dbt_utils.not_constant
- dbt_utils.not_empty_string
- relationships:
to: ref('stg_decoupage_administratif__departements')
field: code
- name: code_commune
data_tests:
- not_null
- dbt_utils.not_constant
- dbt_utils.not_empty_string
- relationships:
to: ref('stg_decoupage_administratif__communes')
field: code
- name: centre
data_tests:
- not_null
- dbt_utils.not_constant
- name: codes_postaux
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
WITH source AS (
{{ stg_source_header('decoupage_administratif', 'arrondissements') }}
),

final AS (
SELECT
code AS "code",
nom AS "nom",
"codeRegion" AS "code_region",
"codeDepartement" AS "code_departement",
CASE
WHEN LEFT(code, 3) = '751' THEN '75056' -- Paris
WHEN LEFT(code, 3) = '693' THEN '69123' -- Lyon
WHEN LEFT(code, 3) = '132' THEN '13055' -- Marseille
END AS "code_commune",
ST_GEOMFROMGEOJSON(centre) AS "centre",
"codesPostaux" AS "codes_postaux"
FROM source
ORDER BY code
)

SELECT * FROM final
Original file line number Diff line number Diff line change
@@ -1,39 +1,17 @@
WITH source_communes AS (
WITH source AS (
{{ stg_source_header('decoupage_administratif', 'communes') }}
),

source_arrondissements AS (
{{ stg_source_header('decoupage_administratif', 'arrondissements') }}
),

communes AS (
SELECT
source_communes.code AS "code",
source_communes.nom AS "nom",
source_communes."codeRegion" AS "code_region",
source_communes."codeDepartement" AS "code_departement",
source_communes."codeEpci" AS "code_epci",
ST_GEOMFROMGEOJSON(source_communes.centre) AS "centre",
source_communes."codesPostaux" AS "codes_postaux"
FROM source_communes
),

arrondissements AS (
SELECT
source_arrondissements.code AS "code",
source_arrondissements.nom AS "nom",
source_arrondissements."codeRegion" AS "code_region",
source_arrondissements."codeDepartement" AS "code_departement",
NULL AS "code_epci",
ST_GEOMFROMGEOJSON(source_arrondissements.centre) AS "centre",
source_arrondissements."codesPostaux" AS "codes_postaux"
FROM source_arrondissements
),

final AS (
SELECT * FROM communes
UNION ALL
SELECT * FROM arrondissements
SELECT
code AS "code",
nom AS "nom",
"codeRegion" AS "code_region",
"codeDepartement" AS "code_departement",
"codeEpci" AS "code_epci",
ST_GEOMFROMGEOJSON(centre) AS "centre",
"codesPostaux" AS "codes_postaux"
FROM source
ORDER BY code
)

Expand Down

0 comments on commit cb73235

Please sign in to comment.