Skip to content

Commit

Permalink
Merge pull request #15 from geocode-city/deploy
Browse files Browse the repository at this point in the history
Prepare for deployment
  • Loading branch information
lfborjas authored Jan 24, 2021
2 parents 2772f76 + 8001de8 commit 753954a
Show file tree
Hide file tree
Showing 10 changed files with 189 additions and 102 deletions.
44 changes: 40 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,40 @@ FROM haskell:8.8.4 as dependencies
RUN mkdir /opt/build
WORKDIR /opt/build

# Need postgresql-server-dev to compile postgresql packages
# Create the file repository configuration:
# From: https://docs.docker.com/engine/examples/postgresql_service/
# And: https://www.postgresql.org/download/linux/debian/

# RUN echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list
# This one's from:
#RUN apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys B97B0AFCAA1A47F044F244A07FCC7D46ACCC4CF8

#RUN echo "deb http://apt.postgresql.org/pub/repos/apt/ precise-pgdg main" > /etc/apt/sources.list.d/pgdg.list
RUN echo "deb http://apt.postgresql.org/pub/repos/apt/ buster-pgdg main" > /etc/apt/sources.list.d/pgdg.list \
&& curl -sSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -

# Install the latest version of PostgreSQL.
# If you want a specific version, use 'postgresql-12' or similar instead of 'postgresql':
RUN apt-get update && apt-get install -y postgresql-server-dev-12


COPY stack.yaml package.yaml stack.yaml.lock /opt/build/
# RUN stack build --system-ghc --only-dependencies -j1 servant
# compile some heavy-hitters with one thread to not overwhelm poor ol' docker
RUN stack build --system-ghc --only-dependencies -j1 servant servant-swagger
RUN stack build --system-ghc --only-dependencies

# ---- SECOND LAYER ---

FROM haskell:8.8.4 as build

RUN echo "deb http://apt.postgresql.org/pub/repos/apt/ buster-pgdg main" > /etc/apt/sources.list.d/pgdg.list \
&& curl -sSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -

# Install the latest version of PostgreSQL.
# If you want a specific version, use 'postgresql-12' or similar instead of 'postgresql':
RUN apt-get update && apt-get install -y libpq-dev

COPY --from=dependencies /root/.stack /root/.stack
RUN mkdir -p /opt/geocode-city-api/bin

Expand All @@ -26,15 +52,25 @@ RUN stack --local-bin-path /opt/geocode-city-api/bin install

# Using multi-stage builds:
# https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#use-multi-stage-builds
FROM debian:latest
FROM debian:buster

# latest (buster) debian doesn't have gnupg or curl!
RUN apt-get update
RUN apt-get install -y --no-install-recommends gnupg curl


RUN echo "deb http://apt.postgresql.org/pub/repos/apt/ buster-pgdg main" > /etc/apt/sources.list.d/pgdg.list \
&& curl -ksSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -

# Install the latest version of PostgreSQL.
# If you want a specific version, use 'postgresql-12' or similar instead of 'postgresql':
RUN apt-get update && apt-get install -y libpq-dev postgresql-client-12

# Add user and setup path (for local testing, ignored by Heroku)
RUN adduser geocode-city-api
USER geocode-city-api

COPY --from=build /opt/geocode-city-api/bin /opt/geocode-city-api
COPY --from=build /opt/geocode-city-api/config /opt/geocode-city-api/config
COPY --from=build /opt/geocode-city-api/static /opt/geocode-city-api/static

WORKDIR /opt/geocode-city-api

Expand Down
72 changes: 72 additions & 0 deletions Notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Notes

## Future features

- [ ] Ability to have cities from other sources than `Geonames`. In terms of schemata, we can have e.g `raw.openstreetmap` and then write a smart `insert ... select`, like we do in `seeds.sql`.
The API itself would need to expose the source in some way.
- [ ] Other search criteria for `/search`
- [ ] `/timezone` endpoint that in addition to the timezone for a point/place, returns
daylight savings info, offset, etc.

## SQL Notes

The base schema is based on this article, also part of the Art of PostgreSQL book: https://tapoueh.org/blog/2018/05/postgresql-data-types-point/

(Note: the following examples use the biggest dataset: all cities with >500 population, which is around ~196,000 rows when imported:)

We have some basic indices for trigram operations, which lead to decently fast queries (there's also an index for `alternatenames`, which can take 3x as long, but will have more hits):

```sql
geocode_city_dev=# select name, population from geocode.city where name %> 'teguc' order by population desc, name <-> 'teguc' limit 10;
name | population
---------------------+------------
Tegucigalpa | 850848
Teguise | 19418
Tegueste | 10666
Costa Teguise | 7629
Tegul’det | 4800
Teguajinal | 1117
Banjar Teguan | 0
Ji’ergele Teguoleng | 0
(8 rows)

geocode_city_dev=# explain analyze select name, population from geocode.city where name %> 'teguc' order by population desc, name <-> 'teguc' limit 10;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=734.54..734.56 rows=10 width=23) (actual time=0.560..0.563 rows=8 loops=1)
-> Sort (cost=734.54..735.03 rows=197 width=23) (actual time=0.560..0.561 rows=8 loops=1)
Sort Key: population DESC, ((name <-> 'teguc'::text))
Sort Method: quicksort Memory: 25kB
-> Bitmap Heap Scan on city (cost=77.52..730.28 rows=197 width=23) (actual time=0.488..0.549 rows=8 loops=1)
Recheck Cond: (name %> 'teguc'::text)
Heap Blocks: exact=6
-> Bitmap Index Scan on idx_city_autocomplete_faster (cost=0.00..77.47 rows=197 width=0) (actual time=0.460..0.460 rows=8 loops=1)
Index Cond: (name %> 'teguc'::text)
Planning Time: 0.121 ms
Execution Time: 0.593 ms
(11 rows)
```

and for reverse geocoding:

```sql
geocode_city_dev=# select name from geocode.city order by location <-> '(-87.2, 14.06)' limit 5;
name
-------------
Tegucigalpa
La Paz
Comayagua
Danlí
El Paraíso
(5 rows)

geocode_city_dev=# explain analyze select name from geocode.city order by location <-> '(-87.2, 14.06)' limit 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.25..8.27 rows=1 width=18) (actual time=0.696..0.735 rows=5 loops=1)
-> Index Scan using idx_location on city (cost=0.25..8.27 rows=1 width=18) (actual time=0.692..0.726 rows=5 loops=1)
Order By: (location <-> '(-87.2,14.06)'::point)
Planning Time: 10.290 ms
Execution Time: 0.854 ms
(5 rows)
```
98 changes: 31 additions & 67 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,78 +3,42 @@
![build](https://github.com/geocode-city/api/workflows/Haskell%20CI/badge.svg)


## Deployment

## Execute
I personally use the [Heroku Container Registry](https://devcenter.heroku.com/articles/container-registry-and-runtime); but instead of using [their base images](https://devcenter.heroku.com/articles/heroku-20-stack#heroku-20-docker-image) (which are rather large,)
you'll see in the `Dockerfile` that I stubbornly use `haskell` base images to build dependencies,
and `debian:buster` for the final image -- which requires installing Postgres libraries at every step, and a very questionable `curl -k...`. I did however take some inspiration from the heroku [image Dockerfiles](https://github.com/heroku/stack-images/blob/main/heroku-20/setup.sh)

On a solidly average machine (8GB ram, 3 allocated to Docker,) a build takes ~30 mins from scratch, but under a minute if all dependencies have already been built and we're just recompiling the executables from Haskell source code. I try to keep dependencies small, but there's at least two heavy hitters: `lens` and `swagger2`: Docker with only 2GB was running out of memory trying to compile these behemoths, even
with the `-j1` flag sent to Stack!

For my setup, these commands do the trick:

```sh
heroku container:push web -a geocode-city
heroku container:release web -a geocode-city
```

If you choose to use heroku, the `Dockerfile` should get you the above to work, too.

### Datastores

In heroku, I use the `hobby-dev` Postgres add-on, and the free `redis` addon. One thing to note,
is that for the hyperloglog entries to "fall off" as we reach max memory, I'm using the `allkeys-lru` eviction policy, set with:

```sh
heroku redis:maxmemory REDIS-NAME --policy allkeys-lru -a geocode-city
```


## Development

### Execute

* Run `stack run` to run the server with the default config (see `Config.hs`). You can override with environment vars: `PORT` and `DATABASE_URL`.
* Run `stack run -- --migrate` to run any pending migrations. We endeavor to write idempotent migrations, so running it
_shouldn't_ affect an existing schema.

## Run tests
### Run tests

`stack test`

## Notes

### Queries

The base schema is based on this article, also part of the Art of PostgreSQL book: https://tapoueh.org/blog/2018/05/postgresql-data-types-point/

(Note: the following examples use the biggest dataset: all cities with >500 population, which is around ~196,000 rows when imported:)

We have some basic indices for trigram operations, which lead to decently fast queries (there's also an index for `alternatenames`, which can take 3x as long, but will have more hits):

```sql
geocode_city_dev=# select name, population from geocode.city where name %> 'teguc' order by population desc, name <-> 'teguc' limit 10;
name | population
---------------------+------------
Tegucigalpa | 850848
Teguise | 19418
Tegueste | 10666
Costa Teguise | 7629
Tegul’det | 4800
Teguajinal | 1117
Banjar Teguan | 0
Ji’ergele Teguoleng | 0
(8 rows)

geocode_city_dev=# explain analyze select name, population from geocode.city where name %> 'teguc' order by population desc, name <-> 'teguc' limit 10;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=734.54..734.56 rows=10 width=23) (actual time=0.560..0.563 rows=8 loops=1)
-> Sort (cost=734.54..735.03 rows=197 width=23) (actual time=0.560..0.561 rows=8 loops=1)
Sort Key: population DESC, ((name <-> 'teguc'::text))
Sort Method: quicksort Memory: 25kB
-> Bitmap Heap Scan on city (cost=77.52..730.28 rows=197 width=23) (actual time=0.488..0.549 rows=8 loops=1)
Recheck Cond: (name %> 'teguc'::text)
Heap Blocks: exact=6
-> Bitmap Index Scan on idx_city_autocomplete_faster (cost=0.00..77.47 rows=197 width=0) (actual time=0.460..0.460 rows=8 loops=1)
Index Cond: (name %> 'teguc'::text)
Planning Time: 0.121 ms
Execution Time: 0.593 ms
(11 rows)
```

and for reverse geocoding:

```sql
geocode_city_dev=# select name from geocode.city order by location <-> '(-87.2, 14.06)' limit 5;
name
-------------
Tegucigalpa
La Paz
Comayagua
Danlí
El Paraíso
(5 rows)

geocode_city_dev=# explain analyze select name from geocode.city order by location <-> '(-87.2, 14.06)' limit 5;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.25..8.27 rows=1 width=18) (actual time=0.696..0.735 rows=5 loops=1)
-> Index Scan using idx_location on city (cost=0.25..8.27 rows=1 width=18) (actual time=0.692..0.726 rows=5 loops=1)
Order By: (location <-> '(-87.2,14.06)'::point)
Planning Time: 10.290 ms
Execution Time: 0.854 ms
(5 rows)
```
5 changes: 4 additions & 1 deletion geocode-city-api.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ cabal-version: 1.12
--
-- see: https://github.com/sol/hpack
--
-- hash: 5d0596bb832674fa5ce623837a77f59bb8ffde9e470b037e2efa79949410c210
-- hash: 90300272b0e50cb3dd8db4e56eb2983e4ba6f8bead284a517b5b839e47af464b

name: geocode-city-api
version: 0.1.0.0
Expand Down Expand Up @@ -70,6 +70,7 @@ library
, text
, time
, wai
, wai-cors
, warp
default-language: Haskell2010

Expand Down Expand Up @@ -105,6 +106,7 @@ executable geocode-city-api-exe
, text
, time
, wai
, wai-cors
, warp
default-language: Haskell2010

Expand Down Expand Up @@ -148,5 +150,6 @@ test-suite geocode-city-api-test
, text
, time
, wai
, wai-cors
, warp
default-language: Haskell2010
1 change: 1 addition & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ dependencies:
- containers
- lens
- hedis
- wai-cors

ghc-options:
- -Wall
Expand Down
29 changes: 29 additions & 0 deletions seeds/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,35 @@ and unzip.

For the format: http://download.geonames.org/export/dump/readme.txt

### Loading data for production

Currently, since the below scripts are a bit unwieldy to run in Heroku,
I simply created a backup of a populated DB and [imported it](https://devcenter.heroku.com/articles/heroku-postgres-import-export#import).

```sh
# Note that we exclude the `raw` schema, as it's only useful for imports. Migrations
# should be able to reconstitute it, sans data.
> pg_dump -Fc --no-acl --no-owner -Nraw geocode_city_dev > mydb.dump

> heroku pg:backups:restore -a geocode-city 'https://storage.googleapis.com/some-public-url' DATABASE_URL
▸ WARNING: Destructive Action
▸ This command will affect the app geocode-city
▸ To proceed, type geocode-city or re-run this command with --confirm geocode-city

> geocode-city
Starting restore of https://storage.googleapis.com/geocode-city-backups/mydb.dump to postgresql-lively-93516... done

Use Ctrl-C at any time to stop monitoring progress; the backup will continue restoring.
Use heroku pg:backups to check progress.
Stop a running restore with heroku pg:backups:cancel.

Restoring... done
```

Because the DB is tiny (10MB), it took a little under a minute. We should ideally have
a script to run in production to download the latest geonames definitions and load them
appropriately!


### Loading cities for dev

Expand Down
9 changes: 1 addition & 8 deletions src/Database/Queries.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{-# LANGUAGE QuasiQuotes #-}
module Database.Queries where

import Data.Time
--
import Database.PostgreSQL.Simple.Types (Only (..))
import Effects
import Import
Expand Down Expand Up @@ -49,13 +49,6 @@ cityCount :: Has Database sig m => m Int
cityCount = do
counts <- query_ "select count(geonameid) from geocode.city"
pure $ maybe 0 fromOnly (listToMaybe counts)

-- | Find the newest modification as downloaded from Geonames.
latestUpdate :: Has Database sig m => m (Maybe Day)
latestUpdate = do
updatedAts <- query_ "select max(modification) from raw.geonames"
pure $ fromOnly =<< listToMaybe updatedAts

-- | Given an API Key, find out if it exists and is enabled;
-- return status and current quota.
findApiKey :: Has Database sig m => Text -> m (Bool, Maybe Integer)
Expand Down
9 changes: 1 addition & 8 deletions src/Server/Handlers.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,11 @@ import qualified Network.HTTP.Types as N
service :: AppM sig m => ServerT Service m
service =
return swaggerSpec
:<|> stats
:<|> autoComplete
:<|> search
:<|> reverseGeocode

stats :: (AppM sig m) => RequestKey -> m (RateLimited Stats)
stats apiKey = do
rateLimitInfo <- checkUsage apiKey
count <- Q.cityCount
update <- Q.latestUpdate
return $ addRateLimitHeaders rateLimitInfo $ Stats update count

-- | Autocomplete based on partial name match
autoComplete :: (AppM sig m) => RequestKey -> Text -> Maybe Int -> m (RateLimited [CityAutocomplete])
autoComplete apiKey q limit = do
rateLimitInfo <- checkUsage apiKey
Expand Down
10 changes: 9 additions & 1 deletion src/Server/Run.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import Server.Handlers (service)
import Server.Types (proxyService)
import Server.Auth (ApiKeyAuth, authContext)
import qualified Database.Redis as R
import Network.Wai.Middleware.Cors

-- | Build a wai app with a connection pool
application :: AppContext -> Application
Expand Down Expand Up @@ -71,4 +72,11 @@ start cfg = do
, ctxDatabasePool = pool
, ctxAnonAccess = if Production == appDeployEnv cfg then AlwaysDenyAnon else AlwaysAllowAnon
}
Warp.run (appPort cfg) (application env)
Warp.run (appPort cfg) (corsMiddleware $ application env)
where
corsMiddleware = cors $ const $ Just corsPolicy
corsPolicy =
simpleCorsResourcePolicy {
corsExposedHeaders = Just $ simpleResponseHeaders <> rateLimitingHeaders
}
rateLimitingHeaders = ["X-RateLimit-Limit", "X-RateLimit-Remaining", "X-RateLimit-Resets"]
Loading

0 comments on commit 753954a

Please sign in to comment.