-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcleanup_unused_api_indexes.py
executable file
·184 lines (152 loc) · 5.79 KB
/
cleanup_unused_api_indexes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env python
"""
This script deletes unused indexes in the API cluster.
This reduces disk usage, and in our experience reduces CPU and memory pressure in the cluster.
This script will go through all the indexes in the cluster. If it thinks an index is safe
to delete (i.e. it's not being used in the API and is older than the API indexes), it will
ask you to confirm you want to delete it.
"""
import click
import json
import boto3
import httpx
import humanize
def get_session_with_role(role_arn):
"""
Returns a boto3.Session that uses the given role ARN.
"""
sts_client = boto3.client("sts")
assumed_role_object = sts_client.assume_role(
RoleArn=role_arn, RoleSessionName="AssumeRoleSession1"
)
credentials = assumed_role_object["Credentials"]
return boto3.Session(
aws_access_key_id=credentials["AccessKeyId"],
aws_secret_access_key=credentials["SecretAccessKey"],
aws_session_token=credentials["SessionToken"],
)
def get_api_es_client(session):
"""
Returns an Elasticsearch client for the catalogue cluster.
"""
secrets = session.client("secretsmanager")
credentials = json.loads(
secrets.get_secret_value(SecretId="elasticsearch/api_cleanup/credentials")[
"SecretString"
]
)
return httpx.Client(
base_url=credentials["endpoint"],
auth=(credentials["username"], credentials["password"]),
)
def list_indexes(es_client):
"""
Returns a list of indexes in the Elasticsearch cluster, sorted by index name.
"""
resp = es_client.get("/_cat/indices/works-*,images-*", params={"format": "json"})
resp.raise_for_status()
return sorted(
[
{
"name": r["index"],
"size": r["store.size"],
"doc_count": int(r["docs.count"]),
}
for r in resp.json()
],
key=lambda r: r["name"],
)
def get_index_name(api_url):
"""
Return the name of the index that this instance of the API is reading from.
"""
search_templates_resp = httpx.get(
f"https://{api_url}/catalogue/v2/search-templates.json"
)
return search_templates_resp.json()["templates"][0]["index"]
def maybe_cleanup_index(es_client, *, idx, prod_index_name, stage_index_name):
"""
Decide if we're going to clean up this index, and if so, double-check with the user.
"""
if not idx["name"].startswith(("works-", "images-")):
return
prod_works_index_name = prod_index_name
prod_images_index_name = prod_index_name.replace("works-", "images-")
stage_works_index_name = stage_index_name
stage_images_index_name = stage_index_name.replace("works-", "images-")
click.echo(
f"\nConsidering %s (%s docs, %s)"
% (
click.style(idx["name"], "blue"),
humanize.intcomma(idx["doc_count"]),
idx["size"],
)
)
# We should never delete the prod or staging indexes -- this would cause an instant
# outage in the API, which is very bad.
if idx["name"] == prod_works_index_name or idx["name"] == prod_images_index_name:
click.echo(
f"This index will {click.style('not be deleted', 'green')} -- it is the prod API"
)
return
if idx["name"] == stage_works_index_name or idx["name"] == stage_images_index_name:
click.echo(
f"This index will {click.style('not be deleted', 'green')} -- it is the stage API"
)
return
# We consider deleting an index if it sorts lexicographically lower than both the
# prod and staging APIs. e.g. works-2001-01-01 < works-2002-02-02
#
# We skip indexes that sort higher, because this might be an index that we're currently
# reindexing into, but we haven't pointed an API at yet. In general, indexes go forward,
# not backward.
#
# We still ask the user to confirm they really want to delete this index, just in case.
# We should never offer to delete an index that would cause an outage, but they might
# skip an index if, say, they've only just promoted a new index to prod and they
# want to be able to roll back to this index.
if (
idx["name"].startswith("works-")
and idx["name"] < prod_works_index_name
and idx["name"] < stage_works_index_name
):
result = click.confirm(
f"This index is {click.style('older', 'red')} than the current APIs. Delete it?"
)
if result:
es_client.delete(f"/{idx['name']}")
return
if (
idx["name"].startswith("images-")
and idx["name"] < prod_images_index_name
and idx["name"] < stage_images_index_name
):
result = click.confirm(
f"This index is {click.style('older', 'red')} than the current APIs. Delete it?"
)
if result:
es_client.delete(f"/{idx['name']}")
return
# If we get this far, we're not going to delete this index. Log and return.
click.echo(f"This index will {click.style('not be deleted', 'green')}")
return
if __name__ == "__main__":
session = get_session_with_role(
role_arn="arn:aws:iam::760097843905:role/platform-developer"
)
es_client = get_api_es_client(session)
prod_index_name = get_index_name("api.wellcomecollection.org")
stage_index_name = get_index_name("api-stage.wellcomecollection.org")
click.echo(
"The prod API is reading from %s" % click.style(prod_index_name, "blue")
)
click.echo(
"The stage API is reading from %s" % click.style(stage_index_name, "blue")
)
for idx in list_indexes(es_client):
maybe_cleanup_index(
es_client,
idx=idx,
prod_index_name=prod_index_name,
stage_index_name=stage_index_name,
)