-
Notifications
You must be signed in to change notification settings - Fork 1
/
ExecutionBroker.tex
2749 lines (2279 loc) · 100 KB
/
ExecutionBroker.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[11pt,a4paper]{ivoa}
\input tthdefs
\usepackage{xspace}
% Standard terms used throughout the document,
% defined as macro commands to maintain consistency
% and avoid repeated spelling mistakes.
% Using non-breaking space character.
% https://stackoverflow.com/a/1012891
\usepackage[super]{nth}
\newcommand{\xml} {XML}
\newcommand{\json} {JSON}
\newcommand{\yaml} {YAML}
\newcommand{\http} {HTTP}
\newcommand{\rest} {REST}
\newcommand{\datamodel} {data~model}
\newcommand{\webservice} {web service}
\newcommand{\webbrowser} {web browser}
\newcommand{\vo} {VO}
\newcommand{\vofull} {Virtual Observatory}
\newcommand{\ivoa} {IVOA}
\newcommand{\ivoafull} {International Virtual Observatory Alliance}
\newcommand{\uws} {UWS}
\newcommand{\vospace} {VOSpace}
\newcommand{\execworkerclass} {**ExecutionWorker**}
\newcommand{\execbrokerclass} {\textit{ExecutionBroker}}
\newcommand{\execbrokerservice}[1] {\textit{ExecutionBroker~service#1}}
\newcommand{\execoffer}[1] {\textit{ExecutionBroker~offer#1}}
\newcommand{\execofferset}[1] {\textit{ExecutionBroker~offerset#1}}
\newcommand{\execsession}[1] {\textit{ExecutionBroker~session#1}}
\newcommand{\executionbroker} {\textit{Execution~Broker}}
\newcommand{\executionplanning} {\textit{Execution~Planning}}
\newcommand{\executable} {\textit{executable}}
\newcommand{\executablething}[1] {\textit{executable~thing#1}}
\newcommand{\excutabletask} {\textit{executable} task}
\newcommand{\metadoc} [1]{\textit{metadata document#1}}
%\newcommand{\execoffer}[1] {\textit{offer#1}}
\newcommand{\workerjob}[1] {\textit{session#1}}
\newcommand{\teardown} {tear-down}
\newcommand{\jupyter} {Jupyter}
\newcommand{\jupyterhub} {JupyterHub}
\newcommand{\binderhub} {BinderHub}
\newcommand{\jupyternotebook} {Jupyter notebook}
\newcommand{\esap} {ESAP}
\newcommand{\escape} {ESCAPE}
\newcommand{\datalake} {DataLake}
\newcommand{\rucio} {Rucio}
\newcommand{\python} {Python}
\newcommand{\pythonprogram} {Python program}
\newcommand{\apache} {Apache}
\newcommand{\spark} {Spark}
\newcommand{\pyspark} {PySpark}
\newcommand{\zeppelin} {Zeppelin}
\newcommand{\zeppelinnotebook} {Zeppelin notebook}
\newcommand{\oci} {OCI}
\newcommand{\ociruntime} {OCI runtime}
\newcommand{\ocicontainer} {OCI container}
\newcommand{\docker} {Docker}
\newcommand{\dockercompose} {Docker compose}
\newcommand{\dockerruntime} {Docker runtime}
\newcommand{\dockercontainer} {Docker container}
\newcommand{\singularity} {Singularity}
\newcommand{\singularitycontainer} {Singularity container}
\newcommand{\openstack} {Openstack}
\newcommand{\kubernetes} {Kubernetes}
\newcommand{\codeword}[1] {\texttt{#1}}
\newcommand{\footurl}[1] {\footnote{\url{#1}}}
\newcommand{\dataset}[1] {dataset#1}
\newcommand{\datascience} {data~science}
\newcommand{\scienceplatform}[1] {science~platform#1}
\newcommand{\science}[1] {science#1}
\newcommand{\scientist}[1] {scientist#1}
\newcommand{\cpu}[1] {CPU#1}
\newcommand{\gpu}[1] {GPU#1}
\newcommand{\nvidiagpu} {NVIDIA~AD104~GPU}
\newcommand{\scalable} {scalable}
% TODO add a citation for the YAML specification.
% https://yaml.org/spec/
\usepackage{listings}
\usepackage{xcolor}
%\colorlet{punct}{red!60!black}
\colorlet{numb}{magenta!60!black}
\definecolor{html-gray}{HTML}{EEEEEE}
\definecolor{light-gray}{gray}{0.95}
\definecolor{delim}{RGB}{20,105,176}
\lstset{
basicstyle=\small\ttfamily,
columns=fullflexible,
frame=none,
backgroundcolor=\color{light-gray},
stepnumber=1,
%numbers=left,
numbers=none,
numberstyle=\small,
numbersep=8pt,
%xleftmargin=\parindent,
xrightmargin=1cm,
showstringspaces=false,
keepspaces=true,
breaklines=true,
linewidth=14cm,
frame=none
}
% https://tex.stackexchange.com/questions/83085/how-to-improve-listings-display-of-json-files
% https://tex.stackexchange.com/a/83100
% https://tex.stackexchange.com/questions/10828/indent-a-code-listing-in-latex
% https://tex.stackexchange.com/a/10831
\lstdefinelanguage{json}{
literate=
*{0}{{{\color{numb}0}}}{1}
{1}{{{\color{numb}1}}}{1}
{2}{{{\color{numb}2}}}{1}
{3}{{{\color{numb}3}}}{1}
{4}{{{\color{numb}4}}}{1}
{5}{{{\color{numb}5}}}{1}
{6}{{{\color{numb}6}}}{1}
{7}{{{\color{numb}7}}}{1}
{8}{{{\color{numb}8}}}{1}
}
\lstdefinelanguage{yaml}{
literate=
*{0}{{{\color{numb}0}}}{1}
{1}{{{\color{numb}1}}}{1}
{2}{{{\color{numb}2}}}{1}
{3}{{{\color{numb}3}}}{1}
{4}{{{\color{numb}4}}}{1}
{5}{{{\color{numb}5}}}{1}
{6}{{{\color{numb}6}}}{1}
{7}{{{\color{numb}7}}}{1}
{8}{{{\color{numb}8}}}{1}
}
\hyphenation{Exe-cut-able-Thing}
\title{IVOA Execution Broker}
% see ivoatexDoc for what group names to use here; use \ivoagroup[IG] for
% interest groups.
\ivoagroup{GWS}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/DaveMorris]
{Dave Morris}
\author[http://www.ivoa.net/twiki/bin/view/IVOA/SaraBertocco]
{Sara Bertocco}
\editor[http://www.ivoa.net/twiki/bin/view/IVOA/DaveMorris]
{Dave Morris}
% \previousversion[????URL????]{????Concise Document Label????}
\previousversion{This is the first public release}
\begin{document}
\begin{abstract}
\label{abstract}
One of the long term goals of the \ivoa{} has been to enable users to
move the code to the data.
This is becoming more and more important as the size and complexity
of the \dataset{s} available in the virtual observatory increases.
%\citep{gaia-at-esac}
%\footurl{https://www.skao.int/en/explore/big-data}
%\footurl{https://www.lsst.org/scientists/keynumbers}
The \ivoa{} \executionbroker{} provides a step towards making this possible.
The \ivoa{} \executionbroker{} is designed to address a specific question;
given an executable thing, e.g. a \pythonprogram{} or \jupyternotebook{}.
What facilities are available to run it?
To do this, the \ivoa{} \executionbroker{} specification defines
a \datamodel{} and \webservice{} API for describing executable things
and the resources needed to execute them.
Together these components enable a user to ask a simple question
\textit{"Where (and when) can I execute my program?"}
This in turn enables users to move code between \scienceplatform{s}.
Allowing them to develop their code on one platform and then apply it to a different
\dataset{} by sending it to execute on another platform.
\end{abstract}
\section*{Acknowledgments}
\label{acknowledgments}
The authors would like to thank all the participants in the IVOA and ESCAPE projects
who have contributed their ideas, critical reviews, and suggestions to this document.
\section*{Conformance-related definitions}
The words ``MUST'', ``SHALL'', ``SHOULD'', ``MAY'', ``RECOMMENDED'', and
``OPTIONAL'' (in upper or lower case) used in this document are to be
interpreted as described in IETF standard RFC2119 \citep{std:RFC2119}.
The \emph{Virtual Observatory (VO)} is a general term for a collection of
federated resources that can be used to conduct astronomical research,
education, and outreach.
The \href{https://www.ivoa.net}{International Virtual Observatory Alliance (IVOA)}
is a global collaboration of separately funded projects to develop standards and
infrastructure that enable VO applications.
\section{Introduction}
\label{introduction}
The \ivoa{} \executionbroker{} specification defines a \datamodel{} for describing executable tasks
and a \webservice{} interface for managing them.
Together these provide a common interface for service discovery, resource allocation
and execution scheduling across a heterogeneous federation of different types of
execution platform.
\begin{itemize}
\item \execbrokerclass{} \datamodel{} – a data model for describing execution sessions and their resource requirements.
\item \execbrokerclass{} \webservice{} – a \rest{} based web service to find execution platforms, allocate resources and schedule execution sessions.
\end{itemize}
\subsection{Role within the VO Architecture}
\label{subsec:ivoarole}
% As of ivoatex 1.2, the architecture diagram is generated by ivoatex in
% SVG; copy ivoatex/archdiag-full.xml to role_diagram.xml and throw out
% all lines not relevant to your standard.
% Notes don't generally need this. If you don't copy role_diagram.xml,
% you must remove role_diagram.pdf from SOURCES in the Makefile.
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{role_diagram.pdf}
\caption{Architecture diagram showing the \ivoa{} \executionbroker{}'s role in the \ivoa}
\label{fig:archdiag}
\end{figure}
The \ivoa{} Architecture\citep{2010ivoa.rept.1123A} provides a high-level view of how \ivoa{}
standards work together to connect users and applications with providers of data
and services.
Fig.~\ref{fig:archdiag} shows the role the \ivoa{} \executionbroker{} plays within this architecture.
In response to the increasing size and complexity of the next generation of science \dataset{s}
a number of \ivoa{} members are developing intergrated \scienceplatform{s} which bring
together the \dataset{s} co-located with the compute resources needed to analyse
them.\footurl{https://data.lsst.cloud/}\footurl{https://rsp.lsst.io/index.html}
These \scienceplatform{s} make extensive use of the \ivoa{} data models and
vocabularies to describe their \dataset{s}, and use the \ivoa{} data access
services to find and access data from other data providers.
In addition, some of the \scienceplatform{s} use \ivoa{} \vospace{} services to manage
data transfers to and from local storage co-located with the compute resources.
However, to date the \ivoa{} does not provide any APIs or services that
enable \scienceplatform{s} to exchange the software used to analyse the data.
The \ivoa{} \executionbroker{} provides a step towards making this possible.
This places the \ivoa{} \executionbroker{} in the same region of the \ivoa{} architecture
as the \ivoa{} \vospace{} specification \citep{2009ivoa.specQ1007G},
providing an infrastructure level service that enables service discovery,
resource allocation and execution scheduling across a heterogeneous federation
of execution platforms.
\ivoa{} \executionbroker{} services may use the
\ivoa{} Single-Sign-On standard \citep{2017ivoa.spec.0524T}
for authentication (see section xx) %\ref{subsec:authentication}
and the
\ivoa{} Credential Delegation Protocol \citep{2010ivoa.spec.0218P}
for delegating credentials to other services.
\subsection{Executable things}
\label{executablething}
To understand the problem that the \ivoa{} \executionbroker{} is trying to solve
it is useful to describe what an \executablething{} is in this context.
In general terms, this document refers to something that can be executed, or run,
as an \executable{}.
To explain what this means we can start with a science domain function that we want to perform.
For example, the mathematical concept of the square root of a number.
We can calculate the square root of a positive number using the Newton–Raphson
algorithm\footurl{https://en.wikipedia.org/wiki/Newton\%27s_method}
which produces successively closer approximations to the result.
However, in general case, this mathematical description of the algorithm would not be
considered to be an \executablething{}.
We can write a \pythonprogram{} to use this algorithm to calculate the square root of a number.
This is the first identifiable \executablething{} in our example.
To be able to use this \executablething{}, you would need a computing resource with the appropriate
hardware and software environment. In this case, a computing resource with the \python{} interpreter
installed along with the additional \python{} modules required by the program.
This environment is often referred to as the \python{} runtime.
In the context of \scienceplatform{s} and \datascience{}, a common pattern is to provide this environment
using a Docker\footurl{https://docs.docker.com/get-started/what-is-a-container/}
or OCI\footurl{https://opencontainers.org/} container
to package the \pythonprogram{} and \python{} runtime together as a single binary object.
This package, or container, is itself an \executablething{}. One which requires a different execution
environment than the original \pythonprogram{}.
The aim of containerization is to package software components together with all the libraries and dependencies
they need as a single binary object that interfaces with a standard execution environment,
referred to as the \textit{container runtime}.
To be able to use this \executablething{}, you would need a computing resource with the appropriate
hardware and software environment. In this case, a computing resource with the \docker{} or \ocicontainer{}
runtime installed.
We could also create a \jupyternotebook{} that demonstrates how to use our \pythonprogram{}.
This is the third \executablething{} in our example.
One which provides an interactive environment for the user to experiment with.
As before, to be able to use this \executablething{}, we would need a computing resource with
the appropriate hardware and software environment.
In this case, a computer with the \jupyternotebook{} platform installed along with all the \python{} modules
needed by our \pythonprogram{}.
In the context of \scienceplatform{s} and \datascience{}, a common pattern is to provide this environment as a \webservice{}
that allows the user to interact with the \jupyternotebook{} via a \webbrowser.
From one algorithm that implements a science domain function, we have created three different \executablething{s}.
A \pythonprogram{}, a \dockercontainer{} packaging the \pythonprogram{}, and an interactive \jupyternotebook{}
that demonstrates how to use the \pythonprogram{}.
Each of which requires a different computing environment to execute.
A basic \python{} runtime, the \dockerruntime{}, and a \jupyternotebook{} service.
We may also want to consider the data that we are applying the algorithm to and the compute resources that
will be needed to process it.
If we are running some small experiments to learn how to use the algorithm, then a basic computing
resource will probably be sufficient.
However, if we have a \dataset{} of ten million numbers that we want to process, then we may
need to consider adding extra storage to handle the input data and the results.
For a large \dataset{} it may also be worth using a \gpu{} to accelerate the calculation.
The \ivoa{} \executionbroker{} \datamodel{} provides a way to describe what each of these \executablething{s}
are and what resources are needed to execute them.
This can include things like number of \cpu{} cores and amount of memory it needs,
whether it needs a \gpu{}, the location of the input data, the storage space needed to perform
the calculation, and the storage space needed to save the results.
\section{Service interaction}
\label{service-interaction}
The interaction between a user, the client application they are using, and the services available in the \vo{}
can be described as a conversation between the client and one or more \execbrokerservice{s} to discover
where, how, and when, an \executablething{} can be executed.
\subsection{Discovery services}
\label{discovery-services}
The conversation starts at the discovery stage, where the user uses discovery services to
select the software and \dataset{s} that they want to work with.
\includegraphics[width=0.9\textwidth]{diagrams/data-discovery.pdf}
The detailed specification for the software and data discovery services are beyond the
scope of this document. However we can outline some general requirements for them.
In both cases, the discovery process should not depend on the technical details
of the software or the \dataset{s}, but on their science domain functionality and properties.
From a science user's perspective they want to be able to find software that implements
a particular clustering algorithm, or a \dataset{} that is indexed according to a particular
coordinate system.
The programming language the software is written in and the file format of the \dataset{}
are at best secondary criteria.
In our square root example, we would expect our user to use search terms like \textit{'square root'}
or \textit{'newton raphson'} to find the software they need.
We wouldn't expect them to start out looking for a \textit{'python'} or \textit{'docker'} as their key search terms.
Ideally, if the \executionbroker{} service functions as intended, a science user should not
need to know about programming languages, software packaging or file formats.
The \executionbroker{} service should hide as much as possible of the technical details,
enabling the science user to get on with science.
Another important consideration is these discovery services should be designed to be domain agnostic.
Meaning that it should be possible to swap out an astronomy based discovery service
for an equivalent biochemistry discovery service and although the domain specific
terms and vocabulary will be different, the techical details of the service interfaces
should be the same.
\subsubsection{Software discovery}
\label{software-discovery}
There are three main components involved in software discovery, the metadata schema for
describing the software, one or more search services, and the
repositories where the \executablething{s} are stored.
The vocabularies and schema need to be based on use cases that start by describing what the
\scientist{} wants to do, and from that derrive what software tools they would need, and what terms
they would naturally use to describe them.
The \ivoa{} semantics and data modelling working groups have a lot of experience developing
vocabularies and data models to descibe \science{} data products, and is well placed
to develop the vocabularies needed to descibe astronomy software.
It is important to keep in mind that the requirement is not to model the technical properties
of the software itself e.g. what programming language it is written in or who funded the development.
The important things to model are the search terms that a \scientist{} is most likley to use to try to
find the software they need.
The second component is a searchable database that acceopts a list of search terms and responds with a
list of \metadoc{s} that describe \executablething{s} that match the criteria.
Before we look in detail at the content of the \metadoc{s} it is worth looking at where the \metadoc{s} are
stored in relation to the search service and the repositories where the \executablething{s} are stored.
In one scenario, all of the components can be co-located by the same service.
The database of search terms, the \metadoc{s}, and the binary files containing the \executablething{s}
can all be hosted by the same service implementation.
TODO diagram
An alternative implementation could store them at different locations, using
existing off-the-shelf software and services to host them.
There are a number of widely available content managment systems, both commercial
and open source, that would be capable of implementing the database of search terms.
If the \metadoc{s} are stored in the same database, then the response from a
database search could contain the \metadoc{s} themselves.
TODO diagram
database, results, contain \metadoc{s} from database
Alternatively, the \metadoc{s} could be stored at a separate location,
in an online git repository for example,
and the database search response simply contains a list of URLs that
point to the individual \metadoc{s}.
TODO diagram
database, results, links to \metadoc{s} in external repositories
The third part of the set is the binary image of the \executablething{}.
In most cases it would probably make sense for the \metadoc{} to reference
the \executablething{} as a binary file stored in an external repository
rather than trying to include the \executablething{} as a binary blob in
the database.
TODO diagram
database, results, links to \metadoc{s} with links to images
The system can use standard cryptographic signatures and checksums to ensure the validity
of the \metadoc{s} and the binary images they refer to even when they are stored and accesed
via external \nth{3} party services.
In summary, there are two things that need to be standardised for a software discovery service:
\begin{itemize}
\item The inputs to the discovery service, including the metadata vocabularies
used to describe the software in terms that make sense to the \scientist{}
looking for them. For example what algorithm it implements, the type of input data it
operates on, and the type of results it generates.
\item The outputs of the discovery service, including the \metadoc{s} defined by this
specification, that describe the binary images that package the software
as \executablething{s}.
\end{itemize}
The other components in the software discovery stack, the database of search terms, and
storage and access services for the \metadoc{s} and binary images, do not need to be
standardised at this stage.
\subsubsection{Data discovery}
\label{data-discovery}
TODO : update this with reference to \ivoa{} data product type.
https://www.ivoa.net/rdf/product-type/2024-05-19/product-type.html
\subsection{Execution broker}
\label{execution-broker-intro}
\subsubsection{OfferSet request}
\label{offerset-request}
Once the user has selected the \executablething{} they want to use and the
data they want to apply it to, the client combines this information to create a
complete description of the \execsession{} the user wants to execute, including
details of the executable, the compute, storage, and data resources it needs,
and a schedule describing when the user wants it to run.
\begin{lstlisting}[]
# ExecutionBroker OfferSet request.
executable:
....
resources:
....
schedule:
....
\end{lstlisting}
The client sends the \metadoc{} description to one or more \execbrokerclass{}
services to ask if they can meet the requirements and execute the \execsession{}.
Each \execbrokerservice{} evaluates the request and responds with a top level
\codeword{YES|NO} answer, and if
the answer is \codeword{YES}, a list of one or more \execoffer{s} describing how
the requested \execsession{} could be executed on the platform(s) represented by
that \execbrokerservice{}.
%\begin{lstlisting}[]
%Request - Can this platform execute <task> ?
%Response - YES, list of <offer>[]
%\end{lstlisting}
\includegraphics[width=0.9\textwidth]{diagrams/request-offers.pdf}
\subsubsection{Offerset response}
\label{offerset-response}
Each \execbrokerservice{} will respond to a request for offers with an \execofferset{}
containing some metadata about the \execofferset{} itself, and a list of \execoffer{}s
describing how the requested \execsession{} could be executed.
Each \execoffer{} in the list contains some metadata about the \execoffer{} itself,
including its UUID identifier and expiry time, followed by details of how and when the
\execsession{} would be executed.
\begin{lstlisting}[]
# ExecutionBroker OfferSet response.
result: YES
....
offers:
- uuid: "2e164a1b-7ff6-11ef-8412-4bc36fe2face"
href: "http://..../sessions/2e164a1b-7ff6-11ef-8412-4bc36fe2face"
state: 'OFFERED'
expires: "2023-09-18T07:05:21"
....
executable:
....
resources:
....
schedule:
....
- uuid: "2e16bf4c-7ff6-11ef-8412-4bc36fe2face"
href: "http://..../sessions/2e16bf4c-7ff6-11ef-8412-4bc36fe2face"
status: 'OFFERED'
expires: "2023-09-18T07:05:21"
....
executable:
....
resources:
....
schedule:
....
\end{lstlisting}
The user can choose to accept one of the \execoffer{s} from the list that best
fits their requirements, or they can reject the \execoffer{s} and make a new
request with different criteria.
Each of the \execoffer{s} in the \execofferset{} represent a temporary reservation
for the resources listed in the \execoffer{}.
This means these resources will not available to other users while the \execoffer{s}
are still valid.
If the user does nothing, then \codeword{state} of each of the \execoffer{s} will
automatically be updated to \codeword{EXPIRED}, and their associated resources will
be released, when their expiry time is reached.
If the user accepts one of the \execoffer{s} in the \execofferset{} by updating
the \codeword{state} to \codeword{ACCEPTED}, the \execbrokerservice{} SHOULD
update the \codeword{state} of the other \execoffer{s} in the \execofferset{} to
\codeword{REJECTED} and release the associated resources.
TODO [accept/reject/expire state-transition diagram ]
\subsubsection{Update options}
\label{update-options}
Each \execsession{} has a unique URL that the client can use to monitor and update
its state.
The \execbrokerservice{} response for an \execsession{} includes a list of options
that the user may use to update or modify the \execsession{}.
Which options are available will depend on the current \codeword{state} of the
\execsession{} and the identity and permissions of the authenticated user.
If the \execsession{} is still being offered, then the list of available options
allow the user to accept or reject the offer by updating the \codeword{state} of
the \execsession{} to \codeword{ACCEPTED} or \codeword{REJECTED}.
\begin{lstlisting}[]
uuid: "2e164a1b-7ff6-11ef-8412-4bc36fe2face"
href: "http://..../sessions/2e164a1b-7ff6-11ef-8412-4bc36fe2face"
state: 'OFFERED'
expires: "2023-09-18T07:05:21"
....
....
options:
- type: "urn:enum-value-option"
path: "state"
values:
- "ACCEPTED"
- "REJECTED"
\end{lstlisting}
Once the \execoffer{} has been accepted and the \execsession{} has started to
execute, then the list of available options will only allow the user to cancel
the execution.
\begin{lstlisting}[]
uuid: "2e164a1b-7ff6-11ef-8412-4bc36fe2face"
href: "http://..../sessions/2e164a1b-7ff6-11ef-8412-4bc36fe2face"
state: 'ACCEPTED'
expires: "2023-09-18T07:05:21"
....
....
options:
- type: "urn:enum-value-option"
path: "state"
values:
- "CANCELLED"
\end{lstlisting}
Using a dynamic list of options in this way enables the \execbrokerservice{}
to communicate to the client what actions the user is able to take
over the lifetime of an \execsession{}.
\subsection{Session lifecycle}
\label{session-lifecycle}
A \workerjob{} in an \execworkerclass{} service goes through the following stages in its lifecycle.
\begin{itemize}
\item \codeword{OFFERED} The \workerjob{} is being offered.
\item \codeword{ACCEPTED} The \workerjob{} has been accepted.
\item \codeword{REJECTED} The \workerjob{} offer has been rejected.
\item \codeword{EXPIRED} The \workerjob{} offer has expired.
\item \codeword{WAITING} The \workerjob{} is waiting to activate.
\item \codeword{PREPARING} The resources are being prepared.
\item \codeword{READY} The \workerjob{} is ready to execute.
\item \codeword{RUNNING} The \workerjob{} is executing.
\item \codeword{RELEASING} The resources are being released.
\item \codeword{COMPLETED} The \workerjob{} has completed.
\item \codeword{CANCELLED} The \workerjob{} has been cancelled.
\item \codeword{FAILED} The \workerjob{} has failed.
\end{itemize}
When a \execbrokerclass{} creates a \workerjob{} in an \execworkerclass{} service the
\workerjob{} starts with the \codeword{phase} set to \codeword{PENDING}.
It is up to the \execworkerclass{} to select the right time to change the \workerjob{}
\codeword{phase} from \codeword{WAITING} to \codeword{PEPARING} and begin preparing the resources so that
the \workerjob{} is \codeword{READY} in time for the \codeword{starttime} declared
in the \execoffer{}.
If it will take 2 hours to transfer the data resources
from archive storage to live storage co-located with the compute resources,
then the \execworkerclass{} needs to start the \codeword{PREPARING} phase at least 2 hours
before the \codeword{starttime} declared in the \execoffer{}.
Once all the resources are ready, the \execworkerclass{} changes the \workerjob{}
\codeword{phase} to \codeword{READY} to indicate that the all the resources
are ready and the \workerjob{} is waiting to start.
The \execworkerclass{} will then wait until the \codeword{starttime} declared in the \execoffer{}
at which point it will start executing the \workerjob{} and change the \workerjob{} \codeword{phase}
to \codeword{RUNNING}.
When the \workerjob{} finishes executing, because the \dockercontainer{} finished executing,
the user closed their \jupyternotebook, or the \codeword{maxduration} was reached,
the \execworkerclass{} will change the \workerjob{} \codeword{phase} to \codeword{TEARDOWN} and
begin the process of releasing the resources.
If the \workerjob{} includes some persistent storage that should last beyond the end of the \workerjob{},
then part of the \teardown{} process may involve transferring results from the \workerjob{}
onto the persistent storage before the local storage is released.
When the \teardown{} process completes, the \workerjob{} \codeword{phase} is changed to \codeword{COMPLETED}.
If an error occurs at any time in the process, the \workerjob{} \codeword{phase} is changed to \codeword{FAILED}.
This includes any errors that occur during the \teardown{} process; for example, because
the \execworkerclass{} was unable to transfer the results onto persistent storage.
Then the \workerjob{} \codeword{phase} is changed to \codeword{FAILED}, even if the main part of the
execution completed successfully.
This is because any workflow steps that follow after this step, will depend not only on the execution being
completed, but they also need the \teardown{} data transfers to complete so that the results from this step
are in the right place for the next step to be able to access them.
\section{The data model}
\label{data-model}
\subsection{Data curation roles}
\label{metadata-roles}
The full description of an \executablething{} will include several layers of metadata
provided by different actors playing different roles within the publishing process.
For our square root example we can identify a number of roles that would each provide
layers of the picture nedeed to fully describe an \executablething{}.
The players:
\begin{itemize}
\item The developer - The person who wrote the \pythonprogram{}
\item The packager - The person who packaged it in a \dockercontainer{}
\item The publisher - The person who published it in a discovery service
\item The user agent - The person who wants to use the software
\end{itemize}
\subsubsection{The developer}
\label{software-developer}
The first layer of metadata comes from the person who wrote the \pythonprogram{}.
They have detailed knowledge of what the software does, what execution environment it needs,
and what the inputs and outputs are.
For the square root example, it is a \pythonprogram{} which needs a platform with the \python{} runtime installed,
and a list of the \python{} libraries that the program relies on.
\begin{lstlisting}[]
executable:
type: uri:python-program
requirements:
- numpi: ""
- astropy: ">= 6.1"
\end{lstlisting}
The developer also understands how much memory their program needs, whether it can make use of multiple cpu cores,
and whether it can make use of a \gpu accelerator.
\begin{lstlisting}[]
resources:
compute:
- type: uri:generic-compute
cores:
requested:
min: 4
memory:
requested:
min: 16
units: GiB
....
\end{lstlisting}
The developers also know about what inputs and outputs the program expects and what file
formats can it can handle.
\begin{lstlisting}[]
executable:
type: uri:python-program
....
parameters:
- type: uri:param-file
name: "input data"
mode: readonly
description:
A table containing a list of numbers to be processed, formatted as
comma separated text (CSV) or an IVOA VOTable.
formats:
- type: uri:text-csv
....
- type: uri:votable
....
- type: uri:param-value
name: "input column name"
type: string
description:
The column name within the 'input data' to use.
\end{lstlisting}
\subsubsection{The packager}
\label{software-packager}
Although it is possible to publish our square root example as a stand alone \pythonprogram{},
it is not easy to describe the installation process in sufficient detail for it to be
automatically deployed on a range of different platforms.
A more portable solution would be to package the \pythonprogram{} in a \dockercontainer{},
installing and configuring the software along with all of its dependencies inside the container.
This is often done as part of the software development process, but it is a separate
step that could be implemented by a different person.
To make this distinction clear we can refer to this person, or role, as 'the packager'.
In terms of the \metadoc{}, the packager changes the description of the \executablething{}
from a \pythonprogram{} to a \dockercontainer{}.
\begin{lstlisting}[]
executable:
type: uri:docker-container
repository: ghcr.io
image: ivoa/calycopis/java-builder
tag: 2024.08.30
....
\end{lstlisting}
Depending on how the software is packaged in the container they may also need to update
the description of the inputs and outputs,
and link them to specific locations in the filesystem.
\begin{lstlisting}[]
executable:
type: uri:docker-container
....
parameters:
- type: uri:data-file
name: "input data"
format:
- type: urn:ivoa-votable
filename: input-data.vot
....
resources:
compute:
- type: uri:generic-compute
volumes:
- type: uri:file-mount
parameter: "input data"
filepath: /data
mode: readonly
....
\end{lstlisting}
\subsubsection{The publisher}
\label{metadata-publisher}
This role represents the person who publishes metadata about the software in a discovery service.
Typically changes made at this level would include adding to the description
of what the software does, placing it in a particular context to aid discovery,
or modifying the execution environment to configure the softweare for a particular domain.
For example, this might include:
\begin{itemize}
\item A project specific discovery service that only includes software vetted by the project.
Execution platforms within the project would only accept curated \metadoc{s}
from that discovery service.
\item A domain specific discovery service that modifies the execution environment, optimising
the software for analysing a particular type of data.
\item A catalog of \metadoc{s} maintained as part of a university teaching course, modifying the
execution environment to integrate the software into the university system and setting
parameters to configure the software to match the course notes.
\end{itemize}
\subsubsection{The user}
\label{software-user}
The user, or the user's client agent, starts with an initial \metadoc{} from the
software discovery service and adds additional information describing how the user
wants to use the software.
Adding details of the data resources the user wants to use enables the \execbrokerservice{}
to transfer the data to local storage before the \execsession{} is started.
Including a value for the filesize enables the \execbrokerservice{} to estimate
how much local storage it will need to allocate
and how much time will be needed to transfer the data.
The \execbrokerservice{} can take this into account when calculating the start time of
the \execoffer{s} it makes, allowing enough time for the data transfers to complete
before the \execsession{} starts.
\begin{lstlisting}[]
resources:
data:
- type: uri:simple-data-resource
name: "input data"
location: http:data.example.org/....
filesize:
value: 145
units: MiB
....
\end{lstlisting}
Linking the data resources with volumes on the corresponding compute resources enables
the \execbrokerservice{} to mount the data resources at the correct location in
the compute resource's filesystem.
\begin{lstlisting}[]
resources:
data:
- type: uri:simple-data-resource
name: input-data
....
compute:
- type: uri:generic-compute
....
volumes:
- resource: input-data
path: /data
mode: "ro"
\end{lstlisting}
The user can also update the compute resource requirements to reflect how they plan
to use the software.
In the case of our square root example, the compute resource requirements set by the
developers will reflect the original intent of simply demonstrating how to use the
\pythonprogram{}.
However, if the user intends to use the software to analyse a much larger \dataset{}
they can update the compute resource requirements to match their use case.
\begin{lstlisting}[]
resources:
compute:
- type: uri:generic-compute
cores:
requested:
# min: 4
min: 32
memory:
requested:
# min: 16
min: 128
units: GiB
\end{lstlisting}
TODO user provides the schedule ... when they want to run it.
\subsection{The \executable{}}
\label{executable}
At the simplest level the client just needs to check whether a platform is able to execute a particular
type of \excutabletask{}.
For example, \textit{"Is this platform able to run a \jupyternotebook{}?"}
In order to do this, the request needs to specify the task type, e.g. \jupyternotebook{},
along with details about it, e.g. where to fetch the notebook from.
The information in this part of the \datamodel{} will be different for each type of \executable{}.
Rather than try to model every possible type of \executable{} in one large \datamodel{},
the \datamodel{} for each type is described in an extension to the core \datamodel{}.
To support this, the core \datamodel{} defines two fields:
\begin{itemize}
\item \codeword{type} - a URI identifying the type of \executable{}.
\item \codeword{spec} - a place holder for type specific details.
\end{itemize}
% Type URLs
% https://www.purl.org/ivoa.net/executable-types/example
% https://github.com/ivoa-std/ExecutionBroker/blob/main/types/executable-types/example-executable.md
\begin{lstlisting}[]
# ExecutionBroker client request.
request:
# Details of the executable.
executable:
# A URI identifying the type of executable.
type: "https://www.purl.org/ivoa.net/executable-types/example"
# The details, specific to the type of executable.
spec: {}
\end{lstlisting}
\subsubsection{\jupyternotebook{}}
\label{jupyternotebook}
The \datamodel{} for each type of \executable{} defines the metadata needed to
describe that particular type.
For example, the \datamodel{} for a \jupyternotebook{} needs to describe where
to fetch the source code for the notebook from.
% Type URLs
% https://www.purl.org/ivoa.net/executable-types/jupyter-notebook
% https://github.com/ivoa-std/ExecutionBroker/blob/main/types/executable-types/jupyter-notebook.md
\begin{lstlisting}[]
# ExecutionBroker client request.
request:
# Details of the executable.
executable:
# A URI identifying the type of executable.
type: "https://www.purl.org/ivoa.net/executable-types/jupyter-notebook"
# The details, specific to a Jupyter notebook.
spec:
notebook: "https://.../example.jpnb"
\end{lstlisting}
It may also include a reference to a \codeword{requirements.txt} file that describes any additional \python{}
libraries needed to run the notebook.
\begin{lstlisting}[]
# ExecutionBroker client request.
request:
# Details of the executable.
executable:
# A URI identifying the type of executable.
type: "https://www.purl.org/ivoa.net/executable-types/jupyter-notebook"
# The details, specific to a Jupyter notebook.