From d0ed2274ad4899a664b2c3dd5d7aeb9924a77ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Garc=C3=ADa=20Crespo?= Date: Tue, 9 Jul 2024 09:43:27 +0000 Subject: [PATCH] Start validation work in shim API --- hack/ccp/Makefile | 2 +- hack/ccp/go.mod | 3 + hack/ccp/go.sum | 12 +- hack/ccp/internal/cmd/servercmd/server.go | 2 +- hack/ccp/internal/shim/gen/shim.go | 16 +- hack/ccp/internal/shim/middleware.go | 53 ++++ hack/ccp/internal/shim/shim.go | 32 ++- hack/ccp/internal/shim/shim_test.go | 170 ++++++++++-- .../internal/shim/testdata/invalid_avalon.csv | 5 + .../internal/shim/testdata/invalid_rights.csv | 3 + .../internal/shim/testdata/valid_avalon.csv | 5 + .../internal/shim/testdata/valid_rights.csv | 3 + hack/ccp/internal/shim/validation.go | 244 ++++++++++++++++++ 13 files changed, 525 insertions(+), 25 deletions(-) create mode 100644 hack/ccp/internal/shim/middleware.go create mode 100644 hack/ccp/internal/shim/testdata/invalid_avalon.csv create mode 100644 hack/ccp/internal/shim/testdata/invalid_rights.csv create mode 100644 hack/ccp/internal/shim/testdata/valid_avalon.csv create mode 100644 hack/ccp/internal/shim/testdata/valid_rights.csv create mode 100644 hack/ccp/internal/shim/validation.go diff --git a/hack/ccp/Makefile b/hack/ccp/Makefile index 07c1f9c9b..29003a83f 100644 --- a/hack/ccp/Makefile +++ b/hack/ccp/Makefile @@ -143,7 +143,7 @@ gen-web: npm --prefix=$(CURDIR)/web run build gen-shim: # @HELP Generate Go server boilerplate from OpenAPIv3 spec. -gen-shim: SPEC_COMMIT := b0b9af20 +gen-shim: SPEC_COMMIT := 65e58266 gen-shim: SPEC_URL := https://raw.githubusercontent.com/artefactual-labs/archivematica-api-specification/$(SPEC_COMMIT)/typespec/tsp-output/@typespec/openapi3/openapi.v1.yaml gen-shim: $(OAPI_CODEGEN) @echo "Downloading Archivematica API specification ($(SPEC_COMMIT))..." diff --git a/hack/ccp/go.mod b/hack/ccp/go.mod index 6f28a3ee2..881836cef 100644 --- a/hack/ccp/go.mod +++ b/hack/ccp/go.mod @@ -24,6 +24,7 @@ require ( github.com/gorilla/handlers v1.5.2 github.com/gorilla/mux v1.8.1 github.com/hashicorp/go-retryablehttp v0.7.7 + github.com/ikawaha/httpcheck v1.12.3 github.com/jellydator/ttlcache/v3 v3.2.0 github.com/mikespook/gearman-go v0.0.0-20220520031403-2a518e866145 github.com/oapi-codegen/runtime v1.1.1 @@ -83,6 +84,8 @@ require ( github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/iancoleman/orderedmap v0.2.0 // indirect github.com/invopop/yaml v0.2.0 // indirect + github.com/itchyny/gojq v0.12.16 // indirect + github.com/itchyny/timefmt-go v0.1.6 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/klauspost/compress v1.16.7 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect diff --git a/hack/ccp/go.sum b/hack/ccp/go.sum index b67a74626..fa362d25a 100644 --- a/hack/ccp/go.sum +++ b/hack/ccp/go.sum @@ -218,10 +218,16 @@ github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/iancoleman/orderedmap v0.2.0 h1:sq1N/TFpYH++aViPcaKjys3bDClUEU7s5B+z6jq8pNA= github.com/iancoleman/orderedmap v0.2.0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA= +github.com/ikawaha/httpcheck v1.12.3 h1:OCDe3yCTZWKhubT5rNOef26ulykUxQRx/GqSGm5lYMs= +github.com/ikawaha/httpcheck v1.12.3/go.mod h1:K+Oyw7JUcLcRGOvIj2kTEzwPpxMqpXaafHiKb7WsJ4Q= github.com/invopop/yaml v0.2.0 h1:7zky/qH+O0DwAyoobXUqvVBwgBFRxKoQ/3FjcVpjTMY= github.com/invopop/yaml v0.2.0/go.mod h1:2XuRLgs/ouIrW3XNzuNj7J3Nvu/Dig5MXvbCEdiBN3Q= github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk= github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46UU0LZ723meps= +github.com/itchyny/gojq v0.12.16 h1:yLfgLxhIr/6sJNVmYfQjTIv0jGctu6/DgDoivmxTr7g= +github.com/itchyny/gojq v0.12.16/go.mod h1:6abHbdC2uB9ogMS38XsErnfqJ94UlngIJGlRAIj4jTM= +github.com/itchyny/timefmt-go v0.1.6 h1:ia3s54iciXDdzWzwaVKXZPbiXzxxnv1SPGFfM/myJ5Q= +github.com/itchyny/timefmt-go v0.1.6/go.mod h1:RRDZYC5s9ErkjQvTvvU7keJjxUYzIISJGxm9/mAERQg= github.com/jdkato/prose v1.2.1 h1:Fp3UnJmLVISmlc57BgKUzdjr0lOtjqTZicL3PaYy6cU= github.com/jdkato/prose v1.2.1/go.mod h1:AiRHgVagnEx2JbQRQowVBKjG0bcs/vtkGCH1dYAL1rA= github.com/jellydator/ttlcache/v3 v3.2.0 h1:6lqVJ8X3ZaUwvzENqPAobDsXNExfUJd61u++uW8a3LE= @@ -259,8 +265,8 @@ github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxec github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= -github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= +github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-sqlite3 v1.14.7/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/microsoft/kiota-abstractions-go v1.6.0 h1:qbGBNMU0/o5myKbikCBXJFohVCFrrpx2cO15Rta2WyA= github.com/microsoft/kiota-abstractions-go v1.6.0/go.mod h1:7YH20ZbRWXGfHSSvdHkdztzgCB9mRdtFx13+hrYIEpo= @@ -340,6 +346,8 @@ github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSz github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc= github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= diff --git a/hack/ccp/internal/cmd/servercmd/server.go b/hack/ccp/internal/cmd/servercmd/server.go index 9ec639dd5..ccf2b926a 100644 --- a/hack/ccp/internal/cmd/servercmd/server.go +++ b/hack/ccp/internal/cmd/servercmd/server.go @@ -156,7 +156,7 @@ func (s *Server) Run() error { if s.config.shim.Enabled { s.logger.V(1).Info("Creating Archivematica API shim.") - s.shim = shim.NewServer(s.logger.WithName("shim"), s.config.shim) + s.shim = shim.NewServer(s.logger.WithName("shim"), s.config.shim, s.store) if err := s.shim.Run(); err != nil { return fmt.Errorf("error creating shim API: %v", err) } diff --git a/hack/ccp/internal/shim/gen/shim.go b/hack/ccp/internal/shim/gen/shim.go index 1451fef63..feac18ae0 100644 --- a/hack/ccp/internal/shim/gen/shim.go +++ b/hack/ccp/internal/shim/gen/shim.go @@ -2324,6 +2324,18 @@ func (response ValidateCreate400JSONResponse) VisitValidateCreateResponse(w http return json.NewEncoder(w).Encode(response.union) } +type ValidateCreate404JSONResponse struct { + Error bool `json:"error"` + Message string `json:"message"` +} + +func (response ValidateCreate404JSONResponse) VisitValidateCreateResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(404) + + return json.NewEncoder(w).Encode(response) +} + // StrictServerInterface represents all server handlers. type StrictServerInterface interface { @@ -3252,8 +3264,8 @@ var swaggerSpec = []string{ "r5ZwbfxaOWHK7I04lDNEXB4rHZYS+VARbhL5IL+WQ2rpKrWDdd2b7M2TK6l6n+fpnVTLelObXmCFTkcf", "JCJTQplUCEIXfG0WJCiTQRFxALJ7x+7YTZ5ZmZMgBhSSBXIBcaZjjsEd66A/+cOfAxsgx25IlbYdfnJF", "F6JmHPW7BtIlfrrDxiHJtstqkeKf+YDNupGRZWcVqb/8K9GbOjdkurs8Of9fHR8KCer/YjXp/LQqqtkd", - "WS5lRCzK6fAXTla2unHt2RW43X2Kbe+207Ft0zz234RY3j+f0hZuJzO7Kt5L9sf98n757wAAAP//CLMK", - "0RdjAAA=", + "WS5lRCzK6fAXTla2unHt2RW43X2Kbe+207Ft0zz234RY3j9bqWxPLXmvx361vG6kcCmbYWbxOrY/7pf3", + "y38HAAD//50zbJkOZAAA", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/hack/ccp/internal/shim/middleware.go b/hack/ccp/internal/shim/middleware.go new file mode 100644 index 000000000..1764e06b2 --- /dev/null +++ b/hack/ccp/internal/shim/middleware.go @@ -0,0 +1,53 @@ +package shim + +import ( + "context" + "net/http" + + "github.com/artefactual/archivematica/hack/ccp/internal/shim/gen" + "github.com/artefactual/archivematica/hack/ccp/internal/store" +) + +const ( + archivematicaVersion = "dev" +) + +func infoMiddleware(store store.Store) func(next gen.StrictHandlerFunc, _ string) gen.StrictHandlerFunc { + pipelineID := "" + + return func(next gen.StrictHandlerFunc, _ string) gen.StrictHandlerFunc { + return func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (response interface{}, err error) { + if pipelineID == "" { + if id, err := store.ReadPipelineID(ctx); err != nil { + return nil, err + } else { + pipelineID = id.String() + } + } + w.Header().Set("X-Archivematica-Version", archivematicaVersion) + w.Header().Set("x-Archivematica-ID", pipelineID) + return next(ctx, w, r, request) + } + } +} + +type contextKey string + +const ( + requestContextKey contextKey = "requestObject" +) + +func contextMiddleware(next gen.StrictHandlerFunc, _ string) gen.StrictHandlerFunc { + return func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (response interface{}, err error) { + ctx = context.WithValue(ctx, requestContextKey, r) + return next(ctx, w, r, request) + } +} + +func requestFromContext(ctx context.Context) *http.Request { + if req, ok := ctx.Value(requestContextKey).(*http.Request); ok { + return req + } + + return nil +} diff --git a/hack/ccp/internal/shim/shim.go b/hack/ccp/internal/shim/shim.go index 60f82d09c..1ccbfa437 100644 --- a/hack/ccp/internal/shim/shim.go +++ b/hack/ccp/internal/shim/shim.go @@ -9,27 +9,35 @@ import ( "github.com/go-logr/logr" "github.com/artefactual/archivematica/hack/ccp/internal/shim/gen" + "github.com/artefactual/archivematica/hack/ccp/internal/store" ) type Server struct { logger logr.Logger config Config + store store.Store server *http.Server ln net.Listener } var _ gen.StrictServerInterface = (*Server)(nil) -func NewServer(logger logr.Logger, config Config) *Server { +func NewServer(logger logr.Logger, config Config, store store.Store) *Server { return &Server{ logger: logger, config: config, + store: store, } } func (s *Server) Run() error { + middleware := []gen.StrictMiddlewareFunc{ + infoMiddleware(s.store), + contextMiddleware, + } + s.server = &http.Server{ - Handler: gen.Handler(gen.NewStrictHandler(s, []gen.StrictMiddlewareFunc{})), + Handler: gen.Handler(gen.NewStrictHandler(s, middleware)), ReadHeaderTimeout: time.Second, ReadTimeout: 5 * time.Minute, WriteTimeout: 5 * time.Minute, @@ -167,7 +175,25 @@ func (s *Server) TasksRead(ctx context.Context, request gen.TasksReadRequestObje } func (s *Server) ValidateCreate(ctx context.Context, request gen.ValidateCreateRequestObject) (gen.ValidateCreateResponseObject, error) { - return nil, nil + validator, err := loadValidator(request.Validator) + if err != nil { + return gen.ValidateCreate404JSONResponse{ + Error: true, + Message: err.Error(), + }, nil + } + + if err := validateContentType(requestFromContext(ctx)); err != nil { + return nil, err + } + + err = validator.validate(request.Body) + if err != nil { + // TODO: gen.ValidateCreate400JSONResponse doesn't seem workable atm. + return nil, err + } + + return gen.ValidateCreate200JSONResponse{Valid: true}, nil } func (s *Server) Close(ctx context.Context) error { diff --git a/hack/ccp/internal/shim/shim_test.go b/hack/ccp/internal/shim/shim_test.go index 530919beb..c35362325 100644 --- a/hack/ccp/internal/shim/shim_test.go +++ b/hack/ccp/internal/shim/shim_test.go @@ -4,36 +4,174 @@ import ( "context" "fmt" "net/http" + "os" + "path/filepath" "testing" "github.com/go-logr/logr" + "github.com/google/uuid" + "github.com/ikawaha/httpcheck" + "go.artefactual.dev/tools/mockutil" + "go.uber.org/mock/gomock" "gotest.tools/v3/assert" "github.com/artefactual/archivematica/hack/ccp/internal/shim" + "github.com/artefactual/archivematica/hack/ccp/internal/store/storemock" ) -func TestShim(t *testing.T) { - t.Parallel() +func setUpShimServer(t *testing.T) *httpcheck.Checker { + t.Helper() - srv := shim.NewServer(logr.Discard(), shim.Config{Addr: ":0"}) + store := storemock.NewMockStore(gomock.NewController(t)) + store.EXPECT().ReadPipelineID(mockutil.Context()).Return(uuid.MustParse("9db764ac-84da-4c5f-a90d-872d4be54c3f"), nil).AnyTimes() + srv := shim.NewServer(logr.Discard(), shim.Config{Addr: ":0"}, store) err := srv.Run() assert.NilError(t, err) + t.Cleanup(func() { srv.Close(context.Background()) }) - // Returns 200. - url := fmt.Sprintf("http://%s/api/administration/dips/atom/fetch_levels", srv.Addr()) - req, err := http.NewRequest(http.MethodGet, url, nil) - assert.NilError(t, err) - resp, err := http.DefaultClient.Do(req) - assert.NilError(t, err) - assert.Equal(t, resp.StatusCode, http.StatusOK) + return httpcheck.NewExternal(fmt.Sprintf("http://%s", srv.Addr())) +} - // Returns 404. - url = fmt.Sprintf("http://%s/api/NOTFOUND", srv.Addr()) - req, err = http.NewRequest(http.MethodGet, url, nil) - assert.NilError(t, err) - resp, err = http.DefaultClient.Do(req) +func readFile(t *testing.T, filename string) []byte { + t.Helper() + + blob, err := os.ReadFile(filepath.Join("testdata", filename)) assert.NilError(t, err) - assert.Equal(t, resp.StatusCode, http.StatusNotFound) + + return blob +} + +func TestShim(t *testing.T) { + t.Parallel() + + t.Run("Includes ID and Version headers", func(t *testing.T) { + t.Parallel() + + c := setUpShimServer(t) + + c.Test(t, http.MethodGet, "/api/ingest/completed"). + Check(). + HasStatus(http.StatusOK). + HasHeaders(map[string]string{ + "x-archivematica-version": "dev", + "x-archivematica-id": "9db764ac-84da-4c5f-a90d-872d4be54c3f", + }) + }) + + t.Run("Returns 404 if resource not found", func(t *testing.T) { + t.Parallel() + + c := setUpShimServer(t) + + c.Test(t, http.MethodGet, "/api/v0"). + Check(). + HasStatus(http.StatusNotFound) + }) + + t.Run("Returns 405 if method", func(t *testing.T) { + t.Parallel() + + c := setUpShimServer(t) + + c.Test(t, http.MethodGet, "/api/v2beta/validate/rights"). + Check(). + HasStatus(http.StatusMethodNotAllowed) + }) +} + +func TestShimAdministrationFetchLevelsOfDescription(t *testing.T) { + t.Parallel() + + c := setUpShimServer(t) + + c.Test(t, http.MethodGet, "/api/administration/dips/atom/fetch_levels"). + Check(). + HasStatus(http.StatusOK) +} + +func TestShimValidateCreate(t *testing.T) { + t.Parallel() + + t.Run("Validates Avalon CSV", func(t *testing.T) { + t.Parallel() + + c := setUpShimServer(t) + + c.Test(t, http.MethodPost, "/api/v2beta/validate/avalon"). + WithHeader("content-type", "text/csv; charset=utf-8"). + WithBody(readFile(t, "valid_avalon.csv")). + Check(). + HasStatus(http.StatusOK). + HasJSON( + map[string]any{ + "valid": true, + }, + ) + }) + + t.Run("Returns error during validation of Avalon CSV", func(t *testing.T) { + t.Parallel() + + c := setUpShimServer(t) + + c.Test(t, http.MethodPost, "/api/v2beta/validate/avalon"). + WithHeader("content-type", "text/csv; charset=utf-8"). + WithBody(readFile(t, "invalid_avalon.csv")). + Check(). + // TODO: should return JSON-encoded response with status code 400. + HasStatus(http.StatusInternalServerError). + HasString("manifest includes invalid metadata field: Bibliographic ID Lbl\n") + }) + + t.Run("Validates Rights CSV", func(t *testing.T) { + t.Parallel() + + c := setUpShimServer(t) + + c.Test(t, http.MethodPost, "/api/v2beta/validate/rights"). + WithHeader("content-type", "text/csv; charset=utf-8"). + WithBody(readFile(t, "valid_rights.csv")). + Check(). + HasStatus(http.StatusOK). + HasJSON( + map[string]any{ + "valid": true, + }, + ) + }) + + t.Run("Returns error during validation of Rights CSV", func(t *testing.T) { + t.Parallel() + + t.Skip("TODO") + }) + + t.Run("Fails is the validator is unknown", func(t *testing.T) { + t.Parallel() + + c := setUpShimServer(t) + + c.Test(t, http.MethodPost, "/api/v2beta/validate/unknown"). + Check(). + HasStatus(http.StatusNotFound). + HasJSON( + map[string]any{ + "error": true, + "message": "unknown validator, accepted values: avalon, rights", + }, + ) + }) + + t.Run("Fails if the content type is not the expected", func(t *testing.T) { + t.Parallel() + + c := setUpShimServer(t) + + c.Test(t, http.MethodPost, "/api/v2beta/validate/avalon"). + WithBody(readFile(t, "valid_avalon.csv")). + Check(). + HasStatus(http.StatusInternalServerError) // TODO: should be 400 + }) } diff --git a/hack/ccp/internal/shim/testdata/invalid_avalon.csv b/hack/ccp/internal/shim/testdata/invalid_avalon.csv new file mode 100644 index 000000000..2bc91368f --- /dev/null +++ b/hack/ccp/internal/shim/testdata/invalid_avalon.csv @@ -0,0 +1,5 @@ +Avalon Demo Batch,archivist1@example.com,,,,,,,,,,,,,,,,,,,,,, +Bibliographic ID,Bibliographic ID Lbl,Title,Creator,Contributor,Contributor,Contributor,Contributor,Contributor,Publisher,Date Created,Date Issued,Abstract,Topical Subject,Topical Subject,Publish,File,Skip Transcoding,Label,File,Skip Transcoding,Label,Note Type,Note +,,Symphony no. 3,"Mahler, Gustav, 1860-1911",,,,,,,,1996,,,,Yes,assets/agz3068a.wav,no,CD 1,,,,local,This was batch ingested without skip transcoding +,,Féte (Excerpt),"Langlais, Jean, 1907-1991","Young, Christopher C. (Christopher Clark)",,,,,William and Gayle Cook Music Library,,2010,"Recorded on May 2, 2010, Auer Concert Hall, Indiana University, Bloomington.",Organ music,,Yes,assets/OrganClip.mp4,yes,,,,,local,This was batch ingested with multiple quality level skip transcoding +,,Beginning Responsibility: Lunchroom Manners,Coronet Films,,,,,,Coronet Films,,1959,"The rude, clumsy puppet Mr. Bungle shows kids how to behave in the school cafeteria - the assumption being that kids actually want to behave during lunch. This film has a cult following since it appeared on a Pee Wee Herman HBO special.",Social engineering,Puppet theater,Yes,assets/lunchroom_manners_512kb.mp4,yes,Lunchroom 1,assets/lunchroom_manners_512kb.mp4,yes,Lunchroom Again,local,This was batch ingested with skip transcoding and with structure diff --git a/hack/ccp/internal/shim/testdata/invalid_rights.csv b/hack/ccp/internal/shim/testdata/invalid_rights.csv new file mode 100644 index 000000000..01a502217 --- /dev/null +++ b/hack/ccp/internal/shim/testdata/invalid_rights.csv @@ -0,0 +1,3 @@ +file,basis,status,determination_date,jurisdiction,start_date,end_date,terms,citation,note,grant_act,grant_restriction,grant_start_date,grant_end_date,grant_note,doc_id_type,doc_id_value,doc_id_role +objects/8e758e7545212966d0256a6ac70d81db6a6d6a6d_008.tif,copyright,copyrighted,2013-08-03,us,1964-01-01,2084-01-01,,,Work for hire - copyright term 120 years from date of creation. Copyright held by the Village Green Preservation Society.,,,,,,,, +objects/8e758e7545212966d0256a6ac70d81db6a6d6a6d_008.tif,policy,,,,1974-01-01,open,,,Village Green Preservation Society records are open.,disseminate,,2014-01-01,open,,,, diff --git a/hack/ccp/internal/shim/testdata/valid_avalon.csv b/hack/ccp/internal/shim/testdata/valid_avalon.csv new file mode 100644 index 000000000..b68d7080d --- /dev/null +++ b/hack/ccp/internal/shim/testdata/valid_avalon.csv @@ -0,0 +1,5 @@ +Avalon Demo Batch,archivist1@example.com,,,,,,,,,,,,,,,,,,,,,, +Bibliographic ID,Bibliographic ID Label,Title,Creator,Contributor,Contributor,Contributor,Contributor,Contributor,Publisher,Date Created,Date Issued,Abstract,Topical Subject,Topical Subject,Publish,File,Skip Transcoding,Label,File,Skip Transcoding,Label,Note Type,Note +,,Symphony no. 3,"Mahler, Gustav, 1860-1911",,,,,,,,1996,,,,yes,assets/agz3068a.wav,no,CD 1,,,,local,This was batch ingested without skip transcoding +,,Féte (Excerpt),"Langlais, Jean, 1907-1991","Young, Christopher C. (Christopher Clark)",,,,,William and Gayle Cook Music Library,,2010,"Recorded on May 2, 2010, Auer Concert Hall, Indiana University, Bloomington.",Organ music,,yes,assets/OrganClip.mp4,yes,,,,,local,This was batch ingested with multiple quality level skip transcoding +,,Beginning Responsibility: Lunchroom Manners,Coronet Films,,,,,,Coronet Films,,1959,"The rude, clumsy puppet Mr. Bungle shows kids how to behave in the school cafeteria - the assumption being that kids actually want to behave during lunch. This film has a cult following since it appeared on a Pee Wee Herman HBO special.",Social engineering,Puppet theater,yes,assets/lunchroom_manners_512kb.high.mp4,yes,Lunchroom 1,assets/lunchroom_manners_512kb.mp4,yes,Lunchroom Again,local,This was batch ingested with skip transcoding and with structure diff --git a/hack/ccp/internal/shim/testdata/valid_rights.csv b/hack/ccp/internal/shim/testdata/valid_rights.csv new file mode 100644 index 000000000..bcedf0253 --- /dev/null +++ b/hack/ccp/internal/shim/testdata/valid_rights.csv @@ -0,0 +1,3 @@ +file,basis,status,determination_date,jurisdiction,start_date,end_date,terms,citation,note,grant_act,grant_restriction,grant_start_date,grant_end_date,grant_note,doc_id_type,doc_id_value,doc_id_role +objects/45212966d0256a6ac70d81db_008.tif,copyright,copyrighted,2013-08-03,us,1964-01-01,2084-01-01,,,Work for hire - copyright term 120 years from date of creation. Copyright held by the Village Green Preservation Society.,,,,,,,, +objects/45212966d0256a6ac70d81db_008.tif,policy,,,,1974-01-01,open,,,Village Green Preservation Society records are open.,disseminate,allow,2014-01-01,open,,,, diff --git a/hack/ccp/internal/shim/validation.go b/hack/ccp/internal/shim/validation.go new file mode 100644 index 000000000..19d7e19d4 --- /dev/null +++ b/hack/ccp/internal/shim/validation.go @@ -0,0 +1,244 @@ +package shim + +import ( + "encoding/csv" + "fmt" + "io" + "mime" + "net/http" + "strings" + + "github.com/artefactual/archivematica/hack/ccp/internal/shim/gen" +) + +type validationError struct{} //nolint: unused + +func (err *validationError) Error() string { //nolint: unused + return "validation error" +} + +var validators = map[gen.V2BetaValidateValidator]validator{ + gen.V2BetaValidateValidatorAvalon: avalonValidator{}, + gen.V2BetaValidateValidatorRights: rightsValidator{}, +} + +var acceptedValidators = strings.Join([]string{ + string(gen.V2BetaValidateValidatorAvalon), + string(gen.V2BetaValidateValidatorRights), +}, ", ") + +func validateContentType(req *http.Request) error { + contentType := req.Header.Get("Content-Type") + if contentType == "" { + return fmt.Errorf("header Content-Type is missing") + } + + mimeType, params, err := mime.ParseMediaType(contentType) + if err != nil { + return fmt.Errorf("invalid Content-Type header: %v", err) + } + + if mimeType != "text/csv" || params["charset"] != "utf-8" { + return fmt.Errorf("content type should be \"text/csv; charset=utf-8\"") + } + + return nil +} + +func loadValidator(name gen.V2BetaValidateValidator) (validator, error) { + validator, ok := validators[name] + if !ok { + return nil, fmt.Errorf("unknown validator, accepted values: %s", acceptedValidators) + } + + return validator, nil +} + +type validator interface { + validate(r io.Reader) error +} + +type avalonValidator struct{} + +var _ validator = (*avalonValidator)(nil) + +func (v avalonValidator) validate(r io.Reader) error { + cr := csv.NewReader(r) + cr.Comma = ',' + + if adminData, err := cr.Read(); err != nil { + return err + } else if err := v.checkAdminData(adminData); err != nil { + return err + } + + var fileCols, opCols []int + if headerData, err := cr.Read(); err != nil { + return err + } else if err := v.checkHeaderData(headerData); err != nil { + return err + } else { + fileCols = v.fileColumns(headerData) + opCols = v.opColumns(headerData) + if err := v.checkFieldPairs(headerData); err != nil { + return err + } + } + + for { + row, err := cr.Read() + if err == io.EOF { + break + } + if err != nil { + return err + } + if err := v.checkFileExts(row, fileCols); err != nil { + return err + } + if err := v.checkOpFields(row, opCols); err != nil { + return err + } + } + + return nil +} + +func (v avalonValidator) checkAdminData(row []string) error { + if len(row) < 2 || row[0] == "" || row[1] == "" { + return fmt.Errorf("administrative data must include reference name and author") + } + return nil +} + +func (v avalonValidator) checkHeaderData(row []string) error { + allHeaders := []string{ + "Bibliographic ID", "Bibliographic ID Label", "Other Identifier", + "Other Identifier Type", "Title", "Creator", "Contributor", "Genre", + "Publisher", "Date Created", "Date Issued", "Abstract", "Language", + "Physical Description", "Related Item URL", "Related Item Label", + "Topical Subject", "Geographic Subject", "Temporal Subject", + "Terms of Use", "Table of Contents", "Statement of Responsibility", + "Note", "Note Type", "Publish", "Hidden", "File", "Label", "Offset", + "Skip Transcoding", "Absolute Location", "Date Ingested", + } + reqHeaders := []string{"Title", "Date Issued", "File"} + uniqueHeaders := []string{ + "Bibliographic ID", "Bibliographic ID Label", "Title", "Date Created", + "Date Issued", "Abstract", "Physical Description", "Terms of Use", + } + + headerSet := make(map[string]int) + for _, header := range row { + headerSet[strings.TrimSpace(header)]++ + } + + for _, header := range row { + if strings.TrimSpace(header) != header { + return fmt.Errorf("header fields cannot have leading or trailing blanks. Invalid field: %s", header) + } + } + + for _, header := range row { + found := false + for _, validHeader := range allHeaders { + if header == validHeader { + found = true + break + } + } + if !found { + return fmt.Errorf("manifest includes invalid metadata field: %s", header) + } + } + + for _, uniqueHeader := range uniqueHeaders { + if headerSet[uniqueHeader] > 1 { + return fmt.Errorf("a non-repeatable header field is repeated: %s", uniqueHeader) + } + } + + for _, reqHeader := range reqHeaders { + if headerSet[reqHeader] == 0 && headerSet["Bibliographic ID"] == 0 { + return fmt.Errorf("one of the required headers is missing: Title, Date Issued, File") + } + } + + return nil +} + +func (v avalonValidator) fileColumns(row []string) []int { + var columns []int + for i, field := range row { + if field == "File" { + columns = append(columns, i) + } + } + return columns +} + +func (v avalonValidator) opColumns(row []string) []int { + var columns []int + for i, field := range row { + if field == "Publish" || field == "Hidden" { + columns = append(columns, i) + } + } + return columns +} + +func (v avalonValidator) checkFieldPairs(row []string) error { + fieldSet := make(map[string]bool) + for _, field := range row { + fieldSet[field] = true + } + + pairs := [][]string{ + {"Other Identifier", "Other Identifier Type"}, + {"Related Item URL", "Related Item Label"}, + {"Note", "Note Type"}, + } + + for _, pair := range pairs { + if fieldSet[pair[0]] != fieldSet[pair[1]] { + return fmt.Errorf("%s field missing its required pair", pair[0]) + } + } + + return nil +} + +func (v avalonValidator) checkFileExts(row []string, fileCols []int) error { + for _, c := range fileCols { + if c >= len(row) { + continue + } + filepath := row[c] + periods := strings.Count(filepath, ".") + if periods > 1 && !strings.Contains(filepath, ".high.") && + !strings.Contains(filepath, ".medium.") && + !strings.Contains(filepath, ".low.") { + return fmt.Errorf("filepath %s contains more than one period", filepath) + } + } + return nil +} + +func (v avalonValidator) checkOpFields(row []string, opCols []int) error { + for _, c := range opCols { + if c >= len(row) { + continue + } + value := strings.ToLower(row[c]) + if value != "" && value != "yes" && value != "no" { + return fmt.Errorf("publish/hidden fields must have boolean value (yes or no). Value is %s", row[c]) + } + } + return nil +} + +type rightsValidator struct{} + +func (v rightsValidator) validate(r io.Reader) error { + return nil +}