diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index ff2da94..9b297df 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -2,8 +2,8 @@ package datasetIngestor import ( "bytes" - "github.com/paulscherrerinstitute/scicat/datasetUtils" "encoding/json" + "github.com/paulscherrerinstitute/scicat/datasetUtils" "io/ioutil" "log" "net" @@ -68,13 +68,18 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u } // Use type assertion to convert the interface{} object to a map[string]interface{}. - metaDataMap = metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type. + metaDataMap = metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type. beamlineAccount = false + // Check scientificMetadata for illegal keys + if checkIllegalKeys(metaDataMap) { + panic("Metadata contains keys with illegal characters (., [], $, or <>).") + } + // If the user is not the ingestor, check whether any of the accessGroups equal the ownerGroup. Otherwise, check for beamline-specific accounts. if user["displayName"] != "ingestor" { // Check if the metadata contains the "ownerGroup" key. - if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom + if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom validOwner := false // Iterate over accessGroups to validate the owner group. for _, b := range accessGroups { @@ -226,6 +231,9 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u //fmt.Printf("Marshalled meta data : %s\n", string(bmm)) // now check validity req, err := http.NewRequest("POST", myurl, bytes.NewBuffer(bmm)) + if err != nil { + log.Fatal(err) + } req.Header.Set("Content-Type", "application/json") resp, err := client.Do(req) @@ -271,3 +279,40 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u return metaDataMap, sourceFolder, beamlineAccount } + +func checkIllegalKeys(metadata map[string]interface{}) bool { + for key, value := range metadata { + if containsIllegalCharacters(key) { + return true + } + + switch v := value.(type) { + case map[string]interface{}: + if checkIllegalKeys(v) { + return true + } + case []interface{}: + for _, item := range v { + switch itemValue := item.(type) { // Type switch on array item + case map[string]interface{}: + if checkIllegalKeys(itemValue) { + return true + } + // Add other cases if needed + } + } + } + } + return false +} + +func containsIllegalCharacters(s string) bool { + // Check if the string contains periods, brackets, or other illegal characters + // You can adjust this condition based on your specific requirements + for _, char := range s { + if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' || char == '$' { + return true + } + } + return false +} diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index 344b47e..d277f2c 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -50,7 +50,8 @@ func TestCheckMetadata(t *testing.T) { } // Mock access groups - accessGroups := []string{"p17880", "p17301"} + accessGroups := []string{"group1", "group2"} + // Call the function with mock parameters metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) @@ -116,3 +117,44 @@ func TestCheckMetadata(t *testing.T) { t.Error("Expected beamlineAccount to be false") } } + +func TestCheckMetadata_CrashCase(t *testing.T) { + defer func() { + if recover() != nil { + t.Log("Function crashed as expected") + } else { + t.Fatal("Function did not crash as expected") + } + }() + + // Define mock parameters for the function + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" + var APIServer = TEST_API_SERVER + var metadatafile3 = "testdata/metadata_illegal.json" + + // Mock HTTP client + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, + } + + // Mock user map + user := map[string]string{ + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + } + + // Mock access groups + accessGroups := []string{"group1", "group2"} + // Call the function that should crash + CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) +} + diff --git a/datasetIngestor/testdata/metadata-short.json b/datasetIngestor/testdata/metadata-short.json index 06b802c..4912d85 100644 --- a/datasetIngestor/testdata/metadata-short.json +++ b/datasetIngestor/testdata/metadata-short.json @@ -1,8 +1,9 @@ { - "principalInvestigator":"egon.meier@psi.ch", + "principalInvestigator":"piemail@example.ch", "creationLocation":"/PSI/SLS/CSAXS/SWISSFEL", "sourceFolder": "/tmp/gnome", - "owner": "Andreas Menzel", + "owner": "first last", "type": "raw", - "ownerGroup": "p17880" + "ownerGroup": "group1" + } diff --git a/datasetIngestor/testdata/metadata.json b/datasetIngestor/testdata/metadata.json index 100db21..036b05e 100644 --- a/datasetIngestor/testdata/metadata.json +++ b/datasetIngestor/testdata/metadata.json @@ -2,10 +2,11 @@ "creationLocation": "/PSI/SLS/CSAXS/SWISSFEL", "datasetName": "CMakeCache", "description": "", - "owner": "Ana Diaz", - "ownerEmail": "ana.diaz@psi.ch", - "ownerGroup": "p17301", - "principalInvestigator": "ana.diaz@psi.ch", + "owner": "first last", + "ownerEmail": "test@example.com", + "ownerGroup": "group1", + "principalInvestigator": "test@example.com", + "scientificMetadata": [ { "sample": { diff --git a/datasetIngestor/testdata/metadata_illegal.json b/datasetIngestor/testdata/metadata_illegal.json new file mode 100644 index 0000000..accae75 --- /dev/null +++ b/datasetIngestor/testdata/metadata_illegal.json @@ -0,0 +1,20 @@ +{ + "creationLocation": "/PSI/SLS/CSAXS", + "datasetName": "CMakeCache", + "description": "", + "owner": "first last", + "ownerEmail": "test@example.com", + "ownerGroup": "group1", + "principalInvestigator": "test@example.com", + "scientificMetadata": [ + { + "sample": { + "description.": "It has an illegal characters in the key", + "name]": "same", + "principalInvestigator": "" + } + } + ], + "sourceFolder": "/usr/share/gnome", + "type": "raw" +}