From a523cab223d29c9a1c52ce07cb63ce7603237be6 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Mon, 22 Apr 2024 11:31:36 +0200 Subject: [PATCH 1/5] Check for rsync version --- datasetIngestor/syncDataToFileserver_unix.go | 57 ++++++++++++------- .../syncDataToFileserver_unix_test.go | 17 ++++++ 2 files changed, 53 insertions(+), 21 deletions(-) create mode 100644 datasetIngestor/syncDataToFileserver_unix_test.go diff --git a/datasetIngestor/syncDataToFileserver_unix.go b/datasetIngestor/syncDataToFileserver_unix.go index 268b01b..9ebf5ea 100644 --- a/datasetIngestor/syncDataToFileserver_unix.go +++ b/datasetIngestor/syncDataToFileserver_unix.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "strings" + version "github.com/mcuadros/go-version" ) // functionality needed for "de-central" data @@ -21,27 +22,41 @@ func SyncDataToFileserver(datasetId string, user map[string]string, RSYNCServer // append trailing slash to sourceFolder to indicate that the *contents* of the folder should be copied // no special handling for blanks in sourceFolder needed here fullSourceFolderPath := sourceFolder + "/" - // check if filelisting given - // rsync can create only one level deep directory structure, here we need more, therefore mkdir -p - // This code is no longer needed, sine Edgar has a new rrsync wrapper which craetes the needed directory - // cmd := exec.Command("/usr/bin/ssh",RSYNCServer,"mkdir","-p",destFolder) - // // show rsync's output - // cmd.Stdout = os.Stdout - // cmd.Stderr = os.Stderr - // - // fmt.Printf("Running %v.\n", cmd.Args) - // cmd.Run() - - cmd := exec.Command("/usr/bin/rsync", "-e", "ssh -q", "-avxz", "--progress", "--msgs2stderr", fullSourceFolderPath, serverConnectString) - // // TODO: create folderstructure mkdir -p also for this case: - if absFileListing != "" { - cmd = exec.Command("/usr/bin/rsync", "-e", "ssh -q", "-avxzr", "--progress", "--msgs2stderr", "--files-from", absFileListing, fullSourceFolderPath, serverConnectString) + + versionNumber, err := getRsyncVersion() + if err != nil { + log.Fatal("Error getting rsync version: ", err) } - // show rsync's output - // cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - log.Printf("Running %v.\n", cmd.Args) - err = cmd.Run() + + // Check rsync version and adjust command accordingly + var rsyncCmd *exec.Cmd + if version.Compare(versionNumber, "3.2.3", ">=") { + rsyncCmd = exec.Command("/usr/bin/rsync", "-e", "ssh", "-avxz", "--progress", "--stderr=error", fullSourceFolderPath, serverConnectString) + if absFileListing != "" { + rsyncCmd = exec.Command("/usr/bin/rsync", "-e", "ssh", "-avxzr", "--progress", "--stderr=error", "--files-from", absFileListing, fullSourceFolderPath, serverConnectString) + } + } else { + rsyncCmd = exec.Command("/usr/bin/rsync", "-e", "ssh -q", "-avxz", "--progress", "--msgs2stderr", fullSourceFolderPath, serverConnectString) + if absFileListing != "" { + rsyncCmd = exec.Command("/usr/bin/rsync", "-e", "ssh -q", "-avxzr", "--progress", "--msgs2stderr", "--files-from", absFileListing, fullSourceFolderPath, serverConnectString) + } + } + + // Show rsync's output + rsyncCmd.Stderr = os.Stderr + log.Printf("Running %v.\n", rsyncCmd.Args) + err = rsyncCmd.Run() return err } + +// Get rsync version +func getRsyncVersion() (string, error) { + cmd := exec.Command("/usr/bin/rsync", "--version") + output, err := cmd.Output() + if err != nil { + return "", err + } + version := strings.Split(string(output), "\n")[0] + versionNumber := strings.Split(version, " ")[2] + return versionNumber, nil +} diff --git a/datasetIngestor/syncDataToFileserver_unix_test.go b/datasetIngestor/syncDataToFileserver_unix_test.go new file mode 100644 index 0000000..5abc870 --- /dev/null +++ b/datasetIngestor/syncDataToFileserver_unix_test.go @@ -0,0 +1,17 @@ +// +build aix darwin dragonfly freebsd js,wasm linux nacl netbsd openbsd solaris + +package datasetIngestor + +import ( + "testing" +) + +func TestGetRsyncVersion(t *testing.T) { + version, err := getRsyncVersion() + if err != nil { + t.Errorf("getRsyncVersion() returned an error: %v", err) + } + if version == "" { + t.Error("getRsyncVersion() returned an empty string") + } +} From a2f176b7fb76b0156735fa88bcbce0e0c50a0615 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Mon, 22 Apr 2024 13:20:54 +0200 Subject: [PATCH 2/5] Fix the rsync version check and test --- datasetIngestor/syncDataToFileserver_unix.go | 15 ++++++-- .../syncDataToFileserver_unix_test.go | 6 +++ datasetUtils/getAvailableDatasets.go | 37 ++++++++++++++++++- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/datasetIngestor/syncDataToFileserver_unix.go b/datasetIngestor/syncDataToFileserver_unix.go index 9ebf5ea..7a761f7 100644 --- a/datasetIngestor/syncDataToFileserver_unix.go +++ b/datasetIngestor/syncDataToFileserver_unix.go @@ -10,6 +10,7 @@ import ( "os/exec" "strings" version "github.com/mcuadros/go-version" + "regexp" ) // functionality needed for "de-central" data @@ -44,7 +45,7 @@ func SyncDataToFileserver(datasetId string, user map[string]string, RSYNCServer // Show rsync's output rsyncCmd.Stderr = os.Stderr - log.Printf("Running %v.\n", rsyncCmd.Args) + log.Printf("Running: %v.\n", rsyncCmd.Args) err = rsyncCmd.Run() return err } @@ -56,7 +57,15 @@ func getRsyncVersion() (string, error) { if err != nil { return "", err } - version := strings.Split(string(output), "\n")[0] - versionNumber := strings.Split(version, " ")[2] + version := string(output) + + // Use a regular expression to find the version number. + // It will match the first occurrence of a string in the format "x.y.z" in the `version` string, where "x", "y", and "z" are one or more digits. + re := regexp.MustCompile(`\d+\.\d+\.\d+`) + versionNumber := re.FindString(version) + if versionNumber == "" { + return "", fmt.Errorf("could not find version number in rsync version string: %s", version) + } + return versionNumber, nil } diff --git a/datasetIngestor/syncDataToFileserver_unix_test.go b/datasetIngestor/syncDataToFileserver_unix_test.go index 5abc870..456f7f9 100644 --- a/datasetIngestor/syncDataToFileserver_unix_test.go +++ b/datasetIngestor/syncDataToFileserver_unix_test.go @@ -4,6 +4,7 @@ package datasetIngestor import ( "testing" + "regexp" ) func TestGetRsyncVersion(t *testing.T) { @@ -13,5 +14,10 @@ func TestGetRsyncVersion(t *testing.T) { } if version == "" { t.Error("getRsyncVersion() returned an empty string") + } else { + match, _ := regexp.MatchString(`^\d{1,2}\.\d{1,2}\.\d{1,2}$`, version) + if !match { + t.Error("getRsyncVersion() returned wrong version string format: ", version) + } } } diff --git a/datasetUtils/getAvailableDatasets.go b/datasetUtils/getAvailableDatasets.go index 87ad763..c1c43b7 100644 --- a/datasetUtils/getAvailableDatasets.go +++ b/datasetUtils/getAvailableDatasets.go @@ -5,6 +5,8 @@ import ( "log" "os/exec" "strings" + version "github.com/mcuadros/go-version" + "regexp" ) func GetAvailableDatasets(username string, RSYNCServer string, singleDatasetId string) []string { @@ -21,7 +23,21 @@ func GetAvailableDatasets(username string, RSYNCServer string, singleDatasetId s fmt.Printf("====== (only datasets highlighted in green will be retrieved)\n\n") fmt.Printf("====== If you can not find the dataset in this listing: may be you forgot\n") fmt.Printf("====== to start the necessary retrieve job from the the data catalog first ?\n\n") - cmd := exec.Command("rsync", "-e", "ssh -q", "--list-only", username+"@"+RSYNCServer+":retrieve/") + + // Get rsync version + versionNumber, err := getRsyncVersion() + if err != nil { + log.Fatal("Error getting rsync version: ", err) + } + + // Check rsync version and adjust command accordingly + var cmd *exec.Cmd + if version.Compare(versionNumber, "3.2.3", ">=") { + cmd = exec.Command("rsync", "-e", "ssh", "--list-only", username+"@"+RSYNCServer+":retrieve/") + } else { + cmd = exec.Command("rsync", "-e", "ssh -q", "--list-only", username+"@"+RSYNCServer+":retrieve/") + } + out, err := cmd.Output() if err != nil { log.Printf("Running %v.\n", cmd.Args) @@ -43,3 +59,22 @@ func GetAvailableDatasets(username string, RSYNCServer string, singleDatasetId s } return datasetList } + +// Get rsync version +func getRsyncVersion() (string, error) { + cmd := exec.Command("/usr/bin/rsync", "--version") + output, err := cmd.Output() + if err != nil { + return "", err + } + version := string(output) + + // Use a regular expression to find the version number + re := regexp.MustCompile(`\d+\.\d+\.\d+`) + versionNumber := re.FindString(version) + if versionNumber == "" { + return "", fmt.Errorf("could not find version number in rsync version string: %s", version) + } + + return versionNumber, nil +} From 9ae75a828a241c77f0b7c4c89f6738ce2253360b Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Mon, 22 Apr 2024 16:15:23 +0200 Subject: [PATCH 3/5] Make the if statement more compact --- datasetIngestor/syncDataToFileserver_unix.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/datasetIngestor/syncDataToFileserver_unix.go b/datasetIngestor/syncDataToFileserver_unix.go index 7a761f7..cba8d3b 100644 --- a/datasetIngestor/syncDataToFileserver_unix.go +++ b/datasetIngestor/syncDataToFileserver_unix.go @@ -31,16 +31,14 @@ func SyncDataToFileserver(datasetId string, user map[string]string, RSYNCServer // Check rsync version and adjust command accordingly var rsyncCmd *exec.Cmd + rsyncFlags := []string{"-e", "ssh", "-avxz", "--progress", "--stderr=error"} + if absFileListing != "" { + rsyncFlags = append(rsyncFlags, "-r", "--files-from", absFileListing) + } if version.Compare(versionNumber, "3.2.3", ">=") { - rsyncCmd = exec.Command("/usr/bin/rsync", "-e", "ssh", "-avxz", "--progress", "--stderr=error", fullSourceFolderPath, serverConnectString) - if absFileListing != "" { - rsyncCmd = exec.Command("/usr/bin/rsync", "-e", "ssh", "-avxzr", "--progress", "--stderr=error", "--files-from", absFileListing, fullSourceFolderPath, serverConnectString) - } + rsyncCmd = exec.Command("/usr/bin/rsync", append(rsyncFlags, fullSourceFolderPath, serverConnectString)...) } else { - rsyncCmd = exec.Command("/usr/bin/rsync", "-e", "ssh -q", "-avxz", "--progress", "--msgs2stderr", fullSourceFolderPath, serverConnectString) - if absFileListing != "" { - rsyncCmd = exec.Command("/usr/bin/rsync", "-e", "ssh -q", "-avxzr", "--progress", "--msgs2stderr", "--files-from", absFileListing, fullSourceFolderPath, serverConnectString) - } + rsyncCmd = exec.Command("/usr/bin/rsync", append(rsyncFlags, "-q", "--msgs2stderr", fullSourceFolderPath, serverConnectString)...) } // Show rsync's output From 8f445191bcf7d543f44a2cc8fa13c30e1f48d2d4 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:52:16 +0200 Subject: [PATCH 4/5] Fix a bug where `msgs2stderr` and `stderr=error` were both present in cmd --- datasetIngestor/syncDataToFileserver_unix.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/datasetIngestor/syncDataToFileserver_unix.go b/datasetIngestor/syncDataToFileserver_unix.go index cba8d3b..7412193 100644 --- a/datasetIngestor/syncDataToFileserver_unix.go +++ b/datasetIngestor/syncDataToFileserver_unix.go @@ -31,14 +31,19 @@ func SyncDataToFileserver(datasetId string, user map[string]string, RSYNCServer // Check rsync version and adjust command accordingly var rsyncCmd *exec.Cmd - rsyncFlags := []string{"-e", "ssh", "-avxz", "--progress", "--stderr=error"} + rsyncFlags := []string{"-e", "ssh", "-avxz", "--progress"} if absFileListing != "" { rsyncFlags = append(rsyncFlags, "-r", "--files-from", absFileListing) } if version.Compare(versionNumber, "3.2.3", ">=") { + rsyncFlags = append(rsyncFlags, "--stderr=error") rsyncCmd = exec.Command("/usr/bin/rsync", append(rsyncFlags, fullSourceFolderPath, serverConnectString)...) + // Full command: /usr/bin/rsync -e ssh -avxz --progress -r --files-from --stderr=error } else { - rsyncCmd = exec.Command("/usr/bin/rsync", append(rsyncFlags, "-q", "--msgs2stderr", fullSourceFolderPath, serverConnectString)...) + rsyncFlags = append(rsyncFlags, "-q", "--msgs2stderr") + rsyncCmd = exec.Command("/usr/bin/rsync", append(rsyncFlags, fullSourceFolderPath, serverConnectString)...) + // Full command: /usr/bin/rsync -e ssh -avxz --progress -r --files-from -q --msgs2stderr + } // Show rsync's output From 4cc896ba4c51cef1149226752af7223402618cce Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:21:53 +0200 Subject: [PATCH 5/5] Create a separate function for building rsync cmd and test it --- datasetIngestor/syncDataToFileserver_unix.go | 34 ++++++------ .../syncDataToFileserver_unix_test.go | 55 +++++++++++++++++++ 2 files changed, 73 insertions(+), 16 deletions(-) diff --git a/datasetIngestor/syncDataToFileserver_unix.go b/datasetIngestor/syncDataToFileserver_unix.go index 7412193..8baa50c 100644 --- a/datasetIngestor/syncDataToFileserver_unix.go +++ b/datasetIngestor/syncDataToFileserver_unix.go @@ -29,22 +29,7 @@ func SyncDataToFileserver(datasetId string, user map[string]string, RSYNCServer log.Fatal("Error getting rsync version: ", err) } - // Check rsync version and adjust command accordingly - var rsyncCmd *exec.Cmd - rsyncFlags := []string{"-e", "ssh", "-avxz", "--progress"} - if absFileListing != "" { - rsyncFlags = append(rsyncFlags, "-r", "--files-from", absFileListing) - } - if version.Compare(versionNumber, "3.2.3", ">=") { - rsyncFlags = append(rsyncFlags, "--stderr=error") - rsyncCmd = exec.Command("/usr/bin/rsync", append(rsyncFlags, fullSourceFolderPath, serverConnectString)...) - // Full command: /usr/bin/rsync -e ssh -avxz --progress -r --files-from --stderr=error - } else { - rsyncFlags = append(rsyncFlags, "-q", "--msgs2stderr") - rsyncCmd = exec.Command("/usr/bin/rsync", append(rsyncFlags, fullSourceFolderPath, serverConnectString)...) - // Full command: /usr/bin/rsync -e ssh -avxz --progress -r --files-from -q --msgs2stderr - - } + rsyncCmd := buildRsyncCmd(versionNumber, absFileListing, fullSourceFolderPath, serverConnectString) // Show rsync's output rsyncCmd.Stderr = os.Stderr @@ -72,3 +57,20 @@ func getRsyncVersion() (string, error) { return versionNumber, nil } + +// Check rsync version and adjust command accordingly +func buildRsyncCmd(versionNumber, absFileListing, fullSourceFolderPath, serverConnectString string) *exec.Cmd { + rsyncFlags := []string{"-e", "ssh", "-avxz", "--progress"} + if absFileListing != "" { + rsyncFlags = append([]string{"-r", "--files-from", absFileListing}, rsyncFlags...) + } + if version.Compare(versionNumber, "3.2.3", ">=") { + rsyncFlags = append(rsyncFlags, "--stderr=error") + // Full command: /usr/bin/rsync -e ssh -avxz --progress -r --files-from --stderr=error + } else { + rsyncFlags = append(rsyncFlags, "-q", "--msgs2stderr") + // Full command: /usr/bin/rsync -e ssh -avxz --progress -r --files-from -q --msgs2stderr + } + rsyncCmd := exec.Command("/usr/bin/rsync", append(rsyncFlags, fullSourceFolderPath, serverConnectString)...) + return rsyncCmd +} diff --git a/datasetIngestor/syncDataToFileserver_unix_test.go b/datasetIngestor/syncDataToFileserver_unix_test.go index 456f7f9..72123c1 100644 --- a/datasetIngestor/syncDataToFileserver_unix_test.go +++ b/datasetIngestor/syncDataToFileserver_unix_test.go @@ -5,6 +5,7 @@ package datasetIngestor import ( "testing" "regexp" + "strings" ) func TestGetRsyncVersion(t *testing.T) { @@ -21,3 +22,57 @@ func TestGetRsyncVersion(t *testing.T) { } } } + +func TestBuildRsyncCmd(t *testing.T) { + tests := []struct { + name string + versionNumber string + absFileListing string + fullSourceFolder string + serverConnectStr string + expectedCmd string + }{ + { + name: "rsync version >= 3.2.3, absFileListing not empty", + versionNumber: "3.2.3", + absFileListing: "/path/to/file", + fullSourceFolder: "/source/folder", + serverConnectStr: "user@server:/dest/folder", + expectedCmd: "/usr/bin/rsync -r --files-from /path/to/file -e ssh -avxz --progress --stderr=error /source/folder user@server:/dest/folder", + }, + { + name: "rsync version < 3.2.3, absFileListing not empty", + versionNumber: "3.2.2", + absFileListing: "/path/to/file", + fullSourceFolder: "/source/folder", + serverConnectStr: "user@server:/dest/folder", + expectedCmd: "/usr/bin/rsync -r --files-from /path/to/file -e ssh -avxz --progress -q --msgs2stderr /source/folder user@server:/dest/folder", + }, + { + name: "rsync version >= 3.2.3, absFileListing empty", + versionNumber: "3.2.3", + absFileListing: "", + fullSourceFolder: "/source/folder", + serverConnectStr: "user@server:/dest/folder", + expectedCmd: "/usr/bin/rsync -e ssh -avxz --progress --stderr=error /source/folder user@server:/dest/folder", + }, + { + name: "rsync version < 3.2.3, absFileListing empty", + versionNumber: "3.2.2", + absFileListing: "", + fullSourceFolder: "/source/folder", + serverConnectStr: "user@server:/dest/folder", + expectedCmd: "/usr/bin/rsync -e ssh -avxz --progress -q --msgs2stderr /source/folder user@server:/dest/folder", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cmd := buildRsyncCmd(tt.versionNumber, tt.absFileListing, tt.fullSourceFolder, tt.serverConnectStr) + cmdStr := strings.Join(cmd.Args, " ") + if cmdStr != tt.expectedCmd { + t.Errorf("Expected command: %s, got: %s", tt.expectedCmd, cmdStr) + } + }) + } +}