From db97d998cf4d8b6c042529d8c655c3ea8a750117 Mon Sep 17 00:00:00 2001 From: Matthew Jadud Date: Thu, 9 Jan 2025 09:24:40 -0500 Subject: [PATCH 01/39] CircleCI Commit --- .circleci/config.yml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..6229170 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,31 @@ +# Use the latest 2.1 version of CircleCI pipeline process engine. +# See: https://circleci.com/docs/configuration-reference +version: 2.1 + +# Define a job to be invoked later in a workflow. +# See: https://circleci.com/docs/jobs-steps/#jobs-overview & https://circleci.com/docs/configuration-reference/#jobs +jobs: + say-hello: + # Specify the execution environment. You can specify an image from Docker Hub or use one of our convenience images from CircleCI's Developer Hub. + # See: https://circleci.com/docs/executor-intro/ & https://circleci.com/docs/configuration-reference/#executor-job + docker: + # Specify the version you desire here + # See: https://circleci.com/developer/images/image/cimg/base + - image: cimg/base:current + + # Add steps to the job + # See: https://circleci.com/docs/jobs-steps/#steps-overview & https://circleci.com/docs/configuration-reference/#steps + steps: + # Checkout the code as the first step. + - checkout + - run: + name: "Say hello" + command: "echo Hello, World!" + +# Orchestrate jobs using workflows +# See: https://circleci.com/docs/workflows/ & https://circleci.com/docs/configuration-reference/#workflows +workflows: + say-hello-workflow: # This is the name of the workflow, feel free to change it to better match your workflow. + # Inside the workflow, you define the jobs you want to run. + jobs: + - say-hello \ No newline at end of file From d858c68754bba64609707f4f9ff2772572e860bb Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 09:31:50 -0500 Subject: [PATCH 02/39] Trying to change the container --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 6229170..0a62432 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,7 @@ jobs: docker: # Specify the version you desire here # See: https://circleci.com/developer/images/image/cimg/base - - image: cimg/base:current + - image: cloudfoundry/cflinuxfs4:latest # Add steps to the job # See: https://circleci.com/docs/jobs-steps/#steps-overview & https://circleci.com/docs/configuration-reference/#steps From 6c0fc8e17cb6041de6579ae8a89d9098bed1f3c9 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 09:49:22 -0500 Subject: [PATCH 03/39] Attempting to set up env --- .circleci/config.yml | 55 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0a62432..57bd265 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,30 +2,61 @@ # See: https://circleci.com/docs/configuration-reference version: 2.1 -# Define a job to be invoked later in a workflow. -# See: https://circleci.com/docs/jobs-steps/#jobs-overview & https://circleci.com/docs/configuration-reference/#jobs jobs: - say-hello: - # Specify the execution environment. You can specify an image from Docker Hub or use one of our convenience images from CircleCI's Developer Hub. - # See: https://circleci.com/docs/executor-intro/ & https://circleci.com/docs/configuration-reference/#executor-job + install: docker: - # Specify the version you desire here - # See: https://circleci.com/developer/images/image/cimg/base + # Build against the CF image, for libc compatibiltiy reasons. - image: cloudfoundry/cflinuxfs4:latest - - # Add steps to the job - # See: https://circleci.com/docs/jobs-steps/#steps-overview & https://circleci.com/docs/configuration-reference/#steps steps: # Checkout the code as the first step. - checkout - run: name: "Say hello" command: "echo Hello, World!" + - run: + name: "apt install packages" + command: | + sudo apt-get update \ + && sudo apt-get install -y \ + build-essential \ + curl \ + libpoppler-dev \ + libpoppler-glib-dev \ + poppler-utils \ + software-properties-common \ + tree \ + wget + - run: + name: "install golang 1.23" + command: | + rm -rf /usr/local/go \ + && wget https://go.dev/dl/go1.23.3.linux-amd64.tar.gz \ + && tar -C /usr/local -xzf go1.23.3.linux-amd64.tar.gz + - run: + name: "install sqlc" + command: | + wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz \ + && tar xvzf sqlc_1.27.0_linux_amd64.tar.gz \ + && chmod 755 sqlc \ + && mv sqlc /bin/sqlc + - run: + name: "install golang tools" + command: | + go install golang.org/x/tools/cmd/goimports@latest + go install golang.org/x/tools/cmd/stringer@latest + go install github.com/google/go-jsonnet/cmd/jsonnet@latest + go install github.com/google/go-jsonnet/cmd/jsonnet-lint@latest + go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest + - run: + name: "install golanglint" + command: | + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh \ + && sh -s -- -b /usr/local/go/bin v1.62.0 # Orchestrate jobs using workflows # See: https://circleci.com/docs/workflows/ & https://circleci.com/docs/configuration-reference/#workflows workflows: - say-hello-workflow: # This is the name of the workflow, feel free to change it to better match your workflow. + build-and-deploy: # This is the name of the workflow, feel free to change it to better match your workflow. # Inside the workflow, you define the jobs you want to run. jobs: - - say-hello \ No newline at end of file + - install From 2663da4051e0937fa6af405c9e3c00895820c21c Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 09:51:50 -0500 Subject: [PATCH 04/39] Removing && --- .circleci/config.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 57bd265..fd248da 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,8 +16,8 @@ jobs: - run: name: "apt install packages" command: | - sudo apt-get update \ - && sudo apt-get install -y \ + sudo apt-get update + sudo apt-get install -y \ build-essential \ curl \ libpoppler-dev \ @@ -29,16 +29,16 @@ jobs: - run: name: "install golang 1.23" command: | - rm -rf /usr/local/go \ - && wget https://go.dev/dl/go1.23.3.linux-amd64.tar.gz \ - && tar -C /usr/local -xzf go1.23.3.linux-amd64.tar.gz + rm -rf /usr/local/go + wget https://go.dev/dl/go1.23.3.linux-amd64.tar.gz + tar -C /usr/local -xzf go1.23.3.linux-amd64.tar.gz - run: name: "install sqlc" command: | - wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz \ - && tar xvzf sqlc_1.27.0_linux_amd64.tar.gz \ - && chmod 755 sqlc \ - && mv sqlc /bin/sqlc + wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz + tar xvzf sqlc_1.27.0_linux_amd64.tar.gz + chmod 755 sqlc + mv sqlc /bin/sqlc - run: name: "install golang tools" command: | From 69cc862996fda4403477320c030574075685c21b Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 09:59:09 -0500 Subject: [PATCH 05/39] Using shorthand syntax --- .circleci/config.yml | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index fd248da..57a8ccb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,11 +11,9 @@ jobs: # Checkout the code as the first step. - checkout - run: - name: "Say hello" - command: "echo Hello, World!" - - run: - name: "apt install packages" - command: | + name: "Installing packages" + command: "echo installing packages..." + - run: | sudo apt-get update sudo apt-get install -y \ build-essential \ @@ -26,30 +24,22 @@ jobs: software-properties-common \ tree \ wget - - run: - name: "install golang 1.23" - command: | + - run: | rm -rf /usr/local/go wget https://go.dev/dl/go1.23.3.linux-amd64.tar.gz tar -C /usr/local -xzf go1.23.3.linux-amd64.tar.gz - - run: - name: "install sqlc" - command: | + - run: | wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz tar xvzf sqlc_1.27.0_linux_amd64.tar.gz chmod 755 sqlc mv sqlc /bin/sqlc - - run: - name: "install golang tools" - command: | + - run: | go install golang.org/x/tools/cmd/goimports@latest go install golang.org/x/tools/cmd/stringer@latest go install github.com/google/go-jsonnet/cmd/jsonnet@latest go install github.com/google/go-jsonnet/cmd/jsonnet-lint@latest go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest - - run: - name: "install golanglint" - command: | + - run: | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh \ && sh -s -- -b /usr/local/go/bin v1.62.0 From a28d004cae9dddbb3504f196896d7b7f81e074e1 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 10:09:21 -0500 Subject: [PATCH 06/39] Multiline commands These are not nice in CircleCi/YAML. --- .circleci/build-packages.txt | 8 ++++++++ .circleci/config.yml | 10 +--------- 2 files changed, 9 insertions(+), 9 deletions(-) create mode 100644 .circleci/build-packages.txt diff --git a/.circleci/build-packages.txt b/.circleci/build-packages.txt new file mode 100644 index 0000000..4cd96b6 --- /dev/null +++ b/.circleci/build-packages.txt @@ -0,0 +1,8 @@ +build-essential +curl +libpoppler-dev +libpoppler-glib-dev +poppler-utils +software-properties-common +tree +wget \ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml index 57a8ccb..0b87641 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -15,15 +15,7 @@ jobs: command: "echo installing packages..." - run: | sudo apt-get update - sudo apt-get install -y \ - build-essential \ - curl \ - libpoppler-dev \ - libpoppler-glib-dev \ - poppler-utils \ - software-properties-common \ - tree \ - wget + sudo apt-get -y install < .circleci/build-packages.txt - run: | rm -rf /usr/local/go wget https://go.dev/dl/go1.23.3.linux-amd64.tar.gz From d4e4e72a8316b509ce3486067dd810db0e8c30ff Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 10:13:51 -0500 Subject: [PATCH 07/39] Trying a convenience image Should be faster --- .circleci/config.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0b87641..8e4aaa2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,8 +5,14 @@ version: 2.1 jobs: install: docker: - # Build against the CF image, for libc compatibiltiy reasons. - - image: cloudfoundry/cflinuxfs4:latest + # We could build against teh cflinux4 image. + # However, we'll try using the default Ubuntu image, + # but we could run into libc compatibility problems. + # https://hub.docker.com/r/cloudfoundry/cflinuxfs4/tags + # - image: cloudfoundry/cflinuxfs4:latest + # We'll try a go image. That may, or may not, + # build a binary that plays well with CF/CGov. + - image: cimg/go:1.23.3 steps: # Checkout the code as the first step. - checkout @@ -16,16 +22,17 @@ jobs: - run: | sudo apt-get update sudo apt-get -y install < .circleci/build-packages.txt - - run: | - rm -rf /usr/local/go - wget https://go.dev/dl/go1.23.3.linux-amd64.tar.gz - tar -C /usr/local -xzf go1.23.3.linux-amd64.tar.gz + # - run: | + # rm -rf /usr/local/go + # wget https://go.dev/dl/go1.23.3.linux-amd64.tar.gz + # tar -C /usr/local -xzf go1.23.3.linux-amd64.tar.gz - run: | wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz tar xvzf sqlc_1.27.0_linux_amd64.tar.gz chmod 755 sqlc mv sqlc /bin/sqlc - run: | + # PATH=$PATH:/usr/local/go/bin:/root/go/bin go install golang.org/x/tools/cmd/goimports@latest go install golang.org/x/tools/cmd/stringer@latest go install github.com/google/go-jsonnet/cmd/jsonnet@latest From b29c15e6a3b767fa4b8d7a7e580baf6aef5d4f69 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 10:17:30 -0500 Subject: [PATCH 08/39] Using sudo --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8e4aaa2..2e80f18 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -30,7 +30,7 @@ jobs: wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz tar xvzf sqlc_1.27.0_linux_amd64.tar.gz chmod 755 sqlc - mv sqlc /bin/sqlc + sudo mv sqlc /bin/sqlc - run: | # PATH=$PATH:/usr/local/go/bin:/root/go/bin go install golang.org/x/tools/cmd/goimports@latest From 5b9d5172a4092c44d1f6db81c231971083449a5b Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 10:27:46 -0500 Subject: [PATCH 09/39] jobs vs steps? --- .circleci/config.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2e80f18..840bebf 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,7 +5,7 @@ version: 2.1 jobs: install: docker: - # We could build against teh cflinux4 image. + # We could build against the cflinux4 image. # However, we'll try using the default Ubuntu image, # but we could run into libc compatibility problems. # https://hub.docker.com/r/cloudfoundry/cflinuxfs4/tags @@ -22,10 +22,6 @@ jobs: - run: | sudo apt-get update sudo apt-get -y install < .circleci/build-packages.txt - # - run: | - # rm -rf /usr/local/go - # wget https://go.dev/dl/go1.23.3.linux-amd64.tar.gz - # tar -C /usr/local -xzf go1.23.3.linux-amd64.tar.gz - run: | wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz tar xvzf sqlc_1.27.0_linux_amd64.tar.gz @@ -41,11 +37,20 @@ jobs: - run: | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh \ && sh -s -- -b /usr/local/go/bin v1.62.0 + build: + docker: + - image: cimg/go:1.23.3 + steps: + - checkout + - run: + name: "Building" + command: "echo building..." + - run: make build # Orchestrate jobs using workflows # See: https://circleci.com/docs/workflows/ & https://circleci.com/docs/configuration-reference/#workflows workflows: - build-and-deploy: # This is the name of the workflow, feel free to change it to better match your workflow. - # Inside the workflow, you define the jobs you want to run. + build-and-deploy: jobs: - install + - build From 6e8c1de281274e6e4e2ba6f68e420b8bf854190c Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 10:31:00 -0500 Subject: [PATCH 10/39] Jobs run in parallel --- .circleci/config.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 840bebf..471b96d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -37,15 +37,16 @@ jobs: - run: | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh \ && sh -s -- -b /usr/local/go/bin v1.62.0 - build: - docker: - - image: cimg/go:1.23.3 - steps: - - checkout - - run: - name: "Building" - command: "echo building..." - run: make build + # build: + # docker: + # - image: cimg/go:1.23.3 + # steps: + # - checkout + # - run: + # name: "Building" + # command: "echo building..." + # - run: make build # Orchestrate jobs using workflows # See: https://circleci.com/docs/workflows/ & https://circleci.com/docs/configuration-reference/#workflows @@ -53,4 +54,3 @@ workflows: build-and-deploy: jobs: - install - - build From f3eebfe6e344a1c4a61a82cdf66f74bd3771efd5 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 10:48:36 -0500 Subject: [PATCH 11/39] Not installing packages... --- .circleci/build-packages.txt | 8 -------- .circleci/config.yml | 14 ++++++++++---- 2 files changed, 10 insertions(+), 12 deletions(-) delete mode 100644 .circleci/build-packages.txt diff --git a/.circleci/build-packages.txt b/.circleci/build-packages.txt deleted file mode 100644 index 4cd96b6..0000000 --- a/.circleci/build-packages.txt +++ /dev/null @@ -1,8 +0,0 @@ -build-essential -curl -libpoppler-dev -libpoppler-glib-dev -poppler-utils -software-properties-common -tree -wget \ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml index 471b96d..edfe57b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,7 +3,7 @@ version: 2.1 jobs: - install: + setup: docker: # We could build against the cflinux4 image. # However, we'll try using the default Ubuntu image, @@ -21,7 +21,14 @@ jobs: command: "echo installing packages..." - run: | sudo apt-get update - sudo apt-get -y install < .circleci/build-packages.txt + sudo apt-get -y install build-essential \ + curl \ + libpoppler-dev \ + libpoppler-glib-dev \ + poppler-utils \ + software-properties-common \ + tree \ + wget - run: | wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz tar xvzf sqlc_1.27.0_linux_amd64.tar.gz @@ -37,7 +44,6 @@ jobs: - run: | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh \ && sh -s -- -b /usr/local/go/bin v1.62.0 - - run: make build # build: # docker: # - image: cimg/go:1.23.3 @@ -53,4 +59,4 @@ jobs: workflows: build-and-deploy: jobs: - - install + - build \ No newline at end of file From 8c5e58499040ce58cc09e952ced9bab2a170341d Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 10:50:50 -0500 Subject: [PATCH 12/39] Building? --- .circleci/config.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index edfe57b..be76846 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,7 +3,7 @@ version: 2.1 jobs: - setup: + setup-and-build: docker: # We could build against the cflinux4 image. # However, we'll try using the default Ubuntu image, @@ -44,6 +44,8 @@ jobs: - run: | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh \ && sh -s -- -b /usr/local/go/bin v1.62.0 + - run: make build + # build: # docker: # - image: cimg/go:1.23.3 @@ -59,4 +61,4 @@ jobs: workflows: build-and-deploy: jobs: - - build \ No newline at end of file + - setup-and-build \ No newline at end of file From a168ddf3b3bfbecf7bf1fa2cf5d9f5e26c04861d Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 11:02:15 -0500 Subject: [PATCH 13/39] Can I lint first? --- .circleci/config.yml | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index be76846..bb6c62f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,16 +3,25 @@ version: 2.1 jobs: - setup-and-build: - docker: - # We could build against the cflinux4 image. - # However, we'll try using the default Ubuntu image, - # but we could run into libc compatibility problems. - # https://hub.docker.com/r/cloudfoundry/cflinuxfs4/tags - # - image: cloudfoundry/cflinuxfs4:latest - # We'll try a go image. That may, or may not, - # build a binary that plays well with CF/CGov. + lint: + # We could build against the cflinux4 image. + # However, we'll try using the default Ubuntu image, + # but we could run into libc compatibility problems. + # https://hub.docker.com/r/cloudfoundry/cflinuxfs4/tags + # - image: cloudfoundry/cflinuxfs4:latest + # We'll try a go image. That may, or may not, + # build a binary that plays well with CF/CGov. + docker: &gobase - image: cimg/go:1.23.3 + steps: + - checkout + - run: | + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh + sh -s -- -b /usr/local/go/bin v1.62.0 + - run: golint cmd/serve + + setup-and-build: + docker: *gobase steps: # Checkout the code as the first step. - checkout @@ -35,17 +44,15 @@ jobs: chmod 755 sqlc sudo mv sqlc /bin/sqlc - run: | - # PATH=$PATH:/usr/local/go/bin:/root/go/bin go install golang.org/x/tools/cmd/goimports@latest go install golang.org/x/tools/cmd/stringer@latest go install github.com/google/go-jsonnet/cmd/jsonnet@latest go install github.com/google/go-jsonnet/cmd/jsonnet-lint@latest go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest - - run: | - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh \ - && sh -s -- -b /usr/local/go/bin v1.62.0 + - run: make build - + + # build: # docker: # - image: cimg/go:1.23.3 @@ -61,4 +68,6 @@ jobs: workflows: build-and-deploy: jobs: - - setup-and-build \ No newline at end of file + - lint + - setup-and-build: + requires: [ lint ] \ No newline at end of file From 33a24a84f60201be49bb6d465f9d296ce4432c31 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 11:03:25 -0500 Subject: [PATCH 14/39] Wrong command --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index bb6c62f..dc91c81 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,7 +18,7 @@ jobs: - run: | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh sh -s -- -b /usr/local/go/bin v1.62.0 - - run: golint cmd/serve + - run: golangci-lint run setup-and-build: docker: *gobase From 3142c09340b00bed1ec01789eac6bba548a852ef Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 11:08:50 -0500 Subject: [PATCH 15/39] Need libraries in linting --- .circleci/config.yml | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index dc91c81..5994e9d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -15,9 +15,21 @@ jobs: - image: cimg/go:1.23.3 steps: - checkout + - run: &common-installs + name: "common installs" + command: | + sudo apt-get update + sudo apt-get -y install build-essential \ + curl \ + libpoppler-dev \ + libpoppler-glib-dev \ + poppler-utils \ + software-properties-common \ + tree \ + wget - run: | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh - sh -s -- -b /usr/local/go/bin v1.62.0 + sh -s -- -b /usr/local/go/bin v1.63.4 - run: golangci-lint run setup-and-build: @@ -25,19 +37,11 @@ jobs: steps: # Checkout the code as the first step. - checkout + - run: *common-installs - run: name: "Installing packages" command: "echo installing packages..." - - run: | - sudo apt-get update - sudo apt-get -y install build-essential \ - curl \ - libpoppler-dev \ - libpoppler-glib-dev \ - poppler-utils \ - software-properties-common \ - tree \ - wget + - run: | wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz tar xvzf sqlc_1.27.0_linux_amd64.tar.gz From 3eabddb36ab8ebbf85612efe29b52bd8ba0bb6a0 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 11:18:18 -0500 Subject: [PATCH 16/39] Reorg for linting --- .circleci/config.yml | 80 +++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5994e9d..5e7b314 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,20 +2,10 @@ # See: https://circleci.com/docs/configuration-reference version: 2.1 -jobs: - lint: - # We could build against the cflinux4 image. - # However, we'll try using the default Ubuntu image, - # but we could run into libc compatibility problems. - # https://hub.docker.com/r/cloudfoundry/cflinuxfs4/tags - # - image: cloudfoundry/cflinuxfs4:latest - # We'll try a go image. That may, or may not, - # build a binary that plays well with CF/CGov. - docker: &gobase - - image: cimg/go:1.23.3 +commands: + common-install: steps: - - checkout - - run: &common-installs + - run: name: "common installs" command: | sudo apt-get update @@ -27,46 +17,54 @@ jobs: software-properties-common \ tree \ wget - - run: | - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh - sh -s -- -b /usr/local/go/bin v1.63.4 - - run: golangci-lint run - - setup-and-build: - docker: *gobase + install-go-utilities: + parameters: + version: + default: 1.63.4 + type: string steps: - # Checkout the code as the first step. - - checkout - - run: *common-installs - run: - name: "Installing packages" - command: "echo installing packages..." - - - run: | - wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz - tar xvzf sqlc_1.27.0_linux_amd64.tar.gz - chmod 755 sqlc - sudo mv sqlc /bin/sqlc + name: "install golangci-lint" + command: | + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh + sh -s -- -b /usr/local/go/bin v<< parameters.version >> - run: | go install golang.org/x/tools/cmd/goimports@latest go install golang.org/x/tools/cmd/stringer@latest go install github.com/google/go-jsonnet/cmd/jsonnet@latest go install github.com/google/go-jsonnet/cmd/jsonnet-lint@latest go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest + - run: | + wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz + tar xvzf sqlc_1.27.0_linux_amd64.tar.gz + chmod 755 sqlc + sudo mv sqlc /bin/sqlc +jobs: + lint: + docker: *gobase + steps: + - checkout + - common-install + - install-go-utilities + - run: + name: "prep the build" + command: | + make generate + - run: + name: "find the lint" + command: golangci-lint run + setup-and-build: + docker: &gobase + - image: cimg/go:1.23.3 + steps: + # Checkout the code as the first step. + - checkout + - run: common-install + - run: install-go-utilities - run: make build - # build: - # docker: - # - image: cimg/go:1.23.3 - # steps: - # - checkout - # - run: - # name: "Building" - # command: "echo building..." - # - run: make build - # Orchestrate jobs using workflows # See: https://circleci.com/docs/workflows/ & https://circleci.com/docs/configuration-reference/#workflows workflows: From 0d274befd998e7c70b5ebe6c94c8db01f142bb64 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 11:20:39 -0500 Subject: [PATCH 17/39] YAML anchors... --- .circleci/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5e7b314..e87a7ce 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -41,7 +41,8 @@ commands: sudo mv sqlc /bin/sqlc jobs: lint: - docker: *gobase + docker: &gobase + - image: cimg/go:1.23.3 steps: - checkout - common-install @@ -55,8 +56,7 @@ jobs: command: golangci-lint run setup-and-build: - docker: &gobase - - image: cimg/go:1.23.3 + docker: *gobase steps: # Checkout the code as the first step. - checkout From 90d76002e1dc4c9f1a561b20daa372b8c5ca18cb Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 11:37:47 -0500 Subject: [PATCH 18/39] Linting? --- .circleci/config.yml | 13 +++++++++---- Makefile | 7 +++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e87a7ce..dbe4a48 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,20 +28,24 @@ commands: command: | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh sh -s -- -b /usr/local/go/bin v<< parameters.version >> - - run: | + - run: + name: "install jsonnet" + command: | go install golang.org/x/tools/cmd/goimports@latest go install golang.org/x/tools/cmd/stringer@latest go install github.com/google/go-jsonnet/cmd/jsonnet@latest go install github.com/google/go-jsonnet/cmd/jsonnet-lint@latest go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest - - run: | + - run: + name: "install sqlc" + command: | wget https://downloads.sqlc.dev/sqlc_1.27.0_linux_amd64.tar.gz tar xvzf sqlc_1.27.0_linux_amd64.tar.gz chmod 755 sqlc sudo mv sqlc /bin/sqlc jobs: lint: - docker: &gobase + docker: - image: cimg/go:1.23.3 steps: - checkout @@ -56,7 +60,8 @@ jobs: command: golangci-lint run setup-and-build: - docker: *gobase + docker: + - image: cimg/go:1.23.3 steps: # Checkout the code as the first step. - checkout diff --git a/Makefile b/Makefile index eac6c9e..f3f82e5 100644 --- a/Makefile +++ b/Makefile @@ -9,12 +9,11 @@ clean: rm -f cmd/*/service.exe .PHONY: generate -generate: +generate: config cd internal/postgres ; make generate - # cd internal/postgres/search_db ; make generate .PHONY: config -config: +config: clean cd config ; make all || exit 1 docker: @@ -23,7 +22,7 @@ docker: .PHONY: build # lint -build: clean config generate +build: generate echo "build migrate" cd cmd/migrate ; make build echo "build admin" From f302b545e3eaeae65868483e5a28c1d03677b9a4 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 14:05:12 -0500 Subject: [PATCH 19/39] Fixed all linting errors Although this would be nice as a separate commit, I'm working to get the automated builds going. It ended up in this branch. This added `if err != nil` in many places. --- .circleci/.gitignore | 1 + cmd/admin/main.go | 6 ++-- cmd/admin/queues.go | 19 ---------- cmd/entree/accept_logic.go | 7 +++- cmd/entree/crontab.go | 73 +++++++++++++++++--------------------- cmd/entree/entree_test.go | 8 +++-- cmd/entree/main.go | 12 +++++-- cmd/entree/work.go | 2 +- cmd/extract/html.go | 13 +++++-- cmd/extract/main.go | 6 ++-- cmd/extract/pdf.go | 14 ++++++-- cmd/extract/queues.go | 4 --- cmd/extract/work.go | 8 +++-- cmd/fetch/main.go | 6 ++-- cmd/fetch/work.go | 5 --- cmd/fetch/work_support.go | 13 +++++-- cmd/pack/main.go | 6 +++- cmd/pack/pdf.go | 6 +++- cmd/serve/main.go | 6 ++-- cmd/validate/main.go | 6 ++-- cmd/validate/queues.go | 12 ------- 21 files changed, 124 insertions(+), 109 deletions(-) create mode 100644 .circleci/.gitignore diff --git a/.circleci/.gitignore b/.circleci/.gitignore new file mode 100644 index 0000000..b133f0f --- /dev/null +++ b/.circleci/.gitignore @@ -0,0 +1 @@ +process.yml \ No newline at end of file diff --git a/cmd/admin/main.go b/cmd/admin/main.go index 6096e45..666e01c 100644 --- a/cmd/admin/main.go +++ b/cmd/admin/main.go @@ -130,6 +130,8 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. - http.ListenAndServe(":"+env.Env.Port, engine) - + err := http.ListenAndServe(":"+env.Env.Port, engine) + if err != nil { + zap.Error(err) + } } diff --git a/cmd/admin/queues.go b/cmd/admin/queues.go index 2f124f7..b12ec87 100644 --- a/cmd/admin/queues.go +++ b/cmd/admin/queues.go @@ -1,28 +1,9 @@ package main import ( - "os" - - common "github.com/GSA-TTS/jemison/internal/common" "github.com/GSA-TTS/jemison/internal/queueing" - "github.com/jackc/pgx/v5" - "github.com/riverqueue/river" - "github.com/riverqueue/river/riverdriver/riverpgxv5" - "go.uber.org/zap" ) -// GLOBAL TO THE APP -var insertClient *river.Client[pgx.Tx] - func InitializeQueues() { queueing.InitializeRiverQueues() - - // Insert-only client - _, pool, _ := common.CommonQueueInit() - ic, err := river.NewClient(riverpgxv5.New(pool), &river.Config{}) - if err != nil { - zap.L().Error("could not establish insert-only client") - os.Exit(1) - } - insertClient = ic } diff --git a/cmd/entree/accept_logic.go b/cmd/entree/accept_logic.go index 4098ab1..62ea656 100644 --- a/cmd/entree/accept_logic.go +++ b/cmd/entree/accept_logic.go @@ -82,7 +82,7 @@ func EvaluateEntree(ec *EntreeCheck) { // Fetch will update a second time. scheme := JDB.GetScheme(ec.Scheme) next_fetch := JDB.GetNextFetch(ec.Host) - JDB.WorkDBQueries.UpdateGuestbookNextFetch(context.Background(), + _, err := JDB.WorkDBQueries.UpdateGuestbookNextFetch(context.Background(), work_db.UpdateGuestbookNextFetchParams{ Scheme: scheme, Domain64: ec.Domain64, @@ -95,6 +95,11 @@ func EvaluateEntree(ec *EntreeCheck) { }, ) + if err != nil { + zap.L().Error("failed to update guestbook next fetch", + zap.Int64("domain64", ec.Domain64), zap.String("path", ec.Path)) + } + ChQSHP <- queueing.QSHP{ Queue: "fetch", Scheme: ec.Scheme, diff --git a/cmd/entree/crontab.go b/cmd/entree/crontab.go index 52454d5..92e0ec8 100644 --- a/cmd/entree/crontab.go +++ b/cmd/entree/crontab.go @@ -1,16 +1,7 @@ package main import ( - "fmt" - _ "embed" - - "github.com/GSA-TTS/jemison/config" - "github.com/GSA-TTS/jemison/internal/queueing" - - "github.com/robfig/cron" - "github.com/tidwall/gjson" - "go.uber.org/zap" ) /* @@ -24,35 +15,35 @@ Month | Yes | 1-12 or JAN-DEC | * / , - Day of week | Yes | 0-6 or SUN-SAT | * / , - ? */ -func crontab(schedule string) { - c := cron.New() - // https://crontab.guru/#*_*_*_*_* - err := c.AddFunc("0 * * * * *", section("minutely", schedule)) - if err != nil { - zap.L().Error("failed to add crontab in entree") - } - err = c.AddFunc("@hourly", section("hourly", schedule)) - if err != nil { - zap.L().Error("failed to add crontab in entree") - } - c.Start() -} - -func section(section string, schedule string) func() { - JSON := config.ReadJsonConfig(schedule) - return func() { - zap.L().Debug(section) - for _, site := range gjson.Get(JSON, section).Array() { - // Clear out any hall passes at this point. - HallPassLedger.Remove(site.Get("host").String()) - - zap.L().Debug(fmt.Sprintln(site)) - ChQSHP <- queueing.QSHP{ - Queue: "fetch", - Scheme: site.Get("scheme").String(), - Host: site.Get("host").String(), - Path: site.Get("path").String(), - } - } - } -} +// func crontab(schedule string) { +// c := cron.New() +// // https://crontab.guru/#*_*_*_*_* +// err := c.AddFunc("0 * * * * *", section("minutely", schedule)) +// if err != nil { +// zap.L().Error("failed to add crontab in entree") +// } +// err = c.AddFunc("@hourly", section("hourly", schedule)) +// if err != nil { +// zap.L().Error("failed to add crontab in entree") +// } +// c.Start() +// } + +// func section(section string, schedule string) func() { +// JSON := config.ReadJsonConfig(schedule) +// return func() { +// zap.L().Debug(section) +// for _, site := range gjson.Get(JSON, section).Array() { +// // Clear out any hall passes at this point. +// HallPassLedger.Remove(site.Get("host").String()) + +// zap.L().Debug(fmt.Sprintln(site)) +// ChQSHP <- queueing.QSHP{ +// Queue: "fetch", +// Scheme: site.Get("scheme").String(), +// Host: site.Get("host").String(), +// Path: site.Get("path").String(), +// } +// } +// } +// } diff --git a/cmd/entree/entree_test.go b/cmd/entree/entree_test.go index 02a5813..868d1de 100644 --- a/cmd/entree/entree_test.go +++ b/cmd/entree/entree_test.go @@ -167,9 +167,13 @@ func TestSetGuestbookFetchToYesterdayForHost2(t *testing.T) { defer conn.Close(ctx) // Delete everything from the guestbook for this test. - conn.Exec(ctx, "TRUNCATE guestbook") + _, err := conn.Exec(ctx, "TRUNCATE guestbook") - _, err := conn.Exec(ctx, + if err != nil { + t.Error(err) + } + + _, err = conn.Exec(ctx, `INSERT INTO guestbook (scheme, domain64, path, last_updated, last_fetched, next_fetch) VALUES ($1, $2, $3, $4, $5, $6) diff --git a/cmd/entree/main.go b/cmd/entree/main.go index 9d5dfda..9c16eda 100644 --- a/cmd/entree/main.go +++ b/cmd/entree/main.go @@ -39,7 +39,7 @@ func main() { zap.L().Error("could not get Domain64 for FQDN", zap.String("fqdn", fqdn)) } else { zap.L().Debug("inserting fqdn/d64 to hosts", zap.String("fqdn", fqdn), zap.Int64("d64", d64)) - JDB.WorkDBQueries.UpsertUniqueHost(context.Background(), + _, err := JDB.WorkDBQueries.UpsertUniqueHost(context.Background(), work_db.UpsertUniqueHostParams{ Domain64: pgtype.Int8{ Valid: true, @@ -51,6 +51,10 @@ func main() { Time: time.Now().Add(30 * 24 * time.Hour), }, }) + + if err != nil { + zap.L().Error("error upserting domain64 value", zap.Int64("domain64", d64)) + } } } @@ -63,6 +67,8 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. - http.ListenAndServe(":"+env.Env.Port, engine) - + err := http.ListenAndServe(":"+env.Env.Port, engine) + if err != nil { + zap.Error(err) + } } diff --git a/cmd/entree/work.go b/cmd/entree/work.go index 31ea73a..8bec3fe 100644 --- a/cmd/entree/work.go +++ b/cmd/entree/work.go @@ -22,7 +22,7 @@ func (w *EntreeWorker) Work(ctx context.Context, job *river.Job[common.EntreeArg // !fullCrawl & !pass: check // !fullCrawl & pass: fetch the page now - kind := "NOT_VALID_KIND" + var kind string if job.Args.FullCrawl { kind = "full" } else { diff --git a/cmd/extract/html.go b/cmd/extract/html.go index 4dcea5a..2805328 100644 --- a/cmd/extract/html.go +++ b/cmd/extract/html.go @@ -111,7 +111,12 @@ func extractHtml(obj *kv.S3JSON) { raw_key := obj.Key.Copy() raw_key.Extension = util.Raw zap.L().Debug("looking up raw key", zap.String("raw_key", raw_key.Render())) - s3.S3ToFile(raw_key, rawFilename) + err := s3.S3ToFile(raw_key, rawFilename) + if err != nil { + zap.L().Error("could not create tempfile from s3", + zap.String("raw_key", raw_key.Render()), + zap.String("rawfile", rawFilename)) + } rawFile, err := os.Open(rawFilename) if err != nil { zap.L().Error("cannot open tempfile", zap.String("filename", rawFilename)) @@ -159,7 +164,11 @@ func extractHtml(obj *kv.S3JSON) { } new_obj.Set("headers", string(jsonString)) new_obj.Set("body", content) - new_obj.Save() + err = new_obj.Save() + + if err != nil { + zap.L().Error("could not save object", zap.String("key", new_obj.Key.Render())) + } // Enqueue next steps ChQSHP <- queueing.QSHP{ diff --git a/cmd/extract/main.go b/cmd/extract/main.go index a92d152..c0ed23b 100644 --- a/cmd/extract/main.go +++ b/cmd/extract/main.go @@ -24,6 +24,8 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. - http.ListenAndServe(":"+env.Env.Port, routers) - + err := http.ListenAndServe(":"+env.Env.Port, routers) + if err != nil { + zap.Error(err) + } } diff --git a/cmd/extract/pdf.go b/cmd/extract/pdf.go index 01e5252..c037a4c 100644 --- a/cmd/extract/pdf.go +++ b/cmd/extract/pdf.go @@ -20,8 +20,12 @@ func extractPdf(obj *kv.S3JSON) { // s3 := kv.NewS3(ThisServiceName) raw_copy := obj.Key.Copy() raw_copy.Extension = util.Raw - obj.S3.S3ToFile(raw_copy, tempFilename) - + err := obj.S3.S3ToFile(raw_copy, tempFilename) + if err != nil { + zap.L().Error("could not copy s3 object to file", + zap.String("raw_copy", raw_copy.Render()), + zap.String("tempFilename", tempFilename)) + } defer func() { err := os.Remove(tempFilename) if err != nil { @@ -80,7 +84,11 @@ func extractPdf(obj *kv.S3JSON) { obj.Key.Path, obj.GetJSON(), ) - new_obj.Save() + err = new_obj.Save() + if err != nil { + zap.L().Error("could not save object to s3", + zap.String("key", new_obj.Key.Render())) + } page.Close() // e.Stats.Increment("page_count") diff --git a/cmd/extract/queues.go b/cmd/extract/queues.go index 22551aa..2def7af 100644 --- a/cmd/extract/queues.go +++ b/cmd/extract/queues.go @@ -18,9 +18,6 @@ import ( // One pool of connections for River. // The work client, doing the work of `extract` -var packClient *river.Client[pgx.Tx] -var packPool *pgxpool.Pool - type ExtractWorker struct { river.WorkerDefaults[common.ExtractArgs] } @@ -33,7 +30,6 @@ func InitializeQueues() { var err error ctx, extractPool, workers := common.CommonQueueInit() - _, packPool, _ = common.CommonQueueInit() zap.L().Debug("initialized common queues") diff --git a/cmd/extract/work.go b/cmd/extract/work.go index d8dd101..f6e39e4 100644 --- a/cmd/extract/work.go +++ b/cmd/extract/work.go @@ -51,8 +51,12 @@ func (w *ExtractWorker) Work(ctx context.Context, job *river.Job[common.ExtractA util.ToScheme(job.Args.Scheme), job.Args.Host, job.Args.Path) - s3json.Load() - + err := s3json.Load() + if err != nil { + zap.L().Error("could not load s3 JSON", + zap.String("host", job.Args.Host), + zap.String("path", job.Args.Path)) + } extract(s3json) zap.L().Debug("extraction finished") diff --git a/cmd/fetch/main.go b/cmd/fetch/main.go index 8c6096f..03daf8f 100644 --- a/cmd/fetch/main.go +++ b/cmd/fetch/main.go @@ -66,6 +66,8 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. - http.ListenAndServe(":"+env.Env.Port, engine) - + err := http.ListenAndServe(":"+env.Env.Port, engine) + if err != nil { + zap.Error(err) + } } diff --git a/cmd/fetch/work.go b/cmd/fetch/work.go index 3d856f4..256cbe8 100644 --- a/cmd/fetch/work.go +++ b/cmd/fetch/work.go @@ -3,7 +3,6 @@ package main import ( "context" "fmt" - "math/rand/v2" "net/url" "regexp" "strconv" @@ -59,10 +58,6 @@ func InfoFetchCount() { } } -func randRange(min, max int) int64 { - return int64(rand.IntN(max-min)) + int64(min) -} - func stripHostToAscii(host string) string { reg, _ := regexp.Compile("[^a-z]") result := reg.ReplaceAllString(strings.ToLower(host), "") diff --git a/cmd/fetch/work_support.go b/cmd/fetch/work_support.go index 49b8317..c353d1e 100644 --- a/cmd/fetch/work_support.go +++ b/cmd/fetch/work_support.go @@ -54,7 +54,11 @@ func chunkwiseSHA1(filename string) []byte { } chunk := buf[0:n] // https://pkg.go.dev/crypto/sha1#example-New - io.Writer.Write(h, chunk) + _, err = io.Writer.Write(h, chunk) + if err != nil { + zap.L().Error("did not write SHA bytes successfully", + zap.Int("h.Size", h.Size()), zap.String("chunk", string(chunk))) + } } return h.Sum(nil) @@ -167,7 +171,12 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) (map[string]string, er // Stream that file over to S3 s3 := kv.NewS3(ThisServiceName) - s3.FileToS3(key, tempFilename, util.GetMimeType(contentType)) + err = s3.FileToS3(key, tempFilename, util.GetMimeType(contentType)) + if err != nil { + zap.L().Error("could not send file to S3", + zap.String("key", key.Render()), + zap.String("tempFilename", tempFilename)) + } response := make(map[string]string) // Copy in all of the response headers. diff --git a/cmd/pack/main.go b/cmd/pack/main.go index d35a7a3..8adf022 100644 --- a/cmd/pack/main.go +++ b/cmd/pack/main.go @@ -8,6 +8,7 @@ import ( "github.com/GSA-TTS/jemison/internal/env" "github.com/GSA-TTS/jemison/internal/postgres" "github.com/GSA-TTS/jemison/internal/queueing" + "go.uber.org/zap" ) var ThisServiceName = "pack" @@ -34,5 +35,8 @@ func main() { go queueing.ClearCompletedPeriodically() // Local and Cloud should both get this from the environment. - http.ListenAndServe(":"+env.Env.Port, engine) + err := http.ListenAndServe(":"+env.Env.Port, engine) + if err != nil { + zap.Error(err) + } } diff --git a/cmd/pack/pdf.go b/cmd/pack/pdf.go index 26770d8..9138ba2 100644 --- a/cmd/pack/pdf.go +++ b/cmd/pack/pdf.go @@ -67,5 +67,9 @@ func packPdf(s3json *kv.S3JSON) { Tag: "body", Content: s3json.GetString("content"), }) - + if err != nil { + zap.L().Error("could not insert raw content", + zap.Int64("domain64", d64), + zap.String("path", s3json.GetString("path"))) + } } diff --git a/cmd/serve/main.go b/cmd/serve/main.go index 4a27089..23ccdd6 100644 --- a/cmd/serve/main.go +++ b/cmd/serve/main.go @@ -157,6 +157,8 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. - http.ListenAndServe(":"+env.Env.Port, engine) - + err := http.ListenAndServe(":"+env.Env.Port, engine) + if err != nil { + zap.Error(err) + } } diff --git a/cmd/validate/main.go b/cmd/validate/main.go index 1e29d9c..70b9e2c 100644 --- a/cmd/validate/main.go +++ b/cmd/validate/main.go @@ -28,6 +28,8 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. - http.ListenAndServe(":"+env.Env.Port, engine) - + err := http.ListenAndServe(":"+env.Env.Port, engine) + if err != nil { + zap.Error(err) + } } diff --git a/cmd/validate/queues.go b/cmd/validate/queues.go index 58df10a..22decea 100644 --- a/cmd/validate/queues.go +++ b/cmd/validate/queues.go @@ -15,9 +15,6 @@ import ( "go.uber.org/zap" ) -// GLOBAL TO THE APP -var insertClient *river.Client[pgx.Tx] - type FetchWorker struct { river.WorkerDefaults[common.FetchArgs] } @@ -77,13 +74,4 @@ func (w ValidateFetchWorker) Work(ctx context.Context, job *river.Job[common.Val func InitializeQueues() { queueing.InitializeRiverQueues() initX(ThisServiceName, common.ValidateFetchQueue, ValidateFetchWorker{}) - - // Insert-only client - _, pool, _ := common.CommonQueueInit() - ic, err := river.NewClient(riverpgxv5.New(pool), &river.Config{}) - if err != nil { - zap.L().Error("could not establish insert-only client") - os.Exit(1) - } - insertClient = ic } From 5c4b36f4f6b3f15aa841d379b28a76d59523f5ac Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 14:22:55 -0500 Subject: [PATCH 20/39] Still trying to lint This moves linting into the container in the makefile. Also, more code with linting issues that I forgot to commit. Want to see this work in Circle/CI. --- .circleci/config.yml | 3 +-- Dockerfile.build | 3 ++- Dockerfile.dev | 2 +- Makefile | 15 ++++++++------- internal/common/domain64_test.go | 3 +++ internal/env/env.go | 14 ++++++++++---- internal/env/zap.go | 2 +- internal/kv/s3json_test.go | 1 + internal/queueing/generic_insert.go | 2 +- internal/queueing/periodic_clear.go | 5 ++++- 10 files changed, 32 insertions(+), 18 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index dbe4a48..0a44035 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -26,8 +26,7 @@ commands: - run: name: "install golangci-lint" command: | - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh - sh -s -- -b /usr/local/go/bin v<< parameters.version >> + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b /usr/local/go/bin v<< parameters.version >> - run: name: "install jsonnet" command: | diff --git a/Dockerfile.build b/Dockerfile.build index f5db6a2..c92b457 100644 --- a/Dockerfile.build +++ b/Dockerfile.build @@ -2,4 +2,5 @@ FROM jemison/dev WORKDIR /app -ENTRYPOINT ["make", "build"] +ENTRYPOINT ["make"] +CMD ["build"] diff --git a/Dockerfile.dev b/Dockerfile.dev index 44e9590..5042568 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -64,7 +64,7 @@ RUN cp /root/go/bin/stringer /usr/local/go/bin/stringer WORKDIR /golanglint # binary will be $(go env GOPATH)/bin/golangci-lint -RUN curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b /usr/local/go/bin v1.62.0 +RUN curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b /usr/local/go/bin v1.63.4 ENTRYPOINT ["tree /"] \ No newline at end of file diff --git a/Makefile b/Makefile index f3f82e5..4316337 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,6 @@ docker: docker build -t jemison/build -f Dockerfile.build . .PHONY: build -# lint build: generate echo "build migrate" cd cmd/migrate ; make build @@ -41,8 +40,14 @@ build: generate # cd cmd/validate ; make build echo "build walk" cd cmd/walk ; make build - echo "copy assets" - cd assets ; rm -rf static/assets ; unzip -qq static.zip + +.PHONY: lint +lint: generate + golangci-lint run -v + +.PHONY: containerlint +containerlint: + docker run -v ${PWD}:/app -t jemison/build lint .PHONY: up up: build @@ -110,7 +115,3 @@ terraform: delete_all docker_full_clean: -docker stop $(docker ps -a -q) -docker rm $(docker ps -a -q) - -.PHONY: lint -lint: - -golangci-lint run -v \ No newline at end of file diff --git a/internal/common/domain64_test.go b/internal/common/domain64_test.go index bbf3dc0..c2d92bc 100644 --- a/internal/common/domain64_test.go +++ b/internal/common/domain64_test.go @@ -97,11 +97,13 @@ var d64 string = ` func TestUnmarshal(t *testing.T) { b := []byte(d64) + //nolint:all NewTLD64s(b) } func TestCheckEdu(t *testing.T) { b := []byte(d64) + //nolint:all d, err := NewTLD64s(b) if err != nil { t.Error(err) @@ -114,6 +116,7 @@ func TestCheckEdu(t *testing.T) { func TestCountEdu(t *testing.T) { b := []byte(d64) + //nolint:all d, err := NewTLD64s(b) if err != nil { t.Error(err) diff --git a/internal/env/env.go b/internal/env/env.go index 1c99a58..6bd65d9 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -129,11 +129,13 @@ func InitGlobalEnv(this_service string) { } // Grab the PORT in the cloud and locally from os.Getenv() - viper.BindEnv("PORT") + err := viper.BindEnv("PORT") + if err != nil { + zap.L().Fatal("ENV could not bind env", zap.String("err", err.Error())) + } //err := viper.ReadInConfig() - err := viper.ReadConfig(config.GetYamlFileReader(configName + ".yaml")) - + err = viper.ReadConfig(config.GetYamlFileReader(configName + ".yaml")) if err != nil { log.Fatal("ENV cannot load in the config file ", viper.ConfigFileUsed()) } @@ -154,7 +156,11 @@ func InitGlobalEnv(this_service string) { // with everything in the rgiht places. if IsContainerEnv() || IsLocalTestEnv() { ContainerEnv := container_env{} - viper.Unmarshal(&ContainerEnv) + err := viper.Unmarshal(&ContainerEnv) + if err != nil { + log.Println("ENV could not unmarshal VCAP_SERVICES to new") + log.Fatal(err) + } Env.VcapServices = ContainerEnv.VcapServices } diff --git a/internal/env/zap.go b/internal/env/zap.go index 1896827..64ebd88 100644 --- a/internal/env/zap.go +++ b/internal/env/zap.go @@ -38,7 +38,7 @@ func createLogger(this_service string) *zap.Logger { s, _ := Env.GetUserService(this_service) level := s.GetParamString("debug_level") - zap_level := zap.InfoLevel + var zap_level zapcore.Level switch level { case "debug": zap_level = zap.DebugLevel diff --git a/internal/kv/s3json_test.go b/internal/kv/s3json_test.go index 9b4d705..65c24fb 100644 --- a/internal/kv/s3json_test.go +++ b/internal/kv/s3json_test.go @@ -52,6 +52,7 @@ func TestGetFromBytes(t *testing.T) { func TestSave(t *testing.T) { setup() s3json := NewFromBytes("fetch", util.HTTPS, "search.gov", "/", []byte(`{"a": 3, "b": 5}`)) + //nolint:all s3json.Save() assert.Equal(t, int64(3), s3json.GetInt64("a")) } diff --git a/internal/queueing/generic_insert.go b/internal/queueing/generic_insert.go index e4c6f3f..aa2830a 100644 --- a/internal/queueing/generic_insert.go +++ b/internal/queueing/generic_insert.go @@ -50,7 +50,7 @@ func Enqueue(ch_qshp <-chan QSHP) { qshp := <-ch_qshp ctx, tx := common.CtxTx(pool) - queue_to_match := "NONE" + var queue_to_match string if strings.HasPrefix(qshp.Queue, "fetch") { queue_to_match = "fetch" } else { diff --git a/internal/queueing/periodic_clear.go b/internal/queueing/periodic_clear.go index cfea5db..987e431 100644 --- a/internal/queueing/periodic_clear.go +++ b/internal/queueing/periodic_clear.go @@ -17,6 +17,9 @@ func ClearCompletedPeriodically() { <-ticker.C zap.L().Warn("clearing completed queue") ctx := context.Background() - pool.Exec(ctx, "DELETE FROM river_job WHERE state='completed'") + _, err := pool.Exec(ctx, "DELETE FROM river_job WHERE state='completed'") + if err != nil { + zap.L().Error("failed to periodically delete jobs") + } } } From 6d9c514b0a70236293f4f181e1ea591531f7bdcb Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 14:27:20 -0500 Subject: [PATCH 21/39] Path/sudo fix --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0a44035..b2cd413 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -26,7 +26,7 @@ commands: - run: name: "install golangci-lint" command: | - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b /usr/local/go/bin v<< parameters.version >> + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v<< parameters.version >> - run: name: "install jsonnet" command: | From 957df695c4b9f2ff9b4e1e14df41e2f806be145b Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Thu, 9 Jan 2025 14:31:28 -0500 Subject: [PATCH 22/39] Timeout/verbose --- .circleci/config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b2cd413..50ba021 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -56,7 +56,8 @@ jobs: make generate - run: name: "find the lint" - command: golangci-lint run + command: golangci-lint run -v + no_output_timeout: 5m setup-and-build: docker: From c5882d0e9220df7979f929783995fb1a9e2cbd1f Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Fri, 10 Jan 2025 06:50:05 -0500 Subject: [PATCH 23/39] More linting Should we use *all* of the lints? Either way, lots of formatting. --- cmd/admin/main.go | 16 ++++-- cmd/entree/accept_logic.go | 29 +++++++--- cmd/entree/entree_test.go | 1 + cmd/entree/ledger.go | 1 + cmd/entree/main.go | 9 ++-- cmd/entree/queues.go | 6 +-- cmd/entree/work.go | 27 +++++----- cmd/extract/extract_test.go | 1 + cmd/extract/html.go | 21 ++++++-- cmd/extract/main.go | 2 + cmd/extract/pdf.go | 16 +++--- cmd/extract/queues.go | 5 +- cmd/extract/work.go | 6 ++- cmd/fetch/api.go | 3 +- cmd/fetch/host_gateway.go | 3 ++ cmd/fetch/main.go | 8 ++- cmd/fetch/queues.go | 89 +++++++++++++++++++------------ cmd/fetch/work.go | 43 ++++++++++----- cmd/fetch/work_support.go | 51 ++++++++++++++---- cmd/pack/queues.go | 2 +- cmd/walk/work.go | 1 + config/domain64.go | 2 + config/services/fetch.libsonnet | 8 +++ go.mod | 1 - internal/env/env.go | 9 ++++ internal/env/gin.go | 1 + internal/postgres/postgres.go | 62 ++++++++++++++++----- internal/util/key_utilities.go | 7 +++ internal/util/string_utilities.go | 1 + 29 files changed, 310 insertions(+), 121 deletions(-) diff --git a/cmd/admin/main.go b/cmd/admin/main.go index 666e01c..452286d 100644 --- a/cmd/admin/main.go +++ b/cmd/admin/main.go @@ -9,11 +9,11 @@ import ( "github.com/GSA-TTS/jemison/internal/env" "github.com/GSA-TTS/jemison/internal/queueing" "github.com/gin-gonic/gin" - "go.uber.org/zap" ) var ThisServiceName = "admin" + var ChQSHP = make(chan queueing.QSHP) type FetchRequestInput struct { @@ -29,9 +29,12 @@ func FetchRequestHandler(c *gin.Context) { if err := c.BindJSON(&fri); err != nil { return } + if fri.ApiKey == os.Getenv("API_KEY") || true { - zap.L().Debug("fetch enqueue", zap.String("host", fri.Host), zap.String("path", fri.Path)) - //queueing.InsertFetch(fri.Scheme, fri.Host, fri.Path) + zap.L().Debug("fetch enqueue", + zap.String("host", fri.Host), + zap.String("path", fri.Path)) + ChQSHP <- queueing.QSHP{ Queue: "fetch", Scheme: fri.Scheme, @@ -46,12 +49,14 @@ func FetchRequestHandler(c *gin.Context) { func EntreeRequestHandler(c *gin.Context) { var fri FetchRequestInput + full := c.Param("fullorone") hallPass := c.Param("hallpass") if err := c.BindJSON(&fri); err != nil { return } + if fri.ApiKey == os.Getenv("API_KEY") || true { hallPassB := false fullB := false @@ -59,6 +64,7 @@ func EntreeRequestHandler(c *gin.Context) { if hallPass == "pass" { hallPassB = true } + if full == "full" { fullB = true } @@ -89,8 +95,8 @@ func PackRequestHandler(c *gin.Context) { if err := c.BindJSON(&fri); err != nil { return } - if fri.ApiKey == os.Getenv("API_KEY") || true { + if fri.ApiKey == os.Getenv("API_KEY") || true { zap.L().Debug("pack enqueue", zap.String("host", fri.Host)) @@ -118,10 +124,10 @@ func main() { v1.PUT("/fetch", FetchRequestHandler) v1.PUT("/entree/:fullorone/:hallpass", EntreeRequestHandler) v1.PUT("/pack", PackRequestHandler) - // v1.GET("/jobs", JobCountHandler) } log.Println("environment initialized") + go queueing.Enqueue(ChQSHP) // // Init a cache for the workers diff --git a/cmd/entree/accept_logic.go b/cmd/entree/accept_logic.go index 62ea656..83c49be 100644 --- a/cmd/entree/accept_logic.go +++ b/cmd/entree/accept_logic.go @@ -1,3 +1,4 @@ +//nolint:godox package main import ( @@ -11,10 +12,14 @@ import ( "go.uber.org/zap" ) +const SINGLE_PASS = "single" + +const FULL_PASS = "full" + // The front line of questions involve whether or not // it is a single URL and if there is a hall pass. -// FIXME: add the host_id here. Pass it through +// FIXME: add the host_id here. Pass it through. type EntreeCheck struct { // "full" or "single" Kind string @@ -32,6 +37,7 @@ func NewEntreeCheck(kind, scheme, host, path string, hallPass bool) (*EntreeChec // host_id, err := JDB.WorkDBQueries.GetHostId(ctx, host) d64, err := config.FQDNToDomain64(host) if err != nil { + //nolint:wrapcheck return nil, err } @@ -51,24 +57,29 @@ func EvaluateEntree(ec *EntreeCheck) { if IsSingleWithPass(ec) { zap.L().Debug("is-single-with-pass", zap.String("host", ec.Host), zap.String("path", ec.Path)) + it_shall_pass = true } else if IsSingleNoPass(ec) { zap.L().Debug("is-single-no-pass", zap.String("host", ec.Host), zap.String("path", ec.Path)) + it_shall_pass = true } else if IsFullWithPass(ec) { zap.L().Debug("is-full-with-pass", zap.String("host", ec.Host), zap.String("path", ec.Path)) SetHostNextFetchToYesterday(ec) SetGuestbookFetchToYesterdayForHost(ec) + it_shall_pass = true } else if IsFullNoPass(ec) { zap.L().Debug("is-full-no-pass", zap.String("host", ec.Host), zap.String("path", ec.Path)) + it_shall_pass = true } else { zap.L().Debug("no entree evaluation criteria met", zap.String("host", ec.Host), zap.String("path", ec.Path)) + it_shall_pass = false } @@ -82,6 +93,7 @@ func EvaluateEntree(ec *EntreeCheck) { // Fetch will update a second time. scheme := JDB.GetScheme(ec.Scheme) next_fetch := JDB.GetNextFetch(ec.Host) + _, err := JDB.WorkDBQueries.UpdateGuestbookNextFetch(context.Background(), work_db.UpdateGuestbookNextFetchParams{ Scheme: scheme, @@ -94,7 +106,6 @@ func EvaluateEntree(ec *EntreeCheck) { }, }, ) - if err != nil { zap.L().Error("failed to update guestbook next fetch", zap.Int64("domain64", ec.Domain64), zap.String("path", ec.Path)) @@ -119,7 +130,7 @@ func EvaluateEntree(ec *EntreeCheck) { func IsSingleWithPass(ec *EntreeCheck) bool { // This just allows us to queue this onward to `fetch`. // Fetch will handle guestbook updates. - return ec.Kind == "single" && ec.HallPass + return ec.Kind == SINGLE_PASS && ec.HallPass } // A single URL with no pass is most likely a URL @@ -129,7 +140,7 @@ func IsSingleWithPass(ec *EntreeCheck) bool { // - Fetch the page // - Update last_fetch in guestbook func IsSingleNoPass(ec *EntreeCheck) bool { - return ec.Kind == "single" && !ec.HallPass && CheckIfIsInGuestbook(ec) + return ec.Kind == SINGLE_PASS && !ec.HallPass && CheckIfIsInGuestbook(ec) } func CheckIfIsInGuestbook(ec *EntreeCheck) bool { @@ -152,7 +163,7 @@ func CheckIfIsInGuestbook(ec *EntreeCheck) bool { // - Set last_fetch in guestbook // - Reset next_fetch in hosts table after completion func IsFullWithPass(ec *EntreeCheck) bool { - return ec.Kind == "full" && ec.HallPass + return ec.Kind == FULL_PASS && ec.HallPass } // This is probably a nightly enqueue. @@ -160,24 +171,27 @@ func IsFullWithPass(ec *EntreeCheck) bool { // Possible side-effects: // - None. It runs on what is in the DBs. func IsFullNoPass(ec *EntreeCheck) bool { - return ec.Kind == "full" && !ec.HallPass && CheckIfAfterHostNextFetch(ec) + return ec.Kind == FULL_PASS && !ec.HallPass && CheckIfAfterHostNextFetch(ec) } // Support functions func isInGuestbook(ec *EntreeCheck) bool { ctx := context.Background() + b, err := JDB.WorkDBQueries.CheckEntryExistsInGuestbook(ctx, ec.Domain64) if err != nil { zap.L().Fatal("could not check if in guestbook", zap.Int64("domain64", ec.Domain64), zap.String("domain64_hex", config.Dec64ToHex(ec.Domain64))) } + return b } func CheckIfAfterGuestbookNextFetch(ec *EntreeCheck) bool { ctx := context.Background() + entry, err := JDB.WorkDBQueries.GetGuestbookEntry(ctx, work_db.GetGuestbookEntryParams{ Domain64: ec.Domain64, Path: ec.Path, @@ -193,6 +207,7 @@ func CheckIfAfterGuestbookNextFetch(ec *EntreeCheck) bool { func CheckIfAfterHostNextFetch(ec *EntreeCheck) bool { ctx := context.Background() + ts, err := JDB.WorkDBQueries.GetHostNextFetch(ctx, ec.Domain64) if err != nil { // If it isn't in the host table, then return false @@ -204,6 +219,7 @@ func CheckIfAfterHostNextFetch(ec *EntreeCheck) bool { func SetHostNextFetchToYesterday(ec *EntreeCheck) { ctx := context.Background() + err := JDB.WorkDBQueries.SetHostNextFetchToYesterday(ctx, ec.Domain64) if err != nil { zap.L().Error("could not set host fetch to yesterday", @@ -213,6 +229,7 @@ func SetHostNextFetchToYesterday(ec *EntreeCheck) { func SetGuestbookFetchToYesterdayForHost(ec *EntreeCheck) { ctx := context.Background() + err := JDB.WorkDBQueries.SetGuestbookFetchToYesterdayForHost(ctx, ec.Domain64) if err != nil { zap.L().Fatal("could not set guestbook to yesterday for host", diff --git a/cmd/entree/entree_test.go b/cmd/entree/entree_test.go index 868d1de..c2b3d8a 100644 --- a/cmd/entree/entree_test.go +++ b/cmd/entree/entree_test.go @@ -1,3 +1,4 @@ +//nolint:all package main import ( diff --git a/cmd/entree/ledger.go b/cmd/entree/ledger.go index e7007e2..29214d1 100644 --- a/cmd/entree/ledger.go +++ b/cmd/entree/ledger.go @@ -11,6 +11,7 @@ func NewSafeLedger() *SafeLedger { l := &SafeLedger{ Ledger: make(map[string]bool), } + return l } diff --git a/cmd/entree/main.go b/cmd/entree/main.go index 9c16eda..410c924 100644 --- a/cmd/entree/main.go +++ b/cmd/entree/main.go @@ -17,6 +17,7 @@ import ( ) var ThisServiceName = "entree" + var ChQSHP = make(chan queueing.QSHP) var JDB *postgres.JemisonDB @@ -58,9 +59,11 @@ func main() { } } - // FIXME: This would pre-load the crontab with - // values based on the Domain64 JSON. - //crontab(env.Env.AllowedHosts) + /* + // FIXME: This would pre-load the crontab with + // values based on the Domain64 JSON. + //crontab(env.Env.AllowedHosts) + */ go queueing.Enqueue(ChQSHP) diff --git a/cmd/entree/queues.go b/cmd/entree/queues.go index e80eee9..a9c0e34 100644 --- a/cmd/entree/queues.go +++ b/cmd/entree/queues.go @@ -17,8 +17,9 @@ import ( // GLOBAL TO THE APP // One pool of connections for River. -// The work client, doing the work of `Entree` +// The work client, doing the work of `Entree`. var EntreePool *pgxpool.Pool + var EntreeClient *river.Client[pgx.Tx] type EntreeWorker struct { @@ -49,7 +50,6 @@ func InitializeQueues() { }, Workers: workers, }) - if err != nil { zap.L().Error("could not establish worker pool") log.Println(err) @@ -59,6 +59,6 @@ func InitializeQueues() { // Start the work clients if err := EntreeClient.Start(ctx); err != nil { zap.L().Error("workers are not the means of production. exiting.") - os.Exit(42) + os.Exit(1) } } diff --git a/cmd/entree/work.go b/cmd/entree/work.go index 8bec3fe..12ff469 100644 --- a/cmd/entree/work.go +++ b/cmd/entree/work.go @@ -9,19 +9,18 @@ import ( "go.uber.org/zap" ) -// entree-1 | {"level":"fatal","timestamp":"2024-11-27T13:42:14.431Z","caller":"work_db/custom.go:29","msg":"could not connect to DB1","pid":13,"stacktrace":"github.com/GSA-TTS/jemison/internal/work_db/work_db.GetWorkDbQueryContext\n\t/home/jadudm/git/search/jemison/app/internal/work_db/work_db/custom.go:29\ngithub.com/GSA-TTS/jemison/internal/work_db/work_db.UpdateNextFetch\n\t/home/jadudm/git/search/jemison/app/internal/work_db/work_db/custom.go:36\nmain.EvaluateEntree\n\t/home/jadudm/git/search/jemison/app/cmd/entree/accept_logic.go:81\nmain.(*EntreeWorker).Work\n\t/home/jadudm/git/search/jemison/app/cmd/entree/work.go:45\ngithub.com/riverqueue/river.(*wrapperWorkUnit[...]).Work\n\t/home/jadudm/go/pkg/mod/github.com/riverqueue/river@v0.13.0/work_unit_wrapper.go:30\ngithub.com/riverqueue/river.(*jobExecutor).execute.func2\n\t/home/jadudm/go/pkg/mod/github.com/riverqueue/river@v0.13.0/job_executor.go:216\ngithub.com/riverqueue/river.(*jobExecutor).execute\n\t/home/jadudm/go/pkg/mod/github.com/riverqueue/river@v0.13.0/job_executor.go:239\ngithub.com/riverqueue/river.(*jobExecutor).Execute\n\t/home/jadudm/go/pkg/mod/github.com/riverqueue/river@v0.13.0/job_executor.go:157"} - +// We're the new front door. +// When a request comes in, we will run the algorithm described in +// docs/design_entree.md. + +/* +// Matrix +// fullCrawl & !pass: check every timeout in the domain. +// fullCrawl & pass: re-crawl the whole domain now. +// !fullCrawl & !pass: check +// !fullCrawl & pass: fetch the page now +*/ func (w *EntreeWorker) Work(ctx context.Context, job *river.Job[common.EntreeArgs]) error { - // We're the new front door. - // When a request comes in, we will run the algorithm described in - // docs/design_entree.md. - - // Matrix - // fullCrawl & !pass: check every timeout in the domain. - // fullCrawl & pass: re-crawl the whole domain now. - // !fullCrawl & !pass: check - // !fullCrawl & pass: fetch the page now - var kind string if job.Args.FullCrawl { kind = "full" @@ -32,10 +31,11 @@ func (w *EntreeWorker) Work(ctx context.Context, job *river.Job[common.EntreeArg // In case we don't have clean URLs... if len(job.Args.Path) > 0 { path := strings.TrimSpace(job.Args.Path) - //path = util.TrimSuffix(path, "/") + if path == "" { path = "/" } + ec, err := NewEntreeCheck(kind, job.Args.Scheme, job.Args.Host, path, job.Args.HallPass) if err != nil { // If we cannot create a new EC object, we probably couldn't find the host. @@ -43,6 +43,7 @@ func (w *EntreeWorker) Work(ctx context.Context, job *river.Job[common.EntreeArg // requeue the job, and we don't want to proceed. return nil } + EvaluateEntree(ec) } else { zap.L().Debug("skipping zero-length path", zap.String("host", job.Args.Host)) diff --git a/cmd/extract/extract_test.go b/cmd/extract/extract_test.go index 63b4810..a3483a1 100644 --- a/cmd/extract/extract_test.go +++ b/cmd/extract/extract_test.go @@ -1,3 +1,4 @@ +//nolint:all package main import ( diff --git a/cmd/extract/html.go b/cmd/extract/html.go index 2805328..5493532 100644 --- a/cmd/extract/html.go +++ b/cmd/extract/html.go @@ -24,6 +24,7 @@ import ( func scrape_sel(sel *goquery.Selection) string { txt := sel.Text() repl := strings.ToLower(txt) + return util.CollapseWhitespace(repl) } @@ -31,11 +32,13 @@ func _getTitle(doc *goquery.Document) string { // Some pages are just really malformed. // It turns out there are title tags elsewhere in the doc. title := "" + doc.Find("title").Each(func(ndx int, sel *goquery.Selection) { if title == "" { title = scrape_sel(sel) } }) + return util.CollapseWhitespace(title) } @@ -54,11 +57,14 @@ func _getHeaders(doc *goquery.Document) map[string][]string { "h8", } { accum := make([]string, 0) + doc.Find(tag).Each(func(ndx int, sel *goquery.Selection) { accum = append(accum, util.CollapseWhitespace(scrape_sel(sel))) }) + headers[tag] = accum } + return headers } @@ -71,6 +77,7 @@ func _getBodyContent(doc *goquery.Document) string { doc.Find(".usa-footer").Remove() content := "" + for _, elem := range []string{ "p", "li", @@ -101,9 +108,9 @@ func _getBodyContent(doc *goquery.Document) string { // * title: string // * headers: []string (as JSON) // * body : string - +// +//nolint:funlen func extractHtml(obj *kv.S3JSON) { - // rawFilename := obj.GetString("raw") rawFilename := uuid.NewString() // The file is not in this service... it's in the `fetch` bucket.` s3 := kv.NewS3("fetch") @@ -111,17 +118,21 @@ func extractHtml(obj *kv.S3JSON) { raw_key := obj.Key.Copy() raw_key.Extension = util.Raw zap.L().Debug("looking up raw key", zap.String("raw_key", raw_key.Render())) + err := s3.S3ToFile(raw_key, rawFilename) if err != nil { zap.L().Error("could not create tempfile from s3", zap.String("raw_key", raw_key.Render()), zap.String("rawfile", rawFilename)) } + rawFile, err := os.Open(rawFilename) if err != nil { zap.L().Error("cannot open tempfile", zap.String("filename", rawFilename)) + return } + defer func() { rawFile.Close() os.Remove(rawFilename) @@ -132,6 +143,7 @@ func extractHtml(obj *kv.S3JSON) { zap.L().Error("cannot create new doc from raw file", zap.String("rawFilename", rawFilename), zap.String("rawKey", raw_key.Render())) + return } @@ -160,12 +172,14 @@ func extractHtml(obj *kv.S3JSON) { jsonString, err := json.Marshal(headers) if err != nil { zap.L().Error("could not marshal headers to JSON", zap.String("title", title)) + return } + new_obj.Set("headers", string(jsonString)) new_obj.Set("body", content) - err = new_obj.Save() + err = new_obj.Save() if err != nil { zap.L().Error("could not save object", zap.String("key", new_obj.Key.Render())) } @@ -177,5 +191,4 @@ func extractHtml(obj *kv.S3JSON) { Host: obj.Key.Host, Path: obj.Key.Path, } - } diff --git a/cmd/extract/main.go b/cmd/extract/main.go index c0ed23b..34df22c 100644 --- a/cmd/extract/main.go +++ b/cmd/extract/main.go @@ -11,6 +11,7 @@ import ( ) var ThisServiceName = "extract" + var ChQSHP = make(chan queueing.QSHP) func main() { @@ -19,6 +20,7 @@ func main() { log.Println("environment initialized") routers := common.InitializeAPI() + go queueing.Enqueue(ChQSHP) zap.L().Info("listening to the music of the spheres", diff --git a/cmd/extract/pdf.go b/cmd/extract/pdf.go index c037a4c..ac6f126 100644 --- a/cmd/extract/pdf.go +++ b/cmd/extract/pdf.go @@ -1,3 +1,4 @@ +//nolint:godox package main import ( @@ -14,18 +15,19 @@ import ( "go.uber.org/zap" ) +//nolint:funlen func extractPdf(obj *kv.S3JSON) { - //rawFilename := obj.GetString("raw") tempFilename := uuid.NewString() - // s3 := kv.NewS3(ThisServiceName) raw_copy := obj.Key.Copy() raw_copy.Extension = util.Raw + err := obj.S3.S3ToFile(raw_copy, tempFilename) if err != nil { zap.L().Error("could not copy s3 object to file", zap.String("raw_copy", raw_copy.Render()), zap.String("tempFilename", tempFilename)) } + defer func() { err := os.Remove(tempFilename) if err != nil { @@ -46,6 +48,7 @@ func extractPdf(obj *kv.S3JSON) { // Give up on big files. // FIXME: we need to clean up the bucket, too, and delete PDFs there zap.L().Debug("file too large, not processing") + return } @@ -55,12 +58,13 @@ func extractPdf(obj *kv.S3JSON) { zap.L().Warn("poppler failed to open pdf", zap.String("raw_filename", tempFilename), zap.String("key", obj.Key.Render())) + return } else { // Pull the metadata out, and include in every object. info := doc.Info() - for page_no := 0; page_no < doc.GetNPages(); page_no++ { + for page_no := 0; page_no < doc.GetNPages(); page_no++ { page_number_anchor := fmt.Sprintf("#page=%d", page_no+1) copied_key := obj.Key.Copy() copied_key.Path = copied_key.Path + page_number_anchor @@ -84,13 +88,14 @@ func extractPdf(obj *kv.S3JSON) { obj.Key.Path, obj.GetJSON(), ) + err = new_obj.Save() if err != nil { zap.L().Error("could not save object to s3", zap.String("key", new_obj.Key.Render())) } + page.Close() - // e.Stats.Increment("page_count") // Enqueue next steps ChQSHP <- queueing.QSHP{ @@ -106,7 +111,4 @@ func extractPdf(obj *kv.S3JSON) { } doc.Close() - - //e.Stats.Increment("document_count") - } diff --git a/cmd/extract/queues.go b/cmd/extract/queues.go index 2def7af..0490876 100644 --- a/cmd/extract/queues.go +++ b/cmd/extract/queues.go @@ -24,11 +24,13 @@ type ExtractWorker struct { func InitializeQueues() { var extractClient *river.Client[pgx.Tx] + var extractPool *pgxpool.Pool queueing.InitializeRiverQueues() var err error + ctx, extractPool, workers := common.CommonQueueInit() zap.L().Debug("initialized common queues") @@ -50,7 +52,6 @@ func InitializeQueues() { }, Workers: workers, }) - if err != nil { zap.L().Error("could not establish worker pool") log.Println(err) @@ -60,6 +61,6 @@ func InitializeQueues() { // Start the work clients if err := extractClient.Start(ctx); err != nil { zap.L().Error("workers are not the means of production. exiting.") - os.Exit(42) + os.Exit(1) } } diff --git a/cmd/extract/work.go b/cmd/extract/work.go index f6e39e4..558a3e6 100644 --- a/cmd/extract/work.go +++ b/cmd/extract/work.go @@ -1,3 +1,4 @@ +//nolint:godox package main import ( @@ -33,7 +34,7 @@ func extract(obj *kv.S3JSON) { if !isTooLarge(obj) { extractPdf(obj) } else { - //FIXME DELETE THIS THING + // FIXME: This should be deleted at this point, if we get here. zap.L().Error("s3json object too large", zap.String("host", obj.Key.Host), zap.String("path", obj.Key.Path)) } @@ -42,7 +43,6 @@ func extract(obj *kv.S3JSON) { } func (w *ExtractWorker) Work(ctx context.Context, job *river.Job[common.ExtractArgs]) error { - zap.L().Info("extracting", zap.String("host", job.Args.Host), zap.String("path", job.Args.Path)) @@ -51,12 +51,14 @@ func (w *ExtractWorker) Work(ctx context.Context, job *river.Job[common.ExtractA util.ToScheme(job.Args.Scheme), job.Args.Host, job.Args.Path) + err := s3json.Load() if err != nil { zap.L().Error("could not load s3 JSON", zap.String("host", job.Args.Host), zap.String("path", job.Args.Path)) } + extract(s3json) zap.L().Debug("extraction finished") diff --git a/cmd/fetch/api.go b/cmd/fetch/api.go index acd5c7b..7f76dd2 100644 --- a/cmd/fetch/api.go +++ b/cmd/fetch/api.go @@ -24,7 +24,6 @@ func FetchRequestHandler(c *gin.Context) { if err := c.BindJSON(&fri); err != nil { return } - //zap.L().Debug("api checking key", zap.String("api-key", fri.ApiKey)) if fri.ApiKey == os.Getenv("API_KEY") { zap.L().Debug("api enqueue", zap.String("host", fri.Host), zap.String("path", fri.Path)) @@ -54,7 +53,7 @@ func FetchRequestHandler(c *gin.Context) { } func SitemapRequestHandler(c *gin.Context) { - + // pass } func ExtendApi(r *gin.Engine) { diff --git a/cmd/fetch/host_gateway.go b/cmd/fetch/host_gateway.go index 3a3492a..376d66f 100644 --- a/cmd/fetch/host_gateway.go +++ b/cmd/fetch/host_gateway.go @@ -52,7 +52,9 @@ func (hsm *HostGateway) GoodToGo(host string) bool { // We have not seen this host before // Therefore, add them to the map, and they're good to go. zap.L().Debug("gateway: host never seen before") + hsm.last[host] = time.Now() + return true } } @@ -61,6 +63,7 @@ func (hsm *HostGateway) HostExists(host string) bool { hsm.m.RLock() defer hsm.m.RUnlock() _, ok := hsm.last[host] + return ok } diff --git a/cmd/fetch/main.go b/cmd/fetch/main.go index 03daf8f..b5b2d28 100644 --- a/cmd/fetch/main.go +++ b/cmd/fetch/main.go @@ -15,9 +15,11 @@ import ( ) var PoliteSleep int64 + var ThisServiceName = "fetch" var RetryClient *http.Client + var Gateway *HostGateway var JDB *postgres.JemisonDB @@ -28,6 +30,10 @@ var Workers *river.Workers var MaxFilesize int64 +const BYTES_PER_KB = 1024 + +const KB_PER_MB = 1024 + func main() { env.InitGlobalEnv(ThisServiceName) InitializeQueues() @@ -49,7 +55,7 @@ func main() { // Pre-compute/lookup the sleep duration for backoff PoliteSleep = service.GetParamInt64("polite_sleep") // 1024KB * 1024B => MB - MaxFilesize = service.GetParamInt64("max_filesize_mb") * 1024 * 1024 + MaxFilesize = service.GetParamInt64("max_filesize_mb") * BYTES_PER_KB * KB_PER_MB logger_level := service.GetParamString("debug_level") if logger_level != "debug" { diff --git a/cmd/fetch/queues.go b/cmd/fetch/queues.go index 4349640..99e130d 100644 --- a/cmd/fetch/queues.go +++ b/cmd/fetch/queues.go @@ -22,12 +22,21 @@ import ( // GLOBAL TO THE APP // One pool of connections for River. -// The work client, doing the work of `fetch` +const ROUNDROBIN = "round_robin" + +const OPD = "one_per_domain" + +const SIMPLE = "simple" + +// The work client, doing the work of `fetch`. var FetchPool *pgxpool.Pool + var FetchClient *river.Client[pgx.Tx] + var FetchQueues map[string]river.QueueConfig var RoundRobinWorkerPool atomic.Int64 + var RoundRobinSize int64 var QueueingModel string @@ -37,17 +46,25 @@ type FetchWorker struct { } func oneQueuePerHost(workers *river.Workers, workerCount int64) { + fetchService, err := env.Env.GetUserService(ThisServiceName) + if err != nil { + zap.L().Error("could not fetch service config") + log.Println(err) + os.Exit(1) + } MainQueue := make(map[string]river.QueueConfig) MainQueue[ThisServiceName] = river.QueueConfig{MaxWorkers: int(workerCount)} FetchQueues = make(map[string]river.QueueConfig) + for _, host := range config.GetListOfHosts(env.Env.AllowedHosts) { asciiHost := stripHostToAscii(host) asciiQueueName := fmt.Sprintf("fetch-%s", asciiHost) zap.L().Info("setting up queue", zap.String("queue_name", asciiQueueName)) + FetchQueues[asciiQueueName] = river.QueueConfig{ - MaxWorkers: 30, + MaxWorkers: int(workerCount), } } @@ -65,25 +82,25 @@ func oneQueuePerHost(workers *river.Workers, workerCount int64) { hostsFetchClient, err := river.NewClient(riverpgxv5.New(FetchPool), &river.Config{ Queues: FetchQueues, Workers: workers, - FetchCooldown: 500 * time.Millisecond, - FetchPollInterval: 1000 * time.Millisecond, + FetchCooldown: time.Duration(fetchService.GetParamInt64("fetch_cooldown_ms")) * time.Millisecond, + FetchPollInterval: time.Duration(fetchService.GetParamInt64("fetch_poll_interval_ms")) * time.Millisecond, }) if err != nil { - zap.L().Error("could not establish hosts fetch client") - log.Println(err) - os.Exit(1) + zap.Error(err) + zap.L().Fatal("could not establish hosts fetch client") } // Start the work clients ctx := context.Background() if err := mainFetchClient.Start(ctx); err != nil { - zap.L().Error("could not launch main fetch client.", zap.String("err", err.Error())) - os.Exit(42) + zap.Error(err) + zap.L().Fatal("could not launch main fetch client.") } + ctx = context.Background() if err := hostsFetchClient.Start(ctx); err != nil { - zap.L().Error("could not launch hosts client", zap.String("err", err.Error())) - os.Exit(42) + zap.Error(err) + zap.L().Fatal("could not launch hosts") } } @@ -102,23 +119,23 @@ func roundRobinQueues(workers *river.Workers, workerCount int64) { }, Workers: workers, }) - if err != nil { - zap.L().Error("could not establish main worker pool") - log.Println(err) - os.Exit(1) + zap.Error(err) + zap.L().Fatal("could not establish hosts fetch client") } + FetchClient = fetchClient // Start the work clients if err := fetchClient.Start(context.Background()); err != nil { - zap.L().Error("workers are not the means of production. exiting.") - os.Exit(42) + zap.Error(err) + zap.L().Fatal("workers are not the means of production") } // start the round-robin queues for n := range workerCount { queueName := fmt.Sprintf("%s-%d", ThisServiceName, n) + fetchClient, err = river.NewClient(riverpgxv5.New(FetchPool), &river.Config{ Queues: map[string]river.QueueConfig{ queueName: {MaxWorkers: int(workerCount)}, @@ -127,20 +144,18 @@ func roundRobinQueues(workers *river.Workers, workerCount int64) { Workers: workers, }) if err != nil { - zap.L().Error("could not establish worker pool", zap.Int64("pool number", n)) - log.Println(err) - os.Exit(1) + zap.Error(err) + zap.L().Fatal("could not establish worker pool") } // Start the work clients if err := fetchClient.Start(context.Background()); err != nil { - zap.L().Error("workers are not the means of production. exiting.") - os.Exit(42) + zap.Error(err) + zap.L().Fatal("workers are not the means of production") } } } func simpleQueue(workers *river.Workers, workerCount int64) { - MainQueue := make(map[string]river.QueueConfig) MainQueue[ThisServiceName] = river.QueueConfig{MaxWorkers: int(workerCount)} @@ -150,21 +165,19 @@ func simpleQueue(workers *river.Workers, workerCount int64) { Workers: workers, }) if err != nil { - zap.L().Error("could not establish main fetch client") - log.Println(err) - os.Exit(1) + zap.Error(err) + zap.L().Fatal("could not establish hosts fetch client") } // Start the work clients ctx := context.Background() if err := mainFetchClient.Start(ctx); err != nil { zap.L().Error("could not launch main fetch client.", zap.String("err", err.Error())) - os.Exit(42) + zap.L().Fatal("exiting") } } func InitializeQueues() { - //var fetchClient *river.Client[pgx.Tx] queueing.InitializeRiverQueues() _, fP, workers := common.CommonQueueInit() @@ -179,22 +192,28 @@ func InitializeQueues() { log.Println(err) os.Exit(1) } + workerCount := fetchService.GetParamInt64("workers") queueModel := fetchService.GetParamString("queue_model") switch queueModel { - case "round_robin": - QueueingModel = "round_robin" + case ROUNDROBIN: + QueueingModel = ROUNDROBIN + roundRobinQueues(workers, workerCount) - case "one_per_domain": - QueueingModel = "one_per_domain" + case OPD: + QueueingModel = OPD + oneQueuePerHost(workers, workerCount) - case "simple": - QueueingModel = "simple" + case SIMPLE: + QueueingModel = SIMPLE + simpleQueue(workers, workerCount) default: zap.L().Warn("falling through to default simple queueing model") - QueueingModel = "simple" + + QueueingModel = SIMPLE + simpleQueue(workers, workerCount) } } diff --git a/cmd/fetch/work.go b/cmd/fetch/work.go index 256cbe8..7fa3db9 100644 --- a/cmd/fetch/work.go +++ b/cmd/fetch/work.go @@ -2,6 +2,7 @@ package main import ( "context" + _ "embed" "fmt" "net/url" "regexp" @@ -11,8 +12,6 @@ import ( "sync/atomic" "time" - _ "embed" - "github.com/GSA-TTS/jemison/config" common "github.com/GSA-TTS/jemison/internal/common" filter "github.com/GSA-TTS/jemison/internal/filtering" @@ -25,34 +24,41 @@ import ( "go.uber.org/zap" ) -// /////////////////////////////////// -// GLOBALS var LastHitMap sync.Map + var LastBackoffMap sync.Map var fetchCount atomic.Int64 +const SECONDS_PER_MINUTE = 60 + func InfoFetchCount() { // Probably should be a config value. - ticker := time.NewTicker(60 * time.Second) + ticker := time.NewTicker(SECONDS_PER_MINUTE * time.Second) recent := []int64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0} last := int64(0) ndx := 0 + for { // Wait for the ticker <-ticker.C + cnt := fetchCount.Load() + diff := cnt - last recent[ndx] = diff + if last != 0 { var total int64 = 0 for _, num := range recent { total += num } + zap.L().Info("pages fetched", zap.Int64("pages", cnt), zap.Int64("ppm (5m avg)", total/int64(len(recent)))) } + ndx = (ndx + 1) % len(recent) last = cnt } @@ -61,11 +67,14 @@ func InfoFetchCount() { func stripHostToAscii(host string) string { reg, _ := regexp.Compile("[^a-z]") result := reg.ReplaceAllString(strings.ToLower(host), "") + return result } -func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs]) error { +const THREE_SECONDS = 3 +//nolint:cyclop,funlen +func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs]) error { u := url.URL{ Scheme: job.Args.Scheme, Host: job.Args.Host, @@ -78,9 +87,11 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] } // Have we seen them before? + //nolint:nestif if Gateway.HostExists(job.Args.Host) { // If we have, and it is too soon, send them to their queue. zap.L().Debug("host exists") + if !Gateway.GoodToGo(job.Args.Host) { zap.L().Debug("not good to go") @@ -88,7 +99,7 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] // If it is "simple" or "round_robin", we do nothing. // If it is "one_per_domain", we need to do something fancy. - if QueueingModel == "one_per_domain" { + if QueueingModel == OPD { asciiHost := stripHostToAscii(job.Args.Host) asciiQueueName := fmt.Sprintf("fetch-%s", asciiHost) ChQSHP <- queueing.QSHP{ @@ -108,8 +119,9 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] // We queued them elsewhere, so this job is done and done right. return nil } - zap.L().Debug("good to go") + // If they are good to go, just let them run through and be worked. + zap.L().Debug("good to go") } else { // They do not exist. So, we should add them in to the gateway, // and then requeue, so that they are in their own queue. @@ -117,7 +129,7 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] zap.String("host", job.Args.Host)) Gateway.GoodToGo(job.Args.Host) - if QueueingModel == "one_per_domain" { + if QueueingModel == OPD { asciiHost := stripHostToAscii(job.Args.Host) asciiQueueName := fmt.Sprintf("fetch-%s", asciiHost) ChQSHP <- queueing.QSHP{ @@ -158,6 +170,7 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] strings.Contains(err.Error(), common.FileTooSmallToProcess.String()) { // Return nil, because we want to consume the job, but not requeue it zap.L().Info("common file error", zap.String("type", err.Error())) + return nil } @@ -170,11 +183,12 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] // Update the guestbook lastModified := time.Now() + if v, ok := page_json["last-modified"]; ok { - //layout := "2006-01-02 15:04:05" t, err := time.Parse(time.RFC1123, v) if err != nil { zap.L().Warn("could not convert last-modified") + lastModified = time.Now() } else { lastModified = t @@ -191,8 +205,10 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] scheme := JDB.GetScheme("https") contentType := JDB.GetContentType(page_json["content-type"]) + if err != nil { zap.L().Error("could not fetch page scheme") + scheme = 1 } @@ -220,7 +236,7 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] LastFetched: pgtype.Timestamp{ Valid: true, InfinityModifier: 0, - Time: JDB.InThePast(3 * time.Second), + Time: JDB.InThePast(THREE_SECONDS * time.Second), }, NextFetch: pgtype.Timestamp{ Valid: true, @@ -228,7 +244,6 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] Time: next_fetch, }, }) - if err != nil { zap.L().Error("could not store guestbook id", zap.Int64("domain64", d64), @@ -246,11 +261,13 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] ) err = cloudmap.Save() // We get an error if we can't write to S3 - // This is pretty catestrophic. + // This is pretty catastrophic. if err != nil { zap.L().Error("could not Save() s3json k/v", zap.String("key", cloudmap.Key.Render()), ) + + //nolint:wrapcheck return err } diff --git a/cmd/fetch/work_support.go b/cmd/fetch/work_support.go index c353d1e..7cefa9e 100644 --- a/cmd/fetch/work_support.go +++ b/cmd/fetch/work_support.go @@ -19,16 +19,20 @@ import ( "go.uber.org/zap" ) +// This could become a constant in the config. +// But, it is not likely something we want to change. +const CHUNKSIZE = 4 * 1024 + func host_and_path(job *river.Job[common.FetchArgs]) string { var u url.URL u.Scheme = job.Args.Scheme u.Host = job.Args.Host u.Path = job.Args.Path + return u.String() } func chunkwiseSHA1(filename string) []byte { - // Open the file for reading. tFile, err := os.Open(filename) if err != nil { @@ -38,10 +42,11 @@ func chunkwiseSHA1(filename string) []byte { // Compute the SHA1 going chunk-by-chunk h := sha1.New() reader := bufio.NewReader(tFile) - // FIXME: make this a param in the config. - chunkSize := 4 * 1024 + + chunkSize := CHUNKSIZE bytesRead := 0 buf := make([]byte, chunkSize) + for { n, err := reader.Read(buf) bytesRead += n @@ -50,9 +55,12 @@ func chunkwiseSHA1(filename string) []byte { if err != io.EOF { zap.L().Error("chunk error reading") } + break } + chunk := buf[0:n] + // https://pkg.go.dev/crypto/sha1#example-New _, err = io.Writer.Write(h, chunk) if err != nil { @@ -70,16 +78,24 @@ func getUrlToFile(u url.URL) (string, int64, []byte, error) { zap.L().Error("cannot GET content", zap.String("url", u.String()), ) + + //nolint:wrapcheck return "", 0, nil, err } + zap.L().Debug("successful GET response") + // Create a temporary file to download the HTML to. temporaryFilename := uuid.NewString() + outFile, err := os.Create(temporaryFilename) if err != nil { zap.L().Error("cannot create temporary file", zap.String("filename", temporaryFilename)) + + //nolint:wrapcheck return "", 0, nil, err } + defer outFile.Close() // Copy the Get Reader to a file Writer @@ -90,16 +106,27 @@ func getUrlToFile(u url.URL) (string, int64, []byte, error) { zap.L().Error("could not copy GET to file", zap.String("url", u.String()), zap.String("filename", temporaryFilename)) + + //nolint:wrapcheck return "", 0, nil, err } + getResponse.Body.Close() + // Now, it is in a file. // Compute the SHA1 theSHA := chunkwiseSHA1(temporaryFilename) + return temporaryFilename, bytesRead, theSHA, nil } -func fetch_page_content(job *river.Job[common.FetchArgs]) (map[string]string, error) { +const TOO_SHORT = 20 + +//nolint:cyclop,funlen +func fetch_page_content(job *river.Job[common.FetchArgs]) ( + map[string]string, + error, +) { u := url.URL{ Scheme: job.Args.Scheme, Host: job.Args.Host, @@ -108,12 +135,14 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) (map[string]string, er headResp, err := RetryClient.Head(u.String()) if err != nil { + //nolint:wrapcheck return nil, err } // Get a clean mime type right away contentType := util.CleanMimeType(headResp.Header.Get("content-type")) log.Debug("checking HEAD MIME type", zap.String("content-type", contentType)) + if !util.IsSearchableMimeType(contentType) { return nil, fmt.Errorf( common.NonIndexableContentType.String()+ @@ -122,11 +151,11 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) (map[string]string, er // Make sure we don't fetch things that are too big. size_string := headResp.Header.Get("content-length") + size, err := strconv.Atoi(size_string) if err != nil { // Could not extract a size header... } else { - // FIXME: Make this a constant if int64(size) > MaxFilesize { return nil, fmt.Errorf( common.FileTooLargeToFetch.String()+ @@ -139,22 +168,26 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) (map[string]string, er if err != nil { return nil, err } + key := util.CreateS3Key(util.ToScheme(job.Args.Scheme), job.Args.Host, job.Args.Path, util.Raw) if bytesRead > MaxFilesize { zap.L().Warn("file too large", - zap.String("host", job.Args.Host), zap.String("path", job.Args.Path)) + zap.String("host", job.Args.Host), + zap.String("path", job.Args.Path)) + err := os.Remove(tempFilename) if err != nil { zap.L().Error("could not delete temp file that is too big...") } + return nil, fmt.Errorf( common.FileTooLargeToFetch.String()+ " file is too large: %d %s%s", bytesRead, job.Args.Host, job.Args.Path) } // Don't bother in case it came in at zero length - if bytesRead < 100 { + if bytesRead < TOO_SHORT { return nil, fmt.Errorf( common.FileTooSmallToProcess.String()+ " file is too small: %d %s%s", bytesRead, job.Args.Host, job.Args.Path) @@ -172,6 +205,7 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) (map[string]string, er // Stream that file over to S3 s3 := kv.NewS3(ThisServiceName) err = s3.FileToS3(key, tempFilename, util.GetMimeType(contentType)) + if err != nil { zap.L().Error("could not send file to S3", zap.String("key", key.Render()), @@ -196,9 +230,6 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) (map[string]string, er response[k] = v } - // FIXME - // There is a texinfo standard library for normalizing content types. - // Consider using it. I want a simplified string, not utf-8 etc. response["content-type"] = contentType zap.L().Debug("content read", diff --git a/cmd/pack/queues.go b/cmd/pack/queues.go index 2a94b1e..a97b305 100644 --- a/cmd/pack/queues.go +++ b/cmd/pack/queues.go @@ -53,6 +53,6 @@ func InitializeQueues() { // Start the work clients if err := packClient.Start(ctx); err != nil { zap.L().Error("workers are not the means of production. exiting.") - os.Exit(42) + os.Exit(1) } } diff --git a/cmd/walk/work.go b/cmd/walk/work.go index 1cf139d..8daeeea 100644 --- a/cmd/walk/work.go +++ b/cmd/walk/work.go @@ -159,6 +159,7 @@ func is_crawlable(s3json *kv.S3JSON, link string) (string, error) { err = filter.IsReject(lu) if err != nil { + //nolint:wrapcheck return "", err } diff --git a/config/domain64.go b/config/domain64.go index 99948c3..fa985ab 100644 --- a/config/domain64.go +++ b/config/domain64.go @@ -60,6 +60,7 @@ func FQDNToDomain64(fqdn string) (int64, error) { hex := gjson.GetBytes(cached_file, tld+".FQDNToDomain64."+escaped).String() value, err := strconv.ParseInt(hex, 16, 64) if err != nil { + //nolint:wrapcheck return 0, err } return int64(value), nil @@ -105,6 +106,7 @@ func GetAllFQDNToDomain64() map[string]int64 { func HexToDec64(hex string) (int64, error) { value, err := strconv.ParseInt(hex, 16, 64) if err != nil { + //nolint:wrapcheck return 0, err } return value, nil diff --git a/config/services/fetch.libsonnet b/config/services/fetch.libsonnet index 2899e6d..adeb3f2 100644 --- a/config/services/fetch.libsonnet +++ b/config/services/fetch.libsonnet @@ -32,6 +32,14 @@ local parameters = [ 'max_filesize_mb', { cf: 10, container: 20 }, ], + [ + 'fetch_cooldown_ms', + { cf: 500, container: 500 }, + ], + [ + 'fetch_poll_interval_ms', + { cf: 1000, container: 1000 }, + ], ] + B.parameters; { diff --git a/go.mod b/go.mod index cb1219f..6c60323 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,6 @@ require ( github.com/pingcap/log v1.1.0 github.com/riverqueue/river v0.13.0 github.com/riverqueue/river/riverdriver/riverpgxv5 v0.13.0 - github.com/robfig/cron v1.2.0 github.com/spf13/viper v1.19.0 github.com/stretchr/testify v1.9.0 github.com/tidwall/gjson v1.18.0 diff --git a/internal/env/env.go b/internal/env/env.go index 6bd65d9..4cdc199 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -108,6 +108,7 @@ func InitGlobalEnv(this_service string) { if IsContainerEnv() { log.Println("IsContainerEnv") viper.SetConfigName("container") + configName = "container" } @@ -116,12 +117,14 @@ func InitGlobalEnv(this_service string) { viper.AddConfigPath("../../config") viper.AddConfigPath("config") viper.SetConfigName("localhost") + configName = "localhost" } if IsCloudEnv() { log.Println("IsCloudEnv") viper.SetConfigName("cf") + configName = "cf" // https://github.com/spf13/viper/issues/1706 // https://github.com/spf13/viper/issues/1671 @@ -156,21 +159,25 @@ func InitGlobalEnv(this_service string) { // with everything in the rgiht places. if IsContainerEnv() || IsLocalTestEnv() { ContainerEnv := container_env{} + err := viper.Unmarshal(&ContainerEnv) if err != nil { log.Println("ENV could not unmarshal VCAP_SERVICES to new") log.Fatal(err) } + Env.VcapServices = ContainerEnv.VcapServices } if IsCloudEnv() { new_vcs := make(map[string][]Service, 0) + err := json.Unmarshal([]byte(os.Getenv("VCAP_SERVICES")), &new_vcs) if err != nil { log.Println("ENV could not unmarshal VCAP_SERVICES to new") log.Fatal(err) } + Env.VcapServices = new_vcs } @@ -191,6 +198,7 @@ func InitGlobalEnv(this_service string) { if err != nil { log.Println("could not get service for ", this_service) } + Env.AllowedHosts = s.GetParamString("allowed_hosts") log.Println("Setting Schedule: ", Env.AllowedHosts) @@ -212,6 +220,7 @@ func (e *env) GetDatabaseUrl(name string) (string, error) { params, ), nil } + if IsCloudEnv() { return db.CredentialString("uri"), nil } diff --git a/internal/env/gin.go b/internal/env/gin.go index eabdb1d..cc71be9 100644 --- a/internal/env/gin.go +++ b/internal/env/gin.go @@ -9,6 +9,7 @@ import ( func SetGinReleaseMode(this_service string) { s, _ := Env.GetUserService(this_service) + level := s.GetParamString("debug_level") if level == "debug" { zap.L().Info("setting gin debug level to debug") diff --git a/internal/postgres/postgres.go b/internal/postgres/postgres.go index d972fa1..d70e0ca 100644 --- a/internal/postgres/postgres.go +++ b/internal/postgres/postgres.go @@ -23,7 +23,6 @@ type JemisonDB struct { } func NewJemisonDB() *JemisonDB { - jdb := JemisonDB{ Config: make(map[string]*pgxpool.Config), Pool: make(map[string]*pgxpool.Pool), @@ -50,7 +49,6 @@ func NewJemisonDB() *JemisonDB { jdb.Config[db_name] = cfg jdb.Pool[db_name] = pool - } jdb.WorkDBQueries = work_db.New(jdb.Pool[env.JemisonWorkDatabase]) @@ -61,10 +59,15 @@ func NewJemisonDB() *JemisonDB { func Config(db_string string) *pgxpool.Config { const defaultMaxConns = int32(100) + const defaultMinConns = int32(0) + const defaultMaxConnLifetime = time.Hour + const defaultMaxConnIdleTime = time.Minute * 30 + const defaultHealthCheckPeriod = time.Minute + const defaultConnectTimeout = time.Second * 5 dbConfig, err := pgxpool.ParseConfig(db_string) @@ -85,49 +88,82 @@ func Config(db_string string) *pgxpool.Config { // The cache is a key/value store, so prepend // keys to avoid collisions. It should be impossible, // but still... that's the convention of these functions. +// +//nolint:gosec func (jdb *JemisonDB) GetScheme(scheme string) int32 { if val, ok := jdb.constCache.Load("scheme:" + scheme); ok { - return val.(int32) + v, assert_ok := val.(int32) + if !assert_ok { + zap.L().Error("could not convert scheme integer") + } + + return v } else { scheme_int := config.GetScheme(scheme) + // This is a guaranteed save conversion jdb.constCache.Store("scheme:"+scheme, int32(scheme_int)) + return int32(scheme_int) } } func (jdb *JemisonDB) GetContentType(ct string) int { if val, ok := jdb.constCache.Load("contenttype:" + ct); ok { - return val.(int) + v, assert_ok := val.(int) + if !assert_ok { + zap.L().Error("could not convert content type integer") + } + + return v } else { ct_int := config.GetContentType(ct) jdb.constCache.Store("contenttype:"+ct, ct_int) + return ct_int } } +const HOURS_PER_DAY = 24 + +const DAYS_PER_WEEK = 7 + +const DAYS_PER_BIWEEK = 14 + +const DAYS_PER_MONTH = 30 + +const DAYS_PER_QUARTER = 3 * 30 + +const DAYS_PER_BIANNUM = 6 * 30 + +const DAYS_PER_ANNUM = 12 * 30 + func (jdb *JemisonDB) GetNextFetch(fqdn string) time.Time { + var delta time.Duration schedule := config.GetSchedule(fqdn) - delta := time.Duration(30 * 24 * time.Hour) + switch schedule { case config.Daily: - delta = time.Duration(24 * time.Hour) + delta = time.Duration(HOURS_PER_DAY * time.Hour) case config.Weekly: - delta = time.Duration(7 * 24 * time.Hour) + delta = time.Duration(DAYS_PER_WEEK * HOURS_PER_DAY * time.Hour) case config.BiWeekly: - delta = time.Duration(14 * 24 * time.Hour) + delta = time.Duration(DAYS_PER_BIWEEK * HOURS_PER_DAY * time.Hour) case config.Monthly: - // pass + delta = time.Duration(DAYS_PER_MONTH * HOURS_PER_DAY * time.Hour) case config.Quarterly: - delta = time.Duration(3 * 30 * 24 * time.Hour) + delta = time.Duration(DAYS_PER_QUARTER * HOURS_PER_DAY * time.Hour) case config.BiAnnually: - delta = time.Duration(6 * 30 * 24 * time.Hour) + delta = time.Duration(DAYS_PER_BIANNUM * HOURS_PER_DAY * time.Hour) case config.Annually: - delta = time.Duration(12 * 30 * 24 * time.Hour) + delta = time.Duration(DAYS_PER_ANNUM * HOURS_PER_DAY * time.Hour) default: - // pass + // Default to monthly. + delta = time.Duration(DAYS_PER_MONTH * HOURS_PER_DAY * time.Hour) } + next_fetch := time.Now().Add(delta) + return next_fetch } diff --git a/internal/util/key_utilities.go b/internal/util/key_utilities.go index 5376b7c..9fcc4c3 100644 --- a/internal/util/key_utilities.go +++ b/internal/util/key_utilities.go @@ -1,6 +1,11 @@ package util import ( + // SHA1 is a weak cypher. We could use sha256. + // However, we're not using it for crypto purposes, + // just for generating a temporary name. We do not + // expect collisions. + //nolint:gosec "crypto/sha1" "fmt" "strings" @@ -54,8 +59,10 @@ type Key struct { Extension Extension } +//nolint:gosec func (k *Key) SHA1() string { sha := fmt.Sprintf("%x", sha1.Sum([]byte(k.Host+k.Path))) + return sha } diff --git a/internal/util/string_utilities.go b/internal/util/string_utilities.go index 662c40f..ec9d470 100644 --- a/internal/util/string_utilities.go +++ b/internal/util/string_utilities.go @@ -107,6 +107,7 @@ func TrimSuffix(s, suffix string) string { func CanonicalizeURL(s string) (string, error) { u, err := url.Parse(s) if err != nil { + //nolint:wrapcheck return "", err } u.Host = strings.ToLower(u.Host) From df5c1172d129c105f14bdb065d53923129c28806 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Fri, 10 Jan 2025 07:02:51 -0500 Subject: [PATCH 24/39] Linting --- .golangci.yml | 55 +++++++++++++++++++++++++ internal/common/api.go | 69 ++++++++++++++++++++++++-------- internal/common/domain64_test.go | 4 +- internal/common/stats_test.go | 1 + internal/kv/s3.go | 2 + internal/util/memuse.go | 6 ++- 6 files changed, 117 insertions(+), 20 deletions(-) create mode 100644 .golangci.yml diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..47d0e3e --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,55 @@ +linters: + enable-all: true + disable: + - exportloopref + - depguard + - tagalign + - exhaustruct + depguard: + # Rules to apply. + # + # Variables: + # - File Variables + # you can still use and exclamation mark ! in front of a variable to say not to use it. + # Example !$test will match any file that is not a go test file. + # + # `$all` - matches all go files + # `$test` - matches all go test files + # + # - Package Variables + # + # `$gostd` - matches all of go's standard library (Pulled from `GOROOT`) + # + # Default: Only allow $gostd in all files. + rules: + main: + list-mode: lax + # List of file globs that will match this list of settings to compare against. + # Default: $all + files: + - $all + # List of allowed packages. + allow: + - $gostd + - github.com/GSA-TTS/jemison/config + # Packages that are not allowed where the value is a suggestion. + deny: + - pkg: "github.com/sirupsen/logrus" + desc: not allowed + - pkg: "github.com/pkg/errors" + desc: Should be replaced by standard lib errors package + +run: + timeout: 5m + issues-exit-code: 2 + concurrency: 4 + allow-parallel-runners: true + +output: + formats: + - format: json + path: stderr + - format: checkstyle + path: report.xml + - format: colored-line-number + show-stats: true diff --git a/internal/common/api.go b/internal/common/api.go index ab24d30..16e9619 100644 --- a/internal/common/api.go +++ b/internal/common/api.go @@ -6,6 +6,7 @@ import ( "sync" "github.com/gin-gonic/gin" + "go.uber.org/zap" ) func Heartbeat(c *gin.Context) { @@ -17,21 +18,22 @@ func Heartbeat(c *gin.Context) { func InitializeAPI() *gin.Engine { router := gin.Default() router.GET("/heartbeat", Heartbeat) + return router } type StatsInput struct{} + type StatsResponse struct { Stats map[string]int64 `json:"stats"` } -// FIXME Switch to a concurrency-safe map library... type StatsMap = sync.Map type Stats interface { - Set(string, int64) - Increment(string) - Get(string) int64 + Set(key string, value int64) + Increment(key string) + Get(key string) int64 GetAll() StatsMap } @@ -58,7 +60,6 @@ func StatsHandler(stats_base string) func(c *gin.Context) { "stats": b.GetAll(), "response": "ok", }) - } } @@ -66,50 +67,86 @@ func NewBaseStats(service string) *BaseStats { if all_the_stats == nil { all_the_stats = &AllStats{} } + if _, ok := all_the_stats.services.Load(service); !ok { all_the_stats.services.Store(service, &BaseStats{}) } v, _ := all_the_stats.services.Load(service) - return v.(*BaseStats) + + bs, ok := v.(*BaseStats) + if !ok { + zap.L().Error("could not cast basestats") + } + + return bs } -// extract | fatal error: concurrent map writes func (e *BaseStats) Set(key string, val int64) { e.stats.Store(key, val) } func (e *BaseStats) HasKey(key string) bool { _, ok := e.stats.Load(key) + return ok } func (e *BaseStats) Get(key string) int64 { - v, _ := e.stats.Load(key) - return v.(int64) + val, _ := e.stats.Load(key) + + v, ok := val.(int64) + if !ok { + zap.L().Error("could not case int64") + } + + return v } func (e *BaseStats) GetAll() map[string]int64 { - copy := make(map[string]int64, 0) + a_copy := make(map[string]int64, 0) + e.stats.Range(func(key any, v any) bool { - copy[key.(string)] = v.(int64) + val, ok := v.(int64) + if !ok { + zap.L().Error("could not case int64") + } + + k, ok := key.(string) + if !ok { + zap.L().Error("could not case string") + } + + a_copy[k] = val + return true }) - return copy + + return a_copy } func (e *BaseStats) Increment(key string) { if val, ok := e.stats.Load(key); ok { - e.Set(key, val.(int64)+1) + v, ok := val.(int64) + if !ok { + zap.L().Error("could not cast int64") + } + + e.Set(key, v+1) } else { e.Set(key, 1) } } -func (e *BaseStats) Sum(key string, v int64) { +func (e *BaseStats) Sum(key string, incr int64) { if val, ok := e.stats.Load(key); ok { - e.Set(key, val.(int64)+v) + v, ok := val.(int64) + if !ok { + zap.L().Error("cannot cast int64") + } + + e.Set(key, v+incr) } else { - e.Set(key, v) + e.Set(key, incr) } } diff --git a/internal/common/domain64_test.go b/internal/common/domain64_test.go index c2d92bc..0f53433 100644 --- a/internal/common/domain64_test.go +++ b/internal/common/domain64_test.go @@ -1,3 +1,4 @@ +//nolint:all package common import ( @@ -97,13 +98,11 @@ var d64 string = ` func TestUnmarshal(t *testing.T) { b := []byte(d64) - //nolint:all NewTLD64s(b) } func TestCheckEdu(t *testing.T) { b := []byte(d64) - //nolint:all d, err := NewTLD64s(b) if err != nil { t.Error(err) @@ -116,7 +115,6 @@ func TestCheckEdu(t *testing.T) { func TestCountEdu(t *testing.T) { b := []byte(d64) - //nolint:all d, err := NewTLD64s(b) if err != nil { t.Error(err) diff --git a/internal/common/stats_test.go b/internal/common/stats_test.go index 7c857b8..c76555e 100644 --- a/internal/common/stats_test.go +++ b/internal/common/stats_test.go @@ -1,3 +1,4 @@ +//nolint:all package common import ( diff --git a/internal/kv/s3.go b/internal/kv/s3.go index 213692b..6471aea 100644 --- a/internal/kv/s3.go +++ b/internal/kv/s3.go @@ -166,11 +166,13 @@ func (s3 *S3) List(prefix string) ([]*ObjInfo, error) { }) objects := make([]*ObjInfo, 0) + for object := range objectCh { if object.Err != nil { fmt.Println(object.Err) return nil, object.Err } + objects = append(objects, NewObjInfo(object.Key, object.Size)) } return objects, nil diff --git a/internal/util/memuse.go b/internal/util/memuse.go index 004727b..15a3ecc 100644 --- a/internal/util/memuse.go +++ b/internal/util/memuse.go @@ -9,7 +9,9 @@ import ( // of garage collection cycles completed. func PrintMemUsage() { var m runtime.MemStats + runtime.ReadMemStats(&m) + // For info on each, see: https://golang.org/pkg/runtime/#MemStats fmt.Printf("Alloc = %v MiB", bToMb(m.Alloc)) fmt.Printf("\tTotalAlloc = %v MiB", bToMb(m.TotalAlloc)) @@ -17,6 +19,8 @@ func PrintMemUsage() { fmt.Printf("\tNumGC = %v\n", m.NumGC) } +const BYTES_PER_SI = 1024 + func bToMb(b uint64) uint64 { - return b / 1024 / 1024 + return ((b / BYTES_PER_SI) / BYTES_PER_SI) } From 39ded64f13b9a03047860403415aa2f1250d804d Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Fri, 10 Jan 2025 07:20:20 -0500 Subject: [PATCH 25/39] Linting --- internal/kv/interfaces.go | 3 ++- internal/kv/s3.go | 25 ++++++++++++++++++------- internal/kv/s3json.go | 24 ++++++++++++++++++++---- internal/kv/s3json_test.go | 13 +++++++++---- internal/kv/util.go | 29 ++++++++++++++++++++++++----- 5 files changed, 73 insertions(+), 21 deletions(-) diff --git a/internal/kv/interfaces.go b/internal/kv/interfaces.go index bdf107e..b91297e 100644 --- a/internal/kv/interfaces.go +++ b/internal/kv/interfaces.go @@ -12,7 +12,7 @@ type JSON map[string]string type Object interface { GetKey() string GetJson() JSON - GetValue(string) string + GetValue(key string) string GetSize() int64 GetMimeType() string } @@ -44,6 +44,7 @@ func NewObject(key string, value JSON) *Obj { size := int64(len(b)) mime := "" + if good, ok := value["content-type"]; !ok { mime = "octet/binary" } else { diff --git a/internal/kv/s3.go b/internal/kv/s3.go index 6471aea..b7f55d1 100644 --- a/internal/kv/s3.go +++ b/internal/kv/s3.go @@ -2,7 +2,6 @@ package kv import ( "context" - "fmt" "io" "log" "os" @@ -30,6 +29,7 @@ type S3 struct { // Lets us copy files to/from the bucket. func NewS3(bucket_name string) *S3 { s3 := newS3FromBucketName(bucket_name) + return &s3 } @@ -38,6 +38,7 @@ func (s3 *S3) FileToS3(key *util.Key, local_filename string, mime_type string) e if err != nil { log.Fatal("FileToS3 cannot open file ", local_filename) } + fi, err := reader.Stat() if err != nil { log.Println("KV could not stat file") @@ -52,6 +53,7 @@ func (s3 *S3) FileToS3Path(key string, local_filename string, mime_type string) if err != nil { log.Fatal("FileToS3Path cannot open file ", local_filename) } + fi, err := reader.Stat() if err != nil { log.Println("KV could not stat file") @@ -63,42 +65,45 @@ func (s3 *S3) FileToS3Path(key string, local_filename string, mime_type string) func (s3 *S3) S3ToFile(key *util.Key, local_filename string) error { ctx := context.Background() + err := s3.MinioClient.FGetObject( ctx, s3.Bucket.CredentialString("bucket"), key.Render(), local_filename, minio.GetObjectOptions{}) - if err != nil { zap.L().Error("could not FGetObject", zap.String("bucket", s3.Bucket.Name), zap.String("key", key.Render()), zap.String("local_filename", local_filename), ) + return err } + return nil } func (s3 *S3) S3PathToFile(path string, local_filename string) error { ctx := context.Background() + err := s3.MinioClient.FGetObject( ctx, s3.Bucket.CredentialString("bucket"), path, local_filename, minio.GetObjectOptions{}) - if err != nil { - fmt.Println(err) + zap.Error(err) + return err } + return nil } func (s3 *S3) S3PathToS3JSON(key *util.Key) (*S3JSON, error) { - // The object has a channel interface that we have to empty. ctx := context.Background() object, err := s3.MinioClient.GetObject( @@ -121,6 +126,7 @@ func (s3 *S3) S3PathToS3JSON(key *util.Key) (*S3JSON, error) { zap.String("bucket_name", s3.Bucket.CredentialString("bucket")), zap.String("key", key.Render()), zap.String("error", err.Error())) + return nil, err } @@ -129,12 +135,12 @@ func (s3 *S3) S3PathToS3JSON(key *util.Key) (*S3JSON, error) { } raw, err := io.ReadAll(object) - if err != nil { zap.L().Error("could not read object bytes", zap.String("bucket_name", s3.Bucket.CredentialString("bucket")), zap.String("key", key.Render()), zap.String("error", err.Error())) + return nil, err } @@ -142,13 +148,16 @@ func (s3 *S3) S3PathToS3JSON(key *util.Key) (*S3JSON, error) { s3json.raw = raw s3json.Key = key current_mime_type := s3json.GetString("content-type") + updated, err := sjson.SetBytes(s3json.raw, "content-type", util.CleanMimeType(current_mime_type)) if err != nil { zap.L().Error("could not update raw S3JSON") } else { s3json.raw = updated } + s3json.empty = false + return s3json, nil } @@ -169,11 +178,13 @@ func (s3 *S3) List(prefix string) ([]*ObjInfo, error) { for object := range objectCh { if object.Err != nil { - fmt.Println(object.Err) + zap.Error(object.Err) + return nil, object.Err } objects = append(objects, NewObjInfo(object.Key, object.Size)) } + return objects, nil } diff --git a/internal/kv/s3json.go b/internal/kv/s3json.go index 6276f9a..e9a1499 100644 --- a/internal/kv/s3json.go +++ b/internal/kv/s3json.go @@ -1,5 +1,7 @@ // kv provides an interface to key/value work in S3 // It is specialized to the `jemison` architecture. +// +//nolint:godox,godot package kv import ( @@ -10,12 +12,11 @@ import ( "io" "net/url" + "github.com/GSA-TTS/jemison/internal/util" minio "github.com/minio/minio-go/v7" "github.com/tidwall/gjson" "github.com/tidwall/sjson" "go.uber.org/zap" - - "github.com/GSA-TTS/jemison/internal/util" ) var DEBUG_S3JSON = false @@ -34,7 +35,6 @@ var DEBUG_S3JSON = false // Save() does an open and a close // Then, every object is self-contained. Slower, but self-contained. // The sync... is hell waiting to happen in terms of debugging. -//var buckets sync.Map // S3JSON structs are JSON documents stored in S3. // This is because `jemison` shuttles JSON documents in-and-out of S3, and @@ -49,6 +49,7 @@ type S3JSON struct { func NewS3JSON(bucket_name string) *S3JSON { s3 := newS3FromBucketName(bucket_name) + return &S3JSON{ Key: &util.Key{}, raw: nil, @@ -64,6 +65,7 @@ func NewFromBytes(bucket_name string, scheme util.Scheme, host string, path stri s3 := newS3FromBucketName(bucket_name) key := util.CreateS3Key(scheme, host, path, util.JSON) w_key, _ := sjson.SetBytes(m, "_key", key.Render()) + return &S3JSON{ Key: key, raw: w_key, @@ -77,7 +79,9 @@ func NewFromMap(bucket_name string, scheme util.Scheme, host string, path string s3 := newS3FromBucketName(bucket_name) key := util.CreateS3Key(scheme, host, path, util.JSON) m["_key"] = key.Render() + b, _ := json.Marshal(m) + return &S3JSON{ Key: key, raw: b, @@ -91,6 +95,7 @@ func NewFromMap(bucket_name string, scheme util.Scheme, host string, path string func NewEmptyS3JSON(bucket_name string, scheme util.Scheme, host string, path string) *S3JSON { s3 := newS3FromBucketName(bucket_name) key := util.CreateS3Key(scheme, host, path, util.JSON) + return &S3JSON{ Key: key, raw: nil, @@ -123,14 +128,17 @@ func (s3json *S3JSON) Save() error { r := bytes.NewReader(s3json.raw) size := int64(len(s3json.raw)) + err := store(&s3json.S3, s3json.Key.Render(), size, r, util.JSON.String()) if err != nil { zap.L().Fatal("could not store S3JSON", zap.String("bucket_name", s3json.S3.Bucket.Name), zap.String("key", s3json.Key.Render()), zap.String("err", err.Error())) + return err } + return nil } @@ -164,6 +172,7 @@ func (s3json *S3JSON) Load() error { zap.String("bucket_name", s3json.S3.Bucket.CredentialString("bucket")), zap.String("key", key), zap.String("error", err.Error())) + return err } @@ -172,24 +181,27 @@ func (s3json *S3JSON) Load() error { } raw, err := io.ReadAll(object) - if err != nil { zap.L().Error("could not read object bytes", zap.String("bucket_name", s3json.S3.Bucket.CredentialString("bucket")), zap.String("key", key), zap.String("error", err.Error())) + return err } s3json.raw = raw current_mime_type := s3json.GetString("content-type") + updated, err := sjson.SetBytes(s3json.raw, "content-type", util.CleanMimeType(current_mime_type)) if err != nil { zap.L().Error("could not update s3json.raw") } else { s3json.raw = updated } + s3json.empty = false + return nil } @@ -199,16 +211,19 @@ func (s3json *S3JSON) GetJSON() []byte { func (s3json *S3JSON) GetString(gjson_path string) string { r := gjson.GetBytes(s3json.raw, gjson_path) + return r.String() } func (s3json *S3JSON) GetInt64(gjson_path string) int64 { r := gjson.GetBytes(s3json.raw, gjson_path) + return int64(r.Int()) } func (s3json *S3JSON) GetBool(gjson_path string) bool { r := gjson.GetBytes(s3json.raw, gjson_path) + return r.Bool() } @@ -219,6 +234,7 @@ func (s3json *S3JSON) Set(sjson_path string, value string) { zap.String("sjson_path", sjson_path), zap.String("value", value)) } + s3json.raw = b } diff --git a/internal/kv/s3json_test.go b/internal/kv/s3json_test.go index 65c24fb..897b648 100644 --- a/internal/kv/s3json_test.go +++ b/internal/kv/s3json_test.go @@ -1,3 +1,4 @@ +//nolint:testpackage package kv import ( @@ -17,8 +18,8 @@ import ( func setup( /* t *testing.T */ ) func(t *testing.T) { os.Setenv("ENV", "LOCALHOST") env.InitGlobalEnv("testing_env") // we need to pass something + return func(t *testing.T) { - // t.Log("teardown test case") } } @@ -31,39 +32,43 @@ func TestKv(t *testing.T) { func TestEmpty(t *testing.T) { setup() + s3json := NewEmptyS3JSON("fetch", util.HTTPS, "search.gov", "/") assert.Equal(t, "fetch", s3json.S3.Bucket.Name) } -//func NewFromBytes(bucket_name string, scheme util.Scheme, host string, path string, m []byte) *S3JSON { - func TestNewFromBytes(t *testing.T) { setup() + s3json := NewFromBytes("fetch", util.HTTPS, "search.gov", "/", []byte(`{"a": 3, "b": 5}`)) assert.Equal(t, "fetch", s3json.S3.Bucket.Name) } func TestGetFromBytes(t *testing.T) { setup() + s3json := NewFromBytes("fetch", util.HTTPS, "search.gov", "/", []byte(`{"a": 3, "b": 5}`)) assert.Equal(t, int64(3), s3json.GetInt64("a")) } func TestSave(t *testing.T) { setup() + s3json := NewFromBytes("fetch", util.HTTPS, "search.gov", "/", []byte(`{"a": 3, "b": 5}`)) - //nolint:all s3json.Save() assert.Equal(t, int64(3), s3json.GetInt64("a")) } func TestLoad(t *testing.T) { setup() + s3json := NewEmptyS3JSON("fetch", util.HTTPS, "search.gov", "/") + err := s3json.Load() if err != nil { zap.L().Error("TestLoad", zap.String("error", err.Error())) } + zap.L().Info("TestLoad", zap.ByteString("raw", s3json.raw)) assert.Equal(t, int64(3), s3json.GetInt64("a")) assert.Equal(t, int64(5), s3json.GetInt64("b")) diff --git a/internal/kv/util.go b/internal/kv/util.go index 9d9a20f..5c3ffec 100644 --- a/internal/kv/util.go +++ b/internal/kv/util.go @@ -21,20 +21,26 @@ var s3cache sync.Map // NewS3FromBucketName creates an S3 object containing bucket information // from VCAP and a minio client ready to talk to the bucket. S3JSON objects // carry the information so they can load/save. +// +//nolint:cyclop,funlen func newS3FromBucketName(bucket_name string) S3 { if !env.IsValidBucketName(bucket_name) { log.Fatal("KV INVALID BUCKET NAME ", bucket_name) } if v, ok := s3cache.Load(bucket_name); ok { - return v.(S3) + cast, ok := v.(S3) + if !ok { + zap.L().Error("could not cast to s3 struct") + } + + return cast } s3 := S3{} // Grab a reference to our bucket from the config. b, err := env.Env.GetObjectStore(bucket_name) - if err != nil { zap.L().Error("could not get bucket from config", zap.String("bucket_name", bucket_name)) os.Exit(1) @@ -67,6 +73,7 @@ func newS3FromBucketName(bucket_name string) S3 { if err != nil { log.Fatalln(err) } + s3.MinioClient = minioClient ctx := context.Background() @@ -86,6 +93,7 @@ func newS3FromBucketName(bucket_name string) S3 { // when we find a bucket that already exists. // buckets.Store(bucket_name, s3) s3cache.Store(bucket_name, s3) + return s3 } @@ -105,6 +113,7 @@ func newS3FromBucketName(bucket_name string) S3 { } // Skip container creation in CF s3cache.Store(bucket_name, s3) + return s3 } @@ -113,10 +122,14 @@ func containsAll(target string, pieces []string) bool { for _, s := range pieces { allExist = allExist && strings.Contains(target, s) } + return allExist } -// store saves things to S3 +const BACKUOFF_MS = 50 + +const BACKOFF_OFFSET = 25 + func store(s3 *S3, destination_key string, size int64, reader io.Reader, mime_type string) error { trying := true backoff := 50 @@ -145,17 +158,23 @@ func store(s3 *S3, destination_key string, size int64, reader io.Reader, mime_ty // Resource requested is unwritable, please reduce your request rate if containsAll(err.Error(), []string{"reduce", "rate"}) || containsAll(err.Error(), []string{"not", "store"}) { zap.L().Warn("reducing request rate") - sleepyTime := time.Duration((rand.IntN(50) + backoff) * int(time.Millisecond)) - backoff += rand.IntN(50) + 25 + //nolint:gosec + sleepyTime := time.Duration((rand.IntN(BACKUOFF_MS) + backoff) * int(time.Millisecond)) + + backoff += rand.IntN(BACKUOFF_MS) + BACKOFF_OFFSET + time.Sleep(sleepyTime) + continue } else { zap.L().Error("s3 storage error", zap.String("err", err.Error())) + return err } } else { trying = false } } + return nil } From 9f2e999e2a0a2879878ef8abf3ff456d9300953e Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Fri, 10 Jan 2025 07:29:40 -0500 Subject: [PATCH 26/39] Linting --- internal/common/backoff.go | 23 +++++++++++++++++++---- internal/common/common.go | 6 ++++-- internal/common/domain64.go | 3 +++ internal/common/types.go | 2 +- internal/kv/s3json_test.go | 1 + 5 files changed, 28 insertions(+), 7 deletions(-) diff --git a/internal/common/backoff.go b/internal/common/backoff.go index c0c3675..8cc8fd5 100644 --- a/internal/common/backoff.go +++ b/internal/common/backoff.go @@ -8,18 +8,33 @@ import ( "go.uber.org/zap" ) +const EXPBACK = 1.03 + func BackoffLoop(host string, politeSleep int64, lastHitMap *sync.Map, lastBackoffMap *sync.Map) { for { // Look at the timing map. - lastHitTime, ok := lastHitMap.Load(host) + lastHitTime, _ := lastHitMap.Load(host) // If we're in the map, and we're within 2s, we should keep checking after a backoff politeDuration := time.Duration(politeSleep) * time.Second - if ok && (time.Since(lastHitTime.(time.Time)) < politeDuration) { + lht, ok := lastHitTime.(time.Time) + if !ok { + zap.L().Error("could not cast time.Time") + } + + if ok && (time.Since(lht) < politeDuration) { // There will be a last backoff time. last, _ := lastBackoffMap.Load(host) - newBackoffTime := float64(politeSleep)/10*rand.Float64() + float64(last.(int64))*1.03 + + lv, ok := last.(int64) + if !ok { + zap.L().Error("could not cast int64") + } + + //nolint:gosec + newBackoffTime := float64(politeSleep)/10*rand.Float64() + float64(lv)*EXPBACK time.Sleep(time.Duration(newBackoffTime) * time.Second) + continue } else { // We're not in the map, or it is more than milliseconds! @@ -29,8 +44,8 @@ func BackoffLoop(host string, politeSleep int64, lastHitMap *sync.Map, lastBacko zap.String("host", host)) lastBackoffMap.Store(host, politeSleep) lastHitMap.Store(host, time.Now()) + break } } - } diff --git a/internal/common/common.go b/internal/common/common.go index 8bb8c51..c55a165 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -21,11 +21,13 @@ func GetPool(database_url string) (context.Context, *pgxpool.Pool) { ) os.Exit(1) } + return ctx, pool } func CommonQueueInit() (context.Context, *pgxpool.Pool, *river.Workers) { var err error + database_url, err := env.Env.GetDatabaseUrl(env.QueueDatabase) if err != nil { zap.L().Error("unable to get connection string; exiting", @@ -38,17 +40,17 @@ func CommonQueueInit() (context.Context, *pgxpool.Pool, *river.Workers) { ctx, pool := GetPool(database_url) // Create a pool of workers workers := river.NewWorkers() - return ctx, pool, workers + return ctx, pool, workers } func CtxTx(pool *pgxpool.Pool) (context.Context, pgx.Tx) { ctx := context.Background() + tx, err := pool.Begin(ctx) if err != nil { zap.L().Panic("cannot init tx from pool") } - //defer tx.Rollback(ctx) return ctx, tx } diff --git a/internal/common/domain64.go b/internal/common/domain64.go index c668ec4..bda639c 100644 --- a/internal/common/domain64.go +++ b/internal/common/domain64.go @@ -25,10 +25,12 @@ type Domain64 struct { func NewTLD64s(bytes []byte) (TLD64s, error) { var tld TLD64s + err := json.Unmarshal(bytes, &tld) if err != nil { return nil, fmt.Errorf("could not parse TLD64 JSON") } + return tld, nil } @@ -38,6 +40,7 @@ func D64HexToDec(h string) int64 { zap.L().Error("could not convert Domain64", zap.String("Domain64", h)) } + return int64(value) } diff --git a/internal/common/types.go b/internal/common/types.go index a68160c..6b4407b 100644 --- a/internal/common/types.go +++ b/internal/common/types.go @@ -85,7 +85,7 @@ func (WalkArgs) Kind() string { type HttpResponse func(w http.ResponseWriter, r *http.Request) -// VALIDATOR TYPES +// VALIDATOR TYPES. var ValidateFetchQueue = "validate_fetch" type ValidateFetchArgs struct { diff --git a/internal/kv/s3json_test.go b/internal/kv/s3json_test.go index 897b648..756e4e2 100644 --- a/internal/kv/s3json_test.go +++ b/internal/kv/s3json_test.go @@ -55,6 +55,7 @@ func TestSave(t *testing.T) { setup() s3json := NewFromBytes("fetch", util.HTTPS, "search.gov", "/", []byte(`{"a": 3, "b": 5}`)) + //nolint:errcheck s3json.Save() assert.Equal(t, int64(3), s3json.GetInt64("a")) } From ff05a53296d72aff8becaa9cc6ab11391596e64e Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 08:15:59 -0500 Subject: [PATCH 27/39] Linting. --- config/constants.go | 19 ++++++-- config/domain64.go | 29 +++++++++--- config/domain64_test.go | 1 + config/embed.go | 68 ++++++++++++++--------------- internal/filtering/filter.go | 6 +++ internal/queueing/generic_insert.go | 10 ++++- internal/queueing/periodic_clear.go | 8 +++- internal/queueing/river.go | 7 ++- 8 files changed, 99 insertions(+), 49 deletions(-) diff --git a/config/constants.go b/config/constants.go index 16909a1..a4cdccb 100644 --- a/config/constants.go +++ b/config/constants.go @@ -11,10 +11,12 @@ import ( //go:embed constants.json var ConstFS embed.FS -// Load the bytes into RAM, and leave them there. -// Assume over the live of a service we'll hit -// this file a whole bunch of times. And, it never -// changes during a single deploy, so... :shrug: +/* +Load the bytes into RAM, and leave them there. +Assume over the live of a service we'll hit +this file a whole bunch of times. And, it never +changes during a single deploy, so... :shrug:. +*/ var cachedConstants []byte func primeConstants() { @@ -24,31 +26,40 @@ func primeConstants() { if err != nil { zap.L().Fatal("could not read constants from embedded FS") } + cachedConstants = bytes } } func GetScheme(scheme string) int { primeConstants() + v := gjson.GetBytes(cachedConstants, "SchemeToConst."+scheme).Int() + return int(v) } func GetContentType(ct string) int { primeConstants() + v := gjson.GetBytes(cachedConstants, "ContentTypeToConst."+ct).Int() + return int(v) } func GetTLD(tld string) int { primeConstants() + v := gjson.GetBytes(cachedConstants, "TldToConst."+tld).Int() + return int(v) } func IntToTld(i int) string { primeConstants() + search_string := "ConstToTld." + fmt.Sprintf("%x", i) v := gjson.GetBytes(cachedConstants, search_string).String() + return v } diff --git a/config/domain64.go b/config/domain64.go index fa985ab..f5e21f2 100644 --- a/config/domain64.go +++ b/config/domain64.go @@ -26,10 +26,12 @@ const ( //go:embed domain64/domain64.json var Domain64FS embed.FS -// Load the bytes into RAM, and leave them there. -// Assume over the live of a service we'll hit -// this file a whole bunch of times. And, it never -// changes during a single deploy, so... :shrug: +/* +Load the bytes into RAM, and leave them there. +Assume over the live of a service we'll hit +this file a whole bunch of times. And, it never +changes during a single deploy, so... :shrug:. +*/ var cached_file []byte func primeCache() { @@ -45,29 +47,36 @@ func tldAndEscaped(fqdn string) (string, string, error) { if len(pieces) < 2 { return "", "", fmt.Errorf("fqdn is too short: %s", fqdn) } + tld := pieces[len(pieces)-1] // Escape the FQDN dots so it can be used with GJSON fqdn_as_json_key := strings.Replace(fqdn, ".", `\.`, -1) + return tld, fqdn_as_json_key, nil } func FQDNToDomain64(fqdn string) (int64, error) { primeCache() + tld, escaped, err := tldAndEscaped(fqdn) if err != nil { return 0, err } + hex := gjson.GetBytes(cached_file, tld+".FQDNToDomain64."+escaped).String() + value, err := strconv.ParseInt(hex, 16, 64) if err != nil { //nolint:wrapcheck return 0, err } + return int64(value), nil } func Domain64ToFQDN(domain64 int64) (string, error) { primeCache() + h := fmt.Sprintf("%016X", domain64) v, _ := strconv.ParseInt(h[0:2], 16, 32) tld := IntToTld(int(v)) @@ -80,15 +89,19 @@ func Domain64ToFQDN(domain64 int64) (string, error) { func RDomainToDomain64(rdomain string) string { primeCache() + tld := strings.Split(rdomain, ".")[0] hex := gjson.GetBytes(cached_file, tld+".RDomainToDomain64."+strings.Replace(rdomain, ".", `\.`, -1)).String() + return hex } func GetAllFQDNToDomain64() map[string]int64 { primeCache() + tlds := gjson.GetBytes(cached_file, "TLDs").Array() all := make(map[string]int64) + for _, tld := range tlds { m := gjson.GetBytes(cached_file, tld.String()+".FQDNToDomain64").Map() for fq, d64 := range m { @@ -97,9 +110,11 @@ func GetAllFQDNToDomain64() map[string]int64 { zap.L().Error("could not get decimal value for Domain64", zap.String("domain64", d64.String()), zap.String("fqdn", fq)) } + all[fq] = dec } } + return all } @@ -109,6 +124,7 @@ func HexToDec64(hex string) (int64, error) { //nolint:wrapcheck return 0, err } + return value, nil } @@ -119,6 +135,7 @@ func Dec64ToHex(dec int64) string { func GetSchedule(fqdn string) Schedule { primeCache() + tld, escaped, err := tldAndEscaped(fqdn) hex := gjson.GetBytes(cached_file, tld+".FQDNToDomain64."+escaped).String() schedule := gjson.GetBytes(cached_file, tld+".Schedule."+hex).String() @@ -141,8 +158,8 @@ func GetSchedule(fqdn string) Schedule { return BiAnnually case "Annually": return Annually - default: - return Default } } + + return Default } diff --git a/config/domain64_test.go b/config/domain64_test.go index 6b1dbff..cd2e382 100644 --- a/config/domain64_test.go +++ b/config/domain64_test.go @@ -1,3 +1,4 @@ +//nolint:all package config import ( diff --git a/config/embed.go b/config/embed.go index 69f05b6..b7e0213 100644 --- a/config/embed.go +++ b/config/embed.go @@ -17,10 +17,12 @@ func ReadConfigJsonnet(sonnetFilename string) string { bytes, _ := ConfigFs.ReadFile(sonnetFilename) vm := jsonnet.MakeVM() + json, err := vm.EvaluateAnonymousSnippet(sonnetFilename, string(bytes)) if err != nil { zap.L().Fatal(err.Error()) } + return json } @@ -29,6 +31,7 @@ func ReadJsonConfig(jsonFilename string) string { if err != nil { zap.L().Fatal(err.Error()) } + return string(json_bytes) } @@ -37,11 +40,13 @@ func GetYamlFileReader(yamlFilename string) *bytes.Reader { if err != nil { zap.L().Fatal(err.Error()) } + return bytes.NewReader(yaml_bytes) } func GetListOfHosts(allowed_hosts string) []string { zap.L().Debug("reading in hosts", zap.String("allowed_hosts", allowed_hosts)) + cfg := ReadJsonConfig("allowed_hosts.yaml") // The variable `allowed_hosts` will be the key into the doc that has @@ -52,25 +57,31 @@ func GetListOfHosts(allowed_hosts string) []string { set := make(map[string]bool) all := GetAllFQDNToDomain64() + for _, pair := range ranges { low := (pair.Array())[0].Int() high := (pair.Array())[1].Int() + zap.L().Info("checking range", zap.Int64("low", low), zap.Int64("high", high)) + for fqdn, d64 := range all { if (d64 >= low) && (d64 <= high) { set[fqdn] = true } } } + for fqdn := range set { hosts = append(hosts, fqdn) } + return hosts } func GetHostBackend(host, schedule string) string { cfg := ReadJsonConfig(schedule) backend := "postgres" + for _, section := range gjson.Parse(cfg).Get("@keys").Array() { for _, site := range gjson.Get(cfg, section.String()).Array() { if host == site.Get("host").String() { @@ -81,51 +92,40 @@ func GetHostBackend(host, schedule string) string { } } } + return backend } -func SectionToTimestamp(section string, start_time time.Time) time.Time { +const HOURS_PER_DAY = 24 + +const DAYS_PER_WEEK = 7 + +const DAYS_PER_BIWEEK = 14 + +const DAYS_PER_MONTH = 30 + +const DAYS_PER_QUARTER = 3 * 30 + +const DAYS_PER_BIANNUM = 6 * 30 + +const DAYS_PER_ANNUM = 12 * 30 + +func SectionToTimestamp(section string, startTime time.Time) time.Time { switch section { case "daily": - return start_time.Add(24 * time.Hour) + return startTime.Add(HOURS_PER_DAY * time.Hour) case "weekly": - return start_time.Add(7 * 24 * time.Hour) + return startTime.Add(DAYS_PER_WEEK * HOURS_PER_DAY * time.Hour) case "bi-weekly": - return start_time.Add(14 * 24 * time.Hour) + return startTime.Add(DAYS_PER_BIWEEK * HOURS_PER_DAY * time.Hour) case "monthly": - return start_time.Add(30 * 24 * time.Hour) + return startTime.Add(DAYS_PER_MONTH * HOURS_PER_DAY * time.Hour) case "quarterly": - return start_time.Add(3 * 30 * 24 * time.Hour) + return startTime.Add(DAYS_PER_QUARTER * HOURS_PER_DAY * time.Hour) case "bi-annually": - return start_time.Add(6 * 30 * 24 * time.Hour) + return startTime.Add(DAYS_PER_BIANNUM * HOURS_PER_DAY * time.Hour) default: // We will default to `montly` to be safe - return start_time.Add(time.Duration(30*24) * time.Hour) + return startTime.Add(time.Duration(DAYS_PER_MONTH*HOURS_PER_DAY) * time.Hour) } } - -// func GetScheduleFromHost(host string, schedule string) string { -// // This cannot come from the Env, because that would be a circular import. -// // So, this is a big FIXME. -// cfg := ReadJsonConfig(schedule) -// hostSections := make(map[string]string, 0) -// for _, section := range gjson.Parse(cfg).Get("@keys").Array() { -// for _, site := range gjson.Get(cfg, section.String()).Array() { -// hostSections[site.Get("host").String()] = section.String() -// } -// } -// return hostSections[host] -// } - -// func HostToPgTimestamp(host string, schedule string, start_time time.Time) pgtype.Timestamp { -// sched := GetScheduleFromHost(host, schedule) -// return SectionToPgTimestamp(sched, start_time) -// } - -// func SectionToPgTimestamp(section string, start_time time.Time) pgtype.Timestamp { -// return pgtype.Timestamp{ -// Time: SectionToTimestamp(section, start_time), -// InfinityModifier: 0, -// Valid: true, -// } -// } diff --git a/internal/filtering/filter.go b/internal/filtering/filter.go index 76f8c0d..a34e3e0 100644 --- a/internal/filtering/filter.go +++ b/internal/filtering/filter.go @@ -18,12 +18,14 @@ func GetRules() []Rule { rules := make([]Rule, 0) rules = append(rules, GeneralRules()...) rules = append(rules, NasaRules()...) + return rules } func IsRejectRuleset(u *url.URL, rules []Rule) error { failed := false failedMsg := "" + var e error for _, r := range rules { @@ -31,15 +33,18 @@ func IsRejectRuleset(u *url.URL, rules []Rule) error { if err != nil { return fmt.Errorf("should not get here: %s", err.Error()) } + if apply { // log.Println("applying", r.Msg) err := r.Reject(u) if err != nil { zap.L().Debug("reject based on rule", zap.String("msg", r.Msg)) + failed = true failedMsg = r.Msg e = err + break } } @@ -54,5 +59,6 @@ func IsRejectRuleset(u *url.URL, rules []Rule) error { func IsReject(u *url.URL) error { rules := GetRules() + return IsRejectRuleset(u, rules) } diff --git a/internal/queueing/generic_insert.go b/internal/queueing/generic_insert.go index aa2830a..c38ec8a 100644 --- a/internal/queueing/generic_insert.go +++ b/internal/queueing/generic_insert.go @@ -27,6 +27,7 @@ func commonCommit(qshp QSHP, ctx context.Context, tx pgx.Tx) { if err != nil { zap.L().Error("cannot roll back commit") } + zap.L().Fatal("cannot commit insert tx", zap.String("host", qshp.Host), zap.String("path", qshp.Path), @@ -34,6 +35,7 @@ func commonCommit(qshp QSHP, ctx context.Context, tx pgx.Tx) { } } +//nolint:cyclop,funlen func Enqueue(ch_qshp <-chan QSHP) { // Can we leave one connection open for the entire life of a // service? Maybe. Maybe not. @@ -58,7 +60,6 @@ func Enqueue(ch_qshp <-chan QSHP) { } switch queue_to_match { - case "entree": _, err := client.InsertTx(ctx, tx, common.EntreeArgs{ Scheme: qshp.Scheme, @@ -70,10 +71,10 @@ func Enqueue(ch_qshp <-chan QSHP) { if err != nil { zap.L().Error("cannot insert into queue entree") } + commonCommit(qshp, ctx, tx) case "extract": - _, err := client.InsertTx(ctx, tx, common.ExtractArgs{ Scheme: qshp.Scheme, Host: qshp.Host, @@ -94,6 +95,7 @@ func Enqueue(ch_qshp <-chan QSHP) { if err != nil { zap.L().Error("cannot insert into queue fetch") } + commonCommit(qshp, ctx, tx) case "pack": @@ -105,6 +107,7 @@ func Enqueue(ch_qshp <-chan QSHP) { if err != nil { zap.L().Error("cannot insert into queue pack") } + commonCommit(qshp, ctx, tx) case "serve": @@ -114,6 +117,7 @@ func Enqueue(ch_qshp <-chan QSHP) { if err != nil { zap.L().Error("cannot insert into queue serve") } + commonCommit(qshp, ctx, tx) case "walk": @@ -121,6 +125,7 @@ func Enqueue(ch_qshp <-chan QSHP) { zap.L().Error("found non-walk job coming to the walk queue", zap.String("host", qshp.Host), zap.String("path", qshp.Path)) } + _, err := client.InsertTx(ctx, tx, common.WalkArgs{ Scheme: qshp.Scheme, Host: qshp.Host, @@ -129,6 +134,7 @@ func Enqueue(ch_qshp <-chan QSHP) { if err != nil { zap.L().Error("cannot insert into queue walk") } + commonCommit(qshp, ctx, tx) default: diff --git a/internal/queueing/periodic_clear.go b/internal/queueing/periodic_clear.go index 987e431..4d8ec85 100644 --- a/internal/queueing/periodic_clear.go +++ b/internal/queueing/periodic_clear.go @@ -8,15 +8,21 @@ import ( "go.uber.org/zap" ) +const PERIODIC_CLEANUP_MINUTES = 10 + func ClearCompletedPeriodically() { _, pool, _ := common.CommonQueueInit() defer pool.Close() - ticker := time.NewTicker(3 * time.Minute) + ticker := time.NewTicker(PERIODIC_CLEANUP_MINUTES * time.Minute) + for { <-ticker.C + zap.L().Warn("clearing completed queue") + ctx := context.Background() + _, err := pool.Exec(ctx, "DELETE FROM river_job WHERE state='completed'") if err != nil { zap.L().Error("failed to periodically delete jobs") diff --git a/internal/queueing/river.go b/internal/queueing/river.go index 1d4f746..a125dba 100644 --- a/internal/queueing/river.go +++ b/internal/queueing/river.go @@ -12,7 +12,6 @@ import ( ) func InitializeRiverQueues() { - // Set up a pool connection_string, err := env.Env.GetDatabaseUrl(env.QueueDatabase) if err != nil { @@ -21,10 +20,12 @@ func InitializeRiverQueues() { } ctx := context.Background() + pool, err := pgxpool.New(ctx, connection_string) if err != nil { zap.L().Fatal("cannot create database pool for migrations") } + defer pool.Close() // Run the migrations, always. @@ -32,6 +33,7 @@ func InitializeRiverQueues() { if err != nil { zap.L().Info("could not create a river migrator") } + _, err = migrator.Migrate(ctx, rivermigrate.DirectionUp, &rivermigrate.MigrateOpts{}) if err != nil { zap.L().Info("could not run the river migrator") @@ -42,11 +44,11 @@ func RunRiverMigrator() { ctx := context.Background() // Set up a pool connection_string, err := env.Env.GetDatabaseUrl(env.QueueDatabase) - if err != nil { log.Println("RIVER cannot find connection string for", env.QueueDatabase) log.Fatal(err) } + pool, err := pgxpool.New(ctx, connection_string) if err != nil { zap.L().Fatal("could not get pool for river migrator") @@ -59,6 +61,7 @@ func RunRiverMigrator() { zap.L().Error("river could not create river migrator. exiting.") zap.L().Fatal(err.Error()) } + _, err = migrator.Migrate(ctx, rivermigrate.DirectionUp, &rivermigrate.MigrateOpts{}) if err != nil { zap.L().Error("river could not run river migrations. exiting.") From fe9f0b1353bdbb2aae6aa385bf1f021ad43222ca Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 08:37:53 -0500 Subject: [PATCH 28/39] Linting. --- cmd/entree/main.go | 2 +- cmd/entree/work.go | 2 +- cmd/fetch/work.go | 3 +- cmd/fetch/work_support.go | 2 +- cmd/migrate/migrate.go | 11 ++-- cmd/pack/html.go | 6 +-- cmd/pack/main.go | 5 +- cmd/pack/pdf.go | 3 -- cmd/pack/perhostlock.go | 1 + cmd/pack/queues.go | 3 +- cmd/pack/work.go | 2 +- cmd/serve/handler_search.go | 10 +++- cmd/serve/main.go | 12 ++++- cmd/serve/queries.go | 3 ++ cmd/serve/queues.go | 6 +-- cmd/validate/queues.go | 6 ++- cmd/walk/main.go | 11 ++-- cmd/walk/queues.go | 6 +-- cmd/walk/work.go | 33 +++++++++--- config/constants_test.go | 1 + config/domain64.go | 4 +- internal/env/env.go | 71 ++++++++++++++++++++------ internal/env/zap.go | 2 + internal/filtering/filter_test.go | 2 +- internal/filtering/general.go | 24 +++++++-- internal/filtering/nasa.go | 3 +- internal/util/array_util.go | 1 + internal/util/remove_stopwords.go | 6 ++- internal/util/string_utilities.go | 12 ++++- internal/util/string_utilities_test.go | 1 + pkg/vcap/vcap.go | 9 +++- pkg/vcap/vcap_test.go | 4 ++ 32 files changed, 198 insertions(+), 69 deletions(-) diff --git a/cmd/entree/main.go b/cmd/entree/main.go index 410c924..7d7d3fb 100644 --- a/cmd/entree/main.go +++ b/cmd/entree/main.go @@ -40,6 +40,7 @@ func main() { zap.L().Error("could not get Domain64 for FQDN", zap.String("fqdn", fqdn)) } else { zap.L().Debug("inserting fqdn/d64 to hosts", zap.String("fqdn", fqdn), zap.Int64("d64", d64)) + _, err := JDB.WorkDBQueries.UpsertUniqueHost(context.Background(), work_db.UpsertUniqueHostParams{ Domain64: pgtype.Int8{ @@ -52,7 +53,6 @@ func main() { Time: time.Now().Add(30 * 24 * time.Hour), }, }) - if err != nil { zap.L().Error("error upserting domain64 value", zap.Int64("domain64", d64)) } diff --git a/cmd/entree/work.go b/cmd/entree/work.go index 12ff469..34a72b0 100644 --- a/cmd/entree/work.go +++ b/cmd/entree/work.go @@ -18,7 +18,7 @@ import ( // fullCrawl & !pass: check every timeout in the domain. // fullCrawl & pass: re-crawl the whole domain now. // !fullCrawl & !pass: check -// !fullCrawl & pass: fetch the page now +// !fullCrawl & pass: fetch the page now. */ func (w *EntreeWorker) Work(ctx context.Context, job *river.Job[common.EntreeArgs]) error { var kind string diff --git a/cmd/fetch/work.go b/cmd/fetch/work.go index 7fa3db9..7ce4a28 100644 --- a/cmd/fetch/work.go +++ b/cmd/fetch/work.go @@ -1,3 +1,4 @@ +//nolint:godox package main import ( @@ -160,8 +161,8 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] // will proceed. zap.L().Debug("fetching page content", zap.String("url", host_and_path(job))) - page_json, err := fetch_page_content(job) + page_json, err := fetch_page_content(job) if err != nil { // The queueing system retries should save us here; bail if we // can't get the content now. diff --git a/cmd/fetch/work_support.go b/cmd/fetch/work_support.go index 7cefa9e..ceb7c2b 100644 --- a/cmd/fetch/work_support.go +++ b/cmd/fetch/work_support.go @@ -204,8 +204,8 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) ( // Stream that file over to S3 s3 := kv.NewS3(ThisServiceName) - err = s3.FileToS3(key, tempFilename, util.GetMimeType(contentType)) + err = s3.FileToS3(key, tempFilename, util.GetMimeType(contentType)) if err != nil { zap.L().Error("could not send file to S3", zap.String("key", key.Render()), diff --git a/cmd/migrate/migrate.go b/cmd/migrate/migrate.go index 320fda3..5c73c23 100644 --- a/cmd/migrate/migrate.go +++ b/cmd/migrate/migrate.go @@ -6,16 +6,15 @@ import ( "log" "net/url" - _ "github.com/amacneil/dbmate/v2/pkg/driver/postgres" - "github.com/GSA-TTS/jemison/internal/env" "github.com/amacneil/dbmate/v2/pkg/dbmate" + _ "github.com/amacneil/dbmate/v2/pkg/driver/postgres" "go.uber.org/zap" ) // Carry our migrations with us as part of the build. // This eliminates wondering where they are when we deploy. -// + //go:embed work_db/db/migrations/*.sql var workFS embed.FS @@ -27,9 +26,8 @@ type location struct { MigrationsDir string } -// Assumes config has been read +// Assumes config has been read. func MigrateDB(dbUri string, loc location) { - db1_url, err := env.Env.GetDatabaseUrl(dbUri) if err != nil { zap.L().Fatal("could not get url for", @@ -42,15 +40,18 @@ func MigrateDB(dbUri string, loc location) { db.MigrationsDir = []string{loc.MigrationsDir} log.Println("Migrations:") + migrations, err := db.FindMigrations() if err != nil { panic(err) } + for _, m := range migrations { fmt.Println(m.Version, m.FilePath) } log.Println("\nApplying...") + err = db.CreateAndMigrate() if err != nil { panic(err) diff --git a/cmd/pack/html.go b/cmd/pack/html.go index 7e54b59..42bb2ce 100644 --- a/cmd/pack/html.go +++ b/cmd/pack/html.go @@ -28,7 +28,6 @@ func packHtml(s3json *kv.S3JSON) { Tag: "path", Content: s3json.GetString("path"), }) - if err != nil { zap.L().Error("could not insert path when packing", zap.String("_key", s3json.GetString("_key")), @@ -36,6 +35,7 @@ func packHtml(s3json *kv.S3JSON) { zap.String("path", s3json.GetString("path")), ) } + zap.L().Debug("packed path") /////////////////////// @@ -48,7 +48,6 @@ func packHtml(s3json *kv.S3JSON) { Tag: "title", Content: s3json.GetString("title"), }) - if err != nil { zap.L().Error("could not insert title when packing", zap.String("_key", s3json.GetString("_key")), @@ -56,6 +55,7 @@ func packHtml(s3json *kv.S3JSON) { zap.String("path", s3json.GetString("title")), ) } + zap.L().Debug("packed title") /////////////////////// @@ -68,7 +68,6 @@ func packHtml(s3json *kv.S3JSON) { Tag: "body", Content: s3json.GetString("body"), }) - if err != nil { zap.L().Error("could not insert title when packing", zap.String("_key", s3json.GetString("_key")), @@ -76,5 +75,6 @@ func packHtml(s3json *kv.S3JSON) { zap.String("path", s3json.GetString("body")), ) } + zap.L().Debug("packed body") } diff --git a/cmd/pack/main.go b/cmd/pack/main.go index 8adf022..00c307f 100644 --- a/cmd/pack/main.go +++ b/cmd/pack/main.go @@ -13,15 +13,17 @@ import ( var ThisServiceName = "pack" -// var ChFinalize = make(chan string) var ChQSHP = make(chan queueing.QSHP) + var PHL *PerHostLock = nil + var JDB *postgres.JemisonDB func main() { env.InitGlobalEnv(ThisServiceName) InitializeQueues() + engine := common.InitializeAPI() log.Println("environment initialized") @@ -30,7 +32,6 @@ func main() { JDB = postgres.NewJemisonDB() - //go FinalizeTimer(ChFinalize) go queueing.Enqueue(ChQSHP) go queueing.ClearCompletedPeriodically() diff --git a/cmd/pack/pdf.go b/cmd/pack/pdf.go index 9138ba2..4647134 100644 --- a/cmd/pack/pdf.go +++ b/cmd/pack/pdf.go @@ -10,7 +10,6 @@ import ( ) func packPdf(s3json *kv.S3JSON) { - // We have more fields than before. d64, err := config.FQDNToDomain64(s3json.Key.Host) if err != nil { @@ -27,7 +26,6 @@ func packPdf(s3json *kv.S3JSON) { Tag: "path", Content: s3json.GetString("path"), }) - if err != nil { zap.L().Error("could not insert path when packing", zap.String("_key", s3json.GetString("_key")), @@ -46,7 +44,6 @@ func packPdf(s3json *kv.S3JSON) { Tag: "title", Content: s3json.GetString("title") + " (PDF page " + s3json.GetString("pdf_page_number") + ")", }) - if err != nil { zap.L().Error("could not insert title when packing", zap.String("_key", s3json.GetString("_key")), diff --git a/cmd/pack/perhostlock.go b/cmd/pack/perhostlock.go index 9db0272..ca6357f 100644 --- a/cmd/pack/perhostlock.go +++ b/cmd/pack/perhostlock.go @@ -9,6 +9,7 @@ const mutexLocked = 1 func MutexLocked(m *sync.Mutex) bool { state := reflect.ValueOf(m).Elem().FieldByName("state") + return state.Int()&mutexLocked == mutexLocked } diff --git a/cmd/pack/queues.go b/cmd/pack/queues.go index a97b305..1292582 100644 --- a/cmd/pack/queues.go +++ b/cmd/pack/queues.go @@ -14,6 +14,7 @@ import ( ) var packPool *pgxpool.Pool + var packClient *river.Client[pgx.Tx] type PackWorker struct { @@ -21,7 +22,6 @@ type PackWorker struct { } func InitializeQueues() { - ctx, pP, workers := common.CommonQueueInit() packPool = pP @@ -43,7 +43,6 @@ func InitializeQueues() { }, Workers: workers, }) - if err != nil { zap.L().Error("could not establish worker pool") log.Println(err) diff --git a/cmd/pack/work.go b/cmd/pack/work.go index e63d25d..9b75fee 100644 --- a/cmd/pack/work.go +++ b/cmd/pack/work.go @@ -11,7 +11,6 @@ import ( ) func (w *PackWorker) Work(ctx context.Context, job *river.Job[common.PackArgs]) error { - // It comes in with the GuestbookId. That's all we need (plus the S3 object). s3 := kv.NewS3("extract") key := util.CreateS3Key(util.ToScheme(job.Args.Scheme), job.Args.Host, job.Args.Path, util.JSON) @@ -20,6 +19,7 @@ func (w *PackWorker) Work(ctx context.Context, job *river.Job[common.PackArgs]) if err != nil { zap.L().Error("could not fetch object for packing", zap.String("key", s3json.Key.Render())) + return nil } diff --git a/cmd/serve/handler_search.go b/cmd/serve/handler_search.go index 457791c..b75da13 100644 --- a/cmd/serve/handler_search.go +++ b/cmd/serve/handler_search.go @@ -1,3 +1,4 @@ +//nolint:godox package main import ( @@ -38,12 +39,14 @@ type SearchResult struct { func to64(s string) int64 { v, _ := strconv.Atoi(s) + return int64(v) } // Would just be * with SQLite. var _stemmed = ":*" +//nolint:funlen func runQuery(sri SearchRequestInput) ([]SearchResult, time.Duration, error) { start := time.Now() @@ -61,9 +64,11 @@ func runQuery(sri SearchRequestInput) ([]SearchResult, time.Duration, error) { et = strings.TrimSpace(et) stemmed, err := snowball.Stem(et, "english", true) zap.L().Debug("stemmed result", zap.String("et", et), zap.String("stemmed", stemmed)) + if err != nil { zap.L().Debug("stemming error", zap.String("err", err.Error())) } + query.AddToQuery(Or(et, stemmed+_stemmed)) } @@ -84,6 +89,7 @@ func runQuery(sri SearchRequestInput) ([]SearchResult, time.Duration, error) { duration := time.Since(start) cleaned := make([]SearchResult, 0) + for _, r := range res { // FIXME: the database structure is forcing us into an N+1 queries // situation... Not good. @@ -120,8 +126,8 @@ func runQuery(sri SearchRequestInput) ([]SearchResult, time.Duration, error) { FQDN: fqdn, }) } - return cleaned, duration, err + return cleaned, duration, err } func SearchHandler(c *gin.Context) { @@ -140,6 +146,7 @@ func SearchHandler(c *gin.Context) { "elapsed": duration, "results": nil, }) + return } else { c.IndentedJSON(http.StatusOK, gin.H{ @@ -147,6 +154,7 @@ func SearchHandler(c *gin.Context) { "elapsed": duration, "results": rows, }) + return } } diff --git a/cmd/serve/main.go b/cmd/serve/main.go index 23ccdd6..a9fb3c8 100644 --- a/cmd/serve/main.go +++ b/cmd/serve/main.go @@ -19,9 +19,12 @@ import ( "go.uber.org/zap" ) -var Databases sync.Map //map[string]*sql.DB +var Databases sync.Map + var ChQSHP = make(chan queueing.QSHP) + var ThisServiceName = "serve" + var JDB *postgres.JemisonDB func addMetadata(m map[string]any) map[string]any { @@ -32,8 +35,10 @@ func addMetadata(m map[string]any) map[string]any { }) if err != nil { zap.L().Error(err.Error()) + pathCount = 0 } + m["pageCount"] = pathCount bodyCount, err := JDB.SearchDBQueries.BodiesInDomain64Range(context.Background(), @@ -43,16 +48,19 @@ func addMetadata(m map[string]any) map[string]any { }) if err != nil { zap.L().Error(err.Error()) + bodyCount = 0 } + m["bodyCount"] = bodyCount return m } +//nolint:funlen func main() { env.InitGlobalEnv(ThisServiceName) - //s3 := kv.NewS3(ThisServiceName) + InitializeQueues() go queueing.Enqueue(ChQSHP) diff --git a/cmd/serve/queries.go b/cmd/serve/queries.go index fa84c11..8565fec 100644 --- a/cmd/serve/queries.go +++ b/cmd/serve/queries.go @@ -1,6 +1,7 @@ package main var _or = " | " + var _and = " & " type Q interface { @@ -48,6 +49,7 @@ type Query struct { func NewQuery() *Query { q := Query{} q.Queries = make([]Q, 0) + return &q } @@ -64,5 +66,6 @@ func (q *Query) ToString() string { qs += _and } } + return qs } diff --git a/cmd/serve/queues.go b/cmd/serve/queues.go index 6897102..d5f8df8 100644 --- a/cmd/serve/queues.go +++ b/cmd/serve/queues.go @@ -17,8 +17,9 @@ import ( // GLOBAL TO THE APP // One pool of connections for River. -// The work client, doing the work of `fetch` +// The work client, doing the work of `fetch`. var servePool *pgxpool.Pool + var serveClient *river.Client[pgx.Tx] type ServeWorker struct { @@ -49,7 +50,6 @@ func InitializeQueues() { }, Workers: workers, }) - if err != nil { zap.L().Error("could not establish worker pool") log.Println(err) @@ -59,6 +59,6 @@ func InitializeQueues() { // Start the work clients if err := serveClient.Start(ctx); err != nil { zap.L().Error("workers are not the means of production. exiting.") - os.Exit(42) + os.Exit(1) } } diff --git a/cmd/validate/queues.go b/cmd/validate/queues.go index 22decea..827f426 100644 --- a/cmd/validate/queues.go +++ b/cmd/validate/queues.go @@ -19,8 +19,10 @@ type FetchWorker struct { river.WorkerDefaults[common.FetchArgs] } +//nolint:lll func initX[T river.Worker[U], U river.JobArgs](service_name string, queue_name string, workerStruct T) *river.Client[pgx.Tx] { queueing.InitializeRiverQueues() + ctx, pool, workers := common.CommonQueueInit() // Essentially adds a worker "type" to the work engine. @@ -42,7 +44,6 @@ func initX[T river.Worker[U], U river.JobArgs](service_name string, queue_name s }, Workers: workers, }) - if err != nil { zap.L().Error("could not establish worker pool", zap.String("service_name", service_name), @@ -56,7 +57,7 @@ func initX[T river.Worker[U], U river.JobArgs](service_name string, queue_name s if err := theClient.Start(ctx); err != nil { zap.L().Error("workers are not the means of production. exiting.", zap.String("queue_name", queue_name)) - os.Exit(42) + os.Exit(1) } return theClient @@ -68,6 +69,7 @@ type ValidateFetchWorker struct { func (w ValidateFetchWorker) Work(ctx context.Context, job *river.Job[common.ValidateFetchArgs]) error { zap.L().Info("VALIDATE IS RUNNING AND DOING NOTHING") + return nil } diff --git a/cmd/walk/main.go b/cmd/walk/main.go index 5e29b2d..d88565a 100644 --- a/cmd/walk/main.go +++ b/cmd/walk/main.go @@ -5,26 +5,29 @@ import ( "net/http" "time" - expirable "github.com/go-pkgz/expirable-cache/v3" - "github.com/patrickmn/go-cache" - "github.com/GSA-TTS/jemison/internal/common" "github.com/GSA-TTS/jemison/internal/env" "github.com/GSA-TTS/jemison/internal/queueing" + expirable "github.com/go-pkgz/expirable-cache/v3" + "github.com/patrickmn/go-cache" "go.uber.org/zap" ) var expirable_cache expirable.Cache[string, int] + var RecentlyVisitedCache *cache.Cache + var ChQSHP = make(chan queueing.QSHP) + var ThisServiceName = "walk" func main() { - env.InitGlobalEnv(ThisServiceName) + InitializeQueues() log.Println("environment initialized") + service, _ := env.Env.GetUserService("walk") engine := common.InitializeAPI() diff --git a/cmd/walk/queues.go b/cmd/walk/queues.go index 799bf49..2457666 100644 --- a/cmd/walk/queues.go +++ b/cmd/walk/queues.go @@ -14,8 +14,9 @@ import ( "go.uber.org/zap" ) -// The work client, doing the work of `fetch` +// The work client, doing the work of `fetch`. var dbPool *pgxpool.Pool + var walkClient *river.Client[pgx.Tx] type WalkWorker struct { @@ -46,7 +47,6 @@ func InitializeQueues() { }, Workers: workers, }) - if err != nil { zap.L().Error("could not establish worker pool") log.Println(err) @@ -56,6 +56,6 @@ func InitializeQueues() { // Start the work clients if err := walkClient.Start(ctx); err != nil { zap.L().Error("workers are not the means of production. exiting.") - os.Exit(42) + os.Exit(1) } } diff --git a/cmd/walk/work.go b/cmd/walk/work.go index 8daeeea..5f6a661 100644 --- a/cmd/walk/work.go +++ b/cmd/walk/work.go @@ -1,3 +1,4 @@ +//nolint:godox,godot package main import ( @@ -21,6 +22,8 @@ import ( // ////////////////////////////////////// // go_for_a_walk +// +//nolint:wsl func go_for_a_walk(s3json *kv.S3JSON) { cleaned_mime_type := util.CleanMimeType(s3json.GetString("content-type")) switch cleaned_mime_type { @@ -33,8 +36,9 @@ func go_for_a_walk(s3json *kv.S3JSON) { // ////////////////////////////////////// // extract_links +// +//nolint:cyclop,funlen func extract_links(s3json *kv.S3JSON) []*url.URL { - // Return a unique set link_set := make(map[string]bool) // Remove all trailing slashes. @@ -45,24 +49,29 @@ func extract_links(s3json *kv.S3JSON) []*url.URL { tempFilename := uuid.NewString() s3 := kv.NewS3("fetch") + err := s3.S3PathToFile(raw, tempFilename) if err != nil { zap.L().Error("could not copy s3 to local file", zap.String("tempFilename", tempFilename), zap.String("raw", raw)) + return links } tFile, err := os.Open(tempFilename) if err != nil { zap.L().Error("cannot open temporary file", zap.String("filename", tempFilename)) + return links } + defer func() { tFile.Close(); os.Remove(tempFilename) }() fi, err := os.Stat(tempFilename) if err != nil { zap.L().Error(err.Error()) + return links } // get the size @@ -70,9 +79,9 @@ func extract_links(s3json *kv.S3JSON) []*url.URL { zap.L().Debug("tempFilename size", zap.Int64("size", size)) doc, err := goquery.NewDocumentFromReader(tFile) - if err != nil { zap.L().Error("WALK cannot convert to document", zap.String("key", s3json.Key.Render())) + return links } @@ -82,8 +91,7 @@ func extract_links(s3json *kv.S3JSON) []*url.URL { doc.Find("a[href]").Each(func(ndx int, sel *goquery.Selection) { link, exists := sel.Attr("href") - // zap.L().Debug("found link", zap.String("link", link), zap.Bool("exists", exists)) - + //nolint:nestif if exists { link_to_crawl, err := is_crawlable(s3json, link) if err != nil { @@ -98,6 +106,7 @@ func extract_links(s3json *kv.S3JSON) []*url.URL { if strings.HasPrefix(link_to_crawl, "http") { zap.L().Debug("link to crawl", zap.String("url", link_to_crawl)) expirable_cache.Set(link_to_crawl, 0, 0) + link_set[link_to_crawl] = true } } @@ -107,6 +116,7 @@ func extract_links(s3json *kv.S3JSON) []*url.URL { for link := range link_set { link = trimSuffix(link, "/") + u, err := url.Parse(link) if err != nil { zap.L().Warn("WALK ExtractLinks did a bad with", @@ -116,7 +126,6 @@ func extract_links(s3json *kv.S3JSON) []*url.URL { } } - //log.Println("EXTRACTED", links) return links } @@ -126,6 +135,7 @@ func walk_html(s3json *kv.S3JSON) { links := extract_links(s3json) zap.L().Debug("walk considering links", zap.Int("count", len(links))) + for _, link := range links { // The links come back canonicalized against the host. So, // all the fields should be present. @@ -145,6 +155,8 @@ func walk_html(s3json *kv.S3JSON) { // A set of functions applied that, one at a time, decide if a link should // be crawled. +const TOO_FEW_PIECES_IN_HOST = 2 + func is_crawlable(s3json *kv.S3JSON, link string) (string, error) { base := url.URL{ Scheme: s3json.GetString("scheme"), @@ -171,6 +183,7 @@ func is_crawlable(s3json *kv.S3JSON, link string) (string, error) { zap.String("link", link), zap.String("base", base.String()), zap.String("resolved", resolved.String())) + return resolved.String(), nil } @@ -181,11 +194,12 @@ func is_crawlable(s3json *kv.S3JSON, link string) (string, error) { zap.String("link", link), zap.String("base", base.String()), ) + return base.String(), nil } pieces := strings.Split(base.Host, ".") - if len(pieces) < 2 { + if len(pieces) < TOO_FEW_PIECES_IN_HOST { return "", errors.New("crawler: link host has too few pieces") } else { tld := pieces[len(pieces)-2] + "." + pieces[len(pieces)-1] @@ -202,16 +216,20 @@ func is_crawlable(s3json *kv.S3JSON, link string) (string, error) { func trimSuffix(s, suffix string) string { if strings.HasSuffix(s, suffix) { s = s[:len(s)-len(suffix)] + return s } else { return s } } +const MAX_FAILED_ATTEMPTS = 2 + func (w *WalkWorker) Work(ctx context.Context, job *river.Job[common.WalkArgs]) error { - if job.Attempt > 2 { + if job.Attempt > MAX_FAILED_ATTEMPTS { zap.L().Warn("walking zombie; dropping", zap.String("host", job.Args.Host), zap.String("path", job.Args.Path)) + return nil } @@ -225,6 +243,7 @@ func (w *WalkWorker) Work(ctx context.Context, job *river.Job[common.WalkArgs]) util.ToScheme(job.Args.Scheme), job.Args.Host, job.Args.Path) + err := s3json.Load() if err != nil { // Don't do anything if we can't load the S3. diff --git a/config/constants_test.go b/config/constants_test.go index 3c31e2c..de6856f 100644 --- a/config/constants_test.go +++ b/config/constants_test.go @@ -1,3 +1,4 @@ +//nolint:testpackage package config import ( diff --git a/config/domain64.go b/config/domain64.go index f5e21f2..05eb3f2 100644 --- a/config/domain64.go +++ b/config/domain64.go @@ -42,9 +42,11 @@ func primeCache() { } } +const MIN_LEN_OF_FQDN = 2 + func tldAndEscaped(fqdn string) (string, string, error) { pieces := strings.Split(fqdn, ".") - if len(pieces) < 2 { + if len(pieces) < MIN_LEN_OF_FQDN { return "", "", fmt.Errorf("fqdn is too short: %s", fqdn) } diff --git a/internal/env/env.go b/internal/env/env.go index 4cdc199..1ac37ff 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -1,3 +1,4 @@ +//nolint:godox package env import ( @@ -13,13 +14,17 @@ import ( ) var Env *env + var DEBUG_ENV = false // Constants for the attached services // These reach into the VCAP_SERVICES and are // defined in the Terraform. + const QueueDatabase = "jemison-queues-db" + const JemisonWorkDatabase = "jemison-work-db" + const SearchDatabase = "jemison-search-db" var validBucketNames = []string{ @@ -34,12 +39,13 @@ func IsValidBucketName(name string) bool { return true } } + return false } type Credentials interface { - CredentialString(string) string - CredentialInt(string) int64 + CredentialString(cred string) string + CredentialInt(cred string) int64 } type Service struct { @@ -48,29 +54,43 @@ type Service struct { Parameters map[string]interface{} `mapstructure:"parameters"` } -// FIXME: This should be string, err +// FIXME: This should be string, err. func (s *Service) CredentialString(key string) string { if v, ok := s.Credentials[key]; ok { - return v.(string) + cast, ok := v.(string) + if !ok { + zap.L().Error("could not cast to string") + } + + return cast } else { zap.L().Error("cannot find credential for key", zap.String("key", key)) + return fmt.Sprintf("NOVAL:%s", v) } } func (s *Service) CredentialInt(key string) int64 { if v, ok := s.Credentials[key]; ok { - return int64(v.(int)) + cast, ok := v.(int) + if !ok { + zap.L().Error("could not cast to int") + } + + return int64(cast) } else { zap.L().Error("cannot find credential for key", zap.String("key", key)) + return -1 } } type Database = Service + type Bucket = Service + type env struct { AppEnv string `mapstructure:"APPENV"` Home string `mapstructure:"HOME"` @@ -92,9 +112,12 @@ type container_env struct { } var container_envs = []string{"DOCKER", "GH_ACTIONS"} + var cf_envs = []string{"SANDBOX", "PREVIEW", "DEV", "STAGING", "PROD"} + var test_envs = []string{"LOCALHOST"} +//nolint:cyclop,funlen func InitGlobalEnv(this_service string) { Env = &env{} configName := "NO_CONFIG_NAME_SET" @@ -137,22 +160,16 @@ func InitGlobalEnv(this_service string) { zap.L().Fatal("ENV could not bind env", zap.String("err", err.Error())) } - //err := viper.ReadInConfig() err = viper.ReadConfig(config.GetYamlFileReader(configName + ".yaml")) if err != nil { log.Fatal("ENV cannot load in the config file ", viper.ConfigFileUsed()) } err = viper.Unmarshal(&Env) - if err != nil { log.Fatal("ENV can't find config files: ", err) } - if err != nil { - log.Fatal("ENV environment can't be loaded: ", err) - } - // CF puts VCAP_* in a string containing JSON. // This means we don't have 1:1 locally *yet*, but // if we unpack things right, we end up with one struct @@ -201,7 +218,6 @@ func InitGlobalEnv(this_service string) { Env.AllowedHosts = s.GetParamString("allowed_hosts") log.Println("Setting Schedule: ", Env.AllowedHosts) - } // https://stackoverflow.com/questions/3582552/what-is-the-format-for-the-postgresql-connection-string-url @@ -212,6 +228,8 @@ func (e *env) GetDatabaseUrl(name string) (string, error) { params := "" if IsContainerEnv() || IsLocalTestEnv() { params = "?sslmode=disable" + + //nolint:nosprintfhostport return fmt.Sprintf("postgresql://%s@%s:%d/%s%s", db.CredentialString("username"), db.CredentialString("host"), @@ -224,9 +242,9 @@ func (e *env) GetDatabaseUrl(name string) (string, error) { if IsCloudEnv() { return db.CredentialString("uri"), nil } - } } + return "", fmt.Errorf("ENV no db found with name %s", name) } @@ -244,6 +262,7 @@ func (e *env) GetObjectStore(name string) (Bucket, error) { return b, nil } } + return Bucket{}, fmt.Errorf("ENV no bucket with name %s", name) } @@ -253,6 +272,7 @@ func (e *env) GetUserService(name string) (Service, error) { return s, nil } } + return Service{}, fmt.Errorf("ENV no service with name %s", name) } @@ -272,21 +292,31 @@ func (s *Service) GetParamInt64(key string) int64 { for _, global_s := range Env.UserServices { if s.Name == global_s.Name { if global_param_val, ok := global_s.Parameters[key]; ok { - return int64(global_param_val.(int)) + cast, ok := global_param_val.(int) + if !ok { + zap.L().Error("could not cast int") + } + + return int64(cast) } else { log.Fatalf("ENV no int64 param found for %s", key) } } } + return -1 } func (s *Service) GetParamString(key string) string { - for _, global_s := range Env.UserServices { if s.Name == global_s.Name { if global_param_val, ok := global_s.Parameters[key]; ok { - return global_param_val.(string) + cast, ok := global_param_val.(string) + if !ok { + zap.L().Error("could not cast string") + } + + return cast } else { log.Fatalf("ENV no string param found for %s", key) } @@ -300,12 +330,18 @@ func (s *Service) GetParamBool(key string) bool { for _, global_s := range Env.UserServices { if s.Name == global_s.Name { if global_param_val, ok := global_s.Parameters[key]; ok { - return global_param_val.(bool) + cast, ok := global_param_val.(bool) + if !ok { + zap.L().Error("could not cast bool") + } + + return cast } else { log.Fatalf("ENV no bool param found for %s", key) } } } + return false } @@ -314,5 +350,6 @@ func (s *Service) AsJson() string { if err != nil { fmt.Println(err) } + return string(b) } diff --git a/internal/env/zap.go b/internal/env/zap.go index 64ebd88..eb98807 100644 --- a/internal/env/zap.go +++ b/internal/env/zap.go @@ -39,6 +39,7 @@ func createLogger(this_service string) *zap.Logger { level := s.GetParamString("debug_level") var zap_level zapcore.Level + switch level { case "debug": zap_level = zap.DebugLevel @@ -75,6 +76,7 @@ func createLogger(this_service string) *zap.Logger { if err != nil { log.Fatal("cannot build zap logger from config") } + return zap.Must(logger, nil) } diff --git a/internal/filtering/filter_test.go b/internal/filtering/filter_test.go index 99d41d0..43dd578 100644 --- a/internal/filtering/filter_test.go +++ b/internal/filtering/filter_test.go @@ -1,3 +1,4 @@ +//nolint:testpackage,lll package filter import ( @@ -52,6 +53,5 @@ func TestAll(t *testing.T) { } else { assert.Equal(t, nil, tt.fun(u)) } - } } diff --git a/internal/filtering/general.go b/internal/filtering/general.go index 078b28c..37f9916 100644 --- a/internal/filtering/general.go +++ b/internal/filtering/general.go @@ -34,11 +34,20 @@ var skippable_extensions = []string{ "xlsx", } +const IS_TOO_SHORT_MIN = 5 + +const EXCEEDS_LENGTH_MAX = 200 + +const TOO_MANY_REPEATS_LEN = 8 + +const TOO_MANY_REPEATS_COUNT = 50 + func exceedsLength(length int) func(*url.URL) error { return func(u *url.URL) error { if len(u.String()) > length { return fmt.Errorf("exceeds length [%d]: %s", length, u.String()) } + return nil } } @@ -48,6 +57,7 @@ func hasSlashHttp(u *url.URL) error { if m { return fmt.Errorf("http in middle of url: %s", u.Path) } + return nil } @@ -56,6 +66,7 @@ func insecureGov(u *url.URL) error { if m { return fmt.Errorf("insecure URL: %s", u.String()) } + return nil } @@ -64,6 +75,7 @@ func isTooShort(length int) func(*url.URL) error { if len(u.String()) < length { return fmt.Errorf("too short [%d]: %s", length, u.String()) } + return nil } } @@ -74,6 +86,7 @@ func hasSkippablePrefixRelative(u *url.URL) error { return fmt.Errorf("skippable prefix [%s]: %s", sp, u.Path) } } + return nil } @@ -83,15 +96,18 @@ func hasSkippableExtension(u *url.URL) error { return fmt.Errorf("skippable extension [%s]: %s", ext, u.Path) } } + return nil } func hasTooManyRepeats(repeatLength int, threshold int) func(*url.URL) error { return func(u *url.URL) error { s := u.String() + end := len(s) - repeatLength chunks := make(map[string]bool) repeats := make(map[string]int) + for ndx := 0; ndx < end; ndx++ { piece := s[ndx : ndx+repeatLength] if _, ok := chunks[piece]; ok { @@ -110,6 +126,7 @@ func hasTooManyRepeats(repeatLength int, threshold int) func(*url.URL) error { if total >= threshold { return fmt.Errorf("too many repeats [%d over %d]: %s", total, threshold, u.String()) } + return nil } } @@ -122,6 +139,7 @@ func endsWithWrongSlash(u *url.URL) error { return fmt.Errorf("ends with backslash: %s", u.String()) } } + return nil } @@ -139,13 +157,13 @@ func GeneralRules() []Rule { rules = append(rules, Rule{ Match: all, Msg: "max isTooShort 5", - Reject: isTooShort(5), + Reject: isTooShort(IS_TOO_SHORT_MIN), }) rules = append(rules, Rule{ Match: all, Msg: "exceedsLength 200", - Reject: exceedsLength(200), + Reject: exceedsLength(EXCEEDS_LENGTH_MAX), }) rules = append(rules, Rule{ @@ -181,7 +199,7 @@ func GeneralRules() []Rule { rules = append(rules, Rule{ Match: all, Msg: "hasTooManyRepeats", - Reject: hasTooManyRepeats(8, 50), + Reject: hasTooManyRepeats(TOO_MANY_REPEATS_LEN, TOO_MANY_REPEATS_COUNT), }) return rules diff --git a/internal/filtering/nasa.go b/internal/filtering/nasa.go index f35b019..7276af3 100644 --- a/internal/filtering/nasa.go +++ b/internal/filtering/nasa.go @@ -6,7 +6,7 @@ import ( "regexp" ) -// We compare against the host, so leave off the scheme +// We compare against the host, so leave off the scheme. var nasa string = `.*nasa.gov` func hasRightHere(u *url.URL) error { @@ -14,6 +14,7 @@ func hasRightHere(u *url.URL) error { if match { return fmt.Errorf("repeating `right here`: %s", u.String()) } + return nil } diff --git a/internal/util/array_util.go b/internal/util/array_util.go index 67143e7..db59f41 100644 --- a/internal/util/array_util.go +++ b/internal/util/array_util.go @@ -5,5 +5,6 @@ func Map[T, V any](ts []T, fn func(T) V) []V { for i, t := range ts { result[i] = fn(t) } + return result } diff --git a/internal/util/remove_stopwords.go b/internal/util/remove_stopwords.go index 780c940..76183f1 100644 --- a/internal/util/remove_stopwords.go +++ b/internal/util/remove_stopwords.go @@ -4,21 +4,23 @@ import ( _ "embed" "regexp" "slices" - "strings" ) //go:embed stopwords.txt var stopwords string + var each_stopword []string var ws_re = regexp.MustCompile(`\s+`) + var punc_re = regexp.MustCompile(`[-_\.!\?,]`) func removeStopwords(content string) string { content = ws_re.ReplaceAllString(content, " ") each := strings.Split(content, " ") new_content := make([]string, 0) + for _, e := range each { e = punc_re.ReplaceAllString(e, " ") @@ -26,6 +28,7 @@ func removeStopwords(content string) string { new_content = append(new_content, e) } } + return ws_re.ReplaceAllString(strings.Join(new_content, " "), " ") } @@ -34,6 +37,7 @@ func RemoveStopwords(content string) string { return removeStopwords(content) } else { each_stopword = strings.Split(stopwords, "\n") + return removeStopwords(content) } } diff --git a/internal/util/string_utilities.go b/internal/util/string_utilities.go index ec9d470..8d4b36c 100644 --- a/internal/util/string_utilities.go +++ b/internal/util/string_utilities.go @@ -30,6 +30,7 @@ func (mt MimeType) EnumIndex() int { func AtoZOnly(s string) string { var result strings.Builder + for i := 0; i < len(s); i++ { b := s[i] if ('a' <= b && b <= 'z') || @@ -38,6 +39,7 @@ func AtoZOnly(s string) string { result.WriteByte(b) } } + return result.String() } @@ -54,6 +56,7 @@ func CleanMimeType(mime string) string { return m } } + // The unknown mime type return "application/octet-stream" } @@ -77,6 +80,7 @@ func GetMimeType(path string) string { return mime_type } } + return m["json"] } @@ -86,18 +90,22 @@ func IsSearchableMimeType(mime string) bool { return true } } + return false } func CollapseWhitespace(s string) string { - var re = regexp.MustCompile(`\s\s+`) + re := regexp.MustCompile(`\s\s+`) + s = strings.TrimSpace(s) + return re.ReplaceAllString(s, " ") } func TrimSuffix(s, suffix string) string { if strings.HasSuffix(s, suffix) { s = s[:len(s)-len(suffix)] + return s } else { return s @@ -110,9 +118,11 @@ func CanonicalizeURL(s string) (string, error) { //nolint:wrapcheck return "", err } + u.Host = strings.ToLower(u.Host) if len(u.Path) > 1 { u.Path = strings.TrimSuffix(u.Path, "/") } + return u.Host + u.Path, nil } diff --git a/internal/util/string_utilities_test.go b/internal/util/string_utilities_test.go index 8156989..0cc58aa 100644 --- a/internal/util/string_utilities_test.go +++ b/internal/util/string_utilities_test.go @@ -1,3 +1,4 @@ +//nolint:testpackage package util import ( diff --git a/pkg/vcap/vcap.go b/pkg/vcap/vcap.go index 06137ba..b580244 100644 --- a/pkg/vcap/vcap.go +++ b/pkg/vcap/vcap.go @@ -1,3 +1,4 @@ +//nolint:godox package vcap import ( @@ -21,6 +22,7 @@ func VcapServicesFromEnv(env_var string) VcapServices { vcs.EnvStringToJson(env_var) vcs.ParseBuckets() vcs.ParseDatabases() + return vcs } @@ -52,10 +54,11 @@ func (vcs *VcapServices) ParseBuckets() { AccessKeyID: b.Get("credentials.access_key_id").String(), SecretAccessKey: b.Get("credentials.secret_access_key").String(), // FIXME: Check the endpoint shape, and set it - //URI: b.Get("credentials.uri").String(), - //Endpoint: b.Get("credentials.endpoint").String(), + // URI: b.Get("credentials.uri").String(), + // Endpoint: b.Get("credentials.endpoint").String(), }) } + vcs.Buckets = buckets } @@ -85,6 +88,7 @@ func (vcs *VcapServices) ParseDatabases() { Endpoint: db.Get("credentials.endpoint").String(), }) } + vcs.Databases = databases } @@ -94,6 +98,7 @@ func (vcs *VcapServices) GetBucketByName(bucket_name string) *Bucket { return &b } } + return nil } diff --git a/pkg/vcap/vcap_test.go b/pkg/vcap/vcap_test.go index 38cdb0d..4c5648c 100644 --- a/pkg/vcap/vcap_test.go +++ b/pkg/vcap/vcap_test.go @@ -1,3 +1,4 @@ +//nolint:testpackage package vcap import ( @@ -113,7 +114,9 @@ var test_vcap = `{ func TestReadEnv(t *testing.T) { os.Setenv("VCAP_SERVICES", test_vcap) + vcs := VcapServicesFromEnv("VCAP_SERVICES") + // Expected, actual assert.Equal(t, 1, len(vcs.Buckets)) assert.Equal(t, 2, len(vcs.Databases)) @@ -121,6 +124,7 @@ func TestReadEnv(t *testing.T) { func TestDatbases(t *testing.T) { os.Setenv("VCAP_SERVICES", test_vcap) + vcs := VcapServicesFromEnv("VCAP_SERVICES") assert.Equal(t, "fac-db", vcs.Databases[0].ServiceName) From 70d6b9d8af7bc918fd243c8ea1eb3c7ba3f1bb3d Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 10:51:33 -0500 Subject: [PATCH 29/39] Linting This gets us to litner zero. --- .golangci.yml | 29 +++++++- cmd/admin/main.go | 9 +-- cmd/entree/accept_logic.go | 34 ++++----- cmd/entree/entree_test.go | 8 +-- cmd/entree/ledger.go | 2 +- cmd/entree/main.go | 1 + cmd/entree/work.go | 2 +- cmd/extract/extract_test.go | 89 ----------------------- cmd/extract/html.go | 46 ++++++------ cmd/extract/main.go | 1 + cmd/extract/pdf.go | 105 ++++++++++++++-------------- cmd/extract/queues.go | 4 +- cmd/extract/work.go | 13 ++-- cmd/fetch/api.go | 10 +-- cmd/fetch/host_gateway.go | 25 +++---- cmd/fetch/main.go | 13 ++-- cmd/fetch/queues.go | 22 +++--- cmd/fetch/work.go | 72 ++++++++++++------- cmd/fetch/work_support.go | 26 +++---- cmd/migrate/migrate.go | 8 +-- cmd/pack/html.go | 2 +- cmd/pack/main.go | 3 +- cmd/pack/queues.go | 4 +- cmd/pack/work.go | 4 +- cmd/serve/handler_search.go | 30 ++++---- cmd/serve/main.go | 85 +++++++++++----------- cmd/serve/queries.go | 20 +++--- cmd/serve/query_test.go | 1 + cmd/serve/work.go | 1 + cmd/validate/main.go | 1 + cmd/validate/queues.go | 17 ++--- cmd/walk/main.go | 5 +- cmd/walk/queues.go | 4 +- cmd/walk/work.go | 71 +++++++++---------- config/constants.go | 4 +- config/constants_test.go | 2 +- config/domain64.go | 64 ++++++++--------- config/embed.go | 48 ++++++------- internal/common/api.go | 22 +++--- internal/common/backoff.go | 20 +++--- internal/common/common.go | 11 +-- internal/common/domain64.go | 1 + internal/common/types.go | 6 +- internal/env/env.go | 87 +++++++++++------------ internal/env/gin.go | 4 +- internal/env/zap.go | 22 +++--- internal/filtering/filter_test.go | 4 +- internal/filtering/general.go | 28 ++++---- internal/filtering/nasa.go | 2 +- internal/kv/interfaces.go | 5 +- internal/kv/s3.go | 42 ++++++----- internal/kv/s3json.go | 82 ++++++++-------------- internal/kv/s3json_test.go | 4 +- internal/kv/util.go | 58 +++++++-------- internal/postgres/postgres.go | 82 +++++++++++----------- internal/queueing/generic_insert.go | 13 ++-- internal/queueing/periodic_clear.go | 4 +- internal/queueing/river.go | 8 +-- internal/util/memuse.go | 4 +- internal/util/remove_stopwords.go | 28 ++++---- internal/util/string_utilities.go | 14 ++-- 61 files changed, 697 insertions(+), 739 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 47d0e3e..799901c 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,10 +1,35 @@ linters: enable-all: true + # Some things here we may want to leave disabled. + # Some we may want to enable, and then pay down as debt. + # Anything included here represented a more invasive set of changes to + # satisfy the linter than was desired during initial cleanup. disable: - - exportloopref + - canonicalheader + - contextcheck - depguard - - tagalign + - err113 + - errchkjson - exhaustruct + - exportloopref + - forbidigo + - gochecknoglobals + - gocritic + - intrange + - ireturn + - musttag + - nilerr + - noctx + - paralleltest + - perfsprint + - tagalign + - tagliatelle + - tenv + - testifylint + - unconvert + - unparam + - varnamelen + - wastedassign depguard: # Rules to apply. # diff --git a/cmd/admin/main.go b/cmd/admin/main.go index 452286d..5132eb7 100644 --- a/cmd/admin/main.go +++ b/cmd/admin/main.go @@ -20,7 +20,7 @@ type FetchRequestInput struct { Scheme string `json:"scheme" maxLength:"10" doc:"Resource scheme"` Host string `json:"host" maxLength:"500" doc:"Host of resource"` Path string `json:"path" maxLength:"1500" doc:"Path to resource"` - ApiKey string `json:"api-key"` + APIKey string `json:"api-key"` } // https://dev.to/kashifsoofi/rest-api-with-go-chi-and-inmemory-store-43ag @@ -30,7 +30,7 @@ func FetchRequestHandler(c *gin.Context) { return } - if fri.ApiKey == os.Getenv("API_KEY") || true { + if fri.APIKey == os.Getenv("API_KEY") || true { zap.L().Debug("fetch enqueue", zap.String("host", fri.Host), zap.String("path", fri.Path)) @@ -57,7 +57,7 @@ func EntreeRequestHandler(c *gin.Context) { return } - if fri.ApiKey == os.Getenv("API_KEY") || true { + if fri.APIKey == os.Getenv("API_KEY") || true { hallPassB := false fullB := false @@ -96,7 +96,7 @@ func PackRequestHandler(c *gin.Context) { return } - if fri.ApiKey == os.Getenv("API_KEY") || true { + if fri.APIKey == os.Getenv("API_KEY") || true { zap.L().Debug("pack enqueue", zap.String("host", fri.Host)) @@ -136,6 +136,7 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. + //nolint:gosec err := http.ListenAndServe(":"+env.Env.Port, engine) if err != nil { zap.Error(err) diff --git a/cmd/entree/accept_logic.go b/cmd/entree/accept_logic.go index 83c49be..69b0986 100644 --- a/cmd/entree/accept_logic.go +++ b/cmd/entree/accept_logic.go @@ -12,9 +12,9 @@ import ( "go.uber.org/zap" ) -const SINGLE_PASS = "single" +const SinglePass = "single" -const FULL_PASS = "full" +const FullPass = "full" // The front line of questions involve whether or not // it is a single URL and if there is a hall pass. @@ -52,47 +52,47 @@ func NewEntreeCheck(kind, scheme, host, path string, hallPass bool) (*EntreeChec } func EvaluateEntree(ec *EntreeCheck) { - it_shall_pass := false + itShallPass := false if IsSingleWithPass(ec) { zap.L().Debug("is-single-with-pass", zap.String("host", ec.Host), zap.String("path", ec.Path)) - it_shall_pass = true + itShallPass = true } else if IsSingleNoPass(ec) { zap.L().Debug("is-single-no-pass", zap.String("host", ec.Host), zap.String("path", ec.Path)) - it_shall_pass = true + itShallPass = true } else if IsFullWithPass(ec) { zap.L().Debug("is-full-with-pass", zap.String("host", ec.Host), zap.String("path", ec.Path)) SetHostNextFetchToYesterday(ec) SetGuestbookFetchToYesterdayForHost(ec) - it_shall_pass = true + itShallPass = true } else if IsFullNoPass(ec) { zap.L().Debug("is-full-no-pass", zap.String("host", ec.Host), zap.String("path", ec.Path)) - it_shall_pass = true + itShallPass = true } else { zap.L().Debug("no entree evaluation criteria met", zap.String("host", ec.Host), zap.String("path", ec.Path)) - it_shall_pass = false + itShallPass = false } // FIXME: We set the fetch to yesterday, then set it to now (below)? // This feels wrong. Redundant. One of these is not needed? // Or... is it necessary with multiple workers? Probably. - if it_shall_pass { + if itShallPass { // We need to update the guestbook now, because we will end up re-walking // the page if we don't. This is true in each case. // Fetch will update a second time. scheme := JDB.GetScheme(ec.Scheme) - next_fetch := JDB.GetNextFetch(ec.Host) + nextFetch := JDB.GetNextFetch(ec.Host) _, err := JDB.WorkDBQueries.UpdateGuestbookNextFetch(context.Background(), work_db.UpdateGuestbookNextFetchParams{ @@ -100,7 +100,7 @@ func EvaluateEntree(ec *EntreeCheck) { Domain64: ec.Domain64, Path: ec.Path, NextFetch: pgtype.Timestamp{ - Time: next_fetch, + Time: nextFetch, Valid: true, InfinityModifier: 0, }, @@ -130,7 +130,7 @@ func EvaluateEntree(ec *EntreeCheck) { func IsSingleWithPass(ec *EntreeCheck) bool { // This just allows us to queue this onward to `fetch`. // Fetch will handle guestbook updates. - return ec.Kind == SINGLE_PASS && ec.HallPass + return ec.Kind == SinglePass && ec.HallPass } // A single URL with no pass is most likely a URL @@ -140,7 +140,7 @@ func IsSingleWithPass(ec *EntreeCheck) bool { // - Fetch the page // - Update last_fetch in guestbook func IsSingleNoPass(ec *EntreeCheck) bool { - return ec.Kind == SINGLE_PASS && !ec.HallPass && CheckIfIsInGuestbook(ec) + return ec.Kind == SinglePass && !ec.HallPass && CheckIfIsInGuestbook(ec) } func CheckIfIsInGuestbook(ec *EntreeCheck) bool { @@ -149,9 +149,9 @@ func CheckIfIsInGuestbook(ec *EntreeCheck) bool { // implementation is for simplicity. if isInGuestbook(ec) { return CheckIfAfterGuestbookNextFetch(ec) - } else { - return CheckIfAfterHostNextFetch(ec) } + + return CheckIfAfterHostNextFetch(ec) } // This is if we are re-running a site at a time that @@ -163,7 +163,7 @@ func CheckIfIsInGuestbook(ec *EntreeCheck) bool { // - Set last_fetch in guestbook // - Reset next_fetch in hosts table after completion func IsFullWithPass(ec *EntreeCheck) bool { - return ec.Kind == FULL_PASS && ec.HallPass + return ec.Kind == FullPass && ec.HallPass } // This is probably a nightly enqueue. @@ -171,7 +171,7 @@ func IsFullWithPass(ec *EntreeCheck) bool { // Possible side-effects: // - None. It runs on what is in the DBs. func IsFullNoPass(ec *EntreeCheck) bool { - return ec.Kind == FULL_PASS && !ec.HallPass && CheckIfAfterHostNextFetch(ec) + return ec.Kind == FullPass && !ec.HallPass && CheckIfAfterHostNextFetch(ec) } // Support functions diff --git a/cmd/entree/entree_test.go b/cmd/entree/entree_test.go index c2b3d8a..b90bb16 100644 --- a/cmd/entree/entree_test.go +++ b/cmd/entree/entree_test.go @@ -221,11 +221,11 @@ func TestSetGuestbookFetchToYesterdayForHost2(t *testing.T) { func GetQueuesDb() (*work_db.Queries, context.Context, *pgx.Conn) { ctx := context.Background() - db_string, err := env.Env.GetDatabaseUrl(env.QueueDatabase) + dbString, err := env.Env.GetDatabaseURL(env.QueueDatabase) if err != nil { zap.L().Fatal("could not get db URL for queues-db") } - conn, err := pgx.Connect(ctx, db_string) + conn, err := pgx.Connect(ctx, dbString) if err != nil { zap.L().Fatal("could not connect to queues-db") } @@ -235,11 +235,11 @@ func GetQueuesDb() (*work_db.Queries, context.Context, *pgx.Conn) { func GetWorkDB() (*work_db.Queries, context.Context, *pgx.Conn) { ctx := context.Background() - db_string, err := env.Env.GetDatabaseUrl(env.JemisonWorkDatabase) + dbString, err := env.Env.GetDatabaseURL(env.JemisonWorkDatabase) if err != nil { zap.L().Fatal("could not get db URL for work-db") } - conn, err := pgx.Connect(ctx, db_string) + conn, err := pgx.Connect(ctx, dbString) if err != nil { zap.L().Fatal("could not connect to work-db") } diff --git a/cmd/entree/ledger.go b/cmd/entree/ledger.go index 29214d1..8a74a5f 100644 --- a/cmd/entree/ledger.go +++ b/cmd/entree/ledger.go @@ -31,4 +31,4 @@ func (l *SafeLedger) Remove(entry string) { delete(l.Ledger, entry) } -var HallPassLedger *SafeLedger = NewSafeLedger() +var HallPassLedger = NewSafeLedger() diff --git a/cmd/entree/main.go b/cmd/entree/main.go index 7d7d3fb..b259ce1 100644 --- a/cmd/entree/main.go +++ b/cmd/entree/main.go @@ -70,6 +70,7 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. + //nolint:gosec err := http.ListenAndServe(":"+env.Env.Port, engine) if err != nil { zap.Error(err) diff --git a/cmd/entree/work.go b/cmd/entree/work.go index 34a72b0..a2c5f4b 100644 --- a/cmd/entree/work.go +++ b/cmd/entree/work.go @@ -20,7 +20,7 @@ import ( // !fullCrawl & !pass: check // !fullCrawl & pass: fetch the page now. */ -func (w *EntreeWorker) Work(ctx context.Context, job *river.Job[common.EntreeArgs]) error { +func (w *EntreeWorker) Work(_ context.Context, job *river.Job[common.EntreeArgs]) error { var kind string if job.Args.FullCrawl { kind = "full" diff --git a/cmd/extract/extract_test.go b/cmd/extract/extract_test.go index a3483a1..05a428d 100644 --- a/cmd/extract/extract_test.go +++ b/cmd/extract/extract_test.go @@ -11,95 +11,6 @@ import ( "github.com/stretchr/testify/assert" ) -// func setup(t *testing.T, sqlite_file string) *sqlite.PackTable { -// os.Remove(sqlite_file) -// pt, err := sqlite.CreatePackTable(sqlite_file) -// if err != nil { -// t.Error(err) -// } -// return pt -// } - -// // func open(t *testing.T, sqlite_file string) *sqlite.PackTable { -// // pt, err := sqlite.OpenPackTable(sqlite_file) -// // if err != nil { -// // t.Error(err) -// // } -// // return pt -// // } - -// func TestCreateTable(t *testing.T) { -// setup(t, "a.db") -// } - -// func _getLevel(h string) int64 { -// s, _ := strconv.Atoi(h[1:]) -// return int64(s) -// } - -// func Map[T, U any](seq iter.Seq[T], f func(T) U) iter.Seq[U] { -// return func(yield func(U) bool) { -// for a := range seq { -// if !yield(f(a)) { -// return -// } -// } -// } -// } - -// func TestExtractHeaders(t *testing.T) { -// pt := setup(t, "headers.db") -// path_id, err := pt.Queries.InsertPath(context.Background(), "/constitution") -// if err != nil { -// t.Error(err) -// } - -// _, err = pt.Queries.InsertTitle(context.Background(), schemas.InsertTitleParams{ -// PathID: path_id, -// Title: "The Constitution", -// }) -// if err != nil { -// t.Error(err) -// } - -// fp, err := os.Open("test-files/constitution-02.html") -// if err != nil { -// t.Error(err) -// } -// doc, err := goquery.NewDocumentFromReader(fp) -// if err != nil { -// zap.L().Fatal("cannot create new doc from raw file") -// } - -// H := _getHeaders(doc) - -// for tag, headers := range H { -// lvl := _getLevel(tag) -// for _, h := range headers { -// id, err := pt.Queries.InsertHeader(context.Background(), schemas.InsertHeaderParams{ -// PathID: path_id, -// Level: lvl, -// Header: h, -// }) -// if err != nil { -// t.Error("insert error", err) -// } -// assert.Greater(t, id, int64(0)) -// } -// } - -// search_params := schemas.NewSearch("north") -// res, _ := pt.Queries.Search(context.Background(), search_params) -// found := false -// for _, r := range res { -// if strings.Contains(r.Snippet, "north") && strings.Contains(r.Snippet, "carolina") { -// found = true -// } -// } - -// assert.True(t, found) -// } - func TestGetTitle(t *testing.T) { html := ` diff --git a/cmd/extract/html.go b/cmd/extract/html.go index 5493532..062be44 100644 --- a/cmd/extract/html.go +++ b/cmd/extract/html.go @@ -21,7 +21,7 @@ import ( // https://alexgarcia.xyz/sqlite-vec/go.html // https://www.zenrows.com/blog/goquery -func scrape_sel(sel *goquery.Selection) string { +func scrapeSel(sel *goquery.Selection) string { txt := sel.Text() repl := strings.ToLower(txt) @@ -33,9 +33,9 @@ func _getTitle(doc *goquery.Document) string { // It turns out there are title tags elsewhere in the doc. title := "" - doc.Find("title").Each(func(ndx int, sel *goquery.Selection) { + doc.Find("title").Each(func(_ int, sel *goquery.Selection) { if title == "" { - title = scrape_sel(sel) + title = scrapeSel(sel) } }) @@ -58,8 +58,8 @@ func _getHeaders(doc *goquery.Document) map[string][]string { } { accum := make([]string, 0) - doc.Find(tag).Each(func(ndx int, sel *goquery.Selection) { - accum = append(accum, util.CollapseWhitespace(scrape_sel(sel))) + doc.Find(tag).Each(func(_ int, sel *goquery.Selection) { + accum = append(accum, util.CollapseWhitespace(scrapeSel(sel))) }) headers[tag] = accum @@ -92,9 +92,9 @@ func _getBodyContent(doc *goquery.Document) string { "i", } { // zap.L().Debug("looking for", zap.String("elem", elem)) - doc.Find(elem).Each(func(ndx int, sel *goquery.Selection) { + doc.Find(elem).Each(func(_ int, sel *goquery.Selection) { // zap.L().Debug("element", zap.String("sel", scrape_sel(sel))) - content += scrape_sel(sel) + content += scrapeSel(sel) }) } @@ -103,26 +103,26 @@ func _getBodyContent(doc *goquery.Document) string { } // ////////////////// -// extractHtml loads the following keys into the JSON +// extractHTML loads the following keys into the JSON // // * title: string // * headers: []string (as JSON) // * body : string // //nolint:funlen -func extractHtml(obj *kv.S3JSON) { +func extractHTML(obj *kv.S3JSON) { rawFilename := uuid.NewString() // The file is not in this service... it's in the `fetch` bucket.` s3 := kv.NewS3("fetch") - raw_key := obj.Key.Copy() - raw_key.Extension = util.Raw - zap.L().Debug("looking up raw key", zap.String("raw_key", raw_key.Render())) + rawKey := obj.Key.Copy() + rawKey.Extension = util.Raw + zap.L().Debug("looking up raw key", zap.String("raw_key", rawKey.Render())) - err := s3.S3ToFile(raw_key, rawFilename) + err := s3.S3ToFile(rawKey, rawFilename) if err != nil { zap.L().Error("could not create tempfile from s3", - zap.String("raw_key", raw_key.Render()), + zap.String("raw_key", rawKey.Render()), zap.String("rawfile", rawFilename)) } @@ -142,7 +142,7 @@ func extractHtml(obj *kv.S3JSON) { if err != nil { zap.L().Error("cannot create new doc from raw file", zap.String("rawFilename", rawFilename), - zap.String("rawKey", raw_key.Render())) + zap.String("rawKey", rawKey.Render())) return } @@ -156,10 +156,10 @@ func extractHtml(obj *kv.S3JSON) { zap.Int("content length", len(content))) // Store everything - copied_key := obj.Key.Copy() - copied_key.Extension = util.JSON + copiedKey := obj.Key.Copy() + copiedKey.Extension = util.JSON // This is because we were holding an object from the "fetch" bucket. - new_obj := kv.NewFromBytes( + newObj := kv.NewFromBytes( ThisServiceName, obj.Key.Scheme, obj.Key.Host, @@ -167,7 +167,7 @@ func extractHtml(obj *kv.S3JSON) { obj.GetJSON()) // Load up the object - new_obj.Set("title", title) + newObj.Set("title", title) // Marshal headers to JSON jsonString, err := json.Marshal(headers) if err != nil { @@ -176,12 +176,12 @@ func extractHtml(obj *kv.S3JSON) { return } - new_obj.Set("headers", string(jsonString)) - new_obj.Set("body", content) + newObj.Set("headers", string(jsonString)) + newObj.Set("body", content) - err = new_obj.Save() + err = newObj.Save() if err != nil { - zap.L().Error("could not save object", zap.String("key", new_obj.Key.Render())) + zap.L().Error("could not save object", zap.String("key", newObj.Key.Render())) } // Enqueue next steps diff --git a/cmd/extract/main.go b/cmd/extract/main.go index 34df22c..cf05d9a 100644 --- a/cmd/extract/main.go +++ b/cmd/extract/main.go @@ -26,6 +26,7 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. + //nolint:gosec err := http.ListenAndServe(":"+env.Env.Port, routers) if err != nil { zap.Error(err) diff --git a/cmd/extract/pdf.go b/cmd/extract/pdf.go index ac6f126..e0002ee 100644 --- a/cmd/extract/pdf.go +++ b/cmd/extract/pdf.go @@ -18,13 +18,13 @@ import ( //nolint:funlen func extractPdf(obj *kv.S3JSON) { tempFilename := uuid.NewString() - raw_copy := obj.Key.Copy() - raw_copy.Extension = util.Raw + rawCopy := obj.Key.Copy() + rawCopy.Extension = util.Raw - err := obj.S3.S3ToFile(raw_copy, tempFilename) + err := obj.S3.S3ToFile(rawCopy, tempFilename) if err != nil { zap.L().Error("could not copy s3 object to file", - zap.String("raw_copy", raw_copy.Render()), + zap.String("raw_copy", rawCopy.Render()), zap.String("tempFilename", tempFilename)) } @@ -44,7 +44,7 @@ func extractPdf(obj *kv.S3JSON) { size := fi.Size() zap.L().Debug("tempFilename size", zap.Int64("size", size)) - if size > MAX_FILESIZE { + if size > MaxFilesize { // Give up on big files. // FIXME: we need to clean up the bucket, too, and delete PDFs there zap.L().Debug("file too large, not processing") @@ -53,61 +53,60 @@ func extractPdf(obj *kv.S3JSON) { } doc, err := poppler.Open(tempFilename) - if err != nil { zap.L().Warn("poppler failed to open pdf", zap.String("raw_filename", tempFilename), zap.String("key", obj.Key.Render())) return - } else { - // Pull the metadata out, and include in every object. - info := doc.Info() - - for page_no := 0; page_no < doc.GetNPages(); page_no++ { - page_number_anchor := fmt.Sprintf("#page=%d", page_no+1) - copied_key := obj.Key.Copy() - copied_key.Path = copied_key.Path + page_number_anchor - copied_key.Extension = util.JSON - - page := doc.GetPage(page_no) - // obj.Set("content", util.RemoveStopwords(page.Text())) - obj.Set("content", page.Text()) - obj.Set("path", copied_key.Path) - obj.Set("pdf_page_number", fmt.Sprintf("%d", page_no+1)) - obj.Set("title", info.Title) - obj.Set("creation-date", strconv.Itoa(info.CreationDate)) - obj.Set("modification-date", strconv.Itoa(info.ModificationDate)) - obj.Set("pdf-version", info.PdfVersion) - obj.Set("pages", strconv.Itoa(info.Pages)) - - new_obj := kv.NewFromBytes( - ThisServiceName, - obj.Key.Scheme, - obj.Key.Host, - obj.Key.Path, - obj.GetJSON(), - ) - - err = new_obj.Save() - if err != nil { - zap.L().Error("could not save object to s3", - zap.String("key", new_obj.Key.Render())) - } - - page.Close() - - // Enqueue next steps - ChQSHP <- queueing.QSHP{ - Queue: "pack", - Scheme: obj.Key.Scheme.String(), - Host: obj.Key.Host, - Path: obj.Key.Path, - } - // https://weaviate.io/blog/gomemlimit-a-game-changer-for-high-memory-applications - // https://stackoverflow.com/questions/38972003/how-to-stop-the-golang-gc-and-trigger-it-manually - runtime.GC() + } + + // Pull the metadata out, and include in every object. + info := doc.Info() + + for pageNumber := 0; pageNumber < doc.GetNPages(); pageNumber++ { + pageNumberAnchor := fmt.Sprintf("#page=%d", pageNumber+1) + copiedKey := obj.Key.Copy() + copiedKey.Path = copiedKey.Path + pageNumberAnchor + copiedKey.Extension = util.JSON + + page := doc.GetPage(pageNumber) + // obj.Set("content", util.RemoveStopwords(page.Text())) + obj.Set("content", page.Text()) + obj.Set("path", copiedKey.Path) + obj.Set("pdf_page_number", fmt.Sprintf("%d", pageNumber+1)) + obj.Set("title", info.Title) + obj.Set("creation-date", strconv.Itoa(info.CreationDate)) + obj.Set("modification-date", strconv.Itoa(info.ModificationDate)) + obj.Set("pdf-version", info.PdfVersion) + obj.Set("pages", strconv.Itoa(info.Pages)) + + newObj := kv.NewFromBytes( + ThisServiceName, + obj.Key.Scheme, + obj.Key.Host, + obj.Key.Path, + obj.GetJSON(), + ) + + err = newObj.Save() + if err != nil { + zap.L().Error("could not save object to s3", + zap.String("key", newObj.Key.Render())) + } + + page.Close() + + // Enqueue next steps + ChQSHP <- queueing.QSHP{ + Queue: "pack", + Scheme: obj.Key.Scheme.String(), + Host: obj.Key.Host, + Path: obj.Key.Path, } + // https://weaviate.io/blog/gomemlimit-a-game-changer-for-high-memory-applications + // https://stackoverflow.com/questions/38972003/how-to-stop-the-golang-gc-and-trigger-it-manually + runtime.GC() } doc.Close() diff --git a/cmd/extract/queues.go b/cmd/extract/queues.go index 0490876..4b7090a 100644 --- a/cmd/extract/queues.go +++ b/cmd/extract/queues.go @@ -38,7 +38,7 @@ func InitializeQueues() { river.AddWorker(workers, &ExtractWorker{}) // Grab the number of workers from the config. - extract_service, err := env.Env.GetUserService("extract") + extractService, err := env.Env.GetUserService("extract") if err != nil { zap.L().Error("could not fetch service config") log.Println(err) @@ -48,7 +48,7 @@ func InitializeQueues() { // Work client extractClient, err = river.NewClient(riverpgxv5.New(extractPool), &river.Config{ Queues: map[string]river.QueueConfig{ - "extract": {MaxWorkers: int(extract_service.GetParamInt64("workers"))}, + "extract": {MaxWorkers: int(extractService.GetParamInt64("workers"))}, }, Workers: workers, }) diff --git a/cmd/extract/work.go b/cmd/extract/work.go index 558a3e6..00f381d 100644 --- a/cmd/extract/work.go +++ b/cmd/extract/work.go @@ -12,22 +12,23 @@ import ( "go.uber.org/zap" ) -const MAX_FILESIZE = 5000000 +// FIXME: THIS MUST BECOME A SERVICE PARAMETER. +const MaxFilesize = 5000000 // FIXME: This is checking the size of the JSON, // not the size of the .raw file. func isTooLarge(obj *kv.S3JSON) bool { - return obj.Size() > MAX_FILESIZE + return obj.Size() > MaxFilesize } func extract(obj *kv.S3JSON) { - mime_type := obj.GetString("content-type") + mimeType := obj.GetString("content-type") s, _ := env.Env.GetUserService(ThisServiceName) - switch mime_type { + switch mimeType { case "text/html": if s.GetParamBool("extract_html") { - extractHtml(obj) + extractHTML(obj) } case "application/pdf": if s.GetParamBool("extract_pdf") { @@ -42,7 +43,7 @@ func extract(obj *kv.S3JSON) { } } -func (w *ExtractWorker) Work(ctx context.Context, job *river.Job[common.ExtractArgs]) error { +func (w *ExtractWorker) Work(_ context.Context, job *river.Job[common.ExtractArgs]) error { zap.L().Info("extracting", zap.String("host", job.Args.Host), zap.String("path", job.Args.Path)) diff --git a/cmd/fetch/api.go b/cmd/fetch/api.go index 7f76dd2..e43e80d 100644 --- a/cmd/fetch/api.go +++ b/cmd/fetch/api.go @@ -9,13 +9,13 @@ import ( "go.uber.org/zap" ) -var FETCH_API_VERSION = "1.0.0" +var FetchAPIVersion = "1.0.0" type FetchRequestInput struct { Scheme string `json:"scheme" maxLength:"10" doc:"Resource scheme"` Host string `json:"host" maxLength:"500" doc:"Host of resource"` Path string `json:"path" maxLength:"1500" doc:"Path to resource"` - ApiKey string `json:"api-key"` + APIKey string `json:"api-key"` } // https://dev.to/kashifsoofi/rest-api-with-go-chi-and-inmemory-store-43ag @@ -25,7 +25,7 @@ func FetchRequestHandler(c *gin.Context) { return } - if fri.ApiKey == os.Getenv("API_KEY") { + if fri.APIKey == os.Getenv("API_KEY") { zap.L().Debug("api enqueue", zap.String("host", fri.Host), zap.String("path", fri.Path)) // if fetchClient == nil { @@ -52,11 +52,11 @@ func FetchRequestHandler(c *gin.Context) { } } -func SitemapRequestHandler(c *gin.Context) { +func SitemapRequestHandler(_ *gin.Context) { // pass } -func ExtendApi(r *gin.Engine) { +func ExtendAPI(r *gin.Engine) { r.PUT("/fetch", FetchRequestHandler) r.PUT("/sitemap", SitemapRequestHandler) } diff --git a/cmd/fetch/host_gateway.go b/cmd/fetch/host_gateway.go index 376d66f..c9a2e2e 100644 --- a/cmd/fetch/host_gateway.go +++ b/cmd/fetch/host_gateway.go @@ -48,15 +48,16 @@ func (hsm *HostGateway) GoodToGo(host string) bool { // If we were good to go, we updated the map, and should let things continue. // Otherwise, return false and this will be requeued. return isGoodToGo - } else /* not OK */ { - // We have not seen this host before - // Therefore, add them to the map, and they're good to go. - zap.L().Debug("gateway: host never seen before") + } - hsm.last[host] = time.Now() + /* not OK */ + // We have not seen this host before + // Therefore, add them to the map, and they're good to go. + zap.L().Debug("gateway: host never seen before") - return true - } + hsm.last[host] = time.Now() + + return true } func (hsm *HostGateway) HostExists(host string) bool { @@ -85,12 +86,12 @@ func (hsm *HostGateway) TimeRemaining(host string) time.Duration { if until > 0 { return until - } else { - return time.Duration(0 * time.Millisecond) } - } else { - // If someone asks for a host that is not in the map, we'll tell them - // there are 0 milliseconds until the host is ready. + return time.Duration(0 * time.Millisecond) } + + // If someone asks for a host that is not in the map, we'll tell them + // there are 0 milliseconds until the host is ready. + return time.Duration(0 * time.Millisecond) } diff --git a/cmd/fetch/main.go b/cmd/fetch/main.go index b5b2d28..3602893 100644 --- a/cmd/fetch/main.go +++ b/cmd/fetch/main.go @@ -30,9 +30,9 @@ var Workers *river.Workers var MaxFilesize int64 -const BYTES_PER_KB = 1024 +const BytesPerKb = 1024 -const KB_PER_MB = 1024 +const KbPerMb = 1024 func main() { env.InitGlobalEnv(ThisServiceName) @@ -41,7 +41,7 @@ func main() { JDB = postgres.NewJemisonDB() engine := common.InitializeAPI() - ExtendApi(engine) + ExtendAPI(engine) retryableClient := retryablehttp.NewClient() retryableClient.RetryMax = 10 @@ -55,10 +55,10 @@ func main() { // Pre-compute/lookup the sleep duration for backoff PoliteSleep = service.GetParamInt64("polite_sleep") // 1024KB * 1024B => MB - MaxFilesize = service.GetParamInt64("max_filesize_mb") * BYTES_PER_KB * KB_PER_MB + MaxFilesize = service.GetParamInt64("max_filesize_mb") * BytesPerKb * KbPerMb - logger_level := service.GetParamString("debug_level") - if logger_level != "debug" { + loggerLevel := service.GetParamString("debug_level") + if loggerLevel != "debug" { retryableClient.Logger = nil } @@ -72,6 +72,7 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. + //nolint:gosec err := http.ListenAndServe(":"+env.Env.Port, engine) if err != nil { zap.Error(err) diff --git a/cmd/fetch/queues.go b/cmd/fetch/queues.go index 99e130d..517cc34 100644 --- a/cmd/fetch/queues.go +++ b/cmd/fetch/queues.go @@ -22,11 +22,11 @@ import ( // GLOBAL TO THE APP // One pool of connections for River. -const ROUNDROBIN = "round_robin" +const RoundRobin = "round_robin" -const OPD = "one_per_domain" +const OnePerDomain = "one_per_domain" -const SIMPLE = "simple" +const Simple = "simple" // The work client, doing the work of `fetch`. var FetchPool *pgxpool.Pool @@ -59,7 +59,7 @@ func oneQueuePerHost(workers *river.Workers, workerCount int64) { FetchQueues = make(map[string]river.QueueConfig) for _, host := range config.GetListOfHosts(env.Env.AllowedHosts) { - asciiHost := stripHostToAscii(host) + asciiHost := stripHostToASCII(host) asciiQueueName := fmt.Sprintf("fetch-%s", asciiHost) zap.L().Info("setting up queue", zap.String("queue_name", asciiQueueName)) @@ -197,22 +197,22 @@ func InitializeQueues() { queueModel := fetchService.GetParamString("queue_model") switch queueModel { - case ROUNDROBIN: - QueueingModel = ROUNDROBIN + case RoundRobin: + QueueingModel = RoundRobin roundRobinQueues(workers, workerCount) - case OPD: - QueueingModel = OPD + case OnePerDomain: + QueueingModel = OnePerDomain oneQueuePerHost(workers, workerCount) - case SIMPLE: - QueueingModel = SIMPLE + case Simple: + QueueingModel = Simple simpleQueue(workers, workerCount) default: zap.L().Warn("falling through to default simple queueing model") - QueueingModel = SIMPLE + QueueingModel = Simple simpleQueue(workers, workerCount) } diff --git a/cmd/fetch/work.go b/cmd/fetch/work.go index 7ce4a28..658e999 100644 --- a/cmd/fetch/work.go +++ b/cmd/fetch/work.go @@ -31,15 +31,21 @@ var LastBackoffMap sync.Map var fetchCount atomic.Int64 -const SECONDS_PER_MINUTE = 60 +const SecondsPerMinute = 60 + +const MinInt32 = -2147483648 + +const MaxInt32 = 2147483647 func InfoFetchCount() { // Probably should be a config value. - ticker := time.NewTicker(SECONDS_PER_MINUTE * time.Second) + ticker := time.NewTicker(SecondsPerMinute * time.Second) recent := []int64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0} - last := int64(0) ndx := 0 + // Defaults to 0 value + var last int64 + for { // Wait for the ticker <-ticker.C @@ -50,7 +56,8 @@ func InfoFetchCount() { recent[ndx] = diff if last != 0 { - var total int64 = 0 + var total int64 + for _, num := range recent { total += num } @@ -65,17 +72,17 @@ func InfoFetchCount() { } } -func stripHostToAscii(host string) string { +func stripHostToASCII(host string) string { reg, _ := regexp.Compile("[^a-z]") result := reg.ReplaceAllString(strings.ToLower(host), "") return result } -const THREE_SECONDS = 3 +const ThreeSeconds = 3 -//nolint:cyclop,funlen -func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs]) error { +//nolint:cyclop,funlen,maintidx +func (w *FetchWorker) Work(_ context.Context, job *river.Job[common.FetchArgs]) error { u := url.URL{ Scheme: job.Args.Scheme, Host: job.Args.Host, @@ -100,8 +107,8 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] // If it is "simple" or "round_robin", we do nothing. // If it is "one_per_domain", we need to do something fancy. - if QueueingModel == OPD { - asciiHost := stripHostToAscii(job.Args.Host) + if QueueingModel == OnePerDomain { + asciiHost := stripHostToASCII(job.Args.Host) asciiQueueName := fmt.Sprintf("fetch-%s", asciiHost) ChQSHP <- queueing.QSHP{ Queue: asciiQueueName, @@ -130,8 +137,8 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] zap.String("host", job.Args.Host)) Gateway.GoodToGo(job.Args.Host) - if QueueingModel == OPD { - asciiHost := stripHostToAscii(job.Args.Host) + if QueueingModel == OnePerDomain { + asciiHost := stripHostToASCII(job.Args.Host) asciiQueueName := fmt.Sprintf("fetch-%s", asciiHost) ChQSHP <- queueing.QSHP{ Queue: asciiQueueName, @@ -160,9 +167,9 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] // Now, A1 will come around on the queue again, and if it is time, it // will proceed. - zap.L().Debug("fetching page content", zap.String("url", host_and_path(job))) + zap.L().Debug("fetching page content", zap.String("url", hostAndPath(job))) - page_json, err := fetch_page_content(job) + pageJSON, err := fetchPageContent(job) if err != nil { // The queueing system retries should save us here; bail if we // can't get the content now. @@ -185,7 +192,7 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] // Update the guestbook lastModified := time.Now() - if v, ok := page_json["last-modified"]; ok { + if v, ok := pageJSON["last-modified"]; ok { t, err := time.Parse(time.RFC1123, v) if err != nil { zap.L().Warn("could not convert last-modified") @@ -196,16 +203,25 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] } } - cl, err := strconv.Atoi(page_json["content-length"]) + cl, err := strconv.Atoi(pageJSON["content-length"]) if err != nil { zap.L().Warn("could not convert length to int", zap.String("host", job.Args.Host), zap.String("path", job.Args.Path)) } + // Make sure we stay within int32 + if cl > MaxInt32 { + cl = MaxInt32 + } + + if cl < MinInt32 { + cl = MinInt32 + } + scheme := JDB.GetScheme("https") - contentType := JDB.GetContentType(page_json["content-type"]) + contentType := JDB.GetContentType(pageJSON["content-type"]) if err != nil { zap.L().Error("could not fetch page scheme") @@ -219,16 +235,18 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] zap.String("fqdn", job.Args.Host)) } - next_fetch := JDB.GetNextFetch(job.Args.Host) + nextFetch := JDB.GetNextFetch(job.Args.Host) - guestbook_id, err := JDB.WorkDBQueries.UpdateGuestbookFetch( + guestbookID, err := JDB.WorkDBQueries.UpdateGuestbookFetch( context.Background(), work_db.UpdateGuestbookFetchParams{ - Scheme: scheme, - Domain64: d64, - Path: job.Args.Path, + Scheme: scheme, + Domain64: d64, + Path: job.Args.Path, + //nolint:gosec ContentLength: int32(cl), - ContentType: int32(contentType), + //nolint:gosec + ContentType: int32(contentType), LastModified: pgtype.Timestamp{ Valid: true, InfinityModifier: 0, @@ -237,12 +255,12 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] LastFetched: pgtype.Timestamp{ Valid: true, InfinityModifier: 0, - Time: JDB.InThePast(THREE_SECONDS * time.Second), + Time: JDB.InThePast(ThreeSeconds * time.Second), }, NextFetch: pgtype.Timestamp{ Valid: true, InfinityModifier: 0, - Time: next_fetch, + Time: nextFetch, }, }) if err != nil { @@ -252,13 +270,13 @@ func (w *FetchWorker) Work(ctx context.Context, job *river.Job[common.FetchArgs] } // Save the metadata about this page to S3 - page_json["guestbook_id"] = fmt.Sprintf("%d", guestbook_id) + pageJSON["guestbook_id"] = fmt.Sprintf("%d", guestbookID) cloudmap := kv.NewFromMap( ThisServiceName, util.ToScheme(job.Args.Scheme), job.Args.Host, job.Args.Path, - page_json, + pageJSON, ) err = cloudmap.Save() // We get an error if we can't write to S3 diff --git a/cmd/fetch/work_support.go b/cmd/fetch/work_support.go index ceb7c2b..731769a 100644 --- a/cmd/fetch/work_support.go +++ b/cmd/fetch/work_support.go @@ -1,8 +1,10 @@ +//nolint:gosec package main import ( "bufio" "crypto/sha1" + "errors" "fmt" "io" "net/url" @@ -23,7 +25,7 @@ import ( // But, it is not likely something we want to change. const CHUNKSIZE = 4 * 1024 -func host_and_path(job *river.Job[common.FetchArgs]) string { +func hostAndPath(job *river.Job[common.FetchArgs]) string { var u url.URL u.Scheme = job.Args.Scheme u.Host = job.Args.Host @@ -52,7 +54,7 @@ func chunkwiseSHA1(filename string) []byte { bytesRead += n if err != nil { - if err != io.EOF { + if !errors.Is(err, io.EOF) { zap.L().Error("chunk error reading") } @@ -72,7 +74,7 @@ func chunkwiseSHA1(filename string) []byte { return h.Sum(nil) } -func getUrlToFile(u url.URL) (string, int64, []byte, error) { +func getURLToFile(u url.URL) (string, int64, []byte, error) { getResponse, err := RetryClient.Get(u.String()) if err != nil { zap.L().Error("cannot GET content", @@ -120,10 +122,10 @@ func getUrlToFile(u url.URL) (string, int64, []byte, error) { return temporaryFilename, bytesRead, theSHA, nil } -const TOO_SHORT = 20 +const TooShort = 20 //nolint:cyclop,funlen -func fetch_page_content(job *river.Job[common.FetchArgs]) ( +func fetchPageContent(job *river.Job[common.FetchArgs]) ( map[string]string, error, ) { @@ -139,6 +141,8 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) ( return nil, err } + defer headResp.Body.Close() + // Get a clean mime type right away contentType := util.CleanMimeType(headResp.Header.Get("content-type")) log.Debug("checking HEAD MIME type", zap.String("content-type", contentType)) @@ -150,12 +154,10 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) ( } // Make sure we don't fetch things that are too big. - size_string := headResp.Header.Get("content-length") + sizeString := headResp.Header.Get("content-length") - size, err := strconv.Atoi(size_string) - if err != nil { - // Could not extract a size header... - } else { + size, err := strconv.Atoi(sizeString) + if err == nil { if int64(size) > MaxFilesize { return nil, fmt.Errorf( common.FileTooLargeToFetch.String()+ @@ -164,7 +166,7 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) ( } // Write the raw content to a file. - tempFilename, bytesRead, theSHA, err := getUrlToFile(u) + tempFilename, bytesRead, theSHA, err := getURLToFile(u) if err != nil { return nil, err } @@ -187,7 +189,7 @@ func fetch_page_content(job *river.Job[common.FetchArgs]) ( } // Don't bother in case it came in at zero length - if bytesRead < TOO_SHORT { + if bytesRead < TooShort { return nil, fmt.Errorf( common.FileTooSmallToProcess.String()+ " file is too small: %d %s%s", bytesRead, job.Args.Host, job.Args.Path) diff --git a/cmd/migrate/migrate.go b/cmd/migrate/migrate.go index 5c73c23..acf93ed 100644 --- a/cmd/migrate/migrate.go +++ b/cmd/migrate/migrate.go @@ -27,14 +27,14 @@ type location struct { } // Assumes config has been read. -func MigrateDB(dbUri string, loc location) { - db1_url, err := env.Env.GetDatabaseUrl(dbUri) +func MigrateDB(dbURI string, loc location) { + db1URL, err := env.Env.GetDatabaseURL(dbURI) if err != nil { zap.L().Fatal("could not get url for", - zap.String("URI", dbUri)) + zap.String("URI", dbURI)) } - u, _ := url.Parse(db1_url) + u, _ := url.Parse(db1URL) db := dbmate.New(u) db.FS = loc.FS db.MigrationsDir = []string{loc.MigrationsDir} diff --git a/cmd/pack/html.go b/cmd/pack/html.go index 42bb2ce..9978d2b 100644 --- a/cmd/pack/html.go +++ b/cmd/pack/html.go @@ -9,7 +9,7 @@ import ( "go.uber.org/zap" ) -func packHtml(s3json *kv.S3JSON) { +func packHTML(s3json *kv.S3JSON) { // We have more fields than before. d64, err := config.FQDNToDomain64(s3json.Key.Host) if err != nil { diff --git a/cmd/pack/main.go b/cmd/pack/main.go index 00c307f..18721c0 100644 --- a/cmd/pack/main.go +++ b/cmd/pack/main.go @@ -15,7 +15,7 @@ var ThisServiceName = "pack" var ChQSHP = make(chan queueing.QSHP) -var PHL *PerHostLock = nil +var PHL *PerHostLock var JDB *postgres.JemisonDB @@ -36,6 +36,7 @@ func main() { go queueing.ClearCompletedPeriodically() // Local and Cloud should both get this from the environment. + //nolint:gosec err := http.ListenAndServe(":"+env.Env.Port, engine) if err != nil { zap.Error(err) diff --git a/cmd/pack/queues.go b/cmd/pack/queues.go index 1292582..8f3b1ca 100644 --- a/cmd/pack/queues.go +++ b/cmd/pack/queues.go @@ -29,7 +29,7 @@ func InitializeQueues() { river.AddWorker(workers, &PackWorker{}) // Grab the number of workers from the config. - fetch_service, err := env.Env.GetUserService("fetch") + fetchService, err := env.Env.GetUserService("fetch") if err != nil { zap.L().Error("could not fetch service config") log.Println(err) @@ -39,7 +39,7 @@ func InitializeQueues() { // Work client packClient, err = river.NewClient(riverpgxv5.New(packPool), &river.Config{ Queues: map[string]river.QueueConfig{ - "pack": {MaxWorkers: int(fetch_service.GetParamInt64("workers"))}, + "pack": {MaxWorkers: int(fetchService.GetParamInt64("workers"))}, }, Workers: workers, }) diff --git a/cmd/pack/work.go b/cmd/pack/work.go index 9b75fee..36d28b0 100644 --- a/cmd/pack/work.go +++ b/cmd/pack/work.go @@ -10,7 +10,7 @@ import ( "go.uber.org/zap" ) -func (w *PackWorker) Work(ctx context.Context, job *river.Job[common.PackArgs]) error { +func (w *PackWorker) Work(_ context.Context, job *river.Job[common.PackArgs]) error { // It comes in with the GuestbookId. That's all we need (plus the S3 object). s3 := kv.NewS3("extract") key := util.CreateS3Key(util.ToScheme(job.Args.Scheme), job.Args.Host, job.Args.Path, util.JSON) @@ -26,7 +26,7 @@ func (w *PackWorker) Work(ctx context.Context, job *river.Job[common.PackArgs]) contentType := s3json.GetString("content-type") switch contentType { case "text/html": - packHtml(s3json) + packHTML(s3json) case "application/pdf": packPdf(s3json) } diff --git a/cmd/serve/handler_search.go b/cmd/serve/handler_search.go index b75da13..5604ecd 100644 --- a/cmd/serve/handler_search.go +++ b/cmd/serve/handler_search.go @@ -55,12 +55,12 @@ func runQuery(sri SearchRequestInput) ([]SearchResult, time.Duration, error) { zap.Int64("end", to64(sri.Domain64End))) // Don't only use the stemmed words - existing_terms := strings.Split(sri.Terms, " ") - zap.L().Debug("EXISTING TERMS", zap.Strings("terms", existing_terms)) + existingTerms := strings.Split(sri.Terms, " ") + zap.L().Debug("EXISTING TERMS", zap.Strings("terms", existingTerms)) query := NewQuery() - for _, et := range existing_terms { + for _, et := range existingTerms { et = strings.TrimSpace(et) stemmed, err := snowball.Stem(et, "english", true) zap.L().Debug("stemmed result", zap.String("et", et), zap.String("stemmed", stemmed)) @@ -72,16 +72,16 @@ func runQuery(sri SearchRequestInput) ([]SearchResult, time.Duration, error) { query.AddToQuery(Or(et, stemmed+_stemmed)) } - improved_terms_string := query.ToString() + improvedTermsString := query.ToString() zap.L().Debug("search string", zap.String("original", sri.Terms), zap.String("Q", fmt.Sprintln(query)), - zap.String("improved", improved_terms_string)) + zap.String("improved", improvedTermsString)) res, err := JDB.SearchDBQueries.SearchContent(context.Background(), search_db.SearchContentParams{ - Query: improved_terms_string, + Query: improvedTermsString, D64Start: to64(sri.Domain64Start), D64End: to64(sri.Domain64End), }) @@ -118,7 +118,7 @@ func runQuery(sri SearchRequestInput) ([]SearchResult, time.Duration, error) { } cleaned = append(cleaned, SearchResult{ - Terms: improved_terms_string, + Terms: improvedTermsString, PageTitle: title, PathString: path, Snippet: string(r.Snippet), @@ -127,6 +127,7 @@ func runQuery(sri SearchRequestInput) ([]SearchResult, time.Duration, error) { }) } + //nolint:wrapcheck return cleaned, duration, err } @@ -138,7 +139,6 @@ func SearchHandler(c *gin.Context) { } rows, duration, err := runQuery(sri) - if err != nil { c.IndentedJSON(http.StatusOK, gin.H{ "result": "err", @@ -147,14 +147,12 @@ func SearchHandler(c *gin.Context) { "results": nil, }) - return - } else { - c.IndentedJSON(http.StatusOK, gin.H{ - "result": "ok", - "elapsed": duration, - "results": rows, - }) - return } + + c.IndentedJSON(http.StatusOK, gin.H{ + "result": "ok", + "elapsed": duration, + "results": rows, + }) } diff --git a/cmd/serve/main.go b/cmd/serve/main.go index a9fb3c8..9460854 100644 --- a/cmd/serve/main.go +++ b/cmd/serve/main.go @@ -66,20 +66,20 @@ func main() { go queueing.Enqueue(ChQSHP) s, _ := env.Env.GetUserService(ThisServiceName) - template_files_path := s.GetParamString("template_files_path") - static_files_path := s.GetParamString("static_files_path") + templateFilesPath := s.GetParamString("template_files_path") + staticFilesPath := s.GetParamString("static_files_path") - external_host := s.GetParamString("external_host") - external_port := s.GetParamInt64("external_port") + externalHost := s.GetParamString("external_host") + externalPort := s.GetParamInt64("external_port") JDB = postgres.NewJemisonDB() log.Println("environment initialized") zap.L().Info("serve environment", - zap.String("template_files_path", template_files_path), - zap.String("external_host", external_host), - zap.Int64("external_port", external_port), + zap.String("template_files_path", templateFilesPath), + zap.String("external_host", externalHost), + zap.Int64("external_port", externalPort), ) ///////////////////// @@ -91,12 +91,12 @@ func main() { // engine.GET("/search", func(c *gin.Context) { // c.Redirect(http.StatusMovedPermanently, "/search/"+start) // }) - engine.StaticFS("/static", gin.Dir(static_files_path, true)) + engine.StaticFS("/static", gin.Dir(staticFilesPath, true)) // engine.GET("/search/:host", ServeHost) - engine.LoadHTMLGlob(template_files_path + "/*") + engine.LoadHTMLGlob(templateFilesPath + "/*") - base_params := gin.H{ + baseParams := gin.H{ "scheme": "http", "search_host": "localhost", "search_port": "10000", @@ -104,17 +104,17 @@ func main() { engine.GET("/:tld", func(c *gin.Context) { tld := config.GetTLD(c.Param("tld")) - d64_start, _ := strconv.ParseInt(fmt.Sprintf("%02x00000000000000", tld), 16, 64) - d64_end, _ := strconv.ParseInt(fmt.Sprintf("%02xFFFFFFFFFFFF00", tld), 16, 64) - base_params["tld"] = c.Param("tld") - delete(base_params, "domain") - delete(base_params, "subdomain") - base_params["fqdn"] = c.Param("tld") - base_params["d64_start"] = d64_start - base_params["d64_end"] = d64_end - base_params = addMetadata(base_params) - - c.HTML(http.StatusOK, "index.tmpl", base_params) + d64Start, _ := strconv.ParseInt(fmt.Sprintf("%02x00000000000000", tld), 16, 64) + d64End, _ := strconv.ParseInt(fmt.Sprintf("%02xFFFFFFFFFFFF00", tld), 16, 64) + baseParams["tld"] = c.Param("tld") + delete(baseParams, "domain") + delete(baseParams, "subdomain") + baseParams["fqdn"] = c.Param("tld") + baseParams["d64_start"] = d64Start + baseParams["d64_end"] = d64End + baseParams = addMetadata(baseParams) + + c.HTML(http.StatusOK, "index.tmpl", baseParams) }) engine.GET("/:tld/:domain", func(c *gin.Context) { @@ -123,18 +123,18 @@ func main() { start := config.RDomainToDomain64(fmt.Sprintf("%s.%s", tld, domain)) zap.L().Debug("rdomain", zap.String("start", start)) - d64_start, _ := strconv.ParseInt(fmt.Sprintf("%s00000000", start), 16, 64) - d64_end, _ := strconv.ParseInt(fmt.Sprintf("%sFFFFFF00", start), 16, 64) + d64Start, _ := strconv.ParseInt(fmt.Sprintf("%s00000000", start), 16, 64) + d64End, _ := strconv.ParseInt(fmt.Sprintf("%sFFFFFF00", start), 16, 64) - base_params["tld"] = tld - base_params["domain"] = domain - delete(base_params, "subdomain") - base_params["fqdn"] = fmt.Sprintf("%s.%s", domain, tld) - base_params["d64_start"] = d64_start - base_params["d64_end"] = d64_end - base_params = addMetadata(base_params) + baseParams["tld"] = tld + baseParams["domain"] = domain + delete(baseParams, "subdomain") + baseParams["fqdn"] = fmt.Sprintf("%s.%s", domain, tld) + baseParams["d64_start"] = d64Start + baseParams["d64_end"] = d64End + baseParams = addMetadata(baseParams) - c.HTML(http.StatusOK, "index.tmpl", base_params) + c.HTML(http.StatusOK, "index.tmpl", baseParams) }) engine.GET("/:tld/:domain/:subdomain", func(c *gin.Context) { @@ -143,17 +143,17 @@ func main() { subdomain := c.Param("subdomain") fqdn := fmt.Sprintf("%s.%s.%s", subdomain, domain, tld) start, _ := config.FQDNToDomain64(fqdn) - d64_start := start - d64_end := start + 1 - - base_params["tld"] = tld - base_params["domain"] = domain - base_params["subdomain"] = subdomain - base_params["fqdn"] = fqdn - base_params["d64_start"] = d64_start - base_params["d64_end"] = d64_end - base_params = addMetadata(base_params) - c.HTML(http.StatusOK, "index.tmpl", base_params) + d64Start := start + d64End := start + 1 + + baseParams["tld"] = tld + baseParams["domain"] = domain + baseParams["subdomain"] = subdomain + baseParams["fqdn"] = fqdn + baseParams["d64_start"] = d64Start + baseParams["d64_end"] = d64End + baseParams = addMetadata(baseParams) + c.HTML(http.StatusOK, "index.tmpl", baseParams) }) v1 := engine.Group("/api") @@ -165,6 +165,7 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. + //nolint:gosec err := http.ListenAndServe(":"+env.Env.Port, engine) if err != nil { zap.Error(err) diff --git a/cmd/serve/queries.go b/cmd/serve/queries.go index 8565fec..248465f 100644 --- a/cmd/serve/queries.go +++ b/cmd/serve/queries.go @@ -17,29 +17,29 @@ func (qs Qs) String() string { } type OrQ struct { - Lhs Q - Rhs Q + LHS Q + RHS Q } func (orq OrQ) String() string { - return "(" + orq.Lhs.String() + _or + orq.Rhs.String() + ")" + return "(" + orq.LHS.String() + _or + orq.RHS.String() + ")" } func Or(a, b string) Q { - return OrQ{Lhs: Qs{Str: a}, Rhs: Qs{Str: b}} + return OrQ{LHS: Qs{Str: a}, RHS: Qs{Str: b}} } type AndQ struct { - Lhs Q - Rhs Q + LHS Q + RHS Q } func And(a, b string) Q { - return AndQ{Lhs: Qs{Str: a}, Rhs: Qs{Str: b}} + return AndQ{LHS: Qs{Str: a}, RHS: Qs{Str: b}} } func (andq AndQ) String() string { - return andq.Lhs.String() + _and + andq.Rhs.String() + return andq.LHS.String() + _and + andq.RHS.String() } type Query struct { @@ -53,8 +53,8 @@ func NewQuery() *Query { return &q } -func (q *Query) AddToQuery(new_q Q) { - q.Queries = append(q.Queries, new_q) +func (q *Query) AddToQuery(newQ Q) { + q.Queries = append(q.Queries, newQ) } func (q *Query) ToString() string { diff --git a/cmd/serve/query_test.go b/cmd/serve/query_test.go index 860b84f..03749c7 100644 --- a/cmd/serve/query_test.go +++ b/cmd/serve/query_test.go @@ -1,3 +1,4 @@ +//nolint:paralleltest package main import ( diff --git a/cmd/serve/work.go b/cmd/serve/work.go index 1610392..079db14 100644 --- a/cmd/serve/work.go +++ b/cmd/serve/work.go @@ -7,6 +7,7 @@ import ( "github.com/riverqueue/river" ) +//nolint:revive func (w *ServeWorker) Work(ctx context.Context, job *river.Job[common.ServeArgs]) error { return nil } diff --git a/cmd/validate/main.go b/cmd/validate/main.go index 70b9e2c..43d2872 100644 --- a/cmd/validate/main.go +++ b/cmd/validate/main.go @@ -28,6 +28,7 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. + //nolint:gosec err := http.ListenAndServe(":"+env.Env.Port, engine) if err != nil { zap.Error(err) diff --git a/cmd/validate/queues.go b/cmd/validate/queues.go index 827f426..82b61a1 100644 --- a/cmd/validate/queues.go +++ b/cmd/validate/queues.go @@ -20,7 +20,7 @@ type FetchWorker struct { } //nolint:lll -func initX[T river.Worker[U], U river.JobArgs](service_name string, queue_name string, workerStruct T) *river.Client[pgx.Tx] { +func initX[T river.Worker[U], U river.JobArgs](serviceName string, queueName string, workerStruct T) *river.Client[pgx.Tx] { queueing.InitializeRiverQueues() ctx, pool, workers := common.CommonQueueInit() @@ -29,10 +29,10 @@ func initX[T river.Worker[U], U river.JobArgs](service_name string, queue_name s river.AddWorker(workers, workerStruct) // Grab the number of workers from the config. - theService, err := env.Env.GetUserService(service_name) + theService, err := env.Env.GetUserService(serviceName) if err != nil { zap.L().Error("could not fetch service config", - zap.String("service_name", service_name)) + zap.String("service_name", serviceName)) log.Println(err) os.Exit(1) } @@ -40,14 +40,14 @@ func initX[T river.Worker[U], U river.JobArgs](service_name string, queue_name s // Work client theClient, err := river.NewClient(riverpgxv5.New(pool), &river.Config{ Queues: map[string]river.QueueConfig{ - queue_name: {MaxWorkers: int(theService.GetParamInt64("workers"))}, + queueName: {MaxWorkers: int(theService.GetParamInt64("workers"))}, }, Workers: workers, }) if err != nil { zap.L().Error("could not establish worker pool", - zap.String("service_name", service_name), - zap.String("queue_name", queue_name), + zap.String("service_name", serviceName), + zap.String("queue_name", queueName), zap.String("error", fmt.Sprintln(err))) log.Println(err) os.Exit(1) @@ -56,7 +56,7 @@ func initX[T river.Worker[U], U river.JobArgs](service_name string, queue_name s // Start the work clients if err := theClient.Start(ctx); err != nil { zap.L().Error("workers are not the means of production. exiting.", - zap.String("queue_name", queue_name)) + zap.String("queue_name", queueName)) os.Exit(1) } @@ -67,7 +67,8 @@ type ValidateFetchWorker struct { river.WorkerDefaults[common.ValidateFetchArgs] } -func (w ValidateFetchWorker) Work(ctx context.Context, job *river.Job[common.ValidateFetchArgs]) error { +//nolint:revive +func (w ValidateFetchWorker) Work(_ context.Context, job *river.Job[common.ValidateFetchArgs]) error { zap.L().Info("VALIDATE IS RUNNING AND DOING NOTHING") return nil diff --git a/cmd/walk/main.go b/cmd/walk/main.go index d88565a..203eb83 100644 --- a/cmd/walk/main.go +++ b/cmd/walk/main.go @@ -13,7 +13,7 @@ import ( "go.uber.org/zap" ) -var expirable_cache expirable.Cache[string, int] +var expirableCache expirable.Cache[string, int] var RecentlyVisitedCache *cache.Cache @@ -33,7 +33,7 @@ func main() { engine := common.InitializeAPI() ttl := service.GetParamInt64("cache-ttl") - expirable_cache = expirable.NewCache[string, int]().WithTTL(time.Duration(ttl) * time.Second) + expirableCache = expirable.NewCache[string, int]().WithTTL(time.Duration(ttl) * time.Second) RecentlyVisitedCache = cache.New( time.Duration(service.GetParamInt64("polite_cache_default_expiration"))*time.Second, @@ -46,6 +46,7 @@ func main() { zap.L().Info("listening to the music of the spheres", zap.String("port", env.Env.Port)) // Local and Cloud should both get this from the environment. + //nolint:gosec err := http.ListenAndServe(":"+env.Env.Port, engine) if err != nil { zap.L().Fatal("failed to start http server in serve") diff --git a/cmd/walk/queues.go b/cmd/walk/queues.go index 2457666..3051936 100644 --- a/cmd/walk/queues.go +++ b/cmd/walk/queues.go @@ -33,7 +33,7 @@ func InitializeQueues() { river.AddWorker(workers, &WalkWorker{}) // Grab the number of workers from the config. - walk_service, err := env.Env.GetUserService("walk") + walkService, err := env.Env.GetUserService("walk") if err != nil { zap.L().Error("could not fetch service config") log.Println(err) @@ -43,7 +43,7 @@ func InitializeQueues() { // Work client walkClient, err = river.NewClient(riverpgxv5.New(dbPool), &river.Config{ Queues: map[string]river.QueueConfig{ - "walk": {MaxWorkers: int(walk_service.GetParamInt64("workers"))}, + "walk": {MaxWorkers: int(walkService.GetParamInt64("workers"))}, }, Workers: workers, }) diff --git a/cmd/walk/work.go b/cmd/walk/work.go index 5f6a661..a3d812d 100644 --- a/cmd/walk/work.go +++ b/cmd/walk/work.go @@ -21,26 +21,26 @@ import ( ) // ////////////////////////////////////// -// go_for_a_walk +// goForAWalk // //nolint:wsl -func go_for_a_walk(s3json *kv.S3JSON) { - cleaned_mime_type := util.CleanMimeType(s3json.GetString("content-type")) - switch cleaned_mime_type { +func goForAWalk(s3json *kv.S3JSON) { + cleanedMIMEType := util.CleanMimeType(s3json.GetString("content-type")) + switch cleanedMIMEType { case "text/html": - walk_html(s3json) + walkHTML(s3json) case "application/pdf": // log.Println("PDFs do not walk") } } // ////////////////////////////////////// -// extract_links +// extractLinks // //nolint:cyclop,funlen -func extract_links(s3json *kv.S3JSON) []*url.URL { +func extractLinks(s3json *kv.S3JSON) []*url.URL { // Return a unique set - link_set := make(map[string]bool) + linkSet := make(map[string]bool) // Remove all trailing slashes. links := make([]*url.URL, 0) @@ -89,32 +89,33 @@ func extract_links(s3json *kv.S3JSON) []*url.URL { // have no links. // zap.L().Debug("doc", zap.String("all", fmt.Sprintln(doc.Text()))) - doc.Find("a[href]").Each(func(ndx int, sel *goquery.Selection) { + doc.Find("a[href]").Each(func(_ int, sel *goquery.Selection) { link, exists := sel.Attr("href") //nolint:nestif if exists { - link_to_crawl, err := is_crawlable(s3json, link) + linkToCrawl, err := isCrawlable(s3json, link) if err != nil { zap.L().Debug("error checking crawlability", zap.String("url", link), zap.String("error", err.Error())) } else { - if _, ok := expirable_cache.Get(link_to_crawl); ok { + //nolint:revive + if _, ok := expirableCache.Get(linkToCrawl); ok { // PASS ON LOGGING IF IT IS A CACHE HIT } else { // CRAWL BOTH HTTPS AND HTTP? - if strings.HasPrefix(link_to_crawl, "http") { - zap.L().Debug("link to crawl", zap.String("url", link_to_crawl)) - expirable_cache.Set(link_to_crawl, 0, 0) + if strings.HasPrefix(linkToCrawl, "http") { + zap.L().Debug("link to crawl", zap.String("url", linkToCrawl)) + expirableCache.Set(linkToCrawl, 0, 0) - link_set[link_to_crawl] = true + linkSet[linkToCrawl] = true } } } } }) - for link := range link_set { + for link := range linkSet { link = trimSuffix(link, "/") u, err := url.Parse(link) @@ -130,9 +131,9 @@ func extract_links(s3json *kv.S3JSON) []*url.URL { } // ////////////////////////////////////// -// walk_html -func walk_html(s3json *kv.S3JSON) { - links := extract_links(s3json) +// walkHTML +func walkHTML(s3json *kv.S3JSON) { + links := extractLinks(s3json) zap.L().Debug("walk considering links", zap.Int("count", len(links))) @@ -155,9 +156,9 @@ func walk_html(s3json *kv.S3JSON) { // A set of functions applied that, one at a time, decide if a link should // be crawled. -const TOO_FEW_PIECES_IN_HOST = 2 +const TooFewPiecesInHost = 2 -func is_crawlable(s3json *kv.S3JSON, link string) (string, error) { +func isCrawlable(s3json *kv.S3JSON, link string) (string, error) { base := url.URL{ Scheme: s3json.GetString("scheme"), Host: s3json.GetString("host"), @@ -199,15 +200,15 @@ func is_crawlable(s3json *kv.S3JSON, link string) (string, error) { } pieces := strings.Split(base.Host, ".") - if len(pieces) < TOO_FEW_PIECES_IN_HOST { + if len(pieces) < TooFewPiecesInHost { return "", errors.New("crawler: link host has too few pieces") - } else { - tld := pieces[len(pieces)-2] + "." + pieces[len(pieces)-1] + } - // Does the link contain our TLD? - if !strings.Contains(lu.Host, tld) { - return "", errors.New("crawler: link does not contain the TLD") - } + tld := pieces[len(pieces)-2] + "." + pieces[len(pieces)-1] + + // Does the link contain our TLD? + if !strings.Contains(lu.Host, tld) { + return "", errors.New("crawler: link does not contain the TLD") } return "", fmt.Errorf("could not decide: %s", link) @@ -217,16 +218,16 @@ func trimSuffix(s, suffix string) string { if strings.HasSuffix(s, suffix) { s = s[:len(s)-len(suffix)] - return s - } else { return s } + + return s } -const MAX_FAILED_ATTEMPTS = 2 +const MaxFailedAttempts = 2 -func (w *WalkWorker) Work(ctx context.Context, job *river.Job[common.WalkArgs]) error { - if job.Attempt > MAX_FAILED_ATTEMPTS { +func (w *WalkWorker) Work(_ context.Context, job *river.Job[common.WalkArgs]) error { + if job.Attempt > MaxFailedAttempts { zap.L().Warn("walking zombie; dropping", zap.String("host", job.Args.Host), zap.String("path", job.Args.Path)) @@ -254,10 +255,10 @@ func (w *WalkWorker) Work(ctx context.Context, job *river.Job[common.WalkArgs]) // If we're here, we already fetched the content. // So, add ourselves to the cache. Don't re-crawl ourselves // FIXME: figure out if the scheme ends up in the JSON - expirable_cache.Set(s3json.Key.Render(), 0, 0) + expirableCache.Set(s3json.Key.Render(), 0, 0) zap.L().Debug("starting to work walk on", zap.String("url", s3json.URL().String())) - go_for_a_walk(s3json) + goForAWalk(s3json) zap.L().Debug("walk done", zap.String("key", s3json.Key.Render())) diff --git a/config/constants.go b/config/constants.go index a4cdccb..fb0088e 100644 --- a/config/constants.go +++ b/config/constants.go @@ -58,8 +58,8 @@ func GetTLD(tld string) int { func IntToTld(i int) string { primeConstants() - search_string := "ConstToTld." + fmt.Sprintf("%x", i) - v := gjson.GetBytes(cachedConstants, search_string).String() + searchString := "ConstToTld." + fmt.Sprintf("%x", i) + v := gjson.GetBytes(cachedConstants, searchString).String() return v } diff --git a/config/constants_test.go b/config/constants_test.go index de6856f..29dfa20 100644 --- a/config/constants_test.go +++ b/config/constants_test.go @@ -1,4 +1,4 @@ -//nolint:testpackage +//nolint:testpackage,paralleltest package config import ( diff --git a/config/domain64.go b/config/domain64.go index 05eb3f2..92a5644 100644 --- a/config/domain64.go +++ b/config/domain64.go @@ -32,29 +32,29 @@ Assume over the live of a service we'll hit this file a whole bunch of times. And, it never changes during a single deploy, so... :shrug:. */ -var cached_file []byte +var cachedFile []byte func primeCache() { // Cache this - if cached_file == nil { + if cachedFile == nil { bytes, _ := Domain64FS.ReadFile("domain64/domain64.json") - cached_file = bytes + cachedFile = bytes } } -const MIN_LEN_OF_FQDN = 2 +const MinLenOfFQDN = 2 func tldAndEscaped(fqdn string) (string, string, error) { pieces := strings.Split(fqdn, ".") - if len(pieces) < MIN_LEN_OF_FQDN { + if len(pieces) < MinLenOfFQDN { return "", "", fmt.Errorf("fqdn is too short: %s", fqdn) } tld := pieces[len(pieces)-1] // Escape the FQDN dots so it can be used with GJSON - fqdn_as_json_key := strings.Replace(fqdn, ".", `\.`, -1) + fqdnAsJSONKey := strings.Replace(fqdn, ".", `\.`, -1) - return tld, fqdn_as_json_key, nil + return tld, fqdnAsJSONKey, nil } func FQDNToDomain64(fqdn string) (int64, error) { @@ -65,7 +65,7 @@ func FQDNToDomain64(fqdn string) (int64, error) { return 0, err } - hex := gjson.GetBytes(cached_file, tld+".FQDNToDomain64."+escaped).String() + hex := gjson.GetBytes(cachedFile, tld+".FQDNToDomain64."+escaped).String() value, err := strconv.ParseInt(hex, 16, 64) if err != nil { @@ -73,7 +73,7 @@ func FQDNToDomain64(fqdn string) (int64, error) { return 0, err } - return int64(value), nil + return value, nil } func Domain64ToFQDN(domain64 int64) (string, error) { @@ -82,7 +82,7 @@ func Domain64ToFQDN(domain64 int64) (string, error) { h := fmt.Sprintf("%016X", domain64) v, _ := strconv.ParseInt(h[0:2], 16, 32) tld := IntToTld(int(v)) - fqdn := gjson.GetBytes(cached_file, tld+".Domain64ToFQDN."+h).String() + fqdn := gjson.GetBytes(cachedFile, tld+".Domain64ToFQDN."+h).String() // zap.L().Debug("d64tofqdn", // zap.String("h", h), zap.Int64("v", v), zap.String("tld", tld), zap.String("fqdn", fqdn)) // log.Println("h", h, "v", v, "tld", tld, "fqdn", fqdn) @@ -93,7 +93,7 @@ func RDomainToDomain64(rdomain string) string { primeCache() tld := strings.Split(rdomain, ".")[0] - hex := gjson.GetBytes(cached_file, tld+".RDomainToDomain64."+strings.Replace(rdomain, ".", `\.`, -1)).String() + hex := gjson.GetBytes(cachedFile, tld+".RDomainToDomain64."+strings.Replace(rdomain, ".", `\.`, -1)).String() return hex } @@ -101,11 +101,11 @@ func RDomainToDomain64(rdomain string) string { func GetAllFQDNToDomain64() map[string]int64 { primeCache() - tlds := gjson.GetBytes(cached_file, "TLDs").Array() + tlds := gjson.GetBytes(cachedFile, "TLDs").Array() all := make(map[string]int64) for _, tld := range tlds { - m := gjson.GetBytes(cached_file, tld.String()+".FQDNToDomain64").Map() + m := gjson.GetBytes(cachedFile, tld.String()+".FQDNToDomain64").Map() for fq, d64 := range m { dec, err := HexToDec64(d64.String()) if err != nil { @@ -139,28 +139,28 @@ func GetSchedule(fqdn string) Schedule { primeCache() tld, escaped, err := tldAndEscaped(fqdn) - hex := gjson.GetBytes(cached_file, tld+".FQDNToDomain64."+escaped).String() - schedule := gjson.GetBytes(cached_file, tld+".Schedule."+hex).String() + hex := gjson.GetBytes(cachedFile, tld+".FQDNToDomain64."+escaped).String() + schedule := gjson.GetBytes(cachedFile, tld+".Schedule."+hex).String() if err != nil { return Default - } else { - switch schedule { - case "daily": - return Daily - case "weekly": - return Weekly - case "biweekly": - return BiWeekly - case "monthly": - return Monthly - case "Quarterly": - return Quarterly - case "BiAnnually": - return BiAnnually - case "Annually": - return Annually - } + } + + switch schedule { + case "daily": + return Daily + case "weekly": + return Weekly + case "biweekly": + return BiWeekly + case "monthly": + return Monthly + case "Quarterly": + return Quarterly + case "BiAnnually": + return BiAnnually + case "Annually": + return Annually } return Default diff --git a/config/embed.go b/config/embed.go index b7e0213..d524aed 100644 --- a/config/embed.go +++ b/config/embed.go @@ -26,33 +26,33 @@ func ReadConfigJsonnet(sonnetFilename string) string { return json } -func ReadJsonConfig(jsonFilename string) string { - json_bytes, err := ConfigFs.ReadFile(jsonFilename) +func ReadJSONConfig(jsonFilename string) string { + jsonBytes, err := ConfigFs.ReadFile(jsonFilename) if err != nil { zap.L().Fatal(err.Error()) } - return string(json_bytes) + return string(jsonBytes) } func GetYamlFileReader(yamlFilename string) *bytes.Reader { - yaml_bytes, err := ConfigFs.ReadFile(yamlFilename) + yamlBytes, err := ConfigFs.ReadFile(yamlFilename) if err != nil { zap.L().Fatal(err.Error()) } - return bytes.NewReader(yaml_bytes) + return bytes.NewReader(yamlBytes) } -func GetListOfHosts(allowed_hosts string) []string { - zap.L().Debug("reading in hosts", zap.String("allowed_hosts", allowed_hosts)) +func GetListOfHosts(allowedHosts string) []string { + zap.L().Debug("reading in hosts", zap.String("allowed_hosts", allowedHosts)) - cfg := ReadJsonConfig("allowed_hosts.yaml") + cfg := ReadJSONConfig("allowed_hosts.yaml") // The variable `allowed_hosts` will be the key into the doc that has // a list of pairs. Each pair is a range of values, which tells us how // to filter the FQDN/D64 values. - ranges := gjson.Get(cfg, allowed_hosts).Array() + ranges := gjson.Get(cfg, allowedHosts).Array() hosts := make([]string, 0) set := make(map[string]bool) @@ -79,7 +79,7 @@ func GetListOfHosts(allowed_hosts string) []string { } func GetHostBackend(host, schedule string) string { - cfg := ReadJsonConfig(schedule) + cfg := ReadJSONConfig(schedule) backend := "postgres" for _, section := range gjson.Parse(cfg).Get("@keys").Array() { @@ -96,36 +96,36 @@ func GetHostBackend(host, schedule string) string { return backend } -const HOURS_PER_DAY = 24 +const HoursPerDay = 24 -const DAYS_PER_WEEK = 7 +const DaysPerWeek = 7 -const DAYS_PER_BIWEEK = 14 +const DaysPerBiWeek = 14 -const DAYS_PER_MONTH = 30 +const DaysPerMonth = 30 -const DAYS_PER_QUARTER = 3 * 30 +const DaysPerQuarter = 3 * 30 -const DAYS_PER_BIANNUM = 6 * 30 +const DaysPerBiAnnum = 6 * 30 -const DAYS_PER_ANNUM = 12 * 30 +const DaysPerAnnum = 12 * 30 func SectionToTimestamp(section string, startTime time.Time) time.Time { switch section { case "daily": - return startTime.Add(HOURS_PER_DAY * time.Hour) + return startTime.Add(HoursPerDay * time.Hour) case "weekly": - return startTime.Add(DAYS_PER_WEEK * HOURS_PER_DAY * time.Hour) + return startTime.Add(DaysPerWeek * HoursPerDay * time.Hour) case "bi-weekly": - return startTime.Add(DAYS_PER_BIWEEK * HOURS_PER_DAY * time.Hour) + return startTime.Add(DaysPerBiWeek * HoursPerDay * time.Hour) case "monthly": - return startTime.Add(DAYS_PER_MONTH * HOURS_PER_DAY * time.Hour) + return startTime.Add(DaysPerMonth * HoursPerDay * time.Hour) case "quarterly": - return startTime.Add(DAYS_PER_QUARTER * HOURS_PER_DAY * time.Hour) + return startTime.Add(DaysPerQuarter * HoursPerDay * time.Hour) case "bi-annually": - return startTime.Add(DAYS_PER_BIANNUM * HOURS_PER_DAY * time.Hour) + return startTime.Add(DaysPerBiAnnum * HoursPerDay * time.Hour) default: // We will default to `montly` to be safe - return startTime.Add(time.Duration(DAYS_PER_MONTH*HOURS_PER_DAY) * time.Hour) + return startTime.Add(time.Duration(DaysPerMonth*HoursPerDay) * time.Hour) } } diff --git a/internal/common/api.go b/internal/common/api.go index 16e9619..004f12a 100644 --- a/internal/common/api.go +++ b/internal/common/api.go @@ -45,7 +45,7 @@ type AllStats struct { services sync.Map } -var all_the_stats *AllStats +var allTheStats *AllStats type HandlerFunType = func(ctx context.Context, input *StatsInput) (*StatsResponseBody, error) @@ -53,9 +53,9 @@ type StatsResponseBody struct { Body *StatsResponse } -func StatsHandler(stats_base string) func(c *gin.Context) { +func StatsHandler(statsBase string) func(c *gin.Context) { return func(c *gin.Context) { - b := NewBaseStats(stats_base) + b := NewBaseStats(statsBase) c.IndentedJSON(http.StatusOK, gin.H{ "stats": b.GetAll(), "response": "ok", @@ -64,15 +64,15 @@ func StatsHandler(stats_base string) func(c *gin.Context) { } func NewBaseStats(service string) *BaseStats { - if all_the_stats == nil { - all_the_stats = &AllStats{} + if allTheStats == nil { + allTheStats = &AllStats{} } - if _, ok := all_the_stats.services.Load(service); !ok { - all_the_stats.services.Store(service, &BaseStats{}) + if _, ok := allTheStats.services.Load(service); !ok { + allTheStats.services.Store(service, &BaseStats{}) } - v, _ := all_the_stats.services.Load(service) + v, _ := allTheStats.services.Load(service) bs, ok := v.(*BaseStats) if !ok { @@ -104,7 +104,7 @@ func (e *BaseStats) Get(key string) int64 { } func (e *BaseStats) GetAll() map[string]int64 { - a_copy := make(map[string]int64, 0) + aCopy := make(map[string]int64, 0) e.stats.Range(func(key any, v any) bool { val, ok := v.(int64) @@ -117,12 +117,12 @@ func (e *BaseStats) GetAll() map[string]int64 { zap.L().Error("could not case string") } - a_copy[k] = val + aCopy[k] = val return true }) - return a_copy + return aCopy } func (e *BaseStats) Increment(key string) { diff --git a/internal/common/backoff.go b/internal/common/backoff.go index 8cc8fd5..2b1eba7 100644 --- a/internal/common/backoff.go +++ b/internal/common/backoff.go @@ -36,16 +36,16 @@ func BackoffLoop(host string, politeSleep int64, lastHitMap *sync.Map, lastBacko time.Sleep(time.Duration(newBackoffTime) * time.Second) continue - } else { - // We're not in the map, or it is more than milliseconds! - // IT IS OUR TURN. - // Reset the times and get out of here. - zap.L().Debug("freedom: left the backoff loop", - zap.String("host", host)) - lastBackoffMap.Store(host, politeSleep) - lastHitMap.Store(host, time.Now()) - - break } + + // We're not in the map, or it is more than milliseconds! + // IT IS OUR TURN. + // Reset the times and get out of here. + zap.L().Debug("freedom: left the backoff loop", + zap.String("host", host)) + lastBackoffMap.Store(host, politeSleep) + lastHitMap.Store(host, time.Now()) + + break } } diff --git a/internal/common/common.go b/internal/common/common.go index c55a165..297ef42 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -11,13 +11,13 @@ import ( "go.uber.org/zap" ) -func GetPool(database_url string) (context.Context, *pgxpool.Pool) { +func GetPool(databaseURL string) (context.Context, *pgxpool.Pool) { ctx := context.Background() - pool, err := pgxpool.New(ctx, database_url) + pool, err := pgxpool.New(ctx, databaseURL) if err != nil { zap.L().Error("could not establish database pool; exiting", - zap.String("database_url", database_url), + zap.String("database_url", databaseURL), ) os.Exit(1) } @@ -25,10 +25,11 @@ func GetPool(database_url string) (context.Context, *pgxpool.Pool) { return ctx, pool } +//nolint:revive func CommonQueueInit() (context.Context, *pgxpool.Pool, *river.Workers) { var err error - database_url, err := env.Env.GetDatabaseUrl(env.QueueDatabase) + databaseURL, err := env.Env.GetDatabaseURL(env.QueueDatabase) if err != nil { zap.L().Error("unable to get connection string; exiting", zap.String("database", env.QueueDatabase), @@ -37,7 +38,7 @@ func CommonQueueInit() (context.Context, *pgxpool.Pool, *river.Workers) { } // Establsih the database - ctx, pool := GetPool(database_url) + ctx, pool := GetPool(databaseURL) // Create a pool of workers workers := river.NewWorkers() diff --git a/internal/common/domain64.go b/internal/common/domain64.go index bda639c..526a00d 100644 --- a/internal/common/domain64.go +++ b/internal/common/domain64.go @@ -41,6 +41,7 @@ func D64HexToDec(h string) int64 { zap.String("Domain64", h)) } + //nolint:gosec return int64(value) } diff --git a/internal/common/types.go b/internal/common/types.go index 6b4407b..0426b0b 100644 --- a/internal/common/types.go +++ b/internal/common/types.go @@ -37,7 +37,7 @@ type ExtractArgs struct { Scheme string `json:"scheme"` Host string `json:"host"` Path string `json:"path"` - GuestbookId int64 `json:"gb_id"` + GuestbookID int64 `json:"gb_id"` } func (ExtractArgs) Kind() string { @@ -58,7 +58,7 @@ type PackArgs struct { Scheme string `json:"scheme"` Host string `json:"host"` Path string `json:"path"` - GuestbookId int64 `json:"gb_id"` + GuestbookID int64 `json:"gb_id"` } func (PackArgs) Kind() string { @@ -83,7 +83,7 @@ func (WalkArgs) Kind() string { return "walk" } -type HttpResponse func(w http.ResponseWriter, r *http.Request) +type HTTPResponse func(w http.ResponseWriter, r *http.Request) // VALIDATOR TYPES. var ValidateFetchQueue = "validate_fetch" diff --git a/internal/env/env.go b/internal/env/env.go index 1ac37ff..780cf4c 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -15,7 +15,7 @@ import ( var Env *env -var DEBUG_ENV = false +var DebugEnv = false // Constants for the attached services // These reach into the VCAP_SERVICES and are @@ -56,6 +56,7 @@ type Service struct { // FIXME: This should be string, err. func (s *Service) CredentialString(key string) string { + //nolint:revive if v, ok := s.Credentials[key]; ok { cast, ok := v.(string) if !ok { @@ -79,12 +80,12 @@ func (s *Service) CredentialInt(key string) int64 { } return int64(cast) - } else { - zap.L().Error("cannot find credential for key", - zap.String("key", key)) - - return -1 } + + zap.L().Error("cannot find credential for key", + zap.String("key", key)) + + return -1 } type Database = Service @@ -107,18 +108,18 @@ type env struct { Databases []Database } -type container_env struct { +type containerEnv struct { VcapServices map[string][]Service `mapstructure:"VCAP_SERVICES"` } -var container_envs = []string{"DOCKER", "GH_ACTIONS"} +var containerEnvs = []string{"DOCKER", "GH_ACTIONS"} -var cf_envs = []string{"SANDBOX", "PREVIEW", "DEV", "STAGING", "PROD"} +var cfEnvs = []string{"SANDBOX", "PREVIEW", "DEV", "STAGING", "PROD"} -var test_envs = []string{"LOCALHOST"} +var testEnvs = []string{"LOCALHOST"} //nolint:cyclop,funlen -func InitGlobalEnv(this_service string) { +func InitGlobalEnv(thisService string) { Env = &env{} configName := "NO_CONFIG_NAME_SET" @@ -175,7 +176,7 @@ func InitGlobalEnv(this_service string) { // if we unpack things right, we end up with one struct // with everything in the rgiht places. if IsContainerEnv() || IsLocalTestEnv() { - ContainerEnv := container_env{} + ContainerEnv := containerEnv{} err := viper.Unmarshal(&ContainerEnv) if err != nil { @@ -187,15 +188,15 @@ func InitGlobalEnv(this_service string) { } if IsCloudEnv() { - new_vcs := make(map[string][]Service, 0) + newVCS := make(map[string][]Service, 0) - err := json.Unmarshal([]byte(os.Getenv("VCAP_SERVICES")), &new_vcs) + err := json.Unmarshal([]byte(os.Getenv("VCAP_SERVICES")), &newVCS) if err != nil { log.Println("ENV could not unmarshal VCAP_SERVICES to new") log.Fatal(err) } - Env.VcapServices = new_vcs + Env.VcapServices = newVCS } // Configure the buckets and databases @@ -207,13 +208,13 @@ func InitGlobalEnv(this_service string) { log.Println(Env.Databases) } - SetupLogging(this_service) - SetGinReleaseMode(this_service) + SetupLogging(thisService) + SetGinReleaseMode(thisService) // Grab the schedule - s, err := Env.GetUserService(this_service) + s, err := Env.GetUserService(thisService) if err != nil { - log.Println("could not get service for ", this_service) + log.Println("could not get service for ", thisService) } Env.AllowedHosts = s.GetParamString("allowed_hosts") @@ -222,7 +223,7 @@ func InitGlobalEnv(this_service string) { // https://stackoverflow.com/questions/3582552/what-is-the-format-for-the-postgresql-connection-string-url // postgresql://[user[:password]@][netloc][:port][/dbname][?param1=value1&...] -func (e *env) GetDatabaseUrl(name string) (string, error) { +func (e *env) GetDatabaseURL(name string) (string, error) { for _, db := range e.Databases { if db.Name == name { params := "" @@ -250,7 +251,7 @@ func (e *env) GetDatabaseUrl(name string) (string, error) { func (e *env) GetObjectStore(name string) (Bucket, error) { for _, b := range e.ObjectStores { - if DEBUG_ENV { + if DebugEnv { zap.L().Debug("GetObjectStore", zap.String("bucket_name", b.Name), zap.String("search_key", name), @@ -277,30 +278,30 @@ func (e *env) GetUserService(name string) (Service, error) { } func IsContainerEnv() bool { - return slices.Contains(container_envs, os.Getenv("ENV")) + return slices.Contains(containerEnvs, os.Getenv("ENV")) } func IsLocalTestEnv() bool { - return slices.Contains(test_envs, os.Getenv("ENV")) + return slices.Contains(testEnvs, os.Getenv("ENV")) } func IsCloudEnv() bool { - return slices.Contains(cf_envs, os.Getenv("ENV")) + return slices.Contains(cfEnvs, os.Getenv("ENV")) } func (s *Service) GetParamInt64(key string) int64 { - for _, global_s := range Env.UserServices { - if s.Name == global_s.Name { - if global_param_val, ok := global_s.Parameters[key]; ok { - cast, ok := global_param_val.(int) + for _, globalString := range Env.UserServices { + if s.Name == globalString.Name { + if globalParamVal, ok := globalString.Parameters[key]; ok { + cast, ok := globalParamVal.(int) if !ok { zap.L().Error("could not cast int") } return int64(cast) - } else { - log.Fatalf("ENV no int64 param found for %s", key) } + + log.Fatalf("ENV no int64 param found for %s", key) } } @@ -308,18 +309,18 @@ func (s *Service) GetParamInt64(key string) int64 { } func (s *Service) GetParamString(key string) string { - for _, global_s := range Env.UserServices { - if s.Name == global_s.Name { - if global_param_val, ok := global_s.Parameters[key]; ok { - cast, ok := global_param_val.(string) + for _, globalString := range Env.UserServices { + if s.Name == globalString.Name { + if globalParamVal, ok := globalString.Parameters[key]; ok { + cast, ok := globalParamVal.(string) if !ok { zap.L().Error("could not cast string") } return cast - } else { - log.Fatalf("ENV no string param found for %s", key) } + + log.Fatalf("ENV no string param found for %s", key) } } @@ -327,25 +328,25 @@ func (s *Service) GetParamString(key string) string { } func (s *Service) GetParamBool(key string) bool { - for _, global_s := range Env.UserServices { - if s.Name == global_s.Name { - if global_param_val, ok := global_s.Parameters[key]; ok { - cast, ok := global_param_val.(bool) + for _, globalString := range Env.UserServices { + if s.Name == globalString.Name { + if globalParamVal, ok := globalString.Parameters[key]; ok { + cast, ok := globalParamVal.(bool) if !ok { zap.L().Error("could not cast bool") } return cast - } else { - log.Fatalf("ENV no bool param found for %s", key) } + + log.Fatalf("ENV no bool param found for %s", key) } } return false } -func (s *Service) AsJson() string { +func (s *Service) AsJSON() string { b, err := json.MarshalIndent(s, "", " ") if err != nil { fmt.Println(err) diff --git a/internal/env/gin.go b/internal/env/gin.go index cc71be9..99108b6 100644 --- a/internal/env/gin.go +++ b/internal/env/gin.go @@ -7,8 +7,8 @@ import ( "go.uber.org/zap" ) -func SetGinReleaseMode(this_service string) { - s, _ := Env.GetUserService(this_service) +func SetGinReleaseMode(thisService string) { + s, _ := Env.GetUserService(thisService) level := s.GetParamString("debug_level") if level == "debug" { diff --git a/internal/env/zap.go b/internal/env/zap.go index eb98807..bbd15df 100644 --- a/internal/env/zap.go +++ b/internal/env/zap.go @@ -28,33 +28,33 @@ var ZapLogger *zap.Logger // l.WithFields(keysAndValues).Warn(msg) // } -func createLogger(this_service string) *zap.Logger { +func createLogger(thisService string) *zap.Logger { encoderCfg := zap.NewProductionEncoderConfig() encoderCfg.TimeKey = "timestamp" encoderCfg.EncodeTime = zapcore.ISO8601TimeEncoder // level := strings.ToLower(os.Getenv("DEBUG_LEVEL")) // WARNING: THIS MUST RUN AFTER THE ENV IS PARSED/SET UP - s, _ := Env.GetUserService(this_service) + s, _ := Env.GetUserService(thisService) level := s.GetParamString("debug_level") - var zap_level zapcore.Level + var zapLevel zapcore.Level switch level { case "debug": - zap_level = zap.DebugLevel + zapLevel = zap.DebugLevel case "info": - zap_level = zap.InfoLevel + zapLevel = zap.InfoLevel case "warn": - zap_level = zap.WarnLevel + zapLevel = zap.WarnLevel case "error": - zap_level = zap.ErrorLevel + zapLevel = zap.ErrorLevel default: - zap_level = zap.InfoLevel + zapLevel = zap.InfoLevel } config := zap.Config{ - Level: zap.NewAtomicLevelAt(zap_level), + Level: zap.NewAtomicLevelAt(zapLevel), Development: false, DisableCaller: false, DisableStacktrace: false, @@ -80,7 +80,7 @@ func createLogger(this_service string) *zap.Logger { return zap.Must(logger, nil) } -func SetupLogging(this_service string) { - ZapLogger = createLogger(this_service) +func SetupLogging(thisService string) { + ZapLogger = createLogger(thisService) zap.ReplaceGlobals(zap.Must(ZapLogger, nil)) } diff --git a/internal/filtering/filter_test.go b/internal/filtering/filter_test.go index 43dd578..dec26da 100644 --- a/internal/filtering/filter_test.go +++ b/internal/filtering/filter_test.go @@ -21,8 +21,8 @@ var tests = []struct { {"/", isTooShort(1), false}, {"https://tooLong.gov/", exceedsLength(5), true}, {"https://tooLong.gov/", exceedsLength(200), false}, - {"https://nasa.gov/", hasSlashHttp, false}, - {"https://nasa.gov/something/http://", hasSlashHttp, true}, + {"https://nasa.gov/", hasSlashHTTP, false}, + {"https://nasa.gov/something/http://", hasSlashHTTP, true}, // Spaces become %20 once encoded {"https://blog1a.nasa.gov/right here", hasRightHere, true}, {"https://blog1b.nasa.gov/right here", IsReject, true}, diff --git a/internal/filtering/general.go b/internal/filtering/general.go index 37f9916..8dfe4c9 100644 --- a/internal/filtering/general.go +++ b/internal/filtering/general.go @@ -7,9 +7,9 @@ import ( "strings" ) -var skippable_prefixes = []string{"#", "mailto"} +var skippablePrefixes = []string{"#", "mailto"} -var skippable_extensions = []string{ +var skippableExtensions = []string{ "acc", "bmp", "doc", @@ -34,13 +34,13 @@ var skippable_extensions = []string{ "xlsx", } -const IS_TOO_SHORT_MIN = 5 +const IsTooShortMin = 5 -const EXCEEDS_LENGTH_MAX = 200 +const ExceedsLengthMax = 200 -const TOO_MANY_REPEATS_LEN = 8 +const TooManyRepeatsLen = 8 -const TOO_MANY_REPEATS_COUNT = 50 +const TooManyRepeatsCount = 50 func exceedsLength(length int) func(*url.URL) error { return func(u *url.URL) error { @@ -52,7 +52,7 @@ func exceedsLength(length int) func(*url.URL) error { } } -func hasSlashHttp(u *url.URL) error { +func hasSlashHTTP(u *url.URL) error { m, _ := regexp.MatchString(`/http`, u.Path) if m { return fmt.Errorf("http in middle of url: %s", u.Path) @@ -81,7 +81,7 @@ func isTooShort(length int) func(*url.URL) error { } func hasSkippablePrefixRelative(u *url.URL) error { - for _, sp := range skippable_prefixes { + for _, sp := range skippablePrefixes { if strings.HasPrefix(u.String(), sp) { return fmt.Errorf("skippable prefix [%s]: %s", sp, u.Path) } @@ -91,7 +91,7 @@ func hasSkippablePrefixRelative(u *url.URL) error { } func hasSkippableExtension(u *url.URL) error { - for _, ext := range skippable_extensions { + for _, ext := range skippableExtensions { if strings.HasSuffix(u.Path, ext) { return fmt.Errorf("skippable extension [%s]: %s", ext, u.Path) } @@ -143,7 +143,7 @@ func endsWithWrongSlash(u *url.URL) error { return nil } -var all string = ".*" +var all = ".*" func GeneralRules() []Rule { rules := make([]Rule, 0) @@ -157,13 +157,13 @@ func GeneralRules() []Rule { rules = append(rules, Rule{ Match: all, Msg: "max isTooShort 5", - Reject: isTooShort(IS_TOO_SHORT_MIN), + Reject: isTooShort(IsTooShortMin), }) rules = append(rules, Rule{ Match: all, Msg: "exceedsLength 200", - Reject: exceedsLength(EXCEEDS_LENGTH_MAX), + Reject: exceedsLength(ExceedsLengthMax), }) rules = append(rules, Rule{ @@ -175,7 +175,7 @@ func GeneralRules() []Rule { rules = append(rules, Rule{ Match: all, Msg: "hasSlashHttp", - Reject: hasSlashHttp, + Reject: hasSlashHTTP, }) rules = append(rules, Rule{ @@ -199,7 +199,7 @@ func GeneralRules() []Rule { rules = append(rules, Rule{ Match: all, Msg: "hasTooManyRepeats", - Reject: hasTooManyRepeats(TOO_MANY_REPEATS_LEN, TOO_MANY_REPEATS_COUNT), + Reject: hasTooManyRepeats(TooManyRepeatsLen, TooManyRepeatsCount), }) return rules diff --git a/internal/filtering/nasa.go b/internal/filtering/nasa.go index 7276af3..49bcc30 100644 --- a/internal/filtering/nasa.go +++ b/internal/filtering/nasa.go @@ -7,7 +7,7 @@ import ( ) // We compare against the host, so leave off the scheme. -var nasa string = `.*nasa.gov` +var nasa = `.*nasa.gov` func hasRightHere(u *url.URL) error { match, _ := regexp.MatchString("right.*?here", u.String()) diff --git a/internal/kv/interfaces.go b/internal/kv/interfaces.go index b91297e..5deccfd 100644 --- a/internal/kv/interfaces.go +++ b/internal/kv/interfaces.go @@ -43,7 +43,8 @@ func NewObject(key string, value JSON) *Obj { } size := int64(len(b)) - mime := "" + + var mime string if good, ok := value["content-type"]; !ok { mime = "octet/binary" @@ -70,7 +71,7 @@ func (o Obj) GetValue(key string) string { return o.value[key] } -func (o Obj) GetJson() JSON { +func (o Obj) GetJSON() JSON { return o.value } diff --git a/internal/kv/s3.go b/internal/kv/s3.go index b7f55d1..b6024ed 100644 --- a/internal/kv/s3.go +++ b/internal/kv/s3.go @@ -13,7 +13,7 @@ import ( "go.uber.org/zap" ) -var DEBUG_S3 = false +var DebugS3 = false // S3 holds a bucket structure (containing VCAP_SERVICES information) // and an S3 client connection from the min.io libraries. @@ -27,16 +27,16 @@ type S3 struct { // NewS3 creates a new S3 object for the bucket given. // Lets us copy files to/from the bucket. -func NewS3(bucket_name string) *S3 { - s3 := newS3FromBucketName(bucket_name) +func NewS3(bucketName string) *S3 { + s3 := newS3FromBucketName(bucketName) return &s3 } -func (s3 *S3) FileToS3(key *util.Key, local_filename string, mime_type string) error { - reader, err := os.Open(local_filename) +func (s3 *S3) FileToS3(key *util.Key, localFilename string, mimeType string) error { + reader, err := os.Open(localFilename) if err != nil { - log.Fatal("FileToS3 cannot open file ", local_filename) + log.Fatal("FileToS3 cannot open file ", localFilename) } fi, err := reader.Stat() @@ -45,13 +45,13 @@ func (s3 *S3) FileToS3(key *util.Key, local_filename string, mime_type string) e log.Fatal(err) } - return store(s3, key.Render(), fi.Size(), reader, mime_type) + return store(s3, key.Render(), fi.Size(), reader, mimeType) } -func (s3 *S3) FileToS3Path(key string, local_filename string, mime_type string) error { - reader, err := os.Open(local_filename) +func (s3 *S3) FileToS3Path(key string, localFilename string, mimeType string) error { + reader, err := os.Open(localFilename) if err != nil { - log.Fatal("FileToS3Path cannot open file ", local_filename) + log.Fatal("FileToS3Path cannot open file ", localFilename) } fi, err := reader.Stat() @@ -60,43 +60,45 @@ func (s3 *S3) FileToS3Path(key string, local_filename string, mime_type string) log.Fatal(err) } - return store(s3, key, fi.Size(), reader, mime_type) + return store(s3, key, fi.Size(), reader, mimeType) } -func (s3 *S3) S3ToFile(key *util.Key, local_filename string) error { +func (s3 *S3) S3ToFile(key *util.Key, localFilename string) error { ctx := context.Background() err := s3.MinioClient.FGetObject( ctx, s3.Bucket.CredentialString("bucket"), key.Render(), - local_filename, + localFilename, minio.GetObjectOptions{}) if err != nil { zap.L().Error("could not FGetObject", zap.String("bucket", s3.Bucket.Name), zap.String("key", key.Render()), - zap.String("local_filename", local_filename), + zap.String("local_filename", localFilename), ) + //nolint:wrapcheck return err } return nil } -func (s3 *S3) S3PathToFile(path string, local_filename string) error { +func (s3 *S3) S3PathToFile(path string, localFilename string) error { ctx := context.Background() err := s3.MinioClient.FGetObject( ctx, s3.Bucket.CredentialString("bucket"), path, - local_filename, + localFilename, minio.GetObjectOptions{}) if err != nil { zap.Error(err) + //nolint:wrapcheck return err } @@ -127,10 +129,11 @@ func (s3 *S3) S3PathToS3JSON(key *util.Key) (*S3JSON, error) { zap.String("key", key.Render()), zap.String("error", err.Error())) + //nolint:wrapcheck return nil, err } - if DEBUG_S3 { + if DebugS3 { zap.L().Debug("retrieved S3 object", zap.String("key", key.Render())) } @@ -141,15 +144,16 @@ func (s3 *S3) S3PathToS3JSON(key *util.Key) (*S3JSON, error) { zap.String("key", key.Render()), zap.String("error", err.Error())) + //nolint:wrapcheck return nil, err } s3json := NewS3JSON(s3.Bucket.Name) s3json.raw = raw s3json.Key = key - current_mime_type := s3json.GetString("content-type") + currentMIMEType := s3json.GetString("content-type") - updated, err := sjson.SetBytes(s3json.raw, "content-type", util.CleanMimeType(current_mime_type)) + updated, err := sjson.SetBytes(s3json.raw, "content-type", util.CleanMimeType(currentMIMEType)) if err != nil { zap.L().Error("could not update raw S3JSON") } else { diff --git a/internal/kv/s3json.go b/internal/kv/s3json.go index e9a1499..061651b 100644 --- a/internal/kv/s3json.go +++ b/internal/kv/s3json.go @@ -19,7 +19,7 @@ import ( "go.uber.org/zap" ) -var DEBUG_S3JSON = false +var DebugS3JSON = false // NewFromBytes(bucket_name string, host string, path string, m []byte) *S3JSON // NewEmptyS3JSON(bucket_name string, host string, path string) *S3JSON @@ -47,8 +47,8 @@ type S3JSON struct { empty bool } -func NewS3JSON(bucket_name string) *S3JSON { - s3 := newS3FromBucketName(bucket_name) +func NewS3JSON(bucketName string) *S3JSON { + s3 := newS3FromBucketName(bucketName) return &S3JSON{ Key: &util.Key{}, @@ -61,26 +61,29 @@ func NewS3JSON(bucket_name string) *S3JSON { // NewFromBytes takes a []byte representation of a JSON document and constructs // a S3JSON document from it. // Inserts _key -func NewFromBytes(bucket_name string, scheme util.Scheme, host string, path string, m []byte) *S3JSON { - s3 := newS3FromBucketName(bucket_name) +func NewFromBytes(bucketName string, scheme util.Scheme, host string, path string, m []byte) *S3JSON { + s3 := newS3FromBucketName(bucketName) key := util.CreateS3Key(scheme, host, path, util.JSON) - w_key, _ := sjson.SetBytes(m, "_key", key.Render()) + wKey, _ := sjson.SetBytes(m, "_key", key.Render()) return &S3JSON{ Key: key, - raw: w_key, + raw: wKey, S3: s3, empty: false, } } // Inserts _key -func NewFromMap(bucket_name string, scheme util.Scheme, host string, path string, m map[string]string) *S3JSON { - s3 := newS3FromBucketName(bucket_name) +func NewFromMap(bucketName string, scheme util.Scheme, host string, path string, m map[string]string) *S3JSON { + s3 := newS3FromBucketName(bucketName) key := util.CreateS3Key(scheme, host, path, util.JSON) m["_key"] = key.Render() - b, _ := json.Marshal(m) + b, err := json.Marshal(m) + if err != nil { + zap.L().Error("could not marshall JSON") + } return &S3JSON{ Key: key, @@ -92,8 +95,8 @@ func NewFromMap(bucket_name string, scheme util.Scheme, host string, path string // Creates a new, empty S3JSON struct, setting it as `empty`. // `Load()` must be called on it before we can use it. -func NewEmptyS3JSON(bucket_name string, scheme util.Scheme, host string, path string) *S3JSON { - s3 := newS3FromBucketName(bucket_name) +func NewEmptyS3JSON(bucketName string, scheme util.Scheme, host string, path string) *S3JSON { + s3 := newS3FromBucketName(bucketName) key := util.CreateS3Key(scheme, host, path, util.JSON) return &S3JSON{ @@ -173,10 +176,11 @@ func (s3json *S3JSON) Load() error { zap.String("key", key), zap.String("error", err.Error())) + //nolint:wrapcheck return err } - if DEBUG_S3JSON { + if DebugS3JSON { zap.L().Debug("retrieved S3 object", zap.String("key", key)) } @@ -187,13 +191,14 @@ func (s3json *S3JSON) Load() error { zap.String("key", key), zap.String("error", err.Error())) + //nolint:wrapcheck return err } s3json.raw = raw - current_mime_type := s3json.GetString("content-type") + currentMimeType := s3json.GetString("content-type") - updated, err := sjson.SetBytes(s3json.raw, "content-type", util.CleanMimeType(current_mime_type)) + updated, err := sjson.SetBytes(s3json.raw, "content-type", util.CleanMimeType(currentMimeType)) if err != nil { zap.L().Error("could not update s3json.raw") } else { @@ -209,29 +214,29 @@ func (s3json *S3JSON) GetJSON() []byte { return s3json.raw } -func (s3json *S3JSON) GetString(gjson_path string) string { - r := gjson.GetBytes(s3json.raw, gjson_path) +func (s3json *S3JSON) GetString(gjsonPath string) string { + r := gjson.GetBytes(s3json.raw, gjsonPath) return r.String() } -func (s3json *S3JSON) GetInt64(gjson_path string) int64 { - r := gjson.GetBytes(s3json.raw, gjson_path) +func (s3json *S3JSON) GetInt64(gjsonPath string) int64 { + r := gjson.GetBytes(s3json.raw, gjsonPath) return int64(r.Int()) } -func (s3json *S3JSON) GetBool(gjson_path string) bool { - r := gjson.GetBytes(s3json.raw, gjson_path) +func (s3json *S3JSON) GetBool(gjsonPath string) bool { + r := gjson.GetBytes(s3json.raw, gjsonPath) return r.Bool() } -func (s3json *S3JSON) Set(sjson_path string, value string) { - b, err := sjson.SetBytes(s3json.raw, sjson_path, value) +func (s3json *S3JSON) Set(sjsonPath string, value string) { + b, err := sjson.SetBytes(s3json.raw, sjsonPath, value) if err != nil { zap.L().Error("could not set JSON path in Set()", - zap.String("sjson_path", sjson_path), + zap.String("sjson_path", sjsonPath), zap.String("value", value)) } @@ -241,32 +246,3 @@ func (s3json *S3JSON) Set(sjson_path string, value string) { func (s3json *S3JSON) Size() int64 { return int64(len(s3json.raw)) } - -// type Storage interface { -// Store(string, JSON) error -// List(string) ([]*ObjInfo, error) -// Get(string) (Object, error) -// } - -// func (s3 *S3) StoreFile(destination_key string, source_filename string) error { -// reader, err := os.Open(source_filename) -// if err != nil { -// log.Fatal("KV cannot open file", source_filename) -// } -// fi, err := reader.Stat() -// if err != nil { -// log.Println("KV could not stat file") -// log.Fatal(err) -// } - -// return store(s3, destination_key, fi.Size(), make(JSON, 0), reader) -// } - -// //////////////////////////// -// // SUPPORT - -// func mapToReader(json_map JSON) (io.Reader, int64) { -// b, _ := json.Marshal(json_map) -// r := bytes.NewReader(b) -// return r, int64(len(b)) -// } diff --git a/internal/kv/s3json_test.go b/internal/kv/s3json_test.go index 756e4e2..d18787b 100644 --- a/internal/kv/s3json_test.go +++ b/internal/kv/s3json_test.go @@ -19,12 +19,14 @@ func setup( /* t *testing.T */ ) func(t *testing.T) { os.Setenv("ENV", "LOCALHOST") env.InitGlobalEnv("testing_env") // we need to pass something - return func(t *testing.T) { + return func(_ *testing.T) { } } // TestHelloName calls greetings.Hello with a name, checking // for a valid return value. +// +//nolint:revive func TestKv(t *testing.T) { setup() log.Println(env.Env.ObjectStores) diff --git a/internal/kv/util.go b/internal/kv/util.go index 5c3ffec..47f6717 100644 --- a/internal/kv/util.go +++ b/internal/kv/util.go @@ -23,12 +23,12 @@ var s3cache sync.Map // carry the information so they can load/save. // //nolint:cyclop,funlen -func newS3FromBucketName(bucket_name string) S3 { - if !env.IsValidBucketName(bucket_name) { - log.Fatal("KV INVALID BUCKET NAME ", bucket_name) +func newS3FromBucketName(bucketName string) S3 { + if !env.IsValidBucketName(bucketName) { + log.Fatal("KV INVALID BUCKET NAME ", bucketName) } - if v, ok := s3cache.Load(bucket_name); ok { + if v, ok := s3cache.Load(bucketName); ok { cast, ok := v.(S3) if !ok { zap.L().Error("could not cast to s3 struct") @@ -40,13 +40,13 @@ func newS3FromBucketName(bucket_name string) S3 { s3 := S3{} // Grab a reference to our bucket from the config. - b, err := env.Env.GetObjectStore(bucket_name) + b, err := env.Env.GetObjectStore(bucketName) if err != nil { - zap.L().Error("could not get bucket from config", zap.String("bucket_name", bucket_name)) + zap.L().Error("could not get bucket from config", zap.String("bucket_name", bucketName)) os.Exit(1) } - if DEBUG_S3 { + if DebugS3 { zap.L().Debug("got reference to bucket from vcap", zap.String("name", b.Name), zap.String("bucket", b.CredentialString("bucket")), @@ -80,25 +80,25 @@ func newS3FromBucketName(bucket_name string) S3 { found, err := minioClient.BucketExists(ctx, s3.Bucket.CredentialString("bucket")) if err != nil { zap.L().Fatal("could not check if bucket exists", - zap.String("bucket_name", bucket_name), + zap.String("bucket_name", bucketName), zap.String("err", err.Error())) } if found { - if DEBUG_S3 { + if DebugS3 { zap.L().Debug("pre-existing bucket in S3", - zap.String("bucket_name", bucket_name)) + zap.String("bucket_name", bucketName)) } // Make sure to insert the metadata into the sync.Map // when we find a bucket that already exists. // buckets.Store(bucket_name, s3) - s3cache.Store(bucket_name, s3) + s3cache.Store(bucketName, s3) return s3 } if env.IsContainerEnv() { - log.Println("KV creating new bucket ", bucket_name) + log.Println("KV creating new bucket ", bucketName) // Try and make the bucket; if we're local, this is necessary. ctx := context.Background() err = minioClient.MakeBucket( @@ -108,11 +108,11 @@ func newS3FromBucketName(bucket_name string) S3 { if err != nil && !strings.Contains(err.Error(), "succeeded") { log.Println(err) - log.Fatal("KV could not create bucket ", bucket_name) + log.Fatal("KV could not create bucket ", bucketName) } } // Skip container creation in CF - s3cache.Store(bucket_name, s3) + s3cache.Store(bucketName, s3) return s3 } @@ -126,11 +126,11 @@ func containsAll(target string, pieces []string) bool { return allExist } -const BACKUOFF_MS = 50 +const BackoffMillis = 50 -const BACKOFF_OFFSET = 25 +const BackoffOffset = 25 -func store(s3 *S3, destination_key string, size int64, reader io.Reader, mime_type string) error { +func store(s3 *S3, destinationKey string, size int64, reader io.Reader, mimeType string) error { trying := true backoff := 50 @@ -139,11 +139,11 @@ func store(s3 *S3, destination_key string, size int64, reader io.Reader, mime_ty _, err := s3.MinioClient.PutObject( ctx, s3.Bucket.CredentialString("bucket"), - destination_key, + destinationKey, reader, size, minio.PutObjectOptions{ - ContentType: mime_type, + ContentType: mimeType, // This seems to set the *minimum* partsize for multipart uploads. // Which... makes writing JSON objects impossible. // PartSize: 5000000 @@ -152,28 +152,30 @@ func store(s3 *S3, destination_key string, size int64, reader io.Reader, mime_ty // We might be going too fast. if err != nil { zap.L().Warn("S3JSON could not PUT object", - zap.String("destination_key", destination_key), + zap.String("destination_key", destinationKey), zap.String("error", err.Error())) // Resource requested is unwritable, please reduce your request rate if containsAll(err.Error(), []string{"reduce", "rate"}) || containsAll(err.Error(), []string{"not", "store"}) { zap.L().Warn("reducing request rate") //nolint:gosec - sleepyTime := time.Duration((rand.IntN(BACKUOFF_MS) + backoff) * int(time.Millisecond)) + sleepyTime := time.Duration((rand.IntN(BackoffMillis) + backoff) * int(time.Millisecond)) - backoff += rand.IntN(BACKUOFF_MS) + BACKOFF_OFFSET + //nolint:gosec + backoff += rand.IntN(BackoffMillis) + BackoffOffset time.Sleep(sleepyTime) continue - } else { - zap.L().Error("s3 storage error", zap.String("err", err.Error())) - - return err } - } else { - trying = false + + zap.L().Error("s3 storage error", zap.String("err", err.Error())) + + //nolint:wrapcheck + return err } + + trying = false } return nil diff --git a/internal/postgres/postgres.go b/internal/postgres/postgres.go index d70e0ca..a8f2bf1 100644 --- a/internal/postgres/postgres.go +++ b/internal/postgres/postgres.go @@ -28,13 +28,13 @@ func NewJemisonDB() *JemisonDB { Pool: make(map[string]*pgxpool.Pool), } - for _, db_name := range []string{env.QueueDatabase, env.JemisonWorkDatabase, env.SearchDatabase} { - db_string, err := env.Env.GetDatabaseUrl(db_name) + for _, dbName := range []string{env.QueueDatabase, env.JemisonWorkDatabase, env.SearchDatabase} { + dbString, err := env.Env.GetDatabaseURL(dbName) if err != nil { - zap.L().Fatal("could not get db URL", zap.String("db_name", db_name)) + zap.L().Fatal("could not get db URL", zap.String("db_name", dbName)) } - cfg := Config(db_string) + cfg := Config(dbString) // Create database connection pool, err := pgxpool.NewWithConfig(context.Background(), cfg) if err != nil { @@ -47,8 +47,8 @@ func NewJemisonDB() *JemisonDB { zap.L().Error(err.Error()) } - jdb.Config[db_name] = cfg - jdb.Pool[db_name] = pool + jdb.Config[dbName] = cfg + jdb.Pool[dbName] = pool } jdb.WorkDBQueries = work_db.New(jdb.Pool[env.JemisonWorkDatabase]) @@ -57,7 +57,7 @@ func NewJemisonDB() *JemisonDB { return &jdb } -func Config(db_string string) *pgxpool.Config { +func Config(dbString string) *pgxpool.Config { const defaultMaxConns = int32(100) const defaultMinConns = int32(0) @@ -70,7 +70,7 @@ func Config(db_string string) *pgxpool.Config { const defaultConnectTimeout = time.Second * 5 - dbConfig, err := pgxpool.ParseConfig(db_string) + dbConfig, err := pgxpool.ParseConfig(dbString) if err != nil { log.Fatal("Failed to create a config, error: ", err) } @@ -92,50 +92,50 @@ func Config(db_string string) *pgxpool.Config { //nolint:gosec func (jdb *JemisonDB) GetScheme(scheme string) int32 { if val, ok := jdb.constCache.Load("scheme:" + scheme); ok { - v, assert_ok := val.(int32) - if !assert_ok { + v, assertOK := val.(int32) + if !assertOK { zap.L().Error("could not convert scheme integer") } return v - } else { - scheme_int := config.GetScheme(scheme) - // This is a guaranteed save conversion - jdb.constCache.Store("scheme:"+scheme, int32(scheme_int)) - - return int32(scheme_int) } + + schemeInt := config.GetScheme(scheme) + // This is a guaranteed save conversion + jdb.constCache.Store("scheme:"+scheme, int32(schemeInt)) + + return int32(schemeInt) } func (jdb *JemisonDB) GetContentType(ct string) int { if val, ok := jdb.constCache.Load("contenttype:" + ct); ok { - v, assert_ok := val.(int) - if !assert_ok { + v, assertOK := val.(int) + if !assertOK { zap.L().Error("could not convert content type integer") } return v - } else { - ct_int := config.GetContentType(ct) - jdb.constCache.Store("contenttype:"+ct, ct_int) - - return ct_int } + + ctInt := config.GetContentType(ct) + jdb.constCache.Store("contenttype:"+ct, ctInt) + + return ctInt } -const HOURS_PER_DAY = 24 +const HoursPerDay = 24 -const DAYS_PER_WEEK = 7 +const DaysPerWeek = 7 -const DAYS_PER_BIWEEK = 14 +const DaysPerBiWeek = 14 -const DAYS_PER_MONTH = 30 +const DaysPerMonth = 30 -const DAYS_PER_QUARTER = 3 * 30 +const DaysPerQuarter = 3 * 30 -const DAYS_PER_BIANNUM = 6 * 30 +const DaysPerBiAnnum = 6 * 30 -const DAYS_PER_ANNUM = 12 * 30 +const DaysPerAnnum = 12 * 30 func (jdb *JemisonDB) GetNextFetch(fqdn string) time.Time { var delta time.Duration @@ -144,27 +144,29 @@ func (jdb *JemisonDB) GetNextFetch(fqdn string) time.Time { switch schedule { case config.Daily: - delta = time.Duration(HOURS_PER_DAY * time.Hour) + delta = time.Duration(HoursPerDay * time.Hour) case config.Weekly: - delta = time.Duration(DAYS_PER_WEEK * HOURS_PER_DAY * time.Hour) + delta = time.Duration(DaysPerWeek * HoursPerDay * time.Hour) case config.BiWeekly: - delta = time.Duration(DAYS_PER_BIWEEK * HOURS_PER_DAY * time.Hour) + delta = time.Duration(DaysPerBiWeek * HoursPerDay * time.Hour) case config.Monthly: - delta = time.Duration(DAYS_PER_MONTH * HOURS_PER_DAY * time.Hour) + delta = time.Duration(DaysPerMonth * HoursPerDay * time.Hour) case config.Quarterly: - delta = time.Duration(DAYS_PER_QUARTER * HOURS_PER_DAY * time.Hour) + delta = time.Duration(DaysPerQuarter * HoursPerDay * time.Hour) case config.BiAnnually: - delta = time.Duration(DAYS_PER_BIANNUM * HOURS_PER_DAY * time.Hour) + delta = time.Duration(DaysPerBiAnnum * HoursPerDay * time.Hour) case config.Annually: - delta = time.Duration(DAYS_PER_ANNUM * HOURS_PER_DAY * time.Hour) + delta = time.Duration(DaysPerAnnum * HoursPerDay * time.Hour) + case config.Default: + delta = time.Duration(DaysPerMonth * HoursPerDay * time.Hour) default: // Default to monthly. - delta = time.Duration(DAYS_PER_MONTH * HOURS_PER_DAY * time.Hour) + delta = time.Duration(DaysPerMonth * HoursPerDay * time.Hour) } - next_fetch := time.Now().Add(delta) + nextFetch := time.Now().Add(delta) - return next_fetch + return nextFetch } func (jdb *JemisonDB) InThePast(delta time.Duration) time.Time { diff --git a/internal/queueing/generic_insert.go b/internal/queueing/generic_insert.go index c38ec8a..7ed66c9 100644 --- a/internal/queueing/generic_insert.go +++ b/internal/queueing/generic_insert.go @@ -21,6 +21,7 @@ type QSHP struct { Filename string } +//nolint:revive func commonCommit(qshp QSHP, ctx context.Context, tx pgx.Tx) { if err := tx.Commit(ctx); err != nil { err = tx.Rollback(ctx) @@ -36,7 +37,7 @@ func commonCommit(qshp QSHP, ctx context.Context, tx pgx.Tx) { } //nolint:cyclop,funlen -func Enqueue(ch_qshp <-chan QSHP) { +func Enqueue(chQSHP <-chan QSHP) { // Can we leave one connection open for the entire life of a // service? Maybe. Maybe not. _, pool, _ := common.CommonQueueInit() @@ -49,17 +50,17 @@ func Enqueue(ch_qshp <-chan QSHP) { } for { - qshp := <-ch_qshp + qshp := <-chQSHP ctx, tx := common.CtxTx(pool) - var queue_to_match string + var queueToMatch string if strings.HasPrefix(qshp.Queue, "fetch") { - queue_to_match = "fetch" + queueToMatch = "fetch" } else { - queue_to_match = qshp.Queue + queueToMatch = qshp.Queue } - switch queue_to_match { + switch queueToMatch { case "entree": _, err := client.InsertTx(ctx, tx, common.EntreeArgs{ Scheme: qshp.Scheme, diff --git a/internal/queueing/periodic_clear.go b/internal/queueing/periodic_clear.go index 4d8ec85..ba27c71 100644 --- a/internal/queueing/periodic_clear.go +++ b/internal/queueing/periodic_clear.go @@ -8,13 +8,13 @@ import ( "go.uber.org/zap" ) -const PERIODIC_CLEANUP_MINUTES = 10 +const PeriodicCleanupMinutes = 10 func ClearCompletedPeriodically() { _, pool, _ := common.CommonQueueInit() defer pool.Close() - ticker := time.NewTicker(PERIODIC_CLEANUP_MINUTES * time.Minute) + ticker := time.NewTicker(PeriodicCleanupMinutes * time.Minute) for { <-ticker.C diff --git a/internal/queueing/river.go b/internal/queueing/river.go index a125dba..a8a3a86 100644 --- a/internal/queueing/river.go +++ b/internal/queueing/river.go @@ -13,7 +13,7 @@ import ( func InitializeRiverQueues() { // Set up a pool - connection_string, err := env.Env.GetDatabaseUrl(env.QueueDatabase) + connectionString, err := env.Env.GetDatabaseURL(env.QueueDatabase) if err != nil { zap.L().Fatal("cannot find db connection string", zap.String("database", env.QueueDatabase)) @@ -21,7 +21,7 @@ func InitializeRiverQueues() { ctx := context.Background() - pool, err := pgxpool.New(ctx, connection_string) + pool, err := pgxpool.New(ctx, connectionString) if err != nil { zap.L().Fatal("cannot create database pool for migrations") } @@ -43,13 +43,13 @@ func InitializeRiverQueues() { func RunRiverMigrator() { ctx := context.Background() // Set up a pool - connection_string, err := env.Env.GetDatabaseUrl(env.QueueDatabase) + connectionString, err := env.Env.GetDatabaseURL(env.QueueDatabase) if err != nil { log.Println("RIVER cannot find connection string for", env.QueueDatabase) log.Fatal(err) } - pool, err := pgxpool.New(ctx, connection_string) + pool, err := pgxpool.New(ctx, connectionString) if err != nil { zap.L().Fatal("could not get pool for river migrator") } diff --git a/internal/util/memuse.go b/internal/util/memuse.go index 15a3ecc..7301a21 100644 --- a/internal/util/memuse.go +++ b/internal/util/memuse.go @@ -19,8 +19,8 @@ func PrintMemUsage() { fmt.Printf("\tNumGC = %v\n", m.NumGC) } -const BYTES_PER_SI = 1024 +const BytesPerSi = 1024 func bToMb(b uint64) uint64 { - return ((b / BYTES_PER_SI) / BYTES_PER_SI) + return ((b / BytesPerSi) / BytesPerSi) } diff --git a/internal/util/remove_stopwords.go b/internal/util/remove_stopwords.go index 76183f1..21397b3 100644 --- a/internal/util/remove_stopwords.go +++ b/internal/util/remove_stopwords.go @@ -10,34 +10,34 @@ import ( //go:embed stopwords.txt var stopwords string -var each_stopword []string +var eachStopword []string -var ws_re = regexp.MustCompile(`\s+`) +var wsRe = regexp.MustCompile(`\s+`) -var punc_re = regexp.MustCompile(`[-_\.!\?,]`) +var puncRe = regexp.MustCompile(`[-_\.!\?,]`) func removeStopwords(content string) string { - content = ws_re.ReplaceAllString(content, " ") + content = wsRe.ReplaceAllString(content, " ") each := strings.Split(content, " ") - new_content := make([]string, 0) + newContent := make([]string, 0) for _, e := range each { - e = punc_re.ReplaceAllString(e, " ") + e = puncRe.ReplaceAllString(e, " ") - if !slices.Contains(each_stopword, e) { - new_content = append(new_content, e) + if !slices.Contains(eachStopword, e) { + newContent = append(newContent, e) } } - return ws_re.ReplaceAllString(strings.Join(new_content, " "), " ") + return wsRe.ReplaceAllString(strings.Join(newContent, " "), " ") } func RemoveStopwords(content string) string { - if len(each_stopword) > 0 { - return removeStopwords(content) - } else { - each_stopword = strings.Split(stopwords, "\n") - + if len(eachStopword) > 0 { return removeStopwords(content) } + + eachStopword = strings.Split(stopwords, "\n") + + return removeStopwords(content) } diff --git a/internal/util/string_utilities.go b/internal/util/string_utilities.go index 8d4b36c..7c3b26e 100644 --- a/internal/util/string_utilities.go +++ b/internal/util/string_utilities.go @@ -43,7 +43,7 @@ func AtoZOnly(s string) string { return result.String() } -var mime_types = []string{ +var mimeTypes = []string{ "text/html", "text/plain", "application/pdf", @@ -51,7 +51,7 @@ var mime_types = []string{ } func CleanMimeType(mime string) string { - for _, m := range mime_types { + for _, m := range mimeTypes { if strings.Contains(mime, m) { return m } @@ -75,9 +75,9 @@ func GetMimeType(path string) string { // https://www.iana.org/assignments/media-types/application/zstd "zstd": "application/zstd", } - for tag, mime_type := range m { + for tag, mimeType := range m { if strings.HasSuffix(path, tag) { - return mime_type + return mimeType } } @@ -85,7 +85,7 @@ func GetMimeType(path string) string { } func IsSearchableMimeType(mime string) bool { - for _, m := range mime_types { + for _, m := range mimeTypes { if strings.Contains(mime, m) { return true } @@ -106,10 +106,10 @@ func TrimSuffix(s, suffix string) string { if strings.HasSuffix(s, suffix) { s = s[:len(s)-len(suffix)] - return s - } else { return s } + + return s } func CanonicalizeURL(s string) (string, error) { From 546bdbd7e0d2c86a70e88d2ca8e1d1f0b935f671 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 10:55:57 -0500 Subject: [PATCH 30/39] Linting I lied. Forgot a directory. --- pkg/vcap/vcap.go | 20 +++++++++++--------- pkg/vcap/vcap_test.go | 6 +++--- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pkg/vcap/vcap.go b/pkg/vcap/vcap.go index b580244..ff01a6d 100644 --- a/pkg/vcap/vcap.go +++ b/pkg/vcap/vcap.go @@ -9,6 +9,7 @@ import ( "github.com/tidwall/gjson" ) +//nolint:revive type VcapServices struct { Source string VCAP gjson.Result @@ -17,21 +18,22 @@ type VcapServices struct { Databases []Database } -func VcapServicesFromEnv(env_var string) VcapServices { +//nolint:revive +func VcapServicesFromEnv(envVar string) VcapServices { vcs := VcapServices{} - vcs.EnvStringToJson(env_var) + vcs.EnvStringToJSON(envVar) vcs.ParseBuckets() vcs.ParseDatabases() return vcs } -func (vcs *VcapServices) EnvStringToJson(env_var string) { +func (vcs *VcapServices) EnvStringToJSON(envVar string) { // Read it in from the VCAP_SERVICES env var, // which will provide a large JSON structure. vcs.Source = "env" - vcs.VCAP = gjson.Parse(os.Getenv(env_var)) - vcs.Raw = os.Getenv(env_var) + vcs.VCAP = gjson.Parse(os.Getenv(envVar)) + vcs.Raw = os.Getenv(envVar) } type Bucket struct { @@ -92,9 +94,9 @@ func (vcs *VcapServices) ParseDatabases() { vcs.Databases = databases } -func (vcs *VcapServices) GetBucketByName(bucket_name string) *Bucket { +func (vcs *VcapServices) GetBucketByName(bucketName string) *Bucket { for _, b := range vcs.Buckets { - if b.ServiceName == bucket_name { + if b.ServiceName == bucketName { return &b } } @@ -102,12 +104,12 @@ func (vcs *VcapServices) GetBucketByName(bucket_name string) *Bucket { return nil } -func (vcs *VcapServices) ToS3Config(service_name string) *aws.Config { +func (vcs *VcapServices) ToS3Config(serviceName string) *aws.Config { cfg := aws.Config{} creds := aws.Credentials{} for _, b := range vcs.Buckets { - if b.ServiceName == service_name { + if b.ServiceName == serviceName { cfg.Region = b.Region creds.AccessKeyID = b.AccessKeyID creds.SecretAccessKey = b.SecretAccessKey diff --git a/pkg/vcap/vcap_test.go b/pkg/vcap/vcap_test.go index 4c5648c..21fd1ab 100644 --- a/pkg/vcap/vcap_test.go +++ b/pkg/vcap/vcap_test.go @@ -8,7 +8,7 @@ import ( "github.com/stretchr/testify/assert" ) -var test_vcap = `{ +var testVCAP = `{ "s3": [ { "label": "s3", @@ -113,7 +113,7 @@ var test_vcap = `{ }` func TestReadEnv(t *testing.T) { - os.Setenv("VCAP_SERVICES", test_vcap) + os.Setenv("VCAP_SERVICES", testVCAP) vcs := VcapServicesFromEnv("VCAP_SERVICES") @@ -123,7 +123,7 @@ func TestReadEnv(t *testing.T) { } func TestDatbases(t *testing.T) { - os.Setenv("VCAP_SERVICES", test_vcap) + os.Setenv("VCAP_SERVICES", testVCAP) vcs := VcapServicesFromEnv("VCAP_SERVICES") From 4dd3ce95da5ad0109e7659f73b8b536db3c2bdce Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 11:18:16 -0500 Subject: [PATCH 31/39] Fixing common jobs --- .circleci/config.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 50ba021..0123e83 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,6 +4,7 @@ version: 2.1 commands: common-install: + description: Performs a common install in each job steps: - run: name: "common installs" @@ -18,6 +19,7 @@ commands: tree \ wget install-go-utilities: + description: Installs the golang utilities parameters: version: default: 1.63.4 @@ -65,8 +67,8 @@ jobs: steps: # Checkout the code as the first step. - checkout - - run: common-install - - run: install-go-utilities + - common-install + - install-go-utilities - run: make build From 8954f15d18e2cf290d746d369ed507282400f160 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 11:25:20 -0500 Subject: [PATCH 32/39] Splitting linter out --- .circleci/config.yml | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0123e83..b142a6e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,11 +3,11 @@ version: 2.1 commands: - common-install: + apt-packages: description: Performs a common install in each job steps: - run: - name: "common installs" + name: "common apt packages" command: | sudo apt-get update sudo apt-get -y install build-essential \ @@ -18,17 +18,22 @@ commands: software-properties-common \ tree \ wget - install-go-utilities: - description: Installs the golang utilities + + install-linter: + description: Installs the golangci linter parameters: - version: + linter-version: default: 1.63.4 type: string steps: - run: name: "install golangci-lint" command: | - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v<< parameters.version >> + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v<< parameters.linter-version >> + + install-go-utilities: + description: Installs the golang utilities + steps: - run: name: "install jsonnet" command: | @@ -50,8 +55,8 @@ jobs: - image: cimg/go:1.23.3 steps: - checkout - - common-install - - install-go-utilities + - apt-packages + - install-linter - run: name: "prep the build" command: | @@ -67,7 +72,7 @@ jobs: steps: # Checkout the code as the first step. - checkout - - common-install + - apt-packages - install-go-utilities - run: make build From 66e4513541a287d369627d5ecc40139c88e0b567 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 11:30:01 -0500 Subject: [PATCH 33/39] Forgot we need to prep --- .circleci/config.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b142a6e..4786d5a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -49,6 +49,12 @@ commands: tar xvzf sqlc_1.27.0_linux_amd64.tar.gz chmod 755 sqlc sudo mv sqlc /bin/sqlc + prep-the-build: + description: Generates files required for the build + steps: + - run: + name: "generate sqlc models" + command: make generate jobs: lint: docker: @@ -56,11 +62,9 @@ jobs: steps: - checkout - apt-packages + - install-go-utilities - install-linter - - run: - name: "prep the build" - command: | - make generate + - prep-the-build - run: name: "find the lint" command: golangci-lint run -v From 6226645f44df789317514f8dcfe38de7a7896201 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 11:34:45 -0500 Subject: [PATCH 34/39] Indentation --- .circleci/config.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4786d5a..41a385d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -49,12 +49,13 @@ commands: tar xvzf sqlc_1.27.0_linux_amd64.tar.gz chmod 755 sqlc sudo mv sqlc /bin/sqlc - prep-the-build: - description: Generates files required for the build - steps: - - run: - name: "generate sqlc models" - command: make generate + + prep-the-build: + description: Generates files required for the build + steps: + - run: + name: "generate sqlc models" + command: make generate jobs: lint: docker: From b547790538c3538c4c9ea5cdbe50dedbb872f725 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 11:37:13 -0500 Subject: [PATCH 35/39] Indenting Errors on indenting don't make it back to the VSCode client. --- .circleci/config.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 41a385d..26ddf28 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,7 +18,6 @@ commands: software-properties-common \ tree \ wget - install-linter: description: Installs the golangci linter parameters: @@ -30,7 +29,6 @@ commands: name: "install golangci-lint" command: | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v<< parameters.linter-version >> - install-go-utilities: description: Installs the golang utilities steps: @@ -49,13 +47,12 @@ commands: tar xvzf sqlc_1.27.0_linux_amd64.tar.gz chmod 755 sqlc sudo mv sqlc /bin/sqlc - prep-the-build: description: Generates files required for the build steps: - run: - name: "generate sqlc models" - command: make generate + name: "generate sqlc models" + command: make generate jobs: lint: docker: From 1eb4f1e90bc8f5d91d364e83413861a473c4a378 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 12:00:04 -0500 Subject: [PATCH 36/39] Fixing ATOI conversion --- cmd/fetch/work.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/cmd/fetch/work.go b/cmd/fetch/work.go index 658e999..af7842b 100644 --- a/cmd/fetch/work.go +++ b/cmd/fetch/work.go @@ -5,6 +5,7 @@ import ( "context" _ "embed" "fmt" + "math" "net/url" "regexp" "strconv" @@ -203,7 +204,9 @@ func (w *FetchWorker) Work(_ context.Context, job *river.Job[common.FetchArgs]) } } - cl, err := strconv.Atoi(pageJSON["content-length"]) + var cl int32 + + parsed, err := strconv.Atoi(pageJSON["content-length"]) if err != nil { zap.L().Warn("could not convert length to int", zap.String("host", job.Args.Host), @@ -211,12 +214,12 @@ func (w *FetchWorker) Work(_ context.Context, job *river.Job[common.FetchArgs]) } // Make sure we stay within int32 - if cl > MaxInt32 { - cl = MaxInt32 - } - - if cl < MinInt32 { - cl = MinInt32 + if parsed >= math.MinInt32 && parsed <= math.MaxInt32 { + cl = int32(parsed) + } else if parsed > math.MaxInt32 { + cl = math.MaxInt32 + } else { + cl = math.MinInt32 } scheme := JDB.GetScheme("https") From baaa02a75d607b2989e013f8824dcdd7c41c431d Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 12:04:44 -0500 Subject: [PATCH 37/39] Fixes (again) conversion Feel like there must be a better way. Even with the fixes, the linter is angry. This does address CodeQL, however. --- cmd/fetch/work.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmd/fetch/work.go b/cmd/fetch/work.go index af7842b..012036a 100644 --- a/cmd/fetch/work.go +++ b/cmd/fetch/work.go @@ -215,6 +215,7 @@ func (w *FetchWorker) Work(_ context.Context, job *river.Job[common.FetchArgs]) // Make sure we stay within int32 if parsed >= math.MinInt32 && parsed <= math.MaxInt32 { + //nolint:gosec cl = int32(parsed) } else if parsed > math.MaxInt32 { cl = math.MaxInt32 @@ -243,11 +244,10 @@ func (w *FetchWorker) Work(_ context.Context, job *river.Job[common.FetchArgs]) guestbookID, err := JDB.WorkDBQueries.UpdateGuestbookFetch( context.Background(), work_db.UpdateGuestbookFetchParams{ - Scheme: scheme, - Domain64: d64, - Path: job.Args.Path, - //nolint:gosec - ContentLength: int32(cl), + Scheme: scheme, + Domain64: d64, + Path: job.Args.Path, + ContentLength: cl, //nolint:gosec ContentType: int32(contentType), LastModified: pgtype.Timestamp{ From aab7024ae06e8d2136668691851de7bc81ef3ba1 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 12:08:48 -0500 Subject: [PATCH 38/39] Updating deps Dependabot is angry. --- go.mod | 6 +++--- go.sum | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 6c60323..4201d53 100644 --- a/go.mod +++ b/go.mod @@ -82,11 +82,11 @@ require ( go.uber.org/goleak v1.3.0 // indirect go.uber.org/multierr v1.10.0 // indirect golang.org/x/arch v0.11.0 // indirect - golang.org/x/crypto v0.31.0 // indirect + golang.org/x/crypto v0.32.0 // indirect golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect - golang.org/x/net v0.31.0 // indirect + golang.org/x/net v0.34.0 // indirect golang.org/x/sync v0.10.0 // indirect - golang.org/x/sys v0.28.0 // indirect + golang.org/x/sys v0.29.0 // indirect golang.org/x/text v0.21.0 // indirect google.golang.org/protobuf v1.35.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect diff --git a/go.sum b/go.sum index 2b4f8a4..d77634d 100644 --- a/go.sum +++ b/go.sum @@ -153,8 +153,6 @@ github.com/riverqueue/river/rivershared v0.13.0 h1:AqRP54GgtwoLIvV5eoZmOGOCZXL8C github.com/riverqueue/river/rivershared v0.13.0/go.mod h1:vzvawQpDy2Z1U5chkvh1NykzWNkRhc9RLcURsJRhlbE= github.com/riverqueue/river/rivertype v0.13.0 h1:PkT3h9tP0ZV3h0EGy2MiwEhgZqpRMN4fXfj27UKc9Q0= github.com/riverqueue/river/rivertype v0.13.0/go.mod h1:wVOhGBeay6+JcIi0pTFlF4KtUgHYFkhMYv8dpxU46W0= -github.com/robfig/cron v1.2.0 h1:ZjScXvvxeQ63Dbyxy76Fj3AT3Ut0aKsyd2/tl3DTMuQ= -github.com/robfig/cron v1.2.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= @@ -227,6 +225,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc= +golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc= golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= @@ -240,6 +240,8 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= +golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= +golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -255,6 +257,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= +golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= From 65893c6f859473a56fd19917f18864e13dc467ae Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sat, 11 Jan 2025 12:14:03 -0500 Subject: [PATCH 39/39] Fixing 1.N.P complaint --- go.mod | 2 +- go.sum | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/go.mod b/go.mod index 4201d53..025a5b7 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/GSA-TTS/jemison -go 1.23 +go 1.23.0 require ( github.com/PuerkitoBio/goquery v1.10.0 diff --git a/go.sum b/go.sum index d77634d..061370d 100644 --- a/go.sum +++ b/go.sum @@ -223,8 +223,6 @@ golang.org/x/arch v0.11.0 h1:KXV8WWKCXm6tRpLirl2szsO5j/oOODwZf4hATmGVNs4= golang.org/x/arch v0.11.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= -golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc= golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc= golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= @@ -238,8 +236,6 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= -golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -255,8 +251,6 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=