From 3d731c7fa32de2bf80a75b80a24cd9f9be09dfc2 Mon Sep 17 00:00:00 2001 From: "c.dumitru@orcid.org" Date: Fri, 11 Aug 2023 15:23:32 +0100 Subject: [PATCH 1/4] Refactoring of the autospamcli so it can be run as scheduled or manual cli --- orcid-scheduler-web/pom.xml | 6 + .../autospam/AutospamEmailSender.java | 97 +++++++ .../autospam/cli/AutoLockSpamRecords.java | 244 ++++++++++++++++++ .../resources/orcid-scheduler-context.xml | 4 + 4 files changed, 351 insertions(+) create mode 100644 orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/AutospamEmailSender.java create mode 100644 orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java diff --git a/orcid-scheduler-web/pom.xml b/orcid-scheduler-web/pom.xml index c7283878ef1..61945f525e8 100644 --- a/orcid-scheduler-web/pom.xml +++ b/orcid-scheduler-web/pom.xml @@ -35,6 +35,12 @@ solr-solrj ${solr.version} + + + + com.amazonaws + aws-java-sdk-s3 + diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/AutospamEmailSender.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/AutospamEmailSender.java new file mode 100644 index 00000000000..702b7c5b66b --- /dev/null +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/AutospamEmailSender.java @@ -0,0 +1,97 @@ +package org.orcid.scheduler.autospam; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +import javax.annotation.Resource; + +import org.apache.commons.lang3.LocaleUtils; +import org.orcid.core.constants.EmailConstants; +import org.orcid.core.manager.ProfileEntityCacheManager; +import org.orcid.core.manager.TemplateManager; +import org.orcid.core.manager.impl.OrcidUrlManager; +import org.orcid.core.manager.v3.RecordNameManager; + +import org.orcid.core.manager.v3.read_only.EmailManagerReadOnly; + +import org.orcid.core.utils.VerifyEmailUtils; +import org.orcid.jaxb.model.common.AvailableLocales; + + +import org.orcid.persistence.jpa.entities.ProfileEntity; + + +import org.orcid.utils.email.MailGunManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.springframework.stereotype.Component; + +//TODO refactor the RecordEmailSender for orcid-web and move it under orcid-core package as it was before jersey upgrade +@Component +public class AutospamEmailSender { + + private static final Logger LOGGER = LoggerFactory.getLogger(AutospamEmailSender.class); + + @Resource + private ProfileEntityCacheManager profileEntityCacheManager; + + @Resource(name = "emailManagerReadOnlyV3") + private EmailManagerReadOnly emailManager; + + @Resource + private OrcidUrlManager orcidUrlManager; + + @Resource + private TemplateManager templateManager; + + + @Resource(name = "recordNameManagerV3") + private RecordNameManager recordNameManager; + + @Resource + private MailGunManager mailgunManager; + + @Resource + private VerifyEmailUtils verifyEmailUtils; + + + public void sendOrcidLockedEmail(String orcidToLock) { + ProfileEntity profile = profileEntityCacheManager.retrieve(orcidToLock); + Locale userLocale = getUserLocaleFromProfileEntity(profile); + + String subject = verifyEmailUtils.getSubject("email.subject.locked", userLocale); + String email = emailManager.findPrimaryEmail(orcidToLock).getEmail(); + String emailFriendlyName = recordNameManager.deriveEmailFriendlyName(orcidToLock); + + Map templateParams = new HashMap(); + templateParams.put("emailName", emailFriendlyName); + templateParams.put("orcid", orcidToLock); + templateParams.put("baseUri", orcidUrlManager.getBaseUrl()); + templateParams.put("baseUriHttp", orcidUrlManager.getBaseUriHttp()); + templateParams.put("subject", subject); + + verifyEmailUtils.addMessageParams(templateParams, userLocale); + + // Generate body from template + String body = templateManager.processTemplate("locked_orcid_email.ftl", templateParams); + // Generate html from template + String html = templateManager.processTemplate("locked_orcid_email_html.ftl", templateParams); + + mailgunManager.sendEmail(EmailConstants.LOCKED_NOTIFY_ORCID_ORG, email, subject, body, html); + } + + private Locale getUserLocaleFromProfileEntity(ProfileEntity profile) { + String locale = profile.getLocale(); + try { + if (locale != null) { + return LocaleUtils.toLocale(AvailableLocales.valueOf(locale).value()); + } + } + catch(Exception ex) { + LOGGER.error("Locale is not supported in the available locales, defaulting to en", ex); + } + return LocaleUtils.toLocale("en"); + } +} diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java new file mode 100644 index 00000000000..f3bff5b252d --- /dev/null +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java @@ -0,0 +1,244 @@ +package org.orcid.scheduler.autospam.cli; + +import java.io.ByteArrayInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import javax.annotation.Resource; + +import org.orcid.core.admin.LockReason; +import org.orcid.core.manager.ProfileEntityCacheManager; +import org.orcid.core.manager.AffiliationsManager; +import org.orcid.core.manager.v3.NotificationManager; +import org.orcid.core.manager.v3.ProfileEntityManager; +import org.orcid.core.togglz.OrcidTogglzConfiguration; +import org.orcid.core.utils.OrcidStringUtils; +import org.orcid.jaxb.model.record_v2.Affiliation; +import org.orcid.persistence.dao.OrcidOauth2TokenDetailDao; +import org.orcid.persistence.jpa.entities.ProfileEntity; +import org.orcid.pojo.ajaxForm.PojoUtil; +import org.orcid.scheduler.autospam.AutospamEmailSender; +import org.orcid.utils.alerting.SlackManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.ApplicationContext; +import org.springframework.context.support.ClassPathXmlApplicationContext; +import org.springframework.stereotype.Service; +import org.togglz.core.context.ContextClassLoaderFeatureManagerProvider; +import org.togglz.core.manager.FeatureManager; +import org.togglz.core.manager.FeatureManagerBuilder; + +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.util.IOUtils; + +@Service +public class AutoLockSpamRecords { + + private static final Logger LOG = LoggerFactory.getLogger(AutoLockSpamRecords.class); + + @Resource + private SlackManager slackManager; + + @Value("${org.orcid.core.orgs.load.slackChannel}") + private String slackChannel; + + @Value("${org.orcid.core.orgs.load.slackUser}") + private String slackUser; + + @Value("${org.orcid.scheduler.autospam.daily.batch.size:20000}") + private int AUTOSPAM_DAILY_BATCH_SIZE; + + @Value("${org.orcid.message-listener.s3.accessKey}") + private String S3_ACCESS_KEY; + + @Value("${org.orcid.message-listener.s3.secretKey}") + private String S3_SECRET_KEY; + + @Value("${org.orcid.scheduler.aws.bucket:auto-spam-folder}") + private String SPAM_BUCKET; + + @Value("${org.orcid.scheduler.aws.file:orcidspam.csv}") + private String ORCID_S3_SPAM_FILE; + + @Value("${org.orcid.scheduler.autospam.enabled:false}") + private boolean AUTOSPAM_ENABLED; + + @Value("${org.orcid.scheduler.autospam.file:orcidspam.csv}") + private String ORCID_SPAM_FILE; + + @Value("${org.orcid.scheduler.autospam.daily.batch:20000}") + private int DAILY_BATCH_SIZE; + + @Resource(name = "notificationManagerV3") + private NotificationManager notificationManager; + + @Resource + private OrcidOauth2TokenDetailDao orcidOauthDao; + + private static int ONE_DAY = 86400000; + + private ProfileEntityManager profileEntityManager; + + @Resource + private ProfileEntityCacheManager profileEntityCacheManager; + + @Resource + private AutospamEmailSender autospamEmailSender; + + @Resource + private AffiliationsManager affiliationsManager; + + //for running spam manually + public static void main(String[] args) { + AutoLockSpamRecords autolockSpamRecords = new AutoLockSpamRecords(); + try { + autolockSpamRecords.init(); + autolockSpamRecords.process(false); + } catch (Exception e) { + LOG.error("Exception when locking spam records", e); + System.err.println(e.getMessage()); + } finally { + System.exit(0); + } + + } + + private void autolockRecords(List toLock) { + String lastOrcidProcessed = ""; + slackManager.sendAlert("Start time for batch: " + System.currentTimeMillis() + " the batch size is: " + toLock.size(), slackChannel, slackUser); + System.out.println("Start for batch: " + System.currentTimeMillis() + " to lock batch is: " + toLock.size()); + int accountsLocked = 0; + for (String orcidId : toLock) { + try { + LOG.info("Processing orcidId: " + orcidId); + if(OrcidStringUtils.isValidOrcid(orcidId)) { + ProfileEntity profileEntity = profileEntityManager.findByOrcid(orcidId); + //only lock account was not reviewed and not already locked and not have an auth token + + if(!profileEntity.isReviewed() && profileEntity.isAccountNonLocked() && !orcidOauthDao.hasToken(orcidId)) { + List affiliations = affiliationsManager.getAffiliations(orcidId); + //Lock only if doesn't have any affiliations + if(affiliations == null || affiliations.size() < 1) { + boolean wasLocked = profileEntityManager.lockProfile(orcidId, LockReason.SPAM_AUTO.getLabel(), "ML Detected", ""); + if(wasLocked) { + autospamEmailSender.sendOrcidLockedEmail(orcidId); + accountsLocked++; + } + } + } + lastOrcidProcessed = orcidId; + } + } catch (Exception e) { + LOG.error("Exception when locking spam record " + orcidId, e); + slackManager.sendAlert("Exception when locking spam record " + orcidId + ". LastOrcid processed is: " + lastOrcidProcessed , slackChannel, slackUser); + LOG.info("LastOrcid processed is: " + lastOrcidProcessed); + e.printStackTrace(); + } + } + System.out.println("Spam locking for the batch processed on the day: " + System.currentTimeMillis() + " lastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked); + LOG.info("Spam locking for the batch processed on the day: " + System.currentTimeMillis() + " lastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked); + slackManager.sendAlert("Spam locking for the batch processed on the day ended. LastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked, slackChannel, slackUser); + } + + + public void scheduledProcess() throws InterruptedException, IOException { + if(AUTOSPAM_ENABLED) { + process(true); + } + } + + + public void process(boolean fromS3) throws InterruptedException, IOException { + List allIDs = getAllSpamIDs(fromS3); + System.out.println("Found " + allIDs.size() + " profiles for autolocking. Starting the autolocking process"); + slackManager.sendAlert("Found " + allIDs.size() + " profiles for autolocking.", slackChannel, slackUser); + LOG.info("Found {} profiles for autolocking", allIDs.size()); + + List toLock = getNextIdSubset(allIDs); + while (toLock != null && !toLock.isEmpty()) { + autolockRecords(toLock); + LOG.info("Locked {} profiles, {} remaining to lock", new Object[] { toLock.size(), allIDs.size() }); + LOG.info("Profiles autolocked"); + Thread.sleep(ONE_DAY); + if(allIDs.size() - toLock.size() <=0) { + break; + } + else { + toLock = getNextIdSubset(allIDs); + } + } + } + + @SuppressWarnings("resource") + private void init() { + ApplicationContext context = new ClassPathXmlApplicationContext("orcid-scheduler-context.xml"); + profileEntityManager = (ProfileEntityManager) context.getBean("profileEntityManagerV3"); + profileEntityCacheManager = (ProfileEntityCacheManager) context.getBean("profileEntityCacheManager"); + notificationManager = (NotificationManager) context.getBean("notificationManagerV3"); + autospamEmailSender = (AutospamEmailSender) context.getBean("autospamEmailSender"); + orcidOauthDao = (OrcidOauth2TokenDetailDao) context.getBean("orcidOauth2TokenDetailDao"); + affiliationsManager = (AffiliationsManager) context.getBean("affiliationsManager"); + bootstrapTogglz(context.getBean(OrcidTogglzConfiguration.class)); + } + + private List getNextIdSubset(List ids) { + List subset = new ArrayList<>(); + for (int i = 0; i < DAILY_BATCH_SIZE && !ids.isEmpty(); i++) { + subset.add(ids.remove(0)); + } + return subset; + } + + private ArrayList getAllSpamIDs(boolean fromS3) throws IOException { + Reader reader; + if(fromS3) { + BasicAWSCredentials creds = new BasicAWSCredentials(S3_SECRET_KEY, S3_ACCESS_KEY); + AmazonS3 s3 = AmazonS3Client.builder() + .withRegion(Regions.US_EAST_2) + .withCredentials(new AWSStaticCredentialsProvider(creds)) + .build(); + + S3Object response = s3.getObject(new GetObjectRequest(SPAM_BUCKET, ORCID_S3_SPAM_FILE)); + byte[] byteArray = IOUtils.toByteArray(response.getObjectContent()); + reader = new InputStreamReader(new ByteArrayInputStream(byteArray)); + + } else { + reader = new FileReader(ORCID_SPAM_FILE); + } + + Iterator> iterator = new CsvMapper().readerFor(Map.class) + .with(CsvSchema.emptySchema().withHeader().withColumnSeparator(',').withoutQuoteChar()).readValues(reader); + ArrayList spamList = new ArrayList(); + Map keyVals = null; + while (iterator.hasNext()) { + keyVals = iterator.next(); + Object[] keys = keyVals.keySet().toArray(); + spamList.add(keyVals.get(keys[0])); + } + return spamList; + } + + + private static void bootstrapTogglz(OrcidTogglzConfiguration togglzConfig) { + FeatureManager featureManager = new FeatureManagerBuilder().togglzConfig(togglzConfig).build(); + ContextClassLoaderFeatureManagerProvider.bind(featureManager); + } + +} diff --git a/orcid-scheduler-web/src/main/resources/orcid-scheduler-context.xml b/orcid-scheduler-web/src/main/resources/orcid-scheduler-context.xml index 97310889f37..4a2bf9d2228 100644 --- a/orcid-scheduler-web/src/main/resources/orcid-scheduler-context.xml +++ b/orcid-scheduler-web/src/main/resources/orcid-scheduler-context.xml @@ -35,6 +35,8 @@ + + @@ -77,6 +79,8 @@ + + From 4f5edcc4df59879e8dbde7c3aeeb8335e2df8873 Mon Sep 17 00:00:00 2001 From: "c.dumitru@orcid.org" Date: Fri, 11 Aug 2023 17:39:04 +0100 Subject: [PATCH 2/4] Context fixes --- .../scheduler/autospam/cli/AutoLockSpamRecords.java | 2 +- .../src/main/resources/orcid-scheduler-context.xml | 4 ++-- properties/development.properties | 13 +++++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java index f3bff5b252d..ae063f3cb23 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java @@ -89,7 +89,7 @@ public class AutoLockSpamRecords { @Resource(name = "notificationManagerV3") private NotificationManager notificationManager; - @Resource + @Resource(name="orcidOauth2TokenDetailDao") private OrcidOauth2TokenDetailDao orcidOauthDao; private static int ONE_DAY = 86400000; diff --git a/orcid-scheduler-web/src/main/resources/orcid-scheduler-context.xml b/orcid-scheduler-web/src/main/resources/orcid-scheduler-context.xml index 4a2bf9d2228..c6032d1aead 100644 --- a/orcid-scheduler-web/src/main/resources/orcid-scheduler-context.xml +++ b/orcid-scheduler-web/src/main/resources/orcid-scheduler-context.xml @@ -35,7 +35,7 @@ - + @@ -133,5 +133,5 @@ - + diff --git a/properties/development.properties b/properties/development.properties index ea463773b63..40cc352617c 100644 --- a/properties/development.properties +++ b/properties/development.properties @@ -236,3 +236,16 @@ org.orcid.scheduler.web.loadIssnCronConfig=0 0 0 * * FRI # Added the config to index all RORs not only the changed ones org.orcid.core.orgs.ror.indexAllEnabled=false + +#Autospam config +org.orcid.scheduler.autospam.daily.batch.size=20000 +org.orcid.message-listener.s3.accessKey=X +org.orcid.message-listener.s3.secretKey=X +org.orcid.scheduler.aws.bucket=auto-spam-folder +org.orcid.scheduler.aws.file=orcidspam.csv +org.orcid.scheduler.autospam.enabled=false +org.orcid.scheduler.autospam.file=orcidspam.csv +org.orcid.scheduler.autospam.daily.batch:20000 + + + From be13398dad70ab37d07132d55b0c94fee3116ec3 Mon Sep 17 00:00:00 2001 From: "c.dumitru@orcid.org" Date: Fri, 11 Aug 2023 17:41:06 +0100 Subject: [PATCH 3/4] removed duplicate config --- .../org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java | 3 --- properties/development.properties | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java index ae063f3cb23..bc82144f45c 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/autospam/cli/AutoLockSpamRecords.java @@ -62,9 +62,6 @@ public class AutoLockSpamRecords { @Value("${org.orcid.core.orgs.load.slackUser}") private String slackUser; - @Value("${org.orcid.scheduler.autospam.daily.batch.size:20000}") - private int AUTOSPAM_DAILY_BATCH_SIZE; - @Value("${org.orcid.message-listener.s3.accessKey}") private String S3_ACCESS_KEY; diff --git a/properties/development.properties b/properties/development.properties index 40cc352617c..ce9e43aca70 100644 --- a/properties/development.properties +++ b/properties/development.properties @@ -238,14 +238,13 @@ org.orcid.scheduler.web.loadIssnCronConfig=0 0 0 * * FRI org.orcid.core.orgs.ror.indexAllEnabled=false #Autospam config -org.orcid.scheduler.autospam.daily.batch.size=20000 org.orcid.message-listener.s3.accessKey=X org.orcid.message-listener.s3.secretKey=X org.orcid.scheduler.aws.bucket=auto-spam-folder org.orcid.scheduler.aws.file=orcidspam.csv org.orcid.scheduler.autospam.enabled=false org.orcid.scheduler.autospam.file=orcidspam.csv -org.orcid.scheduler.autospam.daily.batch:20000 +org.orcid.scheduler.autospam.daily.batch=20000 From 93adc3de3b1d088ad64430797f503bb6fe14b879 Mon Sep 17 00:00:00 2001 From: "c.dumitru@orcid.org" Date: Fri, 11 Aug 2023 17:52:26 +0100 Subject: [PATCH 4/4] Added the config for automated job --- properties/development.properties | 1 + 1 file changed, 1 insertion(+) diff --git a/properties/development.properties b/properties/development.properties index ce9e43aca70..2953770e5ad 100644 --- a/properties/development.properties +++ b/properties/development.properties @@ -245,6 +245,7 @@ org.orcid.scheduler.aws.file=orcidspam.csv org.orcid.scheduler.autospam.enabled=false org.orcid.scheduler.autospam.file=orcidspam.csv org.orcid.scheduler.autospam.daily.batch=20000 +org.orcid.scheduler.autospam.process=0 0 0 * * TUE