Skip to content

Commit

Permalink
Merge pull request #6866 from ORCID/changes_to_autospamlock
Browse files Browse the repository at this point in the history
Refactoring of the autospamcli so it can be run as scheduled or manual cli
  • Loading branch information
amontenegro committed Aug 11, 2023
2 parents 60088e4 + 93adc3d commit 8e8b7fc
Show file tree
Hide file tree
Showing 5 changed files with 362 additions and 1 deletion.
6 changes: 6 additions & 0 deletions orcid-scheduler-web/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
<artifactId>solr-solrj</artifactId>
<version>${solr.version}</version>
</dependency>

<!-- AWS -->
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
</dependency>

<!-- javax.xml -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.orcid.scheduler.autospam;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import javax.annotation.Resource;

import org.apache.commons.lang3.LocaleUtils;
import org.orcid.core.constants.EmailConstants;
import org.orcid.core.manager.ProfileEntityCacheManager;
import org.orcid.core.manager.TemplateManager;
import org.orcid.core.manager.impl.OrcidUrlManager;
import org.orcid.core.manager.v3.RecordNameManager;

import org.orcid.core.manager.v3.read_only.EmailManagerReadOnly;

import org.orcid.core.utils.VerifyEmailUtils;
import org.orcid.jaxb.model.common.AvailableLocales;


import org.orcid.persistence.jpa.entities.ProfileEntity;


import org.orcid.utils.email.MailGunManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.springframework.stereotype.Component;

//TODO refactor the RecordEmailSender for orcid-web and move it under orcid-core package as it was before jersey upgrade
@Component
public class AutospamEmailSender {

private static final Logger LOGGER = LoggerFactory.getLogger(AutospamEmailSender.class);

@Resource
private ProfileEntityCacheManager profileEntityCacheManager;

@Resource(name = "emailManagerReadOnlyV3")
private EmailManagerReadOnly emailManager;

@Resource
private OrcidUrlManager orcidUrlManager;

@Resource
private TemplateManager templateManager;


@Resource(name = "recordNameManagerV3")
private RecordNameManager recordNameManager;

@Resource
private MailGunManager mailgunManager;

@Resource
private VerifyEmailUtils verifyEmailUtils;


public void sendOrcidLockedEmail(String orcidToLock) {
ProfileEntity profile = profileEntityCacheManager.retrieve(orcidToLock);
Locale userLocale = getUserLocaleFromProfileEntity(profile);

String subject = verifyEmailUtils.getSubject("email.subject.locked", userLocale);
String email = emailManager.findPrimaryEmail(orcidToLock).getEmail();
String emailFriendlyName = recordNameManager.deriveEmailFriendlyName(orcidToLock);

Map<String, Object> templateParams = new HashMap<String, Object>();
templateParams.put("emailName", emailFriendlyName);
templateParams.put("orcid", orcidToLock);
templateParams.put("baseUri", orcidUrlManager.getBaseUrl());
templateParams.put("baseUriHttp", orcidUrlManager.getBaseUriHttp());
templateParams.put("subject", subject);

verifyEmailUtils.addMessageParams(templateParams, userLocale);

// Generate body from template
String body = templateManager.processTemplate("locked_orcid_email.ftl", templateParams);
// Generate html from template
String html = templateManager.processTemplate("locked_orcid_email_html.ftl", templateParams);

mailgunManager.sendEmail(EmailConstants.LOCKED_NOTIFY_ORCID_ORG, email, subject, body, html);
}

private Locale getUserLocaleFromProfileEntity(ProfileEntity profile) {
String locale = profile.getLocale();
try {
if (locale != null) {
return LocaleUtils.toLocale(AvailableLocales.valueOf(locale).value());
}
}
catch(Exception ex) {
LOGGER.error("Locale is not supported in the available locales, defaulting to en", ex);
}
return LocaleUtils.toLocale("en");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
package org.orcid.scheduler.autospam.cli;

import java.io.ByteArrayInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.annotation.Resource;

import org.orcid.core.admin.LockReason;
import org.orcid.core.manager.ProfileEntityCacheManager;
import org.orcid.core.manager.AffiliationsManager;
import org.orcid.core.manager.v3.NotificationManager;
import org.orcid.core.manager.v3.ProfileEntityManager;
import org.orcid.core.togglz.OrcidTogglzConfiguration;
import org.orcid.core.utils.OrcidStringUtils;
import org.orcid.jaxb.model.record_v2.Affiliation;
import org.orcid.persistence.dao.OrcidOauth2TokenDetailDao;
import org.orcid.persistence.jpa.entities.ProfileEntity;
import org.orcid.pojo.ajaxForm.PojoUtil;
import org.orcid.scheduler.autospam.AutospamEmailSender;
import org.orcid.utils.alerting.SlackManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Service;
import org.togglz.core.context.ContextClassLoaderFeatureManagerProvider;
import org.togglz.core.manager.FeatureManager;
import org.togglz.core.manager.FeatureManagerBuilder;

import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.util.IOUtils;

@Service
public class AutoLockSpamRecords {

private static final Logger LOG = LoggerFactory.getLogger(AutoLockSpamRecords.class);

@Resource
private SlackManager slackManager;

@Value("${org.orcid.core.orgs.load.slackChannel}")
private String slackChannel;

@Value("${org.orcid.core.orgs.load.slackUser}")
private String slackUser;

@Value("${org.orcid.message-listener.s3.accessKey}")
private String S3_ACCESS_KEY;

@Value("${org.orcid.message-listener.s3.secretKey}")
private String S3_SECRET_KEY;

@Value("${org.orcid.scheduler.aws.bucket:auto-spam-folder}")
private String SPAM_BUCKET;

@Value("${org.orcid.scheduler.aws.file:orcidspam.csv}")
private String ORCID_S3_SPAM_FILE;

@Value("${org.orcid.scheduler.autospam.enabled:false}")
private boolean AUTOSPAM_ENABLED;

@Value("${org.orcid.scheduler.autospam.file:orcidspam.csv}")
private String ORCID_SPAM_FILE;

@Value("${org.orcid.scheduler.autospam.daily.batch:20000}")
private int DAILY_BATCH_SIZE;

@Resource(name = "notificationManagerV3")
private NotificationManager notificationManager;

@Resource(name="orcidOauth2TokenDetailDao")
private OrcidOauth2TokenDetailDao orcidOauthDao;

private static int ONE_DAY = 86400000;

private ProfileEntityManager profileEntityManager;

@Resource
private ProfileEntityCacheManager profileEntityCacheManager;

@Resource
private AutospamEmailSender autospamEmailSender;

@Resource
private AffiliationsManager affiliationsManager;

//for running spam manually
public static void main(String[] args) {
AutoLockSpamRecords autolockSpamRecords = new AutoLockSpamRecords();
try {
autolockSpamRecords.init();
autolockSpamRecords.process(false);
} catch (Exception e) {
LOG.error("Exception when locking spam records", e);
System.err.println(e.getMessage());
} finally {
System.exit(0);
}

}

private void autolockRecords(List<String> toLock) {
String lastOrcidProcessed = "";
slackManager.sendAlert("Start time for batch: " + System.currentTimeMillis() + " the batch size is: " + toLock.size(), slackChannel, slackUser);
System.out.println("Start for batch: " + System.currentTimeMillis() + " to lock batch is: " + toLock.size());
int accountsLocked = 0;
for (String orcidId : toLock) {
try {
LOG.info("Processing orcidId: " + orcidId);
if(OrcidStringUtils.isValidOrcid(orcidId)) {
ProfileEntity profileEntity = profileEntityManager.findByOrcid(orcidId);
//only lock account was not reviewed and not already locked and not have an auth token

if(!profileEntity.isReviewed() && profileEntity.isAccountNonLocked() && !orcidOauthDao.hasToken(orcidId)) {
List<Affiliation> affiliations = affiliationsManager.getAffiliations(orcidId);
//Lock only if doesn't have any affiliations
if(affiliations == null || affiliations.size() < 1) {
boolean wasLocked = profileEntityManager.lockProfile(orcidId, LockReason.SPAM_AUTO.getLabel(), "ML Detected", "");
if(wasLocked) {
autospamEmailSender.sendOrcidLockedEmail(orcidId);
accountsLocked++;
}
}
}
lastOrcidProcessed = orcidId;
}
} catch (Exception e) {
LOG.error("Exception when locking spam record " + orcidId, e);
slackManager.sendAlert("Exception when locking spam record " + orcidId + ". LastOrcid processed is: " + lastOrcidProcessed , slackChannel, slackUser);
LOG.info("LastOrcid processed is: " + lastOrcidProcessed);
e.printStackTrace();
}
}
System.out.println("Spam locking for the batch processed on the day: " + System.currentTimeMillis() + " lastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked);
LOG.info("Spam locking for the batch processed on the day: " + System.currentTimeMillis() + " lastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked);
slackManager.sendAlert("Spam locking for the batch processed on the day ended. LastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked, slackChannel, slackUser);
}


public void scheduledProcess() throws InterruptedException, IOException {
if(AUTOSPAM_ENABLED) {
process(true);
}
}


public void process(boolean fromS3) throws InterruptedException, IOException {
List<String> allIDs = getAllSpamIDs(fromS3);
System.out.println("Found " + allIDs.size() + " profiles for autolocking. Starting the autolocking process");
slackManager.sendAlert("Found " + allIDs.size() + " profiles for autolocking.", slackChannel, slackUser);
LOG.info("Found {} profiles for autolocking", allIDs.size());

List<String> toLock = getNextIdSubset(allIDs);
while (toLock != null && !toLock.isEmpty()) {
autolockRecords(toLock);
LOG.info("Locked {} profiles, {} remaining to lock", new Object[] { toLock.size(), allIDs.size() });
LOG.info("Profiles autolocked");
Thread.sleep(ONE_DAY);
if(allIDs.size() - toLock.size() <=0) {
break;
}
else {
toLock = getNextIdSubset(allIDs);
}
}
}

@SuppressWarnings("resource")
private void init() {
ApplicationContext context = new ClassPathXmlApplicationContext("orcid-scheduler-context.xml");
profileEntityManager = (ProfileEntityManager) context.getBean("profileEntityManagerV3");
profileEntityCacheManager = (ProfileEntityCacheManager) context.getBean("profileEntityCacheManager");
notificationManager = (NotificationManager) context.getBean("notificationManagerV3");
autospamEmailSender = (AutospamEmailSender) context.getBean("autospamEmailSender");
orcidOauthDao = (OrcidOauth2TokenDetailDao) context.getBean("orcidOauth2TokenDetailDao");
affiliationsManager = (AffiliationsManager) context.getBean("affiliationsManager");
bootstrapTogglz(context.getBean(OrcidTogglzConfiguration.class));
}

private List<String> getNextIdSubset(List<String> ids) {
List<String> subset = new ArrayList<>();
for (int i = 0; i < DAILY_BATCH_SIZE && !ids.isEmpty(); i++) {
subset.add(ids.remove(0));
}
return subset;
}

private ArrayList<String> getAllSpamIDs(boolean fromS3) throws IOException {
Reader reader;
if(fromS3) {
BasicAWSCredentials creds = new BasicAWSCredentials(S3_SECRET_KEY, S3_ACCESS_KEY);
AmazonS3 s3 = AmazonS3Client.builder()
.withRegion(Regions.US_EAST_2)
.withCredentials(new AWSStaticCredentialsProvider(creds))
.build();

S3Object response = s3.getObject(new GetObjectRequest(SPAM_BUCKET, ORCID_S3_SPAM_FILE));
byte[] byteArray = IOUtils.toByteArray(response.getObjectContent());
reader = new InputStreamReader(new ByteArrayInputStream(byteArray));

} else {
reader = new FileReader(ORCID_SPAM_FILE);
}

Iterator<Map<String, String>> iterator = new CsvMapper().readerFor(Map.class)
.with(CsvSchema.emptySchema().withHeader().withColumnSeparator(',').withoutQuoteChar()).readValues(reader);
ArrayList<String> spamList = new ArrayList<String>();
Map<String, String> keyVals = null;
while (iterator.hasNext()) {
keyVals = iterator.next();
Object[] keys = keyVals.keySet().toArray();
spamList.add(keyVals.get(keys[0]));
}
return spamList;
}


private static void bootstrapTogglz(OrcidTogglzConfiguration togglzConfig) {
FeatureManager featureManager = new FeatureManagerBuilder().togglzConfig(togglzConfig).build();
ContextClassLoaderFeatureManagerProvider.bind(featureManager);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
<task:scheduled ref="publicProfileValidator" method="processValidationCycle" cron="${org.orcid.scheduler.api.profile.validation.cronConfig:0 */10 * * * *}"/>
<task:scheduled ref="orgLoadManager" method="loadOrgs" cron="${org.orcid.scheduler.web.orgImportsCronConfig}" />
<task:scheduled ref="issnLoadManager" method="loadIssn" cron="${org.orcid.scheduler.web.loadIssnCronConfig:0 0 0 * * FRI}"/>
<task:scheduled ref="autoLockSpamRecords" method="scheduledProcess" cron="${org.orcid.scheduler.autospam.process:0 0 0 * * TUE}"/>

</task:scheduled-tasks>

<task:scheduler id="scheduler" pool-size="${org.orcid.scheduler.tasks.pool_size:20}"/>
Expand Down Expand Up @@ -77,6 +79,8 @@

<bean id="emailMessageSender" class="org.orcid.scheduler.email.cli.manager.EmailMessageSenderImpl" />

<bean id="autospamEmailSender" class="org.orcid.scheduler.autospam.AutospamEmailSender" />

<bean id="ringgoldFtpsFileDownloader" class="org.orcid.scheduler.loader.io.FtpsFileDownloader" />

<bean id="fundrefOrgDataClient" class="org.orcid.scheduler.loader.io.OrgDataClient" />
Expand Down Expand Up @@ -129,5 +133,5 @@
</bean>

<bean id="issnLoadManager" class="org.orcid.scheduler.loader.manager.impl.IssnLoadManagerImpl"/>

<bean id="autoLockSpamRecords" class="org.orcid.scheduler.autospam.cli.AutoLockSpamRecords" />
</beans>
13 changes: 13 additions & 0 deletions properties/development.properties
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,16 @@ org.orcid.scheduler.web.loadIssnCronConfig=0 0 0 * * FRI

# Added the config to index all RORs not only the changed ones
org.orcid.core.orgs.ror.indexAllEnabled=false

#Autospam config
org.orcid.message-listener.s3.accessKey=X
org.orcid.message-listener.s3.secretKey=X
org.orcid.scheduler.aws.bucket=auto-spam-folder
org.orcid.scheduler.aws.file=orcidspam.csv
org.orcid.scheduler.autospam.enabled=false
org.orcid.scheduler.autospam.file=orcidspam.csv
org.orcid.scheduler.autospam.daily.batch=20000
org.orcid.scheduler.autospam.process=0 0 0 * * TUE



0 comments on commit 8e8b7fc

Please sign in to comment.