Skip to content

Commit

Permalink
Refactoring of the autospamcli so it can be run as scheduled or manua…
Browse files Browse the repository at this point in the history
…l cli
  • Loading branch information
Camelia-Orcid committed Aug 11, 2023
1 parent beb67fd commit 3d731c7
Show file tree
Hide file tree
Showing 4 changed files with 351 additions and 0 deletions.
6 changes: 6 additions & 0 deletions orcid-scheduler-web/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
<artifactId>solr-solrj</artifactId>
<version>${solr.version}</version>
</dependency>

<!-- AWS -->
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
</dependency>

<!-- javax.xml -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.orcid.scheduler.autospam;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import javax.annotation.Resource;

import org.apache.commons.lang3.LocaleUtils;
import org.orcid.core.constants.EmailConstants;
import org.orcid.core.manager.ProfileEntityCacheManager;
import org.orcid.core.manager.TemplateManager;
import org.orcid.core.manager.impl.OrcidUrlManager;
import org.orcid.core.manager.v3.RecordNameManager;

import org.orcid.core.manager.v3.read_only.EmailManagerReadOnly;

import org.orcid.core.utils.VerifyEmailUtils;
import org.orcid.jaxb.model.common.AvailableLocales;


import org.orcid.persistence.jpa.entities.ProfileEntity;


import org.orcid.utils.email.MailGunManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.springframework.stereotype.Component;

//TODO refactor the RecordEmailSender for orcid-web and move it under orcid-core package as it was before jersey upgrade
@Component
public class AutospamEmailSender {

private static final Logger LOGGER = LoggerFactory.getLogger(AutospamEmailSender.class);

@Resource
private ProfileEntityCacheManager profileEntityCacheManager;

@Resource(name = "emailManagerReadOnlyV3")
private EmailManagerReadOnly emailManager;

@Resource
private OrcidUrlManager orcidUrlManager;

@Resource
private TemplateManager templateManager;


@Resource(name = "recordNameManagerV3")
private RecordNameManager recordNameManager;

@Resource
private MailGunManager mailgunManager;

@Resource
private VerifyEmailUtils verifyEmailUtils;


public void sendOrcidLockedEmail(String orcidToLock) {
ProfileEntity profile = profileEntityCacheManager.retrieve(orcidToLock);
Locale userLocale = getUserLocaleFromProfileEntity(profile);

String subject = verifyEmailUtils.getSubject("email.subject.locked", userLocale);
String email = emailManager.findPrimaryEmail(orcidToLock).getEmail();
String emailFriendlyName = recordNameManager.deriveEmailFriendlyName(orcidToLock);

Map<String, Object> templateParams = new HashMap<String, Object>();
templateParams.put("emailName", emailFriendlyName);
templateParams.put("orcid", orcidToLock);
templateParams.put("baseUri", orcidUrlManager.getBaseUrl());
templateParams.put("baseUriHttp", orcidUrlManager.getBaseUriHttp());
templateParams.put("subject", subject);

verifyEmailUtils.addMessageParams(templateParams, userLocale);

// Generate body from template
String body = templateManager.processTemplate("locked_orcid_email.ftl", templateParams);
// Generate html from template
String html = templateManager.processTemplate("locked_orcid_email_html.ftl", templateParams);

mailgunManager.sendEmail(EmailConstants.LOCKED_NOTIFY_ORCID_ORG, email, subject, body, html);
}

private Locale getUserLocaleFromProfileEntity(ProfileEntity profile) {
String locale = profile.getLocale();
try {
if (locale != null) {
return LocaleUtils.toLocale(AvailableLocales.valueOf(locale).value());
}
}
catch(Exception ex) {
LOGGER.error("Locale is not supported in the available locales, defaulting to en", ex);
}
return LocaleUtils.toLocale("en");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
package org.orcid.scheduler.autospam.cli;

import java.io.ByteArrayInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.annotation.Resource;

import org.orcid.core.admin.LockReason;
import org.orcid.core.manager.ProfileEntityCacheManager;
import org.orcid.core.manager.AffiliationsManager;
import org.orcid.core.manager.v3.NotificationManager;
import org.orcid.core.manager.v3.ProfileEntityManager;
import org.orcid.core.togglz.OrcidTogglzConfiguration;
import org.orcid.core.utils.OrcidStringUtils;
import org.orcid.jaxb.model.record_v2.Affiliation;
import org.orcid.persistence.dao.OrcidOauth2TokenDetailDao;
import org.orcid.persistence.jpa.entities.ProfileEntity;
import org.orcid.pojo.ajaxForm.PojoUtil;
import org.orcid.scheduler.autospam.AutospamEmailSender;
import org.orcid.utils.alerting.SlackManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Service;
import org.togglz.core.context.ContextClassLoaderFeatureManagerProvider;
import org.togglz.core.manager.FeatureManager;
import org.togglz.core.manager.FeatureManagerBuilder;

import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.util.IOUtils;

@Service
public class AutoLockSpamRecords {

private static final Logger LOG = LoggerFactory.getLogger(AutoLockSpamRecords.class);

@Resource
private SlackManager slackManager;

@Value("${org.orcid.core.orgs.load.slackChannel}")
private String slackChannel;

@Value("${org.orcid.core.orgs.load.slackUser}")
private String slackUser;

@Value("${org.orcid.scheduler.autospam.daily.batch.size:20000}")
private int AUTOSPAM_DAILY_BATCH_SIZE;

@Value("${org.orcid.message-listener.s3.accessKey}")
private String S3_ACCESS_KEY;

@Value("${org.orcid.message-listener.s3.secretKey}")
private String S3_SECRET_KEY;

@Value("${org.orcid.scheduler.aws.bucket:auto-spam-folder}")
private String SPAM_BUCKET;

@Value("${org.orcid.scheduler.aws.file:orcidspam.csv}")
private String ORCID_S3_SPAM_FILE;

@Value("${org.orcid.scheduler.autospam.enabled:false}")
private boolean AUTOSPAM_ENABLED;

@Value("${org.orcid.scheduler.autospam.file:orcidspam.csv}")
private String ORCID_SPAM_FILE;

@Value("${org.orcid.scheduler.autospam.daily.batch:20000}")
private int DAILY_BATCH_SIZE;

@Resource(name = "notificationManagerV3")
private NotificationManager notificationManager;

@Resource
private OrcidOauth2TokenDetailDao orcidOauthDao;

private static int ONE_DAY = 86400000;

private ProfileEntityManager profileEntityManager;

@Resource
private ProfileEntityCacheManager profileEntityCacheManager;

@Resource
private AutospamEmailSender autospamEmailSender;

@Resource
private AffiliationsManager affiliationsManager;

//for running spam manually
public static void main(String[] args) {
AutoLockSpamRecords autolockSpamRecords = new AutoLockSpamRecords();
try {
autolockSpamRecords.init();
autolockSpamRecords.process(false);
} catch (Exception e) {
LOG.error("Exception when locking spam records", e);
System.err.println(e.getMessage());
} finally {
System.exit(0);
}

}

private void autolockRecords(List<String> toLock) {
String lastOrcidProcessed = "";
slackManager.sendAlert("Start time for batch: " + System.currentTimeMillis() + " the batch size is: " + toLock.size(), slackChannel, slackUser);
System.out.println("Start for batch: " + System.currentTimeMillis() + " to lock batch is: " + toLock.size());
int accountsLocked = 0;
for (String orcidId : toLock) {
try {
LOG.info("Processing orcidId: " + orcidId);
if(OrcidStringUtils.isValidOrcid(orcidId)) {
ProfileEntity profileEntity = profileEntityManager.findByOrcid(orcidId);
//only lock account was not reviewed and not already locked and not have an auth token

if(!profileEntity.isReviewed() && profileEntity.isAccountNonLocked() && !orcidOauthDao.hasToken(orcidId)) {
List<Affiliation> affiliations = affiliationsManager.getAffiliations(orcidId);
//Lock only if doesn't have any affiliations
if(affiliations == null || affiliations.size() < 1) {
boolean wasLocked = profileEntityManager.lockProfile(orcidId, LockReason.SPAM_AUTO.getLabel(), "ML Detected", "");
if(wasLocked) {
autospamEmailSender.sendOrcidLockedEmail(orcidId);
accountsLocked++;
}
}
}
lastOrcidProcessed = orcidId;
}
} catch (Exception e) {
LOG.error("Exception when locking spam record " + orcidId, e);
slackManager.sendAlert("Exception when locking spam record " + orcidId + ". LastOrcid processed is: " + lastOrcidProcessed , slackChannel, slackUser);
LOG.info("LastOrcid processed is: " + lastOrcidProcessed);
e.printStackTrace();
}
}
System.out.println("Spam locking for the batch processed on the day: " + System.currentTimeMillis() + " lastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked);
LOG.info("Spam locking for the batch processed on the day: " + System.currentTimeMillis() + " lastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked);
slackManager.sendAlert("Spam locking for the batch processed on the day ended. LastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked, slackChannel, slackUser);
}


public void scheduledProcess() throws InterruptedException, IOException {
if(AUTOSPAM_ENABLED) {
process(true);
}
}


public void process(boolean fromS3) throws InterruptedException, IOException {
List<String> allIDs = getAllSpamIDs(fromS3);
System.out.println("Found " + allIDs.size() + " profiles for autolocking. Starting the autolocking process");
slackManager.sendAlert("Found " + allIDs.size() + " profiles for autolocking.", slackChannel, slackUser);
LOG.info("Found {} profiles for autolocking", allIDs.size());

List<String> toLock = getNextIdSubset(allIDs);
while (toLock != null && !toLock.isEmpty()) {
autolockRecords(toLock);
LOG.info("Locked {} profiles, {} remaining to lock", new Object[] { toLock.size(), allIDs.size() });
LOG.info("Profiles autolocked");
Thread.sleep(ONE_DAY);
if(allIDs.size() - toLock.size() <=0) {
break;
}
else {
toLock = getNextIdSubset(allIDs);
}
}
}

@SuppressWarnings("resource")
private void init() {
ApplicationContext context = new ClassPathXmlApplicationContext("orcid-scheduler-context.xml");
profileEntityManager = (ProfileEntityManager) context.getBean("profileEntityManagerV3");
profileEntityCacheManager = (ProfileEntityCacheManager) context.getBean("profileEntityCacheManager");
notificationManager = (NotificationManager) context.getBean("notificationManagerV3");
autospamEmailSender = (AutospamEmailSender) context.getBean("autospamEmailSender");
orcidOauthDao = (OrcidOauth2TokenDetailDao) context.getBean("orcidOauth2TokenDetailDao");
affiliationsManager = (AffiliationsManager) context.getBean("affiliationsManager");
bootstrapTogglz(context.getBean(OrcidTogglzConfiguration.class));
}

private List<String> getNextIdSubset(List<String> ids) {
List<String> subset = new ArrayList<>();
for (int i = 0; i < DAILY_BATCH_SIZE && !ids.isEmpty(); i++) {
subset.add(ids.remove(0));
}
return subset;
}

private ArrayList<String> getAllSpamIDs(boolean fromS3) throws IOException {
Reader reader;
if(fromS3) {
BasicAWSCredentials creds = new BasicAWSCredentials(S3_SECRET_KEY, S3_ACCESS_KEY);
AmazonS3 s3 = AmazonS3Client.builder()
.withRegion(Regions.US_EAST_2)
.withCredentials(new AWSStaticCredentialsProvider(creds))
.build();

S3Object response = s3.getObject(new GetObjectRequest(SPAM_BUCKET, ORCID_S3_SPAM_FILE));
byte[] byteArray = IOUtils.toByteArray(response.getObjectContent());
reader = new InputStreamReader(new ByteArrayInputStream(byteArray));

} else {
reader = new FileReader(ORCID_SPAM_FILE);
}

Iterator<Map<String, String>> iterator = new CsvMapper().readerFor(Map.class)
.with(CsvSchema.emptySchema().withHeader().withColumnSeparator(',').withoutQuoteChar()).readValues(reader);
ArrayList<String> spamList = new ArrayList<String>();
Map<String, String> keyVals = null;
while (iterator.hasNext()) {
keyVals = iterator.next();
Object[] keys = keyVals.keySet().toArray();
spamList.add(keyVals.get(keys[0]));
}
return spamList;
}


private static void bootstrapTogglz(OrcidTogglzConfiguration togglzConfig) {
FeatureManager featureManager = new FeatureManagerBuilder().togglzConfig(togglzConfig).build();
ContextClassLoaderFeatureManagerProvider.bind(featureManager);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
<task:scheduled ref="publicProfileValidator" method="processValidationCycle" cron="${org.orcid.scheduler.api.profile.validation.cronConfig:0 */10 * * * *}"/>
<task:scheduled ref="orgLoadManager" method="loadOrgs" cron="${org.orcid.scheduler.web.orgImportsCronConfig}" />
<task:scheduled ref="issnLoadManager" method="loadIssn" cron="${org.orcid.scheduler.web.loadIssnCronConfig:0 0 0 * * FRI}"/>
<task:scheduled ref="issnLoadManager" method="scheduledProcess" cron="${org.orcid.scheduler.autospam.process:0 0 0 * * TUE}"/>

</task:scheduled-tasks>

<task:scheduler id="scheduler" pool-size="${org.orcid.scheduler.tasks.pool_size:20}"/>
Expand Down Expand Up @@ -77,6 +79,8 @@

<bean id="emailMessageSender" class="org.orcid.scheduler.email.cli.manager.EmailMessageSenderImpl" />

<bean id="autospamEmailSender" class="org.orcid.scheduler.autospam.AutospamEmailSender" />

<bean id="ringgoldFtpsFileDownloader" class="org.orcid.scheduler.loader.io.FtpsFileDownloader" />

<bean id="fundrefOrgDataClient" class="org.orcid.scheduler.loader.io.OrgDataClient" />
Expand Down

0 comments on commit 3d731c7

Please sign in to comment.