Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring of the autospamcli so it can be run as scheduled or manual cli #6866

Merged
merged 4 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions orcid-scheduler-web/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
<artifactId>solr-solrj</artifactId>
<version>${solr.version}</version>
</dependency>

<!-- AWS -->
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
</dependency>

<!-- javax.xml -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.orcid.scheduler.autospam;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import javax.annotation.Resource;

import org.apache.commons.lang3.LocaleUtils;
import org.orcid.core.constants.EmailConstants;
import org.orcid.core.manager.ProfileEntityCacheManager;
import org.orcid.core.manager.TemplateManager;
import org.orcid.core.manager.impl.OrcidUrlManager;
import org.orcid.core.manager.v3.RecordNameManager;

import org.orcid.core.manager.v3.read_only.EmailManagerReadOnly;

import org.orcid.core.utils.VerifyEmailUtils;
import org.orcid.jaxb.model.common.AvailableLocales;


import org.orcid.persistence.jpa.entities.ProfileEntity;


import org.orcid.utils.email.MailGunManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.springframework.stereotype.Component;

//TODO refactor the RecordEmailSender for orcid-web and move it under orcid-core package as it was before jersey upgrade
@Component
public class AutospamEmailSender {

private static final Logger LOGGER = LoggerFactory.getLogger(AutospamEmailSender.class);

@Resource
private ProfileEntityCacheManager profileEntityCacheManager;

@Resource(name = "emailManagerReadOnlyV3")
private EmailManagerReadOnly emailManager;

@Resource
private OrcidUrlManager orcidUrlManager;

@Resource
private TemplateManager templateManager;


@Resource(name = "recordNameManagerV3")
private RecordNameManager recordNameManager;

@Resource
private MailGunManager mailgunManager;

@Resource
private VerifyEmailUtils verifyEmailUtils;


public void sendOrcidLockedEmail(String orcidToLock) {
ProfileEntity profile = profileEntityCacheManager.retrieve(orcidToLock);
Locale userLocale = getUserLocaleFromProfileEntity(profile);

String subject = verifyEmailUtils.getSubject("email.subject.locked", userLocale);
String email = emailManager.findPrimaryEmail(orcidToLock).getEmail();
String emailFriendlyName = recordNameManager.deriveEmailFriendlyName(orcidToLock);

Map<String, Object> templateParams = new HashMap<String, Object>();
templateParams.put("emailName", emailFriendlyName);
templateParams.put("orcid", orcidToLock);
templateParams.put("baseUri", orcidUrlManager.getBaseUrl());
templateParams.put("baseUriHttp", orcidUrlManager.getBaseUriHttp());
templateParams.put("subject", subject);

verifyEmailUtils.addMessageParams(templateParams, userLocale);

// Generate body from template
String body = templateManager.processTemplate("locked_orcid_email.ftl", templateParams);
// Generate html from template
String html = templateManager.processTemplate("locked_orcid_email_html.ftl", templateParams);

mailgunManager.sendEmail(EmailConstants.LOCKED_NOTIFY_ORCID_ORG, email, subject, body, html);
}

private Locale getUserLocaleFromProfileEntity(ProfileEntity profile) {
String locale = profile.getLocale();
try {
if (locale != null) {
return LocaleUtils.toLocale(AvailableLocales.valueOf(locale).value());
}
}
catch(Exception ex) {
LOGGER.error("Locale is not supported in the available locales, defaulting to en", ex);
}
return LocaleUtils.toLocale("en");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
package org.orcid.scheduler.autospam.cli;

import java.io.ByteArrayInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.annotation.Resource;

import org.orcid.core.admin.LockReason;
import org.orcid.core.manager.ProfileEntityCacheManager;
import org.orcid.core.manager.AffiliationsManager;
import org.orcid.core.manager.v3.NotificationManager;
import org.orcid.core.manager.v3.ProfileEntityManager;
import org.orcid.core.togglz.OrcidTogglzConfiguration;
import org.orcid.core.utils.OrcidStringUtils;
import org.orcid.jaxb.model.record_v2.Affiliation;
import org.orcid.persistence.dao.OrcidOauth2TokenDetailDao;
import org.orcid.persistence.jpa.entities.ProfileEntity;
import org.orcid.pojo.ajaxForm.PojoUtil;
import org.orcid.scheduler.autospam.AutospamEmailSender;
import org.orcid.utils.alerting.SlackManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Service;
import org.togglz.core.context.ContextClassLoaderFeatureManagerProvider;
import org.togglz.core.manager.FeatureManager;
import org.togglz.core.manager.FeatureManagerBuilder;

import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.util.IOUtils;

@Service
public class AutoLockSpamRecords {

private static final Logger LOG = LoggerFactory.getLogger(AutoLockSpamRecords.class);

@Resource
private SlackManager slackManager;

@Value("${org.orcid.core.orgs.load.slackChannel}")
private String slackChannel;

@Value("${org.orcid.core.orgs.load.slackUser}")
private String slackUser;

@Value("${org.orcid.message-listener.s3.accessKey}")
private String S3_ACCESS_KEY;

@Value("${org.orcid.message-listener.s3.secretKey}")
private String S3_SECRET_KEY;

@Value("${org.orcid.scheduler.aws.bucket:auto-spam-folder}")
private String SPAM_BUCKET;

@Value("${org.orcid.scheduler.aws.file:orcidspam.csv}")
private String ORCID_S3_SPAM_FILE;

@Value("${org.orcid.scheduler.autospam.enabled:false}")
private boolean AUTOSPAM_ENABLED;

@Value("${org.orcid.scheduler.autospam.file:orcidspam.csv}")
private String ORCID_SPAM_FILE;

@Value("${org.orcid.scheduler.autospam.daily.batch:20000}")
private int DAILY_BATCH_SIZE;

@Resource(name = "notificationManagerV3")
private NotificationManager notificationManager;

@Resource(name="orcidOauth2TokenDetailDao")
private OrcidOauth2TokenDetailDao orcidOauthDao;

private static int ONE_DAY = 86400000;

private ProfileEntityManager profileEntityManager;

@Resource
private ProfileEntityCacheManager profileEntityCacheManager;

@Resource
private AutospamEmailSender autospamEmailSender;

@Resource
private AffiliationsManager affiliationsManager;

//for running spam manually
public static void main(String[] args) {
AutoLockSpamRecords autolockSpamRecords = new AutoLockSpamRecords();
try {
autolockSpamRecords.init();
autolockSpamRecords.process(false);
} catch (Exception e) {
LOG.error("Exception when locking spam records", e);
System.err.println(e.getMessage());
} finally {
System.exit(0);
}

}

private void autolockRecords(List<String> toLock) {
String lastOrcidProcessed = "";
slackManager.sendAlert("Start time for batch: " + System.currentTimeMillis() + " the batch size is: " + toLock.size(), slackChannel, slackUser);
System.out.println("Start for batch: " + System.currentTimeMillis() + " to lock batch is: " + toLock.size());
int accountsLocked = 0;
for (String orcidId : toLock) {
try {
LOG.info("Processing orcidId: " + orcidId);
if(OrcidStringUtils.isValidOrcid(orcidId)) {
ProfileEntity profileEntity = profileEntityManager.findByOrcid(orcidId);
//only lock account was not reviewed and not already locked and not have an auth token

if(!profileEntity.isReviewed() && profileEntity.isAccountNonLocked() && !orcidOauthDao.hasToken(orcidId)) {
List<Affiliation> affiliations = affiliationsManager.getAffiliations(orcidId);
//Lock only if doesn't have any affiliations
if(affiliations == null || affiliations.size() < 1) {
boolean wasLocked = profileEntityManager.lockProfile(orcidId, LockReason.SPAM_AUTO.getLabel(), "ML Detected", "");
if(wasLocked) {
autospamEmailSender.sendOrcidLockedEmail(orcidId);
accountsLocked++;
}
}
}
lastOrcidProcessed = orcidId;
}
} catch (Exception e) {
LOG.error("Exception when locking spam record " + orcidId, e);
slackManager.sendAlert("Exception when locking spam record " + orcidId + ". LastOrcid processed is: " + lastOrcidProcessed , slackChannel, slackUser);
LOG.info("LastOrcid processed is: " + lastOrcidProcessed);
e.printStackTrace();
}
}
System.out.println("Spam locking for the batch processed on the day: " + System.currentTimeMillis() + " lastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked);
LOG.info("Spam locking for the batch processed on the day: " + System.currentTimeMillis() + " lastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked);
slackManager.sendAlert("Spam locking for the batch processed on the day ended. LastOrcid processed is: " + lastOrcidProcessed + " acccounts locked in DB: " + accountsLocked, slackChannel, slackUser);
}


public void scheduledProcess() throws InterruptedException, IOException {
if(AUTOSPAM_ENABLED) {
process(true);
}
}


public void process(boolean fromS3) throws InterruptedException, IOException {
List<String> allIDs = getAllSpamIDs(fromS3);
System.out.println("Found " + allIDs.size() + " profiles for autolocking. Starting the autolocking process");
slackManager.sendAlert("Found " + allIDs.size() + " profiles for autolocking.", slackChannel, slackUser);
LOG.info("Found {} profiles for autolocking", allIDs.size());

List<String> toLock = getNextIdSubset(allIDs);
while (toLock != null && !toLock.isEmpty()) {
autolockRecords(toLock);
LOG.info("Locked {} profiles, {} remaining to lock", new Object[] { toLock.size(), allIDs.size() });
LOG.info("Profiles autolocked");
Thread.sleep(ONE_DAY);
if(allIDs.size() - toLock.size() <=0) {
break;
}
else {
toLock = getNextIdSubset(allIDs);
}
}
}

@SuppressWarnings("resource")
private void init() {
ApplicationContext context = new ClassPathXmlApplicationContext("orcid-scheduler-context.xml");
profileEntityManager = (ProfileEntityManager) context.getBean("profileEntityManagerV3");
profileEntityCacheManager = (ProfileEntityCacheManager) context.getBean("profileEntityCacheManager");
notificationManager = (NotificationManager) context.getBean("notificationManagerV3");
autospamEmailSender = (AutospamEmailSender) context.getBean("autospamEmailSender");
orcidOauthDao = (OrcidOauth2TokenDetailDao) context.getBean("orcidOauth2TokenDetailDao");
affiliationsManager = (AffiliationsManager) context.getBean("affiliationsManager");
bootstrapTogglz(context.getBean(OrcidTogglzConfiguration.class));
}

private List<String> getNextIdSubset(List<String> ids) {
List<String> subset = new ArrayList<>();
for (int i = 0; i < DAILY_BATCH_SIZE && !ids.isEmpty(); i++) {
subset.add(ids.remove(0));
}
return subset;
}

private ArrayList<String> getAllSpamIDs(boolean fromS3) throws IOException {
Reader reader;
if(fromS3) {
BasicAWSCredentials creds = new BasicAWSCredentials(S3_SECRET_KEY, S3_ACCESS_KEY);
AmazonS3 s3 = AmazonS3Client.builder()
.withRegion(Regions.US_EAST_2)
.withCredentials(new AWSStaticCredentialsProvider(creds))
.build();

S3Object response = s3.getObject(new GetObjectRequest(SPAM_BUCKET, ORCID_S3_SPAM_FILE));
byte[] byteArray = IOUtils.toByteArray(response.getObjectContent());
reader = new InputStreamReader(new ByteArrayInputStream(byteArray));

} else {
reader = new FileReader(ORCID_SPAM_FILE);
}

Iterator<Map<String, String>> iterator = new CsvMapper().readerFor(Map.class)
.with(CsvSchema.emptySchema().withHeader().withColumnSeparator(',').withoutQuoteChar()).readValues(reader);
ArrayList<String> spamList = new ArrayList<String>();
Map<String, String> keyVals = null;
while (iterator.hasNext()) {
keyVals = iterator.next();
Object[] keys = keyVals.keySet().toArray();
spamList.add(keyVals.get(keys[0]));
}
return spamList;
}


private static void bootstrapTogglz(OrcidTogglzConfiguration togglzConfig) {
FeatureManager featureManager = new FeatureManagerBuilder().togglzConfig(togglzConfig).build();
ContextClassLoaderFeatureManagerProvider.bind(featureManager);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
<task:scheduled ref="publicProfileValidator" method="processValidationCycle" cron="${org.orcid.scheduler.api.profile.validation.cronConfig:0 */10 * * * *}"/>
<task:scheduled ref="orgLoadManager" method="loadOrgs" cron="${org.orcid.scheduler.web.orgImportsCronConfig}" />
<task:scheduled ref="issnLoadManager" method="loadIssn" cron="${org.orcid.scheduler.web.loadIssnCronConfig:0 0 0 * * FRI}"/>
<task:scheduled ref="autoLockSpamRecords" method="scheduledProcess" cron="${org.orcid.scheduler.autospam.process:0 0 0 * * TUE}"/>

</task:scheduled-tasks>

<task:scheduler id="scheduler" pool-size="${org.orcid.scheduler.tasks.pool_size:20}"/>
Expand Down Expand Up @@ -77,6 +79,8 @@

<bean id="emailMessageSender" class="org.orcid.scheduler.email.cli.manager.EmailMessageSenderImpl" />

<bean id="autospamEmailSender" class="org.orcid.scheduler.autospam.AutospamEmailSender" />

<bean id="ringgoldFtpsFileDownloader" class="org.orcid.scheduler.loader.io.FtpsFileDownloader" />

<bean id="fundrefOrgDataClient" class="org.orcid.scheduler.loader.io.OrgDataClient" />
Expand Down Expand Up @@ -129,5 +133,5 @@
</bean>

<bean id="issnLoadManager" class="org.orcid.scheduler.loader.manager.impl.IssnLoadManagerImpl"/>

<bean id="autoLockSpamRecords" class="org.orcid.scheduler.autospam.cli.AutoLockSpamRecords" />
</beans>
13 changes: 13 additions & 0 deletions properties/development.properties
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,16 @@ org.orcid.scheduler.web.loadIssnCronConfig=0 0 0 * * FRI

# Added the config to index all RORs not only the changed ones
org.orcid.core.orgs.ror.indexAllEnabled=false

#Autospam config
org.orcid.message-listener.s3.accessKey=X
org.orcid.message-listener.s3.secretKey=X
org.orcid.scheduler.aws.bucket=auto-spam-folder
org.orcid.scheduler.aws.file=orcidspam.csv
org.orcid.scheduler.autospam.enabled=false
org.orcid.scheduler.autospam.file=orcidspam.csv
org.orcid.scheduler.autospam.daily.batch=20000
org.orcid.scheduler.autospam.process=0 0 0 * * TUE