Skip to content

Commit

Permalink
Adding status priority to cleanDuplicated (ex cleanDoubled)
Browse files Browse the repository at this point in the history
  • Loading branch information
pauldechorgnat committed Aug 22, 2023
1 parent 161da6b commit 7f5035d
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 62 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { logger } from '../../../utils';
import { cleanAssignations } from './cleanAssignations';
import { cleanAssignedDocuments } from './cleanAssignedDocuments';
import { cleanDoubledDocuments } from './cleanDoubledDocuments';
import { cleanDuplicatedDocuments } from './cleanDuplicatedDocuments';
import { cleanFreeDocuments } from './cleanFreeDocuments';
import { cleanLoadedDocuments } from './cleanLoadedDocuments';
import { cleanTreatments } from './cleanTreatments';
Expand All @@ -11,7 +11,7 @@ export { cleanDocuments };
async function cleanDocuments() {
logger.log(`cleanDocuments`);

await cleanDoubledDocuments();
await cleanDuplicatedDocuments();

await cleanAssignedDocuments();

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { documentModule, idModule } from '@label/core';
import { buildDocumentRepository } from '../../../modules/document';
import { cleanDoubledDocuments } from './cleanDoubledDocuments';
import { cleanDuplicatedDocuments } from './cleanDuplicatedDocuments';

describe('cleanDoubledDocuments', () => {
it('should clean the doubledDocuments', async () => {
describe('cleanDuplicatedDocuments', () => {
it('should clean the DuplicatedDocuments', async () => {
const originalDocument = documentModule.generator.generate();
const otherDocument = documentModule.generator.generate();
const doubledDocument = documentModule.generator.generate({
Expand All @@ -17,7 +17,7 @@ describe('cleanDoubledDocuments', () => {
doubledDocument,
]);

await cleanDoubledDocuments();
await cleanDuplicatedDocuments();

const fetchedDocuments = await documentRepository.findAll();

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import { documentType } from '@label/core';
import {
buildDocumentRepository,
documentService,
} from '../../../modules/document';
import { logger } from '../../../utils';

export { cleanDuplicatedDocuments };

/**
* Delete all doubled documents (same source, same documentNumber, same text)
*/
async function cleanDuplicatedDocuments() {
logger.log(`cleanDuplicatedDocuments`);

const documentRepository = buildDocumentRepository();

const documents = await documentRepository.findAll();
const sortedDocuments = documents.sort(compareDocumentsByStatus)

logger.log(`${documents.length} documents found. Searching for duplicates...`);

const documentsToDelete: documentType[] = [];

for (let index = 0, l = documents.length - 1; index < l; index++) {
const currentDocument = sortedDocuments[index];
const nextDocument = sortedDocuments[index + 1];
if (areDocumentsIdentical(currentDocument, nextDocument)) {
documentsToDelete.push(nextDocument);
}
}

logger.log(
`Found ${documentsToDelete.length} documents to delete. Deleting...`,
);

for (let index = 0, l = documentsToDelete.length; index < l; index++) {
await documentService.deleteDocument(documentsToDelete[index]._id);
}
logger.log('cleanDuplicatedDocuments done!');
}

function areDocumentsIdentical(
document1: documentType,
document2: documentType,
) {
return (
document1.documentNumber === document2.documentNumber &&
document1.source === document2.source &&
document1.text === document2.text
);
}


function compareDocumentsByStatus(
document1: documentType,
document2: documentType,
) {

const statusToPriorities = {
"loaded": 0,
"nlpAnnotating": 1,
"free": 2,
"pending": 3,
"saved": 4,
"toBePublished": 5,
"done": 6,
"toBeConfirmed": 7,
"locked": 8,
"rejected": 9,
};

// comparing source
if (document1.source < document2.source) {
return -1
} else if (document1.source > document2.source) {
return 1
} else {
// comparing documentNumber
if (document1.documentNumber < document1.documentNumber) {
return -1
} else if (document1.documentNumber > document2.documentNumber) {
return 1
} else {
//comparing text
if (document1.text < document2.text) {
return -1
} else if (document1.text > document2.text) {
return 1
} else {
// comparing statuses
if (statusToPriorities[document1.status] < statusToPriorities[document2.status]) {
return 1 // low priority
} else if (statusToPriorities[document1.status] > statusToPriorities[document2.status]) {
return -1 // high priority
} else {
return 0
}
}
}
}
}

0 comments on commit 7f5035d

Please sign in to comment.