Skip to content

Commit

Permalink
Better organization existence checks and organizations without activi…
Browse files Browse the repository at this point in the history
…ties now synced to opensearch (#1939)

Co-authored-by: Gasper Grom <gasper.grom@gmail.com>
  • Loading branch information
epipav and gaspergrom authored Dec 13, 2023
1 parent 2e67f83 commit 78e6ea1
Show file tree
Hide file tree
Showing 10 changed files with 438 additions and 77 deletions.
4 changes: 3 additions & 1 deletion backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@
"script:enrich-members-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/enrich-members-and-organizations.ts",
"script:enrich-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/enrich-organizations-synchronous.ts",
"script:generate-merge-suggestions": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/generate-merge-suggestions.ts",
"script:generate-merge-suggestions-synchronous": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/generate-merge-suggestions-synchronous.ts",
"script:merge-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/merge-organizations.ts",
"script:get-member-enrichment-data": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/get-member-enrichment-data.ts",
"script:get-organization-enrichment-data": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/get-organization-enrichment-data.ts",
"script:refresh-materialized-views": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/refresh-materialized-views.ts",
"script:unmerge-members": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/unmerge-members.ts"
"script:unmerge-members": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/unmerge-members.ts",
"script:merge-similar-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/merge-similar-organizations.ts"
},
"dependencies": {
"@aws-sdk/client-comprehend": "^3.159.0",
Expand Down
102 changes: 102 additions & 0 deletions backend/src/bin/scripts/generate-merge-suggestions-synchronous.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import commandLineArgs from 'command-line-args'
import commandLineUsage from 'command-line-usage'
import { getOpensearchClient } from '@crowd/opensearch'
import { OrganizationMergeSuggestionType } from '@crowd/types'
import * as fs from 'fs'
import path from 'path'
import { IRepositoryOptions } from '@/database/repositories/IRepositoryOptions'
import getUserContext from '@/database/utils/getUserContext'
import SegmentService from '@/services/segmentService'
import { OPENSEARCH_CONFIG } from '@/conf'
import OrganizationService from '@/services/organizationService'
import TenantService from '@/services/tenantService'

/* eslint-disable no-console */

const banner = fs.readFileSync(path.join(__dirname, 'banner.txt'), 'utf8')

const options = [
{
name: 'tenant',
alias: 't',
type: String,
description:
'The unique ID of that tenant that you would like to generate merge suggestions for.',
},
{
name: 'plan',
alias: 'p',
type: String,
description:
'Comma separated plans - works with allTenants flag. Only generate suggestions for tenants with specific plans. Available plans: Growth, Scale, Enterprise',
},
{
name: 'allTenants',
alias: 'a',
type: Boolean,
defaultValue: false,
description: 'Set this flag to merge similar organizations for all tenants.',
},
{
name: 'help',
alias: 'h',
type: Boolean,
description: 'Print this usage guide.',
},
]
const sections = [
{
content: banner,
raw: true,
},
{
header: 'Generate merge suggestions for a tenant',
content: 'Generate merge suggestions for a tenant',
},
{
header: 'Options',
optionList: options,
},
]

const usage = commandLineUsage(sections)
const parameters = commandLineArgs(options)

if (parameters.help || (!parameters.tenant && !parameters.allTenants)) {
console.log(usage)
} else {
setImmediate(async () => {
let tenantIds

if (parameters.allTenants) {
tenantIds = (await TenantService._findAndCountAllForEveryUser({})).rows
if (parameters.plan) {
tenantIds = tenantIds.filter((tenant) => parameters.plan.split(',').includes(tenant.plan))
}
tenantIds = tenantIds.map((t) => t.id)
} else if (parameters.tenant) {
tenantIds = parameters.tenant.split(',')
} else {
tenantIds = []
}

for (const tenantId of tenantIds) {
const userContext: IRepositoryOptions = await getUserContext(tenantId)
const segmentService = new SegmentService(userContext)
const { rows: segments } = await segmentService.querySubprojects({})
userContext.currentSegments = segments
userContext.opensearch = getOpensearchClient(OPENSEARCH_CONFIG)

console.log(`Generating organization merge suggestions for tenant ${tenantId}!`)

const organizationService = new OrganizationService(userContext)
await organizationService.generateMergeSuggestions(
OrganizationMergeSuggestionType.BY_IDENTITY,
)

console.log(`Done generating organization merge suggestions for tenant ${tenantId}!`)
}

process.exit(0)
})
}
177 changes: 177 additions & 0 deletions backend/src/bin/scripts/merge-similar-organizations.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import commandLineArgs from 'command-line-args'
import commandLineUsage from 'command-line-usage'
import { QueryTypes } from 'sequelize'
import * as fs from 'fs'
import path from 'path'
import SequelizeRepository from '../../database/repositories/sequelizeRepository'
import TenantService from '@/services/tenantService'
import OrganizationService from '@/services/organizationService'
import getUserContext from '@/database/utils/getUserContext'
import { IRepositoryOptions } from '@/database/repositories/IRepositoryOptions'
import {
MergeActionState,
MergeActionType,
MergeActionsRepository,
} from '@/database/repositories/mergeActionsRepository'

/* eslint-disable no-console */

const banner = fs.readFileSync(path.join(__dirname, 'banner.txt'), 'utf8')

const options = [
{
name: 'tenant',
alias: 't',
type: String,
description: 'The unique ID of tenant',
},
{
name: 'allTenants',
alias: 'a',
type: Boolean,
defaultValue: false,
description: 'Set this flag to merge similar organizations for all tenants.',
},
{
name: 'similarityThreshold',
alias: 's',
type: String,
defaultValue: false,
description:
'Similarity threshold of organization merge suggestions. Suggestions lower than this value will not be merged. Defaults to 0.95',
},
{
name: 'hardLimit',
alias: 'l',
type: String,
defaultValue: false,
description: `Hard limit for # of organizations that'll be merged. Mostly a flag for testing purposes.`,
},
{
name: 'help',
alias: 'h',
type: Boolean,
description: 'Print this usage guide.',
},
]
const sections = [
{
content: banner,
raw: true,
},
{
header: 'Merge organizations with similarity higher than given threshold.',
content: 'Merge organizations with similarity higher than given threshold.',
},
{
header: 'Options',
optionList: options,
},
]

const usage = commandLineUsage(sections)
const parameters = commandLineArgs(options)

if (parameters.help || (!parameters.tenant && !parameters.allTenants)) {
console.log(usage)
} else {
setImmediate(async () => {
const options = await SequelizeRepository.getDefaultIRepositoryOptions()

let tenantIds

if (parameters.allTenants) {
tenantIds = (await TenantService._findAndCountAllForEveryUser({})).rows.map((t) => t.id)
} else if (parameters.tenant) {
tenantIds = parameters.tenant.split(',')
} else {
tenantIds = []
}

for (const tenantId of tenantIds) {
const userContext: IRepositoryOptions = await getUserContext(tenantId)
const orgService = new OrganizationService(userContext)

let hasMoreData = true
let counter = 0

while (hasMoreData) {
// find organization merge suggestions of tenant
const result = await options.database.sequelize.query(
`
SELECT
"ot"."organizationId",
"ot"."toMergeId",
"ot".similarity,
"ot".status,
"org1"."displayName" AS "orgDisplayName",
"org2"."displayName" AS "mergeDisplayName"
FROM
"organizationToMerge" "ot"
LEFT JOIN
"organizations" "org1"
ON
"ot"."organizationId" = "org1"."id"
LEFT JOIN
"organizations" "org2"
ON
"ot"."toMergeId" = "org2"."id"
WHERE
("ot".similarity > :similarityThreshold) AND
("org1"."displayName" ilike "org2"."displayName") AND
("org1"."tenantId" = :tenantId) AND
("org2"."tenantId" = :tenantId)
ORDER BY
"ot".similarity DESC
LIMIT 100
OFFSET :offset;`,
{
replacements: {
similarityThreshold: parameters.similarityThreshold || 0.95,
offset: 0,
tenantId,
},
type: QueryTypes.SELECT,
},
)

if (result.length === 0) {
hasMoreData = false
} else {
for (const row of result) {
try {
console.log(
`Merging [${row.organizationId}] "${row.orgDisplayName}" into ${row.toMergeId} "${row.mergeDisplayName}"...`,
)
await MergeActionsRepository.add(
MergeActionType.ORG,
row.organizationId,
row.toMergeId,
userContext,
)
await orgService.mergeSync(row.organizationId, row.toMergeId)
} catch (err) {
console.log('Error merging organizations - continuing with the rest', err)
await MergeActionsRepository.setState(
MergeActionType.ORG,
row.organizationId,
row.toMergeId,
MergeActionState.ERROR,
userContext,
)
}

if (parameters.hardLimit && counter >= parameters.hardLimit) {
console.log(`Hard limit of ${parameters.hardLimit} reached. Exiting...`)
process.exit(0)
}

counter += 1
}
}
}
}

process.exit(0)
})
}
Loading

0 comments on commit 78e6ea1

Please sign in to comment.