Skip to content

Commit

Permalink
feat: move raw twitter data and entity logic into answer-engine.ts
Browse files Browse the repository at this point in the history
  • Loading branch information
transitive-bullshit committed Feb 25, 2024
1 parent 75fd1f5 commit 458db5b
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 118 deletions.
96 changes: 56 additions & 40 deletions src/answer-engine.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import { Msg } from '@dexaai/dexter'
import { Msg, stringifyForModel } from '@dexaai/dexter'
import pMap from 'p-map'

import * as config from '../src/config.js'
import * as db from './db.js'
import type * as types from './types.js'
import { BotError } from './bot-error.js'
import {
type EntitiesMap,
convertTweetToEntitiesMap,
type EntityMap,
convertTweetToEntityMap,
mergeEntityMaps
} from './entities.js'
import {
getPrunedTweet,
getPrunedTwitterUser,
sanitizeTweetText,
stripUserMentions
} from './twitter-utils.js'
Expand Down Expand Up @@ -165,10 +166,7 @@ export abstract class AnswerEngine {
...Msg.user(tweet.text, {
name: userIdToUsernameMap[tweet.author_id!]
}),

entities: {
tweetIds: [tweet.id]
}
tweetId: tweet.id
})
)

Expand All @@ -179,23 +177,15 @@ export abstract class AnswerEngine {
...Msg.user(message.prompt, {
name: userIdToUsernameMap[message.promptUserId]
}),

entities: {
tweetIds: [message.promptTweetId]
}
tweetId: message.promptTweetId
},

message.response && message !== leafMessage
? {
...Msg.assistant(message.response!, {
name: userIdToUsernameMap[ctx.twitterBotUserId]
}),

entities: {
tweetIds: message.responseTweetId
? [message.responseTweetId!]
: []
}
tweetId: message.responseTweetId!
}
: null
].filter(Boolean)
Expand All @@ -214,43 +204,34 @@ export abstract class AnswerEngine {
.reverse()
}

const chatMessages = answerEngineMessages.map(
({ tweetId, ...message }) => message
)

// Resolve all entity maps for the tweets and messages in the thread and then
// condense them into a single, normalized enitity map
let entityMap: EntitiesMap = {}

for (const answerEngineMessage of answerEngineMessages) {
if (!answerEngineMessage.entities?.tweetIds) continue

for (const tweetId of answerEngineMessage.entities.tweetIds) {
if (entityMap.tweets?.[tweetId]) continue

const tweet = await db.tryGetTweetById(tweetId, ctx, {
fetchFromTwitter: false
})
if (!tweet) continue

const tweetEntityMap = await convertTweetToEntitiesMap(tweet, ctx, {
fetchMissingEntities: true
})

entityMap = mergeEntityMaps(entityMap, tweetEntityMap)
}
}
let entityMap: EntityMap = {}

// Construct a raw array of tweets to pass to the answer engine, which may
// be easier to work with than our AnswerEngineMessage format
const tweets = (
await pMap(
answerEngineMessages,
async (message) => {
const tweetId = message.entities?.tweetIds?.[0]
const { tweetId } = message
assert(tweetId)

const tweet = await db.tryGetTweetById(tweetId, ctx, {
fetchFromTwitter: true
})
if (!tweet) return

const tweetEntityMap = await convertTweetToEntityMap(tweet, ctx, {
fetchMissingEntities: true
})

entityMap = mergeEntityMaps(entityMap, tweetEntityMap)

return getPrunedTweet(tweet)
},
{
Expand All @@ -259,11 +240,46 @@ export abstract class AnswerEngine {
)
).filter(Boolean)

const rawChatMessages = tweets.map((tweet) =>
tweet.author_id === ctx.twitterBotUserId
? Msg.assistant(stringifyForModel(tweet), {
name: userIdToUsernameMap[tweet.author_id!]
})
: Msg.user(stringifyForModel(tweet), {
name: userIdToUsernameMap[tweet.author_id!]
})
)

const rawEntityMap: types.RawEntityMap = {
users: {},
tweets: {}
}

if (entityMap?.users) {
for (const user of Object.values(entityMap.users)) {
assert(user.twitterId)
const twitterUser = await db.tryGetUserById(user.twitterId)
if (!twitterUser) continue
rawEntityMap.users[user.twitterId] = getPrunedTwitterUser(twitterUser)
}
}

if (entityMap?.tweets) {
for (const tweet of Object.values(entityMap.tweets)) {
assert(tweet.id)
const twittertweet = await db.tryGetTweetById(tweet.id, ctx)
if (!twittertweet) continue
rawEntityMap.tweets[tweet.id] = getPrunedTweet(twittertweet)
}
}

return {
message,
answerEngineMessages,
chatMessages,
rawChatMessages,
tweets,
entityMap
entityMap,
rawEntityMap
}
}
}
2 changes: 1 addition & 1 deletion src/answer-engines/dexa-answer-engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export class DexaAnswerEngine extends AnswerEngine {
ctx: types.AnswerEngineContext
): Promise<string> {
return this._dexaClient.generateResponse({
messages: query.answerEngineMessages,
messages: query.chatMessages,
entityMap: query.entityMap
})
}
Expand Down
60 changes: 7 additions & 53 deletions src/answer-engines/openai-answer-engine.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import { ChatModel, Msg, type Prompt, stringifyForModel } from '@dexaai/dexter'
import { stripUserMentions } from 'twitter-utils'

import * as db from '../db.js'
import type * as types from '../types.js'
import { AnswerEngine } from '../answer-engine.js'
import { getPrunedTweet, getPrunedTwitterUser } from '../twitter-utils.js'
import { assert, getCurrentDate } from '../utils.js'
import { getCurrentDate } from '../utils.js'

export class OpenAIAnswerEngine extends AnswerEngine {
protected _chatModel: ChatModel
Expand All @@ -29,76 +27,32 @@ export class OpenAIAnswerEngine extends AnswerEngine {
): Promise<string> {
const currentDate = getCurrentDate()

const userIdToUsernameMap: Record<string, string | undefined> = {}

for (const tweet of query.tweets) {
if (!userIdToUsernameMap[tweet.author_id!]) {
userIdToUsernameMap[tweet.author_id!] =
await db.tryGetTwitterUsernameByUserId(tweet.author_id)
}
}

const rawEntityMap: {
users: Record<string, Partial<types.TwitterUser>>
tweets: Record<string, Partial<types.Tweet>>
} = {
users: {},
tweets: {}
}

if (query.entityMap?.users) {
for (const user of Object.values(query.entityMap.users)) {
assert(user.twitterId)
const twitterUser = await db.tryGetUserById(user.twitterId)
if (!twitterUser) continue
rawEntityMap.users[user.twitterId] = getPrunedTwitterUser(twitterUser)
}
}

if (query.entityMap?.tweets) {
for (const tweet of Object.values(query.entityMap.tweets)) {
assert(tweet.id)
const twittertweet = await db.tryGetTweetById(tweet.id, ctx)
if (!twittertweet) continue
rawEntityMap.tweets[tweet.id] = getPrunedTweet(twittertweet)
}
}

const messages: Prompt.Msg[] = [
Msg.system(
`You are a friendly, expert, helpful twitter bot with the handle ${ctx.twitterBotHandle}.
You respond concisely and creatively to tweets.
You are very concise and informal.
You are sometimes goofy, sassy, and sarcastic, but try not to be rude.
You are sometimes sassy, and sarcastic, but try not to be rude.
DO NOT use emoji very often.
DO NOT use hashtags.
DO NOT use JSON.
DO NOT @mention usernames in your reply.
Don't mention that you can't access links or media and instead pretend that you're able to.
Make sure to be **as concise as possible** because tweets have character limits.
Your response should be as goofy and interesting as possible while remaining CONCISE.
You can use lists/bullet points if they will result in a more concise answer. If you use a list, don't include more than 3 items.
Remember to NEVER use hashtags and to BE CONCISE.
Current date: ${currentDate}.`
),
Msg.system(`Tweets, users, and media objects referenced in this twitter thread contain the following entities which can be indexed by their IDs:

Msg.system(`Tweets and twitter users referenced in this twitter thread include:
\`\`\`json
${stringifyForModel(rawEntityMap)}
${stringifyForModel(query.rawEntityMap)}
\`\`\`
`),

// ...query.tweets.map((tweet) =>
// tweet.author_id === ctx.twitterBotUserId
// ? Msg.assistant(stringifyForModel(getPrunedTweet(tweet)), {
// name: userIdToUsernameMap[tweet.author_id!]
// })
// : Msg.user(stringifyForModel(getPrunedTweet(tweet)), {
// name: userIdToUsernameMap[tweet.author_id!]
// })
// )

...query.answerEngineMessages.map(({ entities, ...msg }) => msg)
// ...query.rawChatMessages
...query.chatMessages
]

const res = await this._chatModel.run({
Expand Down
31 changes: 15 additions & 16 deletions src/entities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,18 @@ export type MediaEntity = z.infer<typeof MediaEntitySchema>
* entity in this entities map. This is intended to reduce duplicate entities in
* cases where multiple messages reference the same entity.
*/
export const EntitiesMapSchema = z.object({
export const EntityMapSchema = z.object({
users: z.record(UserEntitySchema).optional(),
tweets: z.record(TweetEntitySchema).optional(),
media: z.record(MediaEntitySchema).optional()
})
export type EntitiesMap = z.infer<typeof EntitiesMapSchema>
export type EntityMap = z.infer<typeof EntityMapSchema>

/**
* References to specific entities (users, tweets, and media objects) which may
* be attached to a Message in order to provide additional, structured context.
*
* These entity referencers may be looked up in an accompanying `EntitiesMap`.
* These entity referencers may be looked up in an accompanying `EntityMap`.
*
* URLs are handled as local-only because they generally don't have platform-
* specific IDs.
Expand All @@ -116,7 +116,7 @@ export type Entities = z.infer<typeof EntitiesSchema>
* default to not fetching missing related entities from twitter in order to
* keep the conversion as simple and predictable as possible.
*/
export async function convertTweetToEntitiesMap(
export async function convertTweetToEntityMap(
tweet: types.Tweet,
ctx: Pick<types.Context, 'twitterClient'>,
{
Expand All @@ -126,15 +126,15 @@ export async function convertTweetToEntitiesMap(
// missing from the cache
fetchMissingEntities?: boolean
} = {}
): Promise<EntitiesMap> {
const entitiesMap: Required<EntitiesMap> = {
): Promise<EntityMap> {
const EntityMap: Required<EntityMap> = {
users: {},
tweets: {},
// TODO: currently not resolving media entities
media: {}
}
const tweetEntity = convertTweetToEntity(tweet)
entitiesMap.tweets[tweetEntity.id] = tweetEntity
EntityMap.tweets[tweetEntity.id] = tweetEntity

const referencedUserIds = new Set<string>()
const referencedTweetIds = new Set<string>()
Expand All @@ -148,41 +148,40 @@ export async function convertTweetToEntitiesMap(

// Attempt to resolve any referenced tweets
for (const tweetId of referencedTweetIds) {
if (entitiesMap.tweets[tweetId]) continue
if (EntityMap.tweets[tweetId]) continue

const referencedTweet = await db.tryGetTweetById(tweetId, ctx, {
fetchFromTwitter: !!fetchMissingEntities
})
if (!referencedTweet) continue

entitiesMap.tweets[referencedTweet.id] =
convertTweetToEntity(referencedTweet)
EntityMap.tweets[referencedTweet.id] = convertTweetToEntity(referencedTweet)
}

for (const tweet of Object.values(entitiesMap.tweets)) {
for (const tweet of Object.values(EntityMap.tweets)) {
if (tweet.repliedToUserId) referencedUserIds.add(tweet.repliedToUserId)
if (tweet.authorId) referencedUserIds.add(tweet.authorId)
}

// Attempt to resolve any referenced users
for (const userId of referencedUserIds) {
if (entitiesMap.users[userId]) continue
if (EntityMap.users[userId]) continue

const user = await db.tryGetUserById(userId)
if (!user) continue

const userEntity = (entitiesMap.users[user.id] =
const userEntity = (EntityMap.users[user.id] =
convertTwitterUserToEntity(user))
if (userEntity.twitterPinnedTweetId) {
referencedTweetIds.add(userEntity.twitterPinnedTweetId)
}
}

return entitiesMap
return EntityMap
}

export function mergeEntityMaps(...entityMaps: EntitiesMap[]): EntitiesMap {
const result: Required<EntitiesMap> = {
export function mergeEntityMaps(...entityMaps: EntityMap[]): EntityMap {
const result: Required<EntityMap> = {
users: {},
tweets: {},
media: {}
Expand Down
8 changes: 4 additions & 4 deletions src/services/dexa-client.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { Prompt } from '@dexaai/dexter'
import defaultKy, { type KyInstance } from 'ky'

import type { EntitiesMap } from '../entities.js'
import type { AnswerEngineMessage } from '../types.js'
import type { EntityMap } from '../entities.js'

export class DexaClient {
readonly apiKey: string
Expand Down Expand Up @@ -30,8 +30,8 @@ export class DexaClient {
messages,
entityMap
}: {
messages: AnswerEngineMessage[]
entityMap?: EntitiesMap
messages: Prompt.Msg[]
entityMap?: EntityMap
}) {
return this.ky
.post('api/ask-dexa', {
Expand Down
Loading

0 comments on commit 458db5b

Please sign in to comment.