Skip to content

Commit

Permalink
Query time is improved, can now filter by score, system now predicts …
Browse files Browse the repository at this point in the history
…ratings for items already rated
  • Loading branch information
Jim Avery committed Nov 22, 2013
1 parent cbbd353 commit 74a0c3e
Show file tree
Hide file tree
Showing 25 changed files with 603 additions and 28,435 deletions.
38 changes: 38 additions & 0 deletions PostgreSQL/recdb_regression_test.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/* ItemCosCF. */
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING itemcoscf;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itemcoscf WHERE userid = 1;
DROP RECOMMENDER MovieRec;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itemcoscf WHERE userid = 1;

/* ItemPearCF. */
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING itempearcf;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itempearcf WHERE userid = 1;
DROP RECOMMENDER MovieRec;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itempearcf WHERE userid = 1;

/* UserCosCF. */
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING usercoscf;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING usercoscf WHERE userid = 1;
DROP RECOMMENDER MovieRec;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING usercoscf WHERE userid = 1;

/* UserPearCF. */
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING userpearcf;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING userpearcf WHERE userid = 1;
DROP RECOMMENDER MovieRec;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING userpearcf WHERE userid = 1;

/* SVD. */
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING svd;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING svd WHERE userid = 1;
DROP RECOMMENDER MovieRec;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING svd WHERE userid = 1;

/* Miscellaneous. */
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING itemcoscf;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itemcoscf WHERE userid IN (1,2,3,5,9) AND itemid < 7;
SELECT r.itemid,r.ratingval,i.name,i.genre FROM ml_ratings r, ml_items i RECOMMEND r.itemid TO r.userid ON r.ratingval USING itemcoscf WHERE r.userid = 1 AND r.itemid = i.itemid AND i.genre ILIKE '%drama%';
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itemcoscf WHERE userid = 1 ORDER BY ratingval DESC LIMIT 10;
SELECT r.itemid,r.ratingval,i.name,i.genre FROM ml_ratings r, ml_items i RECOMMEND r.itemid TO r.userid ON r.ratingval USING itemcoscf WHERE r.userid = 1 AND r.itemid = i.itemid AND i.genre ILIKE '%action%' ORDER BY ratingval DESC LIMIT 5;
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itemcoscf WHERE userid = 1 AND ratingval >= 4.5;
DROP RECOMMENDER MovieRec;
117 changes: 54 additions & 63 deletions PostgreSQL/src/backend/executor/execRecommend.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ ExecFilterRecommend(RecScanState *recnode,
ExecScanRecheckMtd recheckMtd)
{
ExprContext *econtext;
List *qual;
List *qual, *userqual;
ProjectionInfo *projInfo;
ExprDoneCond isDone;
TupleTableSlot *resultSlot;
Expand All @@ -319,6 +319,7 @@ ExecFilterRecommend(RecScanState *recnode,
* Fetch data from node
*/
qual = node->ps.qual;
userqual = recnode->userqual;
projInfo = node->ps.ps_ProjInfo;
econtext = node->ps.ps_ExprContext;

Expand Down Expand Up @@ -373,7 +374,7 @@ ExecFilterRecommend(RecScanState *recnode,
if (recnode->finished) {
recnode->finished = false;
recnode->userNum = 0;
recnode->itemNum = 0;
recnode->fullItemNum = 0;
return NULL;
}

Expand Down Expand Up @@ -434,51 +435,36 @@ ExecFilterRecommend(RecScanState *recnode,

/*
* We now have a blank tuple slot that we need to fill with data.
* We have a working user ID, but not a valid item list. We'd like to
* use the filter to determine if this is a good user, but we can't
* do that without an item, in many cases. The solution is to add in
* dummy items, then compare it against the filter. If a given user ID
* doesn't make it past the filter with any item ID, then that user is
* being filtered out, and we'll move on to the next.
* We have a working user ID, but not a valid item list. This is where
* we use our custom user-focused WHERE clause to filter out users that
* do not pass our qualifications.
*/
if (recnode->newUser) {
recnode->fullItemNum = 0;
itemindex = recnode->fullItemNum;
itemID = recnode->fullItemList[itemindex];

slot->tts_values[recnode->useratt] = Int32GetDatum(userID);
slot->tts_values[recnode->itematt] = Int32GetDatum(itemID);
slot->tts_values[recnode->itematt] = Int32GetDatum(-1);
slot->tts_values[recnode->eventatt] = Int32GetDatum(-1);

/* We have a preliminary slot - let's test it. */
while (qual && !ExecQual(qual, econtext, false)) {
/* We failed the test. Try the next item. */
recnode->fullItemNum++;
if (recnode->fullItemNum >= recnode->fullTotalItems) {
/* If we've reached the last item, move onto the next user.
* If we've reached the last user, we're done. */
InstrCountFiltered1(node, recnode->fullTotalItems);
recnode->userNum++;
recnode->newUser = true;
while (userqual && !ExecQual(userqual, econtext, false)) {
/* We failed the test. Try the next user.
* If we've reached the last user, we're done. */
InstrCountFiltered1(node, recnode->fullTotalItems);
recnode->userNum++;
recnode->newUser = true;

if (recnode->userNum >= recnode->totalUsers) {
recnode->userNum = 0;
recnode->fullItemNum = 0;
if (recnode->userNum >= recnode->totalUsers) {
recnode->userNum = 0;
recnode->itemNum = 0;
return NULL;
}
userindex = recnode->userNum;
userID = recnode->userList[userindex];
return NULL;
}

itemindex = recnode->fullItemNum;
itemID = recnode->fullItemList[itemindex];
userindex = recnode->userNum;
userID = recnode->userList[userindex];
slot->tts_values[recnode->useratt] = Int32GetDatum(userID);
slot->tts_values[recnode->itematt] = Int32GetDatum(itemID);
}

/* If we get here, then we found a user who will be actually
* returned in the results. One quick reset here. */
recnode->fullItemNum = 0;
* returned in the results. */
}

/* Mark the user ID and index. */
Expand All @@ -495,14 +481,26 @@ ExecFilterRecommend(RecScanState *recnode,
}

/* Now replace the item ID, if the user is valid. Otherwise,
* leave the item ID as is, as it doesn't matter what it is. */
if (recnode->validUser)
* leave the item ID as is, as it doesn't matter what it is. We'll
* move on to the next user, as well. */
if (recnode->validUser) {
itemID = recnode->fullItemList[recnode->fullItemNum];
itemindex = recnode->fullItemNum;
} else {
recnode->userNum++;
recnode->newUser = true;
recnode->fullItemNum = 0;
if (recnode->userNum >= recnode->totalUsers)
recnode->finished = true;
continue;
}
/* if (recnode->validUser)
itemID = recnode->itemList[recnode->itemNum];
while (recnode->fullItemList[recnode->fullItemNum] < itemID)
recnode->fullItemNum++;
itemindex = recnode->fullItemNum;
if (recnode->fullItemList[itemindex] > itemID)
elog(ERROR, "critical item mismatch in ExecRecommend");
elog(ERROR, "critical item mismatch in ExecRecommend");*/

/* Plug in the data, marking those columns full. We also need to
* mark the rating column with something temporary. */
Expand All @@ -514,21 +512,20 @@ ExecFilterRecommend(RecScanState *recnode,
* If that's the case, we need to calculate it before we do the
* qual filtering. Also, if we're doing a JoinRecommend, we should
* not calculate the RecScore in this node. In the current version
* of RecDB, an OP_NOFILTER shouldn't be allowed. */
if (attributes->opType == OP_NOFILTER)
* of RecDB, special joins don't exist, so that's no problem. */
if (attributes->noFilter)
applyRecScore(recnode, slot, itemID, itemindex);

/* Move onto the next item, for next time. If we're doing a RecJoin,
* though, we'll move onto the next user instead. */
recnode->itemNum++;
if (recnode->itemNum >= recnode->totalItems ||
recnode->fullItemNum++;
if (recnode->fullItemNum >= recnode->fullTotalItems ||
attributes->opType == OP_JOIN ||
attributes->opType == OP_GENERATEJOIN) {
/* If we've reached the last item, move onto the next user.
* If we've reached the last user, we're done. */
recnode->userNum++;
recnode->newUser = true;
recnode->itemNum = 0;
recnode->fullItemNum = 0;
if (recnode->userNum >= recnode->totalUsers)
recnode->finished = true;
Expand Down Expand Up @@ -558,7 +555,7 @@ ExecFilterRecommend(RecScanState *recnode,
* Found a satisfactory scan tuple. This is usually when
* we will calculate and apply the RecScore.
*/
if (attributes->opType == OP_FILTER || attributes->opType == OP_GENERATE)
if (!attributes->noFilter)
applyRecScore(recnode, slot, itemID, itemindex);

if (projInfo)
Expand Down Expand Up @@ -701,18 +698,16 @@ InitializeRecommender(RecScanState *recstate) {
recstate->totalUsers = getTupleInt(hslot,"count");
recathon_queryEnd(queryDesc,recathoncontext);

/* In the event that there are no user IDs, our ratings table is empty, so
* we can't do anything. */
/* In the event that there are no user IDs, we can't do anything. */
if (recstate->totalUsers <= 0)
elog(ERROR, "no ratings in table %s, cannot predict ratings",
attributes->eventtable);
elog(ERROR, "no users found, cannot predict ratings");

recstate->userList = (int*) palloc(recstate->totalUsers*sizeof(int));
recstate->userNum = 0;

/* Now for the actual query. */
sprintf(querystring,"select distinct %s from %s order by %s;",
attributes->userkey,attributes->eventtable,attributes->userkey);
sprintf(querystring,"select distinct %s from %s;",
attributes->userkey,attributes->eventtable);
queryDesc = recathon_queryStart(querystring,&recathoncontext);
planstate = queryDesc->planstate;

Expand All @@ -734,16 +729,14 @@ InitializeRecommender(RecScanState *recstate) {
/* Quick error protection. */
recstate->totalUsers = i;
if (recstate->totalUsers <= 0)
elog(ERROR, "no ratings in table %s, cannot predict ratings",
attributes->eventtable);
elog(ERROR, "no users found, cannot predict ratings");

/* Lastly, initialize the attributes->userID. */
attributes->userID = recstate->userList[0] - 1;
}

/* Next, for annoying and convoluted reasons, we need a full list of all the
* items in the rating table. This will help us circumvent some filter issues
* while remaining as efficient as we can manage. */
/* Next, we need a full list of all the items in the rating table. This will tell
* us what items to generate ratings for. */
if ((attributes->opType != OP_GENERATE && attributes->opType != OP_GENERATEJOIN) ||
attributes->method == userCosCF ||
attributes->method == userPearCF) {
Expand All @@ -755,11 +748,9 @@ InitializeRecommender(RecScanState *recstate) {
recstate->fullTotalItems = getTupleInt(hslot,"count");
recathon_queryEnd(queryDesc,recathoncontext);

/* In the event that there are no item IDs, our ratings table is empty, so
* we can't do anything. */
/* In the event that there are no item IDs, we can't do anything. */
if (recstate->fullTotalItems <= 0)
elog(ERROR, "no ratings in table %s, cannot predict ratings",
attributes->eventtable);
elog(ERROR, "no items found, cannot predict ratings");

recstate->fullItemList = (int*) palloc(recstate->fullTotalItems*sizeof(int));
recstate->fullItemNum = 0;
Expand Down Expand Up @@ -788,8 +779,7 @@ InitializeRecommender(RecScanState *recstate) {
/* Quick error protection. */
recstate->fullTotalItems = i;
if (recstate->fullTotalItems <= 0)
elog(ERROR, "no ratings in table %s, cannot predict ratings",
attributes->eventtable);
elog(ERROR, "no items found, cannot predict ratings");
}

recstate->finished = false;
Expand All @@ -803,7 +793,6 @@ InitializeRecommender(RecScanState *recstate) {
recstate->ratedTable = NULL;
recstate->pendingTable = NULL;
recstate->simTable = NULL;
recstate->itemList = NULL;
recstate->userFeatures = NULL;

/* In case we don't have a pre-built recommender, we need to assemble
Expand Down Expand Up @@ -903,6 +892,10 @@ ExecInitRecScan(RecScan *node, EState *estate, int eflags)
* stuff out of Init and into Execute, to make EXPLAIN go faster. */
recstate->initialized = false;

/* Next we need to prep our user WHERE clause. */
recstate->userqual = (List *)
ExecInitExpr((Expr *) attributes->userWhereClause, NULL);

/* Code for a future version of RecDB. */
/* switch(attributes->cellType) {
case CELL_ALPHA:
Expand Down Expand Up @@ -992,8 +985,6 @@ ExecEndRecScan(RecScanState *node)
}

/* Now for extra stuff. */
if (node->itemList)
pfree(node->itemList);
if (node->fullItemList)
pfree(node->fullItemList);
if (node->userFeatures)
Expand Down
6 changes: 3 additions & 3 deletions PostgreSQL/src/backend/executor/nodeRecjoin.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ ExecRecJoin(RecJoinState *recjoin)
/* Otherwise, we need to construct our hash table, since
* we need info from the previous operator to do so. */
freeHash(recjoin->itemTable);
recjoin->itemTable = hashCreate(recjoin->recnode->totalItems);
for (i = 0; i < recjoin->recnode->totalItems; i++) {
int currentItem = recjoin->recnode->itemList[i];
recjoin->itemTable = hashCreate(recjoin->recnode->fullTotalItems);
for (i = 0; i < recjoin->recnode->fullTotalItems; i++) {
int currentItem = recjoin->recnode->fullItemList[i];

tempItem = (GenRating*) palloc(sizeof(GenRating));
tempItem->ID = currentItem;
Expand Down
10 changes: 4 additions & 6 deletions PostgreSQL/src/backend/nodes/copyfuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,7 @@ _copyRangeVar(const RangeVar *from)
COPY_SCALAR_FIELD(relpersistence);
COPY_NODE_FIELD(alias);
COPY_LOCATION_FIELD(location);
COPY_NODE_FIELD(recommender);

return newnode;
}
Expand Down Expand Up @@ -2422,7 +2423,7 @@ _copyQuery(const Query *from)
COPY_SCALAR_FIELD(hasRecursive);
COPY_SCALAR_FIELD(hasModifyingCTE);
COPY_SCALAR_FIELD(hasForUpdate);
COPY_SCALAR_FIELD(isRecommendStmt);
COPY_NODE_FIELD(recommendStmt);
COPY_NODE_FIELD(cteList);
COPY_NODE_FIELD(rtable);
COPY_NODE_FIELD(jointree);
Expand Down Expand Up @@ -2525,7 +2526,6 @@ _copyRecommendInfo(const RecommendInfo *from)
COPY_NODE_FIELD(recommender);
COPY_NODE_FIELD(attributes);
COPY_SCALAR_FIELD(opType);
COPY_NODE_FIELD(next);

return newnode;
}
Expand All @@ -2548,13 +2548,11 @@ _copyAttributeInfo(const AttributeInfo *from)
COPY_STRING_FIELD(recModelName);
COPY_STRING_FIELD(recModelName2);
COPY_STRING_FIELD(recViewName);
COPY_SCALAR_FIELD(numAtts);
COPY_POINTER_FIELD(attNames, from->numAtts*sizeof(char*));
COPY_POINTER_FIELD(attValues, from->numAtts*sizeof(char*));
COPY_NODE_FIELD(target_val);
COPY_NODE_FIELD(userWhereClause);
COPY_SCALAR_FIELD(IDfound);
COPY_SCALAR_FIELD(cellType);
COPY_SCALAR_FIELD(opType);
COPY_SCALAR_FIELD(noFilter);

return newnode;
}
Expand Down
10 changes: 4 additions & 6 deletions PostgreSQL/src/backend/nodes/equalfuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ _equalRangeVar(const RangeVar *a, const RangeVar *b)
COMPARE_SCALAR_FIELD(relpersistence);
COMPARE_NODE_FIELD(alias);
COMPARE_LOCATION_FIELD(location);
COMPARE_NODE_FIELD(recommender);

return true;
}
Expand Down Expand Up @@ -909,7 +910,7 @@ _equalQuery(const Query *a, const Query *b)
COMPARE_SCALAR_FIELD(hasRecursive);
COMPARE_SCALAR_FIELD(hasModifyingCTE);
COMPARE_SCALAR_FIELD(hasForUpdate);
COMPARE_SCALAR_FIELD(isRecommendStmt);
COMPARE_NODE_FIELD(recommendStmt);
COMPARE_NODE_FIELD(cteList);
COMPARE_NODE_FIELD(rtable);
COMPARE_NODE_FIELD(jointree);
Expand Down Expand Up @@ -1002,7 +1003,6 @@ _equalRecommendInfo(const RecommendInfo *a, const RecommendInfo *b)
COMPARE_NODE_FIELD(recommender);
COMPARE_NODE_FIELD(attributes);
COMPARE_SCALAR_FIELD(opType);
COMPARE_NODE_FIELD(next);

return true;
}
Expand All @@ -1023,13 +1023,11 @@ _equalAttributeInfo(const AttributeInfo *a, const AttributeInfo *b)
COMPARE_STRING_FIELD(recModelName);
COMPARE_STRING_FIELD(recModelName2);
COMPARE_STRING_FIELD(recViewName);
COMPARE_SCALAR_FIELD(numAtts);
COMPARE_POINTER_FIELD(attNames, a->numAtts*sizeof(char*));
COMPARE_POINTER_FIELD(attValues, a->numAtts*sizeof(char*));
COMPARE_NODE_FIELD(target_val);
COMPARE_NODE_FIELD(userWhereClause);
COMPARE_SCALAR_FIELD(IDfound);
COMPARE_SCALAR_FIELD(cellType);
COMPARE_SCALAR_FIELD(opType);
COMPARE_SCALAR_FIELD(noFilter);

return true;
}
Expand Down
Loading

0 comments on commit 74a0c3e

Please sign in to comment.