diff --git a/src/components/Cards/MeetingCard/MeetingCard.stories.tsx b/src/components/Cards/MeetingCard/MeetingCard.stories.tsx index daff1fa9..a6761abe 100644 --- a/src/components/Cards/MeetingCard/MeetingCard.stories.tsx +++ b/src/components/Cards/MeetingCard/MeetingCard.stories.tsx @@ -30,4 +30,5 @@ meetingSearchResult.args = { tags: ["bike", "adu", "accessories", "rental"], excerpt: "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce interdum, lorem eget vestibulum tincidunt, augue eros gravida lectus, ut efficitur neque nisi eu metus.", + gram: "ipsum", }; diff --git a/src/components/Cards/MeetingCard/MeetingCard.tsx b/src/components/Cards/MeetingCard/MeetingCard.tsx index 9132f8f0..024c6c37 100644 --- a/src/components/Cards/MeetingCard/MeetingCard.tsx +++ b/src/components/Cards/MeetingCard/MeetingCard.tsx @@ -1,8 +1,11 @@ -import React from "react"; +import React, { useMemo } from "react"; import styled from "@emotion/styled"; +import Highlighter from "react-highlight-words"; +import { removeStopwords } from "stopword"; import { TAG_CONNECTOR } from "../../../constants/StyleConstants"; import "@mozilla-protocol/core/protocol/css/protocol.css"; import { strings } from "../../../assets/LocalizedStrings"; +import cleanText from "../../../utils/cleanText"; export type MeetingCardProps = { /** The static poster image src of the event */ @@ -19,6 +22,10 @@ export type MeetingCardProps = { tags: string[]; /** A context span if the event was found through searching */ excerpt?: string; + /** The highest value gram of the context span */ + gram?: string; + /** The query used to find this meeting */ + query?: string; }; const Meeting = styled.section({ @@ -41,9 +48,24 @@ const MeetingCard = ({ committee, tags, excerpt, + gram, + query, }: MeetingCardProps) => { const tagString = tags.map((tag) => tag.toLowerCase()).join(TAG_CONNECTOR); + const searchWords = useMemo(() => { + const cleanedQuery = cleanText(query || ""); + // Phrases that should be highlighted in the excerpt + const phrases = removeStopwords(cleanedQuery.split(" ")); + if (gram && gram.length > 0) { + phrases.push(gram); + } + if (phrases.length === 0) { + return []; + } + return [new RegExp(`\\b(${phrases.join("|")})`, "g")]; + }, [query, gram]); + return (
@@ -55,15 +77,21 @@ const MeetingCard = ({
{strings.committee}

{meetingDate}

{committee}

- {excerpt ? ( + {excerpt && (

{`"${excerpt}"`}

- ) : null} + > + +

+ )}

{strings.keywords}

{tagString}

diff --git a/src/components/Details/TranscriptItem/TranscriptItem.tsx b/src/components/Details/TranscriptItem/TranscriptItem.tsx index e3686c38..ba0c9efe 100644 --- a/src/components/Details/TranscriptItem/TranscriptItem.tsx +++ b/src/components/Details/TranscriptItem/TranscriptItem.tsx @@ -1,7 +1,9 @@ -import React, { FC, RefObject, RefAttributes, useRef, useImperativeHandle } from "react"; +import React, { FC, RefObject, RefAttributes, useRef, useImperativeHandle, useMemo } from "react"; import { Link } from "react-router-dom"; import Highlighter from "react-highlight-words"; import { Popup } from "semantic-ui-react"; +import { stem } from "stemr"; +import { removeStopwords } from "stopword"; import styled from "@emotion/styled"; import { strings } from "../../../assets/LocalizedStrings"; @@ -10,6 +12,7 @@ import DocumentTextIcon from "../../Shared/DocumentTextIcon"; import PlayIcon from "../../Shared/PlayIcon"; import { fontSizes } from "../../../styles/fonts"; +import cleanText from "../../../utils/cleanText"; import "@mozilla-protocol/core/protocol/css/protocol.css"; @@ -131,15 +134,30 @@ const TranscriptItem: FC = ({ ); + const searchWords = useMemo(() => { + const cleanedQuery = cleanText(searchQuery || ""); + const tokenizedQuery = removeStopwords(cleanedQuery.split(" ")); + if (!cleanedQuery || tokenizedQuery.length === 0) { + // no query or valid tokens to highlight + return []; + } + const stemmedQuery = tokenizedQuery.map((token) => stem(token)); + // highlight the token or the stem + const regExps = tokenizedQuery.map( + (token, i) => new RegExp(`\\b(${token}|${stemmedQuery[i]})`, "g") + ); + if (searchQuery && searchQuery.trim().length > 0) { + // highlight the original query too + regExps.push(new RegExp(searchQuery.trim(), "g")); + } + return regExps; + }, [searchQuery]); + return (
- + diff --git a/src/components/Details/TranscriptSearch/TranscriptSearch.tsx b/src/components/Details/TranscriptSearch/TranscriptSearch.tsx index d61fb888..a3d3f0a9 100644 --- a/src/components/Details/TranscriptSearch/TranscriptSearch.tsx +++ b/src/components/Details/TranscriptSearch/TranscriptSearch.tsx @@ -1,13 +1,15 @@ -import React, { ChangeEventHandler, FC, useState, useMemo } from "react"; +import React, { ChangeEventHandler, FC, useState, useMemo, FormEventHandler } from "react"; import styled from "@emotion/styled"; -import { strings } from "../../../assets/LocalizedStrings"; -import TranscriptItems from "./TranscriptItems"; +import { stem } from "stemr"; +import { removeStopwords } from "stopword"; +import TranscriptItems from "./TranscriptItems"; import { SentenceWithSessionIndex } from "../../../containers/EventContainer/types"; +import { strings } from "../../../assets/LocalizedStrings"; import { fontSizes } from "../../../styles/fonts"; import { screenWidths } from "../../../styles/mediaBreakpoints"; -import isSubstring from "../../../utils/isSubstring"; +import cleanText from "../../../utils/cleanText"; const Container = styled.div({ display: "flex", @@ -65,23 +67,50 @@ const TranscriptSearch: FC = ({ jumpToVideoClip, jumpToTranscript, }: TranscriptSearchProps) => { + // Update the query in the search bar as the user types const [searchTerm, setSearchTerm] = useState(searchQuery); - const onSearchChange: ChangeEventHandler = (event) => + const onSearchChange: ChangeEventHandler = (event) => { setSearchTerm(event.target.value); + }; + + // The query after a search form submit + const [searchedTerm, setSearchedTerm] = useState(searchQuery); + const onSearch: FormEventHandler = (event) => { + event.preventDefault(); + setSearchedTerm(searchTerm); + }; + + const stemmedSentences = useMemo(() => { + return sentences.map(({ text }) => { + const cleanedText = cleanText(text); + const tokens = removeStopwords(cleanedText.split(" ")); + const stems = tokens.map((token) => stem(token).toLowerCase()); + return new Set(stems); + }); + }, [sentences]); + //Update the visible sentences as the searched query changes const visibleSentences = useMemo(() => { - return sentences.filter(({ text }) => isSubstring(text, searchTerm)); - }, [sentences, searchTerm]); + if (!searchedTerm.trim()) { + return sentences; + } + const cleanedQuery = cleanText(searchedTerm); + const tokenizedQuery = removeStopwords(cleanedQuery.split(" ")); + if (!cleanedQuery || tokenizedQuery.length === 0) { + // empty query or no valid tokens to search + return []; + } + const stemmedQuery = tokenizedQuery.map((token) => stem(token).toLowerCase()); + return sentences.filter((_, i) => stemmedQuery.some((q) => stemmedSentences[i].has(q))); + }, [sentences, stemmedSentences, searchedTerm]); return (
{strings.search_transcript}
- {searchTerm && ( -
{strings.number_of_results.replace("{number}", `${visibleSentences.length}`)}
- )} +
{strings.number_of_results.replace("{number}", `${visibleSentences.length}`)}
-
+ = ({
= ({ cards }: CardsContainerProps) => { return ( - {cards.map(({ link, jsx }) => { + {cards.map(({ link, jsx, searchQuery }) => { return (
= ({ searchState }: SearchContain committee={renderableEvent.event.body?.name as string} tags={renderableEvent.keyGrams} excerpt={renderableEvent.selectedContextSpan} + //TODO: add the gram and queryRef.current /> ), }; diff --git a/src/containers/SearchEventsContainer/SearchEventsContainer.tsx b/src/containers/SearchEventsContainer/SearchEventsContainer.tsx index 0dae3922..a44c485f 100644 --- a/src/containers/SearchEventsContainer/SearchEventsContainer.tsx +++ b/src/containers/SearchEventsContainer/SearchEventsContainer.tsx @@ -167,8 +167,11 @@ const SearchEventsContainer: FC = ({ committee={renderableEvent.event.body?.name as string} tags={renderableEvent.keyGrams} excerpt={renderableEvent.selectedContextSpan} + gram={renderableEvent.selectedGram} + query={searchQueryRef.current} /> ), + searchQuery: searchQueryRef.current, }; }); return ( diff --git a/src/networking/EventSearchService.ts b/src/networking/EventSearchService.ts index 973733d5..bd24ffe8 100644 --- a/src/networking/EventSearchService.ts +++ b/src/networking/EventSearchService.ts @@ -12,6 +12,7 @@ import Event from "../models/Event"; import { createError } from "../utils/createError"; import { getStorage, ref, getDownloadURL } from "@firebase/storage"; import { FirebaseConfig } from "../app/AppConfigContext"; +import cleanText from "../utils/cleanText"; /** * The primary return of searchEvents. @@ -23,6 +24,7 @@ class MatchingEvent { pureRelevance: number; datetimeWeightedRelevance: number; containedGrams: string[]; + selectedGram: string; selectedContextSpan: string; constructor( @@ -30,12 +32,14 @@ class MatchingEvent { pureRelevance: number, datetimeWeightedRelevance: number, containedGrams: string[], + selectedGram: string, selectedContextSpan: string ) { this.eventRef = `${COLLECTION_NAME.Event}/${eventId}`; this.pureRelevance = pureRelevance; this.datetimeWeightedRelevance = datetimeWeightedRelevance; this.containedGrams = containedGrams; + this.selectedGram = selectedGram; this.selectedContextSpan = selectedContextSpan; } } @@ -50,6 +54,7 @@ export class RenderableEvent { pureRelevance: number; datetimeWeightedRelevance: number; containedGrams: string[]; + selectedGram: string; selectedContextSpan: string; keyGrams: string[]; staticThumbnailURL: string; @@ -60,6 +65,7 @@ export class RenderableEvent { pureRelevance: number, datetimeWeightedRelevance: number, containedGrams: string[], + selectedGram: string, selectedContextSpan: string, keyGrams: string[], staticThumbnailURL: string, @@ -69,6 +75,7 @@ export class RenderableEvent { this.pureRelevance = pureRelevance; this.datetimeWeightedRelevance = datetimeWeightedRelevance; this.containedGrams = containedGrams; + this.selectedGram = selectedGram; this.selectedContextSpan = selectedContextSpan; this.keyGrams = keyGrams; this.staticThumbnailURL = staticThumbnailURL; @@ -104,23 +111,10 @@ export default class EventSearchService { * Returns as an array of string instead of string to pass into ngrams */ cleanText(query: string): string[] { - // Replace new line and tab characters with a space - let cleanedQuery = query.replace(/[\t\n]+/g, " "); - - // Replace common strings used by documents on backend - // Not _really_ needed here but a nice safety measure to match the alg - cleanedQuery = cleanedQuery.replace(/[\-\-]/, " "); - - // Same as Python standard punctuation string - cleanedQuery = cleanedQuery.replace(/['!"#$%&\\'()\*+,\-\.\/:;<=>?@\[\\\]\^_`{|}~']/g, ""); - - // Remove extra spaces - cleanedQuery = cleanedQuery.replace(/\s{2,}/g, " "); - - // Remove leading and trailing spaces + const cleanedQuery = cleanText(query); // Remove stopwords // Return as list of terms - return removeStopwords(cleanedQuery.trim().split(" ")); + return removeStopwords(cleanedQuery.split(" ")); } getStemmedGrams(query: string): string[] { @@ -197,10 +191,10 @@ export default class EventSearchService { // Unpack matchingGram to protect from undefined let selectedContextSpan = ""; - if (matchingGramWithHighestValue && matchingGramWithHighestValue.context_span) { - selectedContextSpan = matchingGramWithHighestValue.context_span; - } else { - selectedContextSpan = ""; + let selectedGram = ""; + if (matchingGramWithHighestValue) { + selectedContextSpan = matchingGramWithHighestValue?.context_span || ""; + selectedGram = matchingGramWithHighestValue?.unstemmed_gram || ""; } // Get grams found in event from query @@ -217,6 +211,7 @@ export default class EventSearchService { sumBy(matchingIndexedEventGrams, "value"), sumBy(matchingIndexedEventGrams, "datetime_weighted_value"), containedGrams, + selectedGram, selectedContextSpan ) ); @@ -268,6 +263,7 @@ export default class EventSearchService { matchingEvent.pureRelevance, matchingEvent.datetimeWeightedRelevance, matchingEvent.containedGrams, + matchingEvent.selectedGram, matchingEvent.selectedContextSpan, keyUnstemmedGrams, staticThumbnailPathURL, diff --git a/src/pages/EventPage/EventPage.tsx b/src/pages/EventPage/EventPage.tsx index 8be8a864..d91f04aa 100644 --- a/src/pages/EventPage/EventPage.tsx +++ b/src/pages/EventPage/EventPage.tsx @@ -1,6 +1,6 @@ -import React, { FC, useCallback } from "react"; +import React, { FC, useCallback, useMemo } from "react"; -import { useParams } from "react-router-dom"; +import { useParams, useLocation } from "react-router-dom"; import { useAppConfigContext } from "../../app"; import EventService from "../../networking/EventService"; @@ -22,6 +22,14 @@ const EventPage: FC = () => { const { id } = useParams<{ id: string }>(); // Get the app config context const { firebaseConfig } = useAppConfigContext(); + //Get the query + const location = useLocation<{ query: string }>(); + const searchQuery = useMemo(() => { + if (location.state) { + return location.state.query; + } + return ""; + }, [location.state]); const fetchEventData = useCallback(async () => { const eventService = new EventService(firebaseConfig); @@ -115,7 +123,7 @@ const EventPage: FC = () => { return ( - {eventDataState.data && } + {eventDataState.data && } ); }; diff --git a/src/utils/cleanText.ts b/src/utils/cleanText.ts new file mode 100644 index 00000000..641f5715 --- /dev/null +++ b/src/utils/cleanText.ts @@ -0,0 +1,29 @@ +/** + * cleanText function is almost a mirror of the `clean_text` function from + * cdp-backend Python. Only major difference is there is no boolean parameter + * for indicating if we want to clean stopwords, this function is only currently + * used for searching and thus should always clean stopwords. + * + * Removes punctuation, line breaks, tabs, special character strings, stopwords, + * and any extra spaces in string (2+ spaces become 1 space). + * + */ +const cleanText = (text: string): string => { + // Replace new line and tab characters with a space + let cleanedText = text.replace(/[\t\n]+/g, " "); + + // Replace common strings used by documents on backend + // Not _really_ needed here but a nice safety measure to match the alg + cleanedText = cleanedText.replace(/[\-\-]/g, " "); + + // Same as Python standard punctuation string + cleanedText = cleanedText.replace(/['!"#$%&\\'()\*+,\-\.\/:;<=>?@\[\\\]\^_`{|}~']/g, ""); + + // Remove extra spaces + cleanedText = cleanedText.replace(/\s{2,}/g, " "); + + // Remove leading and trailing spaces + return cleanedText.trim(); +}; + +export default cleanText;