Skip to content

Commit

Permalink
Merge pull request #2 from essteer/venues
Browse files Browse the repository at this point in the history
Venues and addresses
  • Loading branch information
essteer authored Aug 18, 2024
2 parents e1a91e0 + 7361224 commit 00d586f
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 6 deletions.
93 changes: 93 additions & 0 deletions src/utils/maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,96 @@
"張蔓姿",
"lagchun",
]

VENUE_MAP = {
"28restaurant": "28 Restaurant",
"aftermath": "The Aftermath",
"alluremusic": "Allure Music Salon",
"aqualand": "Aqualand",
"cccpopupspace": "cccpopupspace",
"champagnebarlobbygrandhyatt": "Grand Hyatt Champagne Bar",
"cheektocheeksoho": "Cheek to Cheek Soho",
"cheztrente": "Chez Trente",
"dalecandela": "Dale Candela",
"diesel": "Diesel's",
"dragonfly": "Dragonfly",
"ella26": "ELLA",
"fairviewmansion": "Fairview Mansion",
"fountaindechopin": "Fountain de Chopin",
"foxglove": "Foxglove",
"freespacethebox": "Freespace The Box",
"fringedairy": "Fringe Dairy",
"hollybrown": "Holly Brown Coffee Roasters",
"hongkongculturalcentre": "Hong Kong Cultural Centre",
"hongkongculturalcenter": "Hong Kong Cultural Centre",
"ironfairies": "Iron Fairies",
"kindofbrew": "Kind of Brew",
"laubakfreespace": "Lau Bak Freespace Livehouse",
"lumosrestaurant": "LUMOS Restaurant and Bar",
"maggiechoo": "Maggie Choo's",
"momlivehouse": "MOM Live House",
"ohmcafe": "Ohm… cafe and bar",
"ohm…": "Ohm… cafe and bar",
"oneness": "Oneness",
"paksharoad": "Pak Sha Road",
"pizzaexpressshopg31": "Pizza Express, Empire Centre",
"ploungebyplaisance": "P Lounge by Plaisance",
"ritatongliu": "Rita Tong Liu Drama Theatre",
"shop222a": "K11 Art Mall Shop 222A",
"shuntakexhibition": "Shun Tak Exhibition & Event Space",
"sohohouse": "Soho House",
"terriblebaby": "Terrible Baby",
"thesanctum": "The Sanctum",
"thesouthside": "The Southside",
"thestage": "The Stage",
"threesheets": "Three Sheets Marquee Bar",
"urbansky": "Urban Sky",
"wanch": "The Wanch",
"y-theatre": "Y-Theatre"
}

ADDRESS_MAP = {
"28 Restaurant": ["28 Restaurant, 28 Yi Chun St, Sai Kung", "西貢宜春街28號"],
"Allure Music Salon": ["Allure Music Salon, 3 School St, Tai Hang, Causeway Bay", "銅鑼灣大坑書館街3號"],
"Aqualand": ["Aqualand, Water World Ocean Park Hong Kong, 33 Ocean Drive, Aberdeen", "香港香港仔海洋徑33號 香港海洋公園水上樂園"],
"cccpopupspace": ["cccpopupspace, G/F, 23 New Market St, Sheung Wan", "上環新街市街23號地舖"],
"Cheek to Cheek Soho": ["Cheek to Cheek Soho, 17 Old Bailey St, Central", "中環奧卑利街17號"],
"Chez Trente": ["Chez Trente, 39 Staunton St, Central", "中環士丹頓街39號"],
"Dale Candela": ["Dale Candela, 23 Main St, Yung Shue Wan, Lamma Island", "南丫島榕樹灣大街23號"],
"Diesel's": ["Diesel's, 51 Main St, Yung Shue Wan, Lamma Island", "南丫島榕樹灣大街51號地下"],
"Dragonfly": ["Dragonfly, Shop 10-G1, Tai Kwun, Hollywood Rd, Central", "中環荷李活道10號 大館10-G1舖"],
"ELLA": ["ELLA, 26/F, The Trilogy, H Code, 45 Pottinger St, Central", "中環砵甸乍街45號 H Code 26樓"],
"Fairview Mansion": ["Shop A&C, G/F, Fairview Mansion, 51 Paterson St, Causeway Bay", "銅鑼灣百德新街51號華爾大廈 A&C地舖"],
"Fountain de Chopin": ["Fountain de Chopin , 6/F, Block B, Kai Tak Factory Building, Stage 1, 22 Sam Chuk St, San Po Kong", "新蒲崗三祝街22號啟德工業大廈第一期B座六樓 翻騰三周半"],
"Foxglove": ["Foxglove, 2/F Printing House, 6 Duddell St, Central", "中環都爹利街6號印刷行2樓"],
"Freespace The Box": ["Freespace, Freespace The Box, West Kowloon", "西九文化區自由空間大盒"],
"Fringe Dairy": ["Fringe Dairy, Fringe Club, 2 Lower Albert Rd, Central", "中環下亞厘畢道二號藝穗會賽奶庫"],
"Grand Hyatt Champagne Bar": ["Champagne Bar, Lobby, Grand Hyatt Hong Kong 1 Harbour Road, Wan Chai", "香檳吧,香港君悅酒店,灣仔港灣道1號"],
"Holly Brown Coffee Roasters": ["Holly Brown Coffee Roasters, G01, G/F, D2 Place Two, 15 Cheung Shun St, Lai Chi Kok", "九龍荔枝角長順街15號 D2第二期地下G01號舖"],
"Hong Kong Cultural Centre": ["Hong Kong Cultural Centre", "香港文化中心"],
"Iron Fairies": ["Iron Fairies, 1-13 Hollywood Rd, Central", "中環荷李活道1-13號"],
"K11 Art Mall Shop 222A": ["Shop 222A, Level 2, K11 Art Mall, 18 Hanoi Rd, Tsim Sha Tsui", "尖沙咀 河內道18號K11 購物藝術館 2樓222A號"],
"Kind of Brew": ["Kind of Brew, G/F, 112 First St, Sai Ying Pun", "西營盤第一街112號地下"],
"Lau Bak Freespace Livehouse": ["Lau Bak Freespace Livehouse G/F, Freespace, West Kowloon Cultural District, 18 Museum Drive, Tsim Sha Tsui", "尖沙咀西九文化區自由空間地舖 留白 Livehouse"],
"LUMOS Restaurant and Bar": ["LUMOS Restaurant and Bar, Shop 13-14, G/F Lakeshore Building, 7 Tseng Choi St, Tuen Mun", "屯門井財街7號力生大廈地下13-14號舖"],
"Maggie Choo's": ["Maggie Choo's, G/F Chinachem Hollywood Centre, Central", "中環華懋荷李活中心"],
"MOM Live House": ["MOM Live House, Shop B38, Seven Seas Shopping Centre, 117-121 Kings Rd, North Point", "北角英皇道117-121號 七海商業中心 店鋪B38"],
"Ohm… cafe and bar": ["Ohm… cafe and bar, 152 Yu Chau St, Sham Shui Po", "深水埗汝州街152號"],
"Oneness": ["Oneness, Rm 11 9/F, Wing Hing Industrial Building, 14 Hing Yip St, Kwun Tong, Kowloon", "觀塘興業街14號永興工業大廈9樓C11室太一"],
"P Lounge by Plaisance": ["P Lounge by Plaisance, G/F, 1 Duddell St, Central", "中環都爹利街1號地舖"],
"Pak Sha Road": ["Pak Sha Rd, Causeway Bay", "銅鑼灣白沙道"],
"Pizza Express, Empire Centre": ["Pizza Express, Shop G31-33, 49-51 Empire Centre, 68 Mody Rd, Tsim Sha Tsui", "尖沙咀麼地道68號帝國中心地下G31-33及G49-51號舖"],
"Rita Tong Liu Drama Theatre": ["Rita Tong Liu Drama Theatre, 1 Gloucester Rd, Wan Chai", "灣仔告士打道一號廖湯惠靄戲劇院"],
"Shun Tak Exhibition & Event Space": ["Shun Tak Exhibition & Event Space, 4/F, Shun Tak Centre, 200 Connaught Rd Central", "香港上環干諾道中200號信德中心4樓信德展覽及活動空間"],
"Soho House": ["Soho house, 33 Des Voeux Rd West, Sheung Wan", "上環德輔道西33號"],
"Terrible Baby": ["Terrible Baby, 4/F, Easton HK, 380 Nathan Rd, Kowloon", "九龍彌敦道380號"],
"The Aftermath": ["The Aftermath, L/G, 57-59 Wyndham St, Central", "中環雲咸街57-59號低層地下"],
"The Sanctum": ["The Sanctum, 3/F, Stanley 11, 11 Stanley St, Central", "中環士丹利街11號Stanley 11 3樓"],
"The Southside": ["The Southside, LG Atrium, 11 Heung Yip Rd, Wong Chuk Hang", "黃竹坑香葉道11號LG中庭"],
"The Stage": ["The Stage, 2/F, The Heritage Woo Cheong Pawn Shop, 60A-66 Johnston Rd, Wan Chai", "灣仔莊士敦道62號 2樓"],
"The Wanch": ["The Wanch, 1/F, Henan Building, 90 Jaffe Rd, Wan Chai", "灣仔謝斐道90號豫港大廈1樓"],
"Three Sheets Marquee Bar": ["Three Sheets Marquee Bar, Shop G06, G/F, D'Deck, 8-12 Plaza Lane, Discovery Bay", "愉景灣廣場徑8-12號D'Deck地下G06號舖"],
"Urban Sky": ["Urban Sky, 9/F, Hysan Place", "希慎廣場9樓"],
"Y-Theatre": ["Y-Theatre, LG1, Youth Square, 238 Chai Wan Rd", "香港柴灣道238號青年廣場LG1 Y-Theatre"]
}

32 changes: 30 additions & 2 deletions src/utils/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from datetime import datetime

sys.path.append(os.path.join(os.path.dirname(__file__)))
from maps import DAY_MAP, GENRE_MAP, NOT_GENRES, TIER_MAP
from maps import ADDRESS_MAP, DAY_MAP, GENRE_MAP, NOT_GENRES, TIER_MAP, VENUE_MAP


def convert_days_to_digits(day_string) -> int:
Expand Down Expand Up @@ -261,6 +261,27 @@ def parse_all_bands_and_genres(bands_string: str) -> list[dict]:
return new_bands_list


def parse_known_venues(full_address: str) -> str:
"""
Searches an address string for a known venue name
to facilitate getting a standardised address
Parameters
----------
full_address : str
raw address string to search for venue name in
Returns
-------
_ : str
venue name from VENUE_MAP
"""
compressed_address = full_address.lower().replace(" ", "").replace(",", "").replace(".", "")
for key in VENUE_MAP:
if key in compressed_address:
return VENUE_MAP[key]


def format_matches(matches: list[re.Match]) -> list[dict]:
"""
Applies standard format to matched entities
Expand Down Expand Up @@ -304,7 +325,14 @@ def format_matches(matches: list[re.Match]) -> list[dict]:
event["open"] = convert_to_24_hour_time(event_temp["open"])
event["close"] = convert_to_24_hour_time(event_temp["close"])

event["venue"] = event_temp["venue"]
venue = parse_known_venues(event_temp["venue"])
if venue:
event["venue"] = venue
event["address_en"] = ADDRESS_MAP[venue][0]
event["address_cn"] = ADDRESS_MAP[venue][1]
else:
event["address_raw"] = event_temp["venue"]

if event_temp["desc"] == "":
event["desc"] = "Unknown"
else:
Expand Down
26 changes: 22 additions & 4 deletions tests/test_app.py → tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,31 @@ def test_expected_fields_present(self):
self.assertIn("month", match)
self.assertIn("date", match)
self.assertIn("desc", match)
self.assertIn("venue", match)
self.assertIn("open", match)
self.assertIn("close", match)
self.assertIn("bands", match)
self.assertIn("tickets", match)
if "venue" in match:
self.assertIn("address_en", match)
self.assertIn("address_cn", match)
else:
self.assertIn("address_raw", match)
# NOTE: this test relies on external source, check source if not found
matches = data_pipeline(TEST_URL)
for match in matches:
self.assertIn("weekday", match)
self.assertIn("month", match)
self.assertIn("date", match)
self.assertIn("desc", match)
self.assertIn("venue", match)
self.assertIn("open", match)
self.assertIn("close", match)
self.assertIn("bands", match)
self.assertIn("tickets", match)
if "venue" in match:
self.assertIn("address_en", match)
self.assertIn("address_cn", match)
else:
self.assertIn("address_raw", match)

def test_match_content_types_correct(self):
"""Test values in each match dict are of expected types"""
Expand All @@ -76,11 +84,16 @@ def test_match_content_types_correct(self):
self.assertIsInstance(match["month"], int)
self.assertIsInstance(match["date"], int)
self.assertIsInstance(match["desc"], str)
self.assertIsInstance(match["venue"], str)
self.assertIsInstance(match["open"], str)
self.assertIsInstance(match["close"], str)
self.assertIsInstance(match["bands"], list)
self.assertIsInstance(match["tickets"], dict)
if "venue" in match:
self.assertIsInstance(match["venue"], str)
self.assertIsInstance(match["address_en"], str)
self.assertIsInstance(match["address_cn"], str)
else:
self.assertIsInstance(match["address_raw"], str)
# NOTE: this test relies on external source, check source if not found
matches = data_pipeline(TEST_URL)
for match in matches:
Expand All @@ -91,11 +104,16 @@ def test_match_content_types_correct(self):
self.assertIsInstance(match["month"], int)
self.assertIsInstance(match["date"], int)
self.assertIsInstance(match["desc"], str)
self.assertIsInstance(match["venue"], str)
self.assertIsInstance(match["open"], str)
self.assertIsInstance(match["close"], str)
self.assertIsInstance(match["bands"], list)
self.assertIsInstance(match["tickets"], dict)
if "venue" in match:
self.assertIsInstance(match["venue"], str)
self.assertIsInstance(match["address_en"], str)
self.assertIsInstance(match["address_cn"], str)
else:
self.assertIsInstance(match["address_raw"], str)


if __name__ == "__main__":
Expand Down
16 changes: 16 additions & 0 deletions tests/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import unittest

sys.path.append(os.path.join(os.path.dirname(__file__)))
from src.utils.maps import ADDRESS_MAP
from src.utils.transform import (
convert_days_to_digits,
convert_to_24_hour_time,
Expand All @@ -12,6 +13,7 @@
parse_genres,
parse_band_name,
parse_all_bands_and_genres,
parse_known_venues
)


Expand Down Expand Up @@ -270,5 +272,19 @@ def test_output_values(self):
)


class TestParseKnownVenues(unittest.TestCase):
def test_no_matches(self):
"""Unknown venues should return None"""
fake_venue = "qwerty"
venue = parse_known_venues(fake_venue)
self.assertIsNone(venue)

def test_known_matches(self):
"""Known venues should return string that is key in ADDRESS_MAP"""
real_venue = "Maggie Choo’s, G/F Chinachem Hollywood Centre, Central, 中環華懋荷李活中心"
venue = parse_known_venues(real_venue)
self.assertIn(venue, ADDRESS_MAP)


if __name__ == "__main__":
unittest.main()

0 comments on commit 00d586f

Please sign in to comment.