Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Display photos for event #4

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions thisishappening/utils/tweet_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"\ufe0f", # Variation Selector-16 for emoji https://codepoints.net/U+FE0F
]

EXTERNAL_MEDIA_SOURCES = ["instagram"]

nlp = en_core_web_sm.load(exclude=["parser", "ner"])

Expand All @@ -70,6 +71,7 @@
"place_country",
"place_country_code",
"place_type",
"media_urls",
],
)

Expand Down Expand Up @@ -106,6 +108,37 @@ def get_tweet_body(status):
return tweet_body


def get_media_urls(status):
media_urls = {}

# Twitter native media are stored in extended_entities
key = "extended_entities" if "extended_entities" in status else "entities"
urls = []
try:
for medium in status[key]["media"]:
urls.append(medium["media_url_https"])
except KeyError:
logger.debug(f"No media in {key}")
if len(urls) > 0:
media_urls["twitter"] = urls

# Other URLs are stored in entities
key = "entities"
for source in EXTERNAL_MEDIA_SOURCES:
urls = []
try:
for url in status[key]["urls"]:
media_url = url.get("expanded_url")
if (media_url is not None) and (source in media_url):
urls.append(media_url)
except KeyError:
logger.debug(f"No urls in {key}")
if len(urls) > 0:
media_urls[source] = urls

return media_urls


def get_lon_lat(status):
has_coords = False
if status["coordinates"]:
Expand Down Expand Up @@ -154,6 +187,10 @@ def get_tweet_info(status: Dict) -> Dict:
place_country_code = status["place"].get("country_code")
# Possible place_type values: country, admin, city, neighborhood, poi
place_type = status["place"].get("place_type")
if "extended_tweet" in status:
media_urls = get_media_urls(status["extended_tweet"])
else:
media_urls = get_media_urls(status)

tweet_info = TweetInfo(
status_id_str=status_id_str,
Expand All @@ -175,6 +212,7 @@ def get_tweet_info(status: Dict) -> Dict:
place_country=place_country,
place_country_code=place_country_code,
place_type=place_type,
media_urls=media_urls,
)

return tweet_info
Expand Down