-
Notifications
You must be signed in to change notification settings - Fork 0
/
3.4.3.py
68 lines (48 loc) · 1.92 KB
/
3.4.3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pymongo
import re
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def main():
collection = getCollectionMongoDB()
generarNube(collection, "Argentina")
generarNube(collection, "United States")
def getCollectionMongoDB():
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["test"]
collection = db["allTweets"]
return collection
def generarNube(collection, pais):
tweets = list(obtenerTweetsPorPais(collection, pais))
texto = procesarTexto(tweets)
palabras_mas_usadas = contarPalabras(texto).most_common(20)
diccionario = {clave: valor for clave, valor in palabras_mas_usadas}
wordcloud = WordCloud(width=800, height=400, background_color='white')
wordcloud.generate_from_frequencies(diccionario)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.title(f'Nube de Palabras para {pais}')
plt.axis('off')
plt.show()
def obtenerTweetsPorPais(collection, pais):
return collection.find({"pais": pais})
def procesarTexto(tweets):
# Lista de diccionario de tweets
text = " ".join(tweet["text"] for tweet in tweets if "text" in tweet)
text = text.lower()
# Filtros
text = re.sub(r"http\S+|www\S+|https\S+", '', text) # Limpia los links
text = re.sub(r'\@\w+|\#','', text) # Elimina menciones de usuarios y hashtags
text = re.sub(r'[^A-Za-z0-9\s]+', '', text) # Elimina caracteres sueltos
text = re.sub(r'\brt\b', '', text) # Eliminar la palabra "RT"
return text
def contarPalabras(texto):
# Dividir el texto en palabras
palabras = texto.split()
# Eliminar palabras que tienen menos de 2 letras
palabras = [palabra for palabra in palabras if len(palabra) > 2]
# Contar las palabras usando Counter
contador = Counter(palabras)
return contador
if __name__ == "__main__":
main()