-
Notifications
You must be signed in to change notification settings - Fork 0
/
exercice_4.py
125 lines (108 loc) · 3.46 KB
/
exercice_4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
################################################
# Ex4 #
# Project by: #
# Group 13 #
# Sofia Aparicio 81105 #
# Rodrigo Lousada 81115 #
# Rogerio Cabaco 81470 #
################################################
################################################
# imports #
#pip install django #
################################################
from exercice_2 import *
import feedparser
import webbrowser
import time
from yattag import Doc
from time import mktime
from datetime import datetime
################################################
# constants #
################################################
websites = ['http://rss.nytimes.com/services/xml/rss/nyt/World.xml','http://rss.cnn.com/rss/edition_world.rss','http://feeds.washingtonpost.com/rss/world','http://www.latimes.com/world/rss2.0.xml']
template = """
<html>
<head>
<title>Template {{ title }}</title>
</head>
<body>
Body with {{ mystring }}.
</body>
</html>
"""
################################################
# functions #
################################################
def striphtml(data):
p = re.compile(r'<.*?>')
return p.sub('', data)
def getTaggs(tags):
listtags = []
for tag in tags:
listtags.append(tag['term'])
return listtags
def getSiteData(site):
dicSite, tags, summary,published, pubparse = {}, [], '', '', time.strptime("1990.01.01","%Y.%m.%d")
feed = feedparser.parse(site)
for entrie in feed['entries']:
if len(entrie['title']) > 0:
if 'summary' in entrie.keys() :
summary = striphtml(entrie['summary'])
if 'tags' in entrie.keys() :
tags = getTaggs(entrie['tags'])
if 'published' in entrie.keys() :
published = entrie['published']
pubparse = entrie['published_parsed']
dicSite.update({striphtml(entrie['title']) : [summary,tags,entrie['links'][0]['href'],published, pubparse ]})
return dicSite
def collectAllDataWeb():
globalMatrix = {}
for site in websites:
globalMatrix.update(getSiteData(site))
# print(globalMatrix)
return globalMatrix
def generateHTML(sentences, data):
f = open('helloworld.html','w')
urlPath = 'file://' + os.path.dirname(os.path.abspath('helloworld.html')) + '/helloworld.html'
doc, tag, text = Doc().tagtext()
with tag('h1'):
text('World News!')
for sent in sentences:
with tag('h1'):
with tag('a', href=data[sent][2]):
text(sent)
doc.stag('br')
with tag('p'):
text(data[sent][3])
if len(data[sent][0]) > 0:
with tag('p'):
text('Summary of the article: ' + data[sent][0])
if len(data[sent][1]) > 0:
with tag('p'):
text('Tags: ')
for t in data[sent][1]:
text(t)
f.write(doc.getvalue())
f.close()
webbrowser.open(urlPath, new=1, autoraise=True)
def getGlobalSummary(matrix):
graph = Graph(matrix)
sentSum = graph.getSummary(SENT_SUM)
return sentSum
def orderCollectin(dictio):
sent = []
sorte = sorted(dictio.items(), key = lambda e:(e[1][4][0],e[1][4][1],e[1][4][2]), reverse=True)
for el in sorte:
sent.append(el[0])
return sent
def exercise_4_main():
data = collectAllDataWeb()
dataSortedTme = orderCollectin(data)
sumary = getGlobalSummary(dataSortedTme)
generateHTML(sumary, data)
################################################
# run #
################################################
if __name__ == '__main__':
mainS = exercise_4_main()