From a8820e7c6e22dcaed043f1bbfbefcf9d6a1e0e54 Mon Sep 17 00:00:00 2001 From: tassoman Date: Wed, 29 Nov 2023 00:03:42 +0100 Subject: [PATCH] added feature sentiment analysis. Breaking Change! --- README.md | 29 +++++++++++++++++++++-------- jobs/create.py | 14 ++++++++++---- jobs/fetch.py | 3 ++- jobs/sentiment.py | 14 ++++++++++++++ requirements.txt | 1 + 5 files changed, 48 insertions(+), 13 deletions(-) create mode 100644 jobs/sentiment.py diff --git a/README.md b/README.md index 7ad3c3d..b4e4202 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ # RSS Newsfeed reader bot for Misskey 😻 -This Python bot fetches RSS feeds every 5 minutes. Then "cherry pics" a news at time, each minute. Choosing from the freshest to the older posted. +This Python bot posts RSS news from your chosen feeds. You can choose the frequency of posting (in minutes) and the amount of Notes to post each time. -News and Notes flows are asyncronous, so that it can pick up always the fresher news an Note them as soon as possible. +Before posting it starts a **sentiment analysis** then flags with CW (Content Warning) and :NSFW: if sentiment is negative. (war, deaths, bad news) + +News and Notes flows are asyncronous, so that it can pick up always the fresher news and Note as soon as possible. Notes will not bloat your Misskey profile, because get deleted if older than a month. @@ -19,7 +21,6 @@ Please, follow this instructions once, before starting: - Remember to set `isBot = True` - Visit the page: `https://your.misskey.instance/settings/api` - Create a new API-key having at minimum `notes:write` privilege. -- copy `.env-example` in `.env` file, fill in your configuration ### Prepare a Python virtual environment @@ -29,10 +30,26 @@ Please use python3. In latest GNU/Linux distros, it's already in as default. Oth 2. `python -m venv .venv` Python sandboxed enviroment creation 3. `. .venv/bin/activate` Environment activation 4. `pip install -r requirements.txt` Dependencies installation +5. `python -m spacy download en_core_web_lg` Gets sentiment analysis data (for NSFW posts) + +## Configuration + +Now you installed the software, you need a small amount of configuration. ### Fill the bot with RSS feed -Edit file `sources.txt` and list a RSS url for every line. +First of all, put the RSS Feed source URLS into the file `sources.txt`, line by line. + +### Environment variables + +Now copy `.env-example` in `.env` file to fill in your personal configuration: + +- **HOST** your Misskey domain +- **APIKEY** app's credentials created before +- **VISIBILITY** choose [in which Timeline to post](https://misskey-hub.net/en/docs/features/timeline.html). +- **LOCAL** boolean for federated Notes +- **EVERY_MINUTES** posting frequency +- **HOW_MANY** posted Notes amount ## Run! @@ -42,10 +59,6 @@ Inside your python environment: It will setup if needed. Then will start three scheduled jobs. -- Fetch RSS every 5 minutes -- Post Notes every minute -- Delete Posted notes older than one month. Hourly. - ### Service daemon configuration You probably want to run it detached from the console by using `nohup` or running into a `screen` command. So that you can close the ssh shell without stopping. diff --git a/jobs/create.py b/jobs/create.py index 7c5c1a7..00b721e 100644 --- a/jobs/create.py +++ b/jobs/create.py @@ -5,6 +5,7 @@ import os from misskey import Misskey from dotenv import load_dotenv +from jobs.sentiment import getSentiment load_dotenv() @@ -24,18 +25,23 @@ def publish_note(): mk = Misskey(os.getenv('HOST'), i=os.getenv('APIKEY')) c.execute(''' - SELECT * FROM news WHERE noted = 0 ORDER BY publishedAt DESC LIMIT ? + SELECT * FROM news + WHERE notedAt IS NULL OR notedAt = '' + ORDER BY publishedAt DESC LIMIT ? ''', str(quantity)) data = c.fetchall() if data is not None: for d in data: - text = d[1] + "\n" + d[4] + "\n" + d[5] + "\n\n" + d[3] + sentiment = getSentiment(d[4] + d[5]) + text = "\n" + d[4] + "\n" + d[5] + "(" +d[1] + ")\n\n" + d[3] + cw = None if sentiment >= 0 else ":nsfw: News article" time.sleep(2) api = mk.notes_create( text=text, visibility=visibility, local_only=local_only, + cw=cw ) n_id = api['createdNote']['id'] n_at = int(datetime.strptime( @@ -43,7 +49,7 @@ def publish_note(): ).timestamp()) c.execute(''' - UPDATE news SET noted = 1, noteId = ?, notedAt = ? WHERE id = ? - ''', (n_id, n_at, d[0])) + UPDATE news SET sentiment = ?, noteId = ?, notedAt = ? WHERE id = ? + ''', (sentiment, n_id, n_at, d[0])) db.commit() db.close() diff --git a/jobs/fetch.py b/jobs/fetch.py index 0a2e2f6..58d1b93 100644 --- a/jobs/fetch.py +++ b/jobs/fetch.py @@ -20,7 +20,7 @@ def install(): "link" TEXT NOT NULL UNIQUE, "title" TEXT NOT NULL, "body" TEXT, - "noted" INTEGER NOT NULL DEFAULT 0, + "sentiment" DECIMAL(1,2), "noteId" TEXT, "notedAt" INTEGER, PRIMARY KEY("id" AUTOINCREMENT) @@ -32,6 +32,7 @@ def install(): CREATE TABLE IF NOT EXISTS "feeds" ( "id" INTEGER NOT NULL UNIQUE, "url" TEXT NOT NULL UNIQUE, + "title" TEXT, PRIMARY KEY("id" AUTOINCREMENT) ); ''') diff --git a/jobs/sentiment.py b/jobs/sentiment.py new file mode 100644 index 0000000..d3be3f9 --- /dev/null +++ b/jobs/sentiment.py @@ -0,0 +1,14 @@ +""" Sentiment Analysis Module """ +import asent # pylint: disable=unused-import +import spacy + +def getSentiment(text): + """ Sentiment analysis result """ + # load spacy pipeline + nlp = spacy.load("en_core_web_lg") + # add the rule-based sentiment model + nlp.add_pipe("asent_en_v1") + sentiment = nlp(text) + #print(f"totale: {doc._.polarity.compound}") + + return sentiment._.polarity.compound diff --git a/requirements.txt b/requirements.txt index 24d9df9..2d558a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ feedparser Misskey.py python-dotenv schedule +asent pip-review \ No newline at end of file