From cc027974817264005ab74da6cdaac1f182164b67 Mon Sep 17 00:00:00 2001
From: Felix <git@flx.ai>
Date: Sat, 24 Jul 2021 22:00:37 +0200
Subject: [PATCH 1/3] Allow downloading tweets by hashtag or cashtag

---
 nitter_scraper/nitter.py |  5 +++--
 nitter_scraper/tweets.py | 21 +++++++++++++++------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/nitter_scraper/nitter.py b/nitter_scraper/nitter.py
index de62f14..fcf1d6a 100644
--- a/nitter_scraper/nitter.py
+++ b/nitter_scraper/nitter.py
@@ -113,7 +113,7 @@ def get_profile(self, username: str, not_found_ok: bool = False):
         """
         return get_profile(username=username, not_found_ok=not_found_ok, address=self.address)
 
-    def get_tweets(self, username: str, pages: int = 25, break_on_tweet_id: Optional[int] = None):
+    def get_tweets(self, query_string: str, query_type: str, pages: int = 25, break_on_tweet_id: Optional[int] = None):
         """Gets the target users tweets
 
         This is a modified version of nitter_scraper.tweets.get_tweets().
@@ -133,7 +133,8 @@ def get_tweets(self, username: str, pages: int = 25, break_on_tweet_id: Optional
         """
 
         return get_tweets(
-            username=username,
+            query_string=query_string,
+            query_type=query_type,
             pages=pages,
             break_on_tweet_id=break_on_tweet_id,
             address=self.address,
diff --git a/nitter_scraper/tweets.py b/nitter_scraper/tweets.py
index 1e02335..213565b 100644
--- a/nitter_scraper/tweets.py
+++ b/nitter_scraper/tweets.py
@@ -123,13 +123,14 @@ def timeline_parser(html):
     return html.find(".timeline", first=True)
 
 
-def pagination_parser(timeline, address, username) -> str:
+def pagination_parser(timeline, url) -> str:
     next_page = list(timeline.find(".show-more")[-1].links)[0]
-    return f"{address}/{username}{next_page}"
+    return f"{url}{next_page}"
 
 
 def get_tweets(
-    username: str,
+    query_string: str,
+    query_type: str = 'user',
     pages: int = 25,
     break_on_tweet_id: Optional[int] = None,
     address="https://nitter.net",
@@ -137,7 +138,8 @@ def get_tweets(
     """Gets the target users tweets
 
     Args:
-        username: Targeted users username.
+        query_string: Targeted username, hashtag or cashtag.
+        query_type: Type of former paremeter. Either one of 'user', 'hashtag' or 'cashtag'.
         pages: Max number of pages to lookback starting from the latest tweet.
         break_on_tweet_id: Gives the ability to break out of a loop if a tweets id is found.
         address: The address to scrape from. The default is https://nitter.net which should
@@ -147,7 +149,14 @@ def get_tweets(
         Tweet Objects
 
     """
-    url = f"{address}/{username}"
+    if query_type == 'user':
+        url = f"{address}/{query_string}"
+    elif query_type == 'hashtag':
+        url = f"{address}/search?q=%23{query_string}"
+    elif query_type == 'cashtag':
+        url = f"{address}/search?q=${query_string}"
+    else:
+        raise ValueError(f"Unknown query_type '{query_type}'")
     session = HTMLSession()
 
     def gen_tweets(pages):
@@ -157,7 +166,7 @@ def gen_tweets(pages):
             if response.status_code == 200:
                 timeline = timeline_parser(response.html)
 
-                next_url = pagination_parser(timeline, address, username)
+                next_url = pagination_parser(timeline, url)
 
                 timeline_items = timeline.find(".timeline-item")
 

From e1811f0a62062f6af338ff171a61ffbc592e5116 Mon Sep 17 00:00:00 2001
From: Felix <git@flx.ai>
Date: Sun, 25 Jul 2021 19:15:53 +0200
Subject: [PATCH 2/3] Make query type implicit

---
 nitter_scraper/nitter.py |  5 ++---
 nitter_scraper/tweets.py | 17 +++++++----------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/nitter_scraper/nitter.py b/nitter_scraper/nitter.py
index fcf1d6a..54cc19a 100644
--- a/nitter_scraper/nitter.py
+++ b/nitter_scraper/nitter.py
@@ -113,7 +113,7 @@ def get_profile(self, username: str, not_found_ok: bool = False):
         """
         return get_profile(username=username, not_found_ok=not_found_ok, address=self.address)
 
-    def get_tweets(self, query_string: str, query_type: str, pages: int = 25, break_on_tweet_id: Optional[int] = None):
+    def get_tweets(self, query_string: str, pages: int = 25, break_on_tweet_id: Optional[int] = None):
         """Gets the target users tweets
 
         This is a modified version of nitter_scraper.tweets.get_tweets().
@@ -121,7 +121,7 @@ def get_tweets(self, query_string: str, query_type: str, pages: int = 25, break_
         address to scrape profile data.
 
         Args:
-            username: Targeted users username.
+            query_string: Hashtag, if starts with #, cashtag if starts with $, username otherwise
             pages: Max number of pages to lookback starting from the latest tweet.
             break_on_tweet_id: Gives the ability to break out of a loop if a tweets id is found.
             address: The address to scrape from. The default is https://nitter.net which should
@@ -134,7 +134,6 @@ def get_tweets(self, query_string: str, query_type: str, pages: int = 25, break_
 
         return get_tweets(
             query_string=query_string,
-            query_type=query_type,
             pages=pages,
             break_on_tweet_id=break_on_tweet_id,
             address=self.address,
diff --git a/nitter_scraper/tweets.py b/nitter_scraper/tweets.py
index 213565b..23efe3c 100644
--- a/nitter_scraper/tweets.py
+++ b/nitter_scraper/tweets.py
@@ -130,7 +130,6 @@ def pagination_parser(timeline, url) -> str:
 
 def get_tweets(
     query_string: str,
-    query_type: str = 'user',
     pages: int = 25,
     break_on_tweet_id: Optional[int] = None,
     address="https://nitter.net",
@@ -138,8 +137,7 @@ def get_tweets(
     """Gets the target users tweets
 
     Args:
-        query_string: Targeted username, hashtag or cashtag.
-        query_type: Type of former paremeter. Either one of 'user', 'hashtag' or 'cashtag'.
+        query_string: Hashtag, if starts with #, cashtag if starts with $, username otherwise
         pages: Max number of pages to lookback starting from the latest tweet.
         break_on_tweet_id: Gives the ability to break out of a loop if a tweets id is found.
         address: The address to scrape from. The default is https://nitter.net which should
@@ -149,16 +147,15 @@ def get_tweets(
         Tweet Objects
 
     """
-    if query_type == 'user':
-        url = f"{address}/{query_string}"
-    elif query_type == 'hashtag':
-        url = f"{address}/search?q=%23{query_string}"
-    elif query_type == 'cashtag':
-        url = f"{address}/search?q=${query_string}"
+    if query_string.startswith('#'):
+        url = f"{address}/search?q=%23{query_string[1:]}"
+    elif query_string.startswith('$'):
+        url = f"{address}/search?q={query_string}"
     else:
-        raise ValueError(f"Unknown query_type '{query_type}'")
+        url = f"{address}/{query_string}"
     session = HTMLSession()
 
+
     def gen_tweets(pages):
         response = session.get(url)
 

From 73ca7b2a8551ed7e422fa80c1863235eb6797ff5 Mon Sep 17 00:00:00 2001
From: Felix <git@flx.ai>
Date: Sun, 25 Jul 2021 19:32:17 +0200
Subject: [PATCH 3/3] Add docs

---
 docs/content/examples.md | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/docs/content/examples.md b/docs/content/examples.md
index 046c786..c894920 100644
--- a/docs/content/examples.md
+++ b/docs/content/examples.md
@@ -59,6 +59,34 @@ for user in users:
 
 ```
 
+### How to scrape tweets related to hashtag or cashtag.
+```python
+from pprint import pprint
+
+import nitter_scraper
+from nitter_scraper import NitterScraper
+
+queries = ["#ToTheMoon", "$USDT"]
+
+print("Scraping with local nitter docker instance.")
+
+with NitterScraper(host="0.0.0.0", port=8008) as nitter:
+    for query in queries:
+        for tweet in nitter.get_tweets(query, pages=2):
+            print()
+            pprint(tweet.dict())
+            print(tweet.json(indent=4))
+
+print("Scraping from https://www.nitter.net.")
+
+for query in queries:
+    for tweet in nitter.get_tweets(query, pages=2):
+        print()
+        pprint(tweet.dict())
+        print(tweet.json(indent=4))
+
+```
+
 ### How to poll a users profile for the latest tweet.
 ```python
 import time