From b1f368851712bcccf0e7b6dc487e0a44afdf8c3d Mon Sep 17 00:00:00 2001
From: asepscareer <aseps.career@gmail.com>
Date: Wed, 24 Jul 2024 15:59:07 +0700
Subject: [PATCH] update version to 1.0.5 - Excluded pandas dependency to
 streamline the library and reduce external dependencies.

---
 CHANGELOG.rst    |   4 +
 README.md        |  56 ++++++++---
 requirements.txt |   1 -
 setup.py         |  18 ++--
 test.py          |  59 ++++++++++--
 ycnbc/base.py    | 160 ++++++++++++++++---------------
 ycnbc/uri.py     |   6 +-
 ycnbc/utils.py   | 242 +++++++++++++++++++++++++++--------------------
 ycnbc/version.py |   2 +-
 9 files changed, 330 insertions(+), 218 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index caa261c..bcd07d2 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,6 +1,10 @@
 Change Log
 ===========
 
+1.0.5
+-------
+- Excluded pandas dependency to streamline the library and reduce external dependencies.
+
 1.0.4
 -------
 - Remapping Query Data
diff --git a/README.md b/README.md
index e331e75..8746ee3 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,6 @@ ycnbc is **not** affiliated, endorsed, or vetted by CNBC, It's an open source to
 ### Requirements
 
 - Python >=3.5+
-- pandas>=0.24.0
 - requests>=2.23.0
 - lxml>=4.5.1
 
@@ -43,16 +42,49 @@ import ycnbc
 
 data = ycnbc.News()
 
-# get trending news
-trending_ = data.trending() # return DataFrame
-
-# get latest news
-latest_ = data.latest() # return DataFrame
-
-# get news by category
-economy_ = data.economy() # return DataFrame
-
-# etc.
+# Get trending news
+trending_ = data.trending()
+
+# Get latest news
+latest_ = data.latest()
+
+# Get news by category
+economy_ = data.economy()
+jobs_ = data.jobs()
+white_house_ = data.white_house()
+hospitals_ = data.hospitals()
+transportation_ = data.transportation()
+media_ = data.media()
+internet_ = data.internet()
+congress_ = data.congress()
+policy_ = data.policy()
+finance_ = data.finance()
+life_ = data.life()
+defense_ = data.defense()
+europe_politics_ = data.europe_politics()
+china_politics_ = data.china_politics()
+asia_politics_ = data.asia_politics()
+world_politics_ = data.world_politics()
+equity_opportunity_ = data.equity_opportunity()
+politics_ = data.politics()
+wealth_ = data.wealth()
+world_economy_ = data.world_economy()
+central_banks_ = data.central_banks()
+real_estate_ = data.real_estate()
+health_science_ = data.health_science()
+small_business_ = data.small_business()
+lifehealth_insurance_ = data.lifehealth_insurance()
+business_ = data.business()
+energy_ = data.energy()
+industrials_ = data.industrials()
+retail_ = data.retail()
+cybersecurity_ = data.cybersecurity()
+mobile_ = data.mobile()
+technology_ = data.technology()
+cnbc_disruptors_ = data.cnbc_disruptors()
+tech_guide_ = data.tech_guide()
+social_media_ = data.social_media()
+climate_ = data.climate()
 ```
 
 Note:
@@ -70,6 +102,6 @@ the [LICENSE.txt](./LICENSE.txt) file in the release for details.
 
 ### P.S.
 
-Please drop me an note with any feedback you have.
+Please drop me a note with any feedback you have.
 
 **Asep Saputra**
diff --git a/requirements.txt b/requirements.txt
index 94444b7..4a3f892 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,2 @@
-pandas>=0.24.0
 requests>=2.23.0
 lxml>=4.5.1
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 5f7a973..3565d28 100644
--- a/setup.py
+++ b/setup.py
@@ -1,13 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
-#
-# ycnbc - CNBC data downloader
-# https://github.com/asepscareer/yfinance
 
-"""ycnbc - cnbc data downloader"""
+"""ycnbc - CNBC data downloader"""
 
 from setuptools import setup, find_packages
-# from codecs import open
 import io
 from os import path
 
@@ -18,7 +14,6 @@
     version = line.replace("version = ", "").replace('"', '')
 # --- /get version ---
 
-
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the README file
@@ -38,8 +33,6 @@
     classifiers=[
         'License :: OSI Approved :: Apache Software License',
         'Development Status :: 5 - Production/Stable',
-
-
         'Operating System :: OS Independent',
         'Intended Audience :: Developers',
         'Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary',
@@ -48,7 +41,6 @@
         'Topic :: Scientific/Engineering :: Interface Engine/Protocol Translator',
         'Topic :: Software Development :: Libraries',
         'Topic :: Software Development :: Libraries :: Python Modules',
-
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
@@ -59,9 +51,11 @@
     platforms=['any'],
     keywords='scrape news, cnbc library, cnbc python, cnbc api',
     packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']),
-    install_requires=['pandas>=0.24.0','requests>=2.23.0','lxml>=4.5.1'],
+    install_requires=['requests>=2.23.0', 'lxml>=4.5.1'],
     entry_points={
-        'console_scripts': ['sample=sample:main',],
+        'console_scripts': [
+            # 'sample=sample:main',
+        ],
     },
 )
 
@@ -70,4 +64,4 @@
     NOTE: ycnbc is not affiliated, endorsed, or vetted by CNBC.
     You should refer to CNBC!'s terms of use for details on your rights to use the actual data downloaded.
     """
-)
\ No newline at end of file
+)
diff --git a/test.py b/test.py
index c743897..f493f96 100644
--- a/test.py
+++ b/test.py
@@ -2,14 +2,57 @@
 import unittest
 
 data = ycnbc.News()
+
+
 class TestData(unittest.TestCase):
-    def test_trending(self):
-        assert(data.trending().empty is False)
-        assert(data.latest().empty is False)
-        assert(data.economy().empty is False)
-        assert(data.health_science().empty is False)
-        assert(data.finance().empty is False)
-    
+    def test_cnbc_news(self):
+        methods = [
+            'latest',
+            'trending',
+            'economy',
+            'jobs',
+            'white_house',
+            'hospitals',
+            'transportation',
+            'media',
+            'internet',
+            'congress',
+            'policy',
+            'finance',
+            'life',
+            'defense',
+            'europe_politics',
+            'china_politics',
+            'asia_politics',
+            'world_politics',
+            'equity_opportunity',
+            'politics',
+            'wealth',
+            'world_economy',
+            'central_banks',
+            'real_estate',
+            'health_science',
+            'small_business',
+            'lifehealth_insurance',
+            'business',
+            'energy',
+            'industrials',
+            'retail',
+            'cybersecurity',
+            'mobile',
+            'technology',
+            'cnbc_disruptors',
+            'tech_guide',
+            'social_media',
+            'climate'
+        ]
+
+        for method_name in methods:
+            with self.subTest(method=method_name):
+                method = getattr(data, method_name)
+                response = method()
+                self.assertNotIn("error", response, f"{method_name} returned an error")
+
 
 if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
+    unittest.main()
diff --git a/ycnbc/base.py b/ycnbc/base.py
index 6c79e9d..ad3e315 100644
--- a/ycnbc/base.py
+++ b/ycnbc/base.py
@@ -19,125 +19,123 @@
 # limitations under the License.
 #
 
-from __future__ import print_function
+from .utils import CNBCNews
 
-from .utils import getnews, latest, trending
 
-class News():
+class News:
+    def __init__(self):
+        self.news = CNBCNews()
 
     def latest(self):
-        return latest()
+        return self.news.latest()
 
     def trending(self):
-        return trending()
+        return self.news.trending()
 
     def economy(self):
-        return getnews('economy')
+        return self.news.by_category('economy')
 
-    def jobs(self):  
-        return getnews('jobs')
+    def jobs(self):
+        return self.news.by_category('jobs')
 
-    def white_house(self):  
-        return getnews('white-house')
+    def white_house(self):
+        return self.news.by_category('white-house')
 
-    def hospitals(self):  
-        return getnews('hospitals')
+    def hospitals(self):
+        return self.news.by_category('hospitals')
 
-    def transportation(self):  
-        return getnews('transportation')
+    def transportation(self):
+        return self.news.by_category('transportation')
 
-    def jobs(self):  
-        return getnews('jobs')
+    def media(self):
+        return self.news.by_category('media')
 
-    def climate(self):  
-        return getnews('climate')
+    def internet(self):
+        return self.news.by_category('internet')
 
-    def media(self):  
-        return getnews('media')
+    def congress(self):
+        return self.news.by_category('congress')
 
-    def internet(self):  
-        return getnews('internet')
+    def policy(self):
+        return self.news.by_category('policy')
 
-    def congress(self):  
-        return getnews('congress')
+    def finance(self):
+        return self.news.by_category('finance')
 
-    def policy(self):  
-        return getnews('policy')
+    def life(self):
+        return self.news.by_category('life')
 
-    def finance(self):  
-        return getnews('finance')
+    def defense(self):
+        return self.news.by_category('defense')
 
-    def life(self):  
-        return getnews('life')
-    
-    def defense(self):  
-        return getnews('defense')
-    
-    def europe_politics(self):  
-        return getnews('europe-politics')
-    
-    def china_politics(self):  
-        return getnews('china-politics')
-    
-    def asia_politics(self):  
-        return getnews('asia-politics')
-    
-    def world_politics(self):  
-        return getnews('world-politics')
-    
-    def equity_opportunity(self):  
-        return getnews('equity-opportunity')
-    
-    def politics(self):  
-        return getnews('politics')
+    def europe_politics(self):
+        return self.news.by_category('europe-politics')
 
-    def wealth(self):  
-        return getnews('wealth')   
+    def china_politics(self):
+        return self.news.by_category('china-politics')
 
-    def world_economy(self):  
-        return getnews('world-economy')  
+    def asia_politics(self):
+        return self.news.by_category('asia-politics')
 
-    def central_banks(self):  
-        return getnews('central-banks')  
+    def world_politics(self):
+        return self.news.by_category('world-politics')
 
-    def real_estate(self):  
-        return getnews('real-estate')   
+    def equity_opportunity(self):
+        return self.news.by_category('equity-opportunity')
 
-    def health_science(self):  
-        return getnews('health-and-science')   
+    def politics(self):
+        return self.news.by_category('politics')
 
-    def small_business(self):  
-        return getnews('small-business')  
+    def wealth(self):
+        return self.news.by_category('wealth')
+
+    def world_economy(self):
+        return self.news.by_category('world-economy')
+
+    def central_banks(self):
+        return self.news.by_category('central-banks')
+
+    def real_estate(self):
+        return self.news.by_category('real-estate')
+
+    def health_science(self):
+        return self.news.by_category('health-and-science')
+
+    def small_business(self):
+        return self.news.by_category('small-business')
 
     def lifehealth_insurance(self):
-        return getnews('life-and-health-insurance')
+        return self.news.by_category('life-and-health-insurance')
 
     def business(self):
-        return getnews('business')
-        
+        return self.news.by_category('business')
+
     def energy(self):
-        return getnews('energy')
+        return self.news.by_category('energy')
 
     def industrials(self):
-        return getnews('industrials')
+        return self.news.by_category('industrials')
 
     def retail(self):
-        return getnews('retail')
-    
+        return self.news.by_category('retail')
+
     def cybersecurity(self):
-        return getnews('cybersecurity')
-    
-    def mobile(self):
-        return getnews('mobile')
+        return self.news.by_category('cybersecurity')
 
     def mobile(self):
-        return getnews('technology')
-    
+        return self.news.by_category('mobile')
+
+    def technology(self):
+        return self.news.by_category('technology')
+
     def cnbc_disruptors(self):
-        return getnews('cnbc-disruptors')
-    
+        return self.news.by_category('cnbc-disruptors')
+
     def tech_guide(self):
-        return getnews('tech-guide')
-    
+        return self.news.by_category('tech-guide')
+
     def social_media(self):
-        return getnews('social-media')
\ No newline at end of file
+        return self.news.by_category('social-media')
+
+    def climate(self):
+        return self.news.by_category('climate')
diff --git a/ycnbc/uri.py b/ycnbc/uri.py
index 4331382..a522230 100644
--- a/ycnbc/uri.py
+++ b/ycnbc/uri.py
@@ -21,8 +21,10 @@
 
 _BASE_URL_ = 'https://www.cnbc.com'
 _HEADERS_ = {
-    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,'
+              'application/signed-exchange;v=b3',
     'Accept-Encoding': 'gzip, deflate, br',
     'DNT': '1',
-    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
+    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) '
+                  'Chrome/39.0.2171.95 Safari/537.36'
 }
\ No newline at end of file
diff --git a/ycnbc/utils.py b/ycnbc/utils.py
index 5693188..dd0592f 100644
--- a/ycnbc/utils.py
+++ b/ycnbc/utils.py
@@ -18,107 +18,147 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from .uri import _BASE_URL_, _HEADERS_
-from requests import get
+from __future__ import print_function
 from lxml import html
-from pandas import DataFrame
-
-def trending():
-    try:
-        page = get(_BASE_URL_, headers=_HEADERS_)
-        tree = html.fromstring(page.content)
-    except Exception:
-        pass
-
-    trending_news = tree.xpath("//li[contains(@class, 'TrendingNowItem')]")
-    assert len(trending_news) > 0, 'Data Not Found'
-    title, source = [], []
-
-    for i in trending_news:
-        text = i.xpath(".//a/text()")
-        link = list(i.iterlinks())[0][2]
-
-        title.append(' '.join(text))
-        source.append(link)
-
-    data = {
-        'Title': title,
-        'Link': source
-    }
-    df = build_df(data)
-    return df
-
-
-def latest():
-    try:
-        page = get(_BASE_URL_, headers=_HEADERS_)
-        tree = html.fromstring(page.content)
-    except Exception:
-        pass
-
-    source, title, posttime = [], [], []
-
-    links = tree.xpath("//a[contains(@class, 'LatestNews')]")
-    assert len(links) > 0, 'Data Not Found'
-
-    latest_news = tree.xpath("//ul[contains(@class, 'LatestNews')]")
-    assert len(latest_news) > 0, 'Data Not Found'
-
-    for i in links:
-        source.append(list(i.iterlinks())[0][2])
-    for i in latest_news:
-        el = i.xpath("li")
-        for rs in el:
-            text = rs.xpath(".//a/text()")
-            posttime_ = rs.xpath(".//span/time/text()")
-
-            title.append(' '.join(text))
-            posttime.append(' '.join(posttime_))
-
-    data = {
-        'Headline': title,
-        'Post Time': posttime,
-        'Link': source
-    }
-    df = build_df(data)
-    return df
-
-def getnews(category):
-    try:
-        page = get("{}/{}".format(_BASE_URL_, category), headers=_HEADERS_)
-        tree = html.fromstring(page.content)
-
-        source, title, posttime = [], [], []
-        news = tree.xpath("//div[contains(@class, 'Card-titleContainer')]")
-        assert len(news)>0, 'Data Not Found'
-
-        posttime_news = tree.xpath("//span[contains(@class, 'Card-time')]")
-        assert len(posttime_news)>0, 'Data Not Found'
-
-        for i in posttime_news:
-            text = i.xpath(".//text()")
-            posttime.append(' '.join(text))
-        for i in news:
-            text = i.xpath("..//div/text()")
-
-            source.append(list(i.iterlinks())[0][2])
-            title.append(' '.join(text))
-
-        data = {
-            'Headline': title,
-            'Post Time': posttime,
-            'Link': source
-            }
+from requests import get
+from .uri import _BASE_URL_, _HEADERS_
 
-        return build_df(data)
-    except:
-        msg = {
-            'data': [None],
-            'msg': ['This page or category contains news with PRO tags.']
-            }
-        return build_df(msg)
 
-def build_df(values):
-    df = DataFrame(data=values)
-    df = df.convert_dtypes()
-    return df
\ No newline at end of file
+class CNBCNews:
+    def __init__(self):
+        self.base_url = _BASE_URL_
+        self.headers = _HEADERS_
+
+    def _fetch_page(self, endpoint=""):
+        """
+        Fetches and parses the web page content.
+
+        Args:
+            endpoint (str): The specific endpoint to fetch data from.
+
+        Returns:
+            html.Element: Parsed HTML tree if successful, otherwise an error dictionary.
+        """
+        try:
+            url = f"{self.base_url}/{endpoint}" if endpoint else self.base_url
+            page = get(url, headers=self.headers)
+            page.raise_for_status()  # Ensure we raise an error for bad HTTP responses
+            return html.fromstring(page.content)
+        except Exception as e:
+            return {"error": str(e)}
+
+    def trending(self):
+        """
+        Fetches trending news.
+
+        Returns:
+            dict: Dictionary containing titles and links of trending news, or an error message.
+        """
+        try:
+            tree = self._fetch_page()
+            if "error" in tree:
+                return tree
+
+            trending_news = tree.xpath("//li[contains(@class, 'TrendingNowItem')]")
+            if not trending_news:
+                return {"error": "Data Not Found"}
+
+            title, source = [], []
+            for i in trending_news:
+                text = i.xpath(".//a/text()")
+                link = list(i.iterlinks())[0][2] if list(i.iterlinks()) else None
+                title.append(' '.join(text))
+                source.append(link)
+
+            return {
+                'Title': title,
+                'Link': source
+            }
+        except Exception as e:
+            return {"error": str(e)}
+
+    def latest(self):
+        """
+        Fetches the latest news.
+
+        Returns:
+            dict: Dictionary containing headlines, post times, and links of the latest news, or an error message.
+        """
+        try:
+            tree = self._fetch_page()
+            if "error" in tree:
+                return tree
+
+            source, title, posttime = [], [], []
+
+            links = tree.xpath("//a[contains(@class, 'LatestNews')]")
+            if not links:
+                return {"error": "No Latest News links found"}
+
+            latest_news = tree.xpath("//ul[contains(@class, 'LatestNews')]")
+            if not latest_news:
+                return {"error": "No Latest News list found"}
+
+            for i in links:
+                link = list(i.iterlinks())[0][2] if list(i.iterlinks()) else None
+                source.append(link)
+
+            for i in latest_news:
+                el = i.xpath("li")
+                for rs in el:
+                    text = rs.xpath(".//a/text()")
+                    posttime_ = rs.xpath(".//span/time/text()")
+
+                    title.append(' '.join(text))
+                    posttime.append(' '.join(posttime_))
+
+            return {
+                'Headline': title,
+                'Post Time': posttime,
+                'Link': source
+            }
+        except Exception as e:
+            return {"error": str(e)}
+
+    def by_category(self, category):
+        """
+        Fetches news based on the category.
+
+        Args:
+            category (str): The news category to fetch.
+
+        Returns: dict: Dictionary containing headlines, post times, and links for the specified category, or an error
+        message.
+        """
+        try:
+            tree = self._fetch_page(category)
+            if "error" in tree:
+                return tree
+
+            source, title, posttime = [], [], []
+
+            news = tree.xpath("//div[contains(@class, 'Card-titleContainer')]")
+            if not news:
+                return {"error": "No news items found"}
+
+            posttime_news = tree.xpath("//span[contains(@class, 'Card-time')]")
+            if not posttime_news:
+                return {"error": "No post time found"}
+
+            for i in posttime_news:
+                text = i.xpath(".//text()")
+                posttime.append(' '.join(text))
+
+            for i in news:
+                text = i.xpath("..//div/text()")
+                link = list(i.iterlinks())[0][2] if list(i.iterlinks()) else None
+                source.append(link)
+                title.append(' '.join(text))
+
+            return {
+                'Headline': title,
+                'Post Time': posttime,
+                'Link': source
+            }
+        except Exception as e:
+            return {"error": str(e)}
diff --git a/ycnbc/version.py b/ycnbc/version.py
index 2745891..4920851 100644
--- a/ycnbc/version.py
+++ b/ycnbc/version.py
@@ -1 +1 @@
-version = "1.0.4"
\ No newline at end of file
+version = "1.0.5"