From 9ba82fbd199203ebd498a7b5fc2a01eb0933e5e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Bartolom=C3=A9=20del=20Canto?= Date: Mon, 23 Sep 2019 20:36:05 +0200 Subject: [PATCH] indices functions implemented #36 - indices retrieval functions implemented for data retrieval to fill `indices.csv` file and to list available data - minor fixes - tests/ need to be updated --- investpy/__init__.py | 185 +++++++++++++++++++++++++++++++++++++------ investpy/equities.py | 51 ------------ investpy/etfs.py | 34 -------- investpy/funds.py | 32 -------- investpy/indices.py | 3 +- 5 files changed, 164 insertions(+), 141 deletions(-) diff --git a/investpy/__init__.py b/investpy/__init__.py index 6e4c0f6b..f971372b 100644 --- a/investpy/__init__.py +++ b/investpy/__init__.py @@ -17,7 +17,7 @@ import unidecode from lxml.html import fromstring -from investpy import user_agent as ua, equities as ts, funds as fs, etfs as es +from investpy import user_agent, equities as eq, funds as fs, etfs as es, indices as ic from investpy.Data import Data @@ -49,7 +49,7 @@ def get_equities(country=None): IOError: raised if equities retrieval failed, both for missing file or empty file, after and before retrieval. """ - return ts.equities_as_df(country) + return eq.equities_as_df(country) def get_equities_list(country=None): @@ -76,7 +76,7 @@ def get_equities_list(country=None): IOError: raised if equities retrieval failed, both for missing file or empty file, after and before retrieval. """ - return ts.equities_as_list(country) + return eq.equities_as_list(country) def get_equities_dict(country=None, columns=None, as_json=False): @@ -116,7 +116,7 @@ def get_equities_dict(country=None, columns=None, as_json=False): IOError: raised when `equities.csv` file is missing or empty. """ - return ts.equities_as_dict(country=country, columns=columns, as_json=as_json) + return eq.equities_as_dict(country=country, columns=columns, as_json=as_json) def get_equity_countries(): @@ -134,7 +134,7 @@ def get_equity_countries(): IndexError: if `equity_countries.csv` was unavailable or not found. """ - return ts.equity_countries_as_list() + return eq.equity_countries_as_list() def get_recent_data(equity, country, as_json=False, order='ascending', debug=False): @@ -274,7 +274,7 @@ def get_recent_data(equity, country, as_json=False, order='ascending', debug=Fal } head = { - "User-Agent": ua.get_random(), + "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", @@ -490,7 +490,7 @@ def get_historical_data(equity, country, from_date, to_date, as_json=False, orde if pkg_resources.resource_exists(resource_package, resource_path): equities = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: - equities = ts.retrieve_equities() + equities = eq.retrieve_equities() if equities is None: raise IOError("ERR#0001: equities object not found or unable to retrieve.") @@ -546,7 +546,7 @@ def get_historical_data(equity, country, from_date, to_date, as_json=False, orde } head = { - "User-Agent": ua.get_random(), + "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", @@ -699,7 +699,7 @@ def get_equity_company_profile(equity, country='spain', language='english'): if pkg_resources.resource_exists(resource_package, resource_path): equities = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: - equities = ts.retrieve_equities() + equities = eq.retrieve_equities() if equities is None: raise IOError("ERR#0001: equities object not found or unable to retrieve.") @@ -724,7 +724,7 @@ def get_equity_company_profile(equity, country='spain', language='english'): company_profile['url'] = url head = { - "User-Agent": ua.get_random(), + "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", @@ -761,7 +761,7 @@ def get_equity_company_profile(equity, country='spain', language='english'): company_profile['url'] = url head = { - "User-Agent": ua.get_random(), + "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", @@ -830,7 +830,7 @@ def search_equities(by, value): if pkg_resources.resource_exists(resource_package, resource_path): equities = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: - equities = ts.retrieve_equities() + equities = eq.retrieve_equities() if equities is None: raise IOError("ERR#0001: equities object not found or unable to retrieve.") @@ -1102,7 +1102,7 @@ def get_fund_recent_data(fund, country, as_json=False, order='ascending', debug= } head = { - "User-Agent": ua.get_random(), + "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", @@ -1361,7 +1361,7 @@ def get_fund_historical_data(fund, country, from_date, to_date, as_json=False, o } head = { - "User-Agent": ua.get_random(), + "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", @@ -1525,7 +1525,7 @@ def get_fund_information(fund, country, as_json=False): url = "https://es.investing.com/funds/" + tag head = { - "User-Agent": ua.get_random(), + "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", @@ -1980,7 +1980,7 @@ def get_etf_recent_data(etf, country, as_json=False, order='ascending', debug=Fa } head = { - "User-Agent": ua.get_random(), + "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", @@ -2242,7 +2242,7 @@ def get_etf_historical_data(etf, country, from_date, to_date, as_json=False, ord } head = { - "User-Agent": ua.get_random(), + "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", @@ -2360,7 +2360,7 @@ def get_etfs_overview(country, as_json=False): raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.") head = { - "User-Agent": ua.get_random(), + "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", @@ -2447,7 +2447,7 @@ def search_etfs(by, value): RuntimeError: raised if no results were found for the introduced value in the introduced field. """ - available_search_fields = ['name', 'full_name', 'isin'] + available_search_fields = ['name', 'symbol'] if not by: raise ValueError('ERR#0006: the introduced field to search is mandatory and should be a str.') @@ -2520,9 +2520,148 @@ def get_indices(country=None): IOError: raised when `indices.csv` file is missing. """ - # return es.etfs_as_df(country=country) - return True + return ic.indices_as_df(country=country) + + +def get_indices_list(country=None): + """ + This function retrieves all the available indices and returns a list of each one of them. + All the available indices can be found at: https://es.investing.com/indices/ + + Args: + country (:obj:`str`, optional): name of the country to retrieve all its available indices from. + + Returns: + :obj:`list` - indices_list: + The resulting :obj:`list` contains the retrieved data, which corresponds to the index names of + every index listed on Investing.com. + + In case the information was successfully retrieved from the CSV file, the :obj:`list` will look like:: + + indices = [...] + + Raises: + ValueError: raised when the introduced arguments are not correct. + IOError: raised if the indices file from `investpy` is missing or errored. + """ + + return ic.indices_as_list(country=country) + + +def get_indices_dict(country=None, columns=None, as_json=False): + """ + This function retrieves all the available indices on Investing.com and returns them as a :obj:`dict` containing the + `country`, `name`, `full_name`, `symbol`, `tag` and `currency`. All the available indices can be found at: + https://es.investing.com/indices/ + + Args: + country (:obj:`str`, optional): name of the country to retrieve all its available indices from. + columns (:obj:`list` of :obj:`str`, optional): description + a :obj:`list` containing the column names from which the data is going to be retrieved. + as_json (:obj:`bool`, optional): description + value to determine the format of the output data (:obj:`dict` or :obj:`json`). + + Returns: + :obj:`dict` or :obj:`json` - indices_dict: + The resulting :obj:`dict` contains the retrieved data if found, if not, the corresponding + fields are filled with `None` values. + + In case the information was successfully retrieved, the :obj:`dict` will look like:: + + { + 'country': country, + 'name': name, + 'full_name': full_name, + 'symbol': symbol, + 'tag': tag + 'currency': currency + } + + Raises: + ValueError: raised when the introduced arguments are not correct. + IOError: raised if the indices file from `investpy` is missing or errored. + """ + + return ic.indices_as_dict(country=country, columns=columns, as_json=as_json) + + +def get_index_countries(): + """ + This function retrieves all the country names indexed in Investing.com with available equities to retrieve data + from, via reading the `equity_countries.csv` file from the resources directory. So on, this function will display a + listing containing a set of countries, in order to let the user know which countries are taken into account and also + the return listing from this function can be used for country param check if needed. + Returns: + :obj:`list` - countries: + The resulting :obj:`list` contains all the available countries with equities as indexed in Investing.com + + Raises: + IndexError: if `equity_countries.csv` was unavailable or not found. + """ + + return ic.index_countries_as_list() + + +def search_indices(by, value): + """ + This function searches indices by the introduced value for the specified field. This means that this function + is going to search if there is a value that matches the introduced value for the specified field which is the + `indices.csv` column name to search in. Available fields to search indices are 'name' and 'full_name'. + + Args: + by (:obj:`str`): name of the field to search for, which is the column name ('name' or 'full_name'). + value (:obj:`str`): value of the field to search for, which is the str that is going to be searched. + + Returns: + :obj:`pandas.DataFrame` - search_result: + The resulting `pandas.DataFrame` contains the search results from the given query (the specified value + in the specified field). If there are no results and error will be raised, but otherwise this + `pandas.DataFrame` will contain all the available field values that match the introduced query. + + Raises: + ValueError: raised if any of the introduced params is not valid or errored. + IOError: raised if data could not be retrieved due to file error. + RuntimeError: raised if no results were found for the introduced value in the introduced field. + """ + + available_search_fields = ['name', 'full_name'] + + if not by: + raise ValueError('ERR#0006: the introduced field to search is mandatory and should be a str.') + + if not isinstance(by, str): + raise ValueError('ERR#0006: the introduced field to search is mandatory and should be a str.') + + if isinstance(by, str) and by not in available_search_fields: + raise ValueError('ERR#0026: the introduced field to search can either just be ' + + ' or '.join(available_search_fields)) + + if not value: + raise ValueError('ERR#0017: the introduced value to search is mandatory and should be a str.') + + if not isinstance(value, str): + raise ValueError('ERR#0017: the introduced value to search is mandatory and should be a str.') + + resource_package = __name__ + resource_path = '/'.join(('resources', 'indices', 'indices.csv')) + if pkg_resources.resource_exists(resource_package, resource_path): + indices = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) + else: + indices = ic.retrieve_indices() + + if indices is None: + raise IOError("ERR#0037: indices not found or unable to retrieve.") + + indices['matches'] = indices[by].str.contains(value, case=False) + + search_result = indices.loc[indices['matches'] == True].copy() + + if len(search_result) == 0: + raise RuntimeError('ERR#0043: no results were found for the introduced ' + str(by) + '.') + + search_result.drop(columns=['tag', 'id', 'matches'], inplace=True) + search_result.reset_index(drop=True, inplace=True) + + return search_result -if __name__ == '__main__': - print(search_equities(by='name', value='error')) diff --git a/investpy/equities.py b/investpy/equities.py index 34c7cdff..a9ae3958 100644 --- a/investpy/equities.py +++ b/investpy/equities.py @@ -482,54 +482,3 @@ def equities_as_dict(country=None, columns=None, as_json=False): return json.dumps(equities[equities['country'] == unidecode.unidecode(country.lower())][columns].to_dict(orient='records')) else: return equities[equities['country'] == unidecode.unidecode(country.lower())][columns].to_dict(orient='records') - - -# Aux Function to Fill Missing equities.csv Data -# ---------------------------------------------- -# def fill_missing_equities(): -# df = equities_as_df() -# -# df = df.where((pd.notnull(df)), None) -# -# for index, row in df.iterrows(): -# if row['symbol'] is None: -# print('Retrieving symbol of... ' + str(row['full_name'])) -# symbol = None -# while symbol is None: -# try: -# symbol = retrieve_symbol(row['tag']) -# except: -# pass -# df.loc[index, 'symbol'] = symbol -# print('Symbol of ' + str(row['full_name']) + ' is ... ' + str(symbol)) -# -# resource_package = __name__ -# resource_path = '/'.join(('resources', 'equities', 'equities.csv')) -# file = pkg_resources.resource_filename(resource_package, resource_path) -# -# df.to_csv(file, index=False) -# -# -# def retrieve_symbol(tag): -# url = "https://es.investing.com/equities/" + tag -# -# head = { -# "User-Agent": ua.get_random(), -# "X-Requested-With": "XMLHttpRequest", -# "Accept": "text/html", -# "Accept-Encoding": "gzip, deflate, br", -# "Connection": "keep-alive", -# } -# -# req = requests.get(url, headers=head) -# -# if req.status_code != 200: -# raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") -# -# root_ = fromstring(req.text) -# path_ = root_.xpath(".//div[@class='instrumentHeader']" -# "/h2") -# -# for element_ in path_: -# result = element_.text_content().replace('Resumen ', '').strip() -# return result diff --git a/investpy/etfs.py b/investpy/etfs.py index c0849480..e4de813b 100644 --- a/investpy/etfs.py +++ b/investpy/etfs.py @@ -379,37 +379,3 @@ def etfs_as_dict(country=None, columns=None, as_json=False): return json.dumps(etfs[etfs['country'] == unidecode.unidecode(country.lower())][columns].to_dict(orient='records')) else: return etfs[etfs['country'] == unidecode.unidecode(country.lower())][columns].to_dict(orient='records') - - -# Aux Function to Fill Missing etfs.csv Data -# ------------------------------------------ -# def fill_missing_data(): -# df = etfs_as_df() -# -# df = df.where((pd.notnull(df)), None) -# -# resource_package = __name__ -# resource_path = '/'.join(('resources', 'etfs', 'etfs.csv')) -# file = pkg_resources.resource_filename(resource_package, resource_path) -# -# for index, row in df.iterrows(): -# if row['currency'] is None: -# print('Retrieving currency of... ' + str(row['name'])) -# currency = None -# -# print(row['tag']) -# -# while currency is None: -# try: -# info = retrieve_etf_info(row['tag']) -# currency = info['currency'] -# except Exception as e: -# if str(e) == 'ERR#0015: error 404, try again later.': -# df.drop(df.index[index], inplace=True) -# break -# pass -# df.loc[index, 'currency'] = currency -# print('Currency of ' + str(row['name']) + ' is ... ' + str(currency)) -# print('\n----------------------------------------------------------\n') -# -# df.to_csv(file, index=False) diff --git a/investpy/funds.py b/investpy/funds.py index 226cdf74..d9344e9e 100644 --- a/investpy/funds.py +++ b/investpy/funds.py @@ -467,35 +467,3 @@ def funds_as_dict(country=None, columns=None, as_json=False): return json.dumps(funds[funds['country'] == unidecode.unidecode(country.lower())][columns].to_dict(orient='records')) else: return funds[funds['country'] == unidecode.unidecode(country.lower())][columns].to_dict(orient='records') - - -# def fill_missing_data(): -# df = funds_as_df() -# -# df = df.where((pd.notnull(df)), None) -# -# resource_package = __name__ -# resource_path = '/'.join(('resources', 'funds', 'funds.csv')) -# file = pkg_resources.resource_filename(resource_package, resource_path) -# -# for index, row in df.iterrows(): -# if row['tag'] is None: -# -# if row['currency'] is None: -# print('Retrieving currency of... ' + str(row['name'])) -# currency = None -# -# while currency is None: -# try: -# info = retrieve_fund_info(row['tag']) -# currency = info['currency'] -# except Exception as e: -# if str(e) == 'ERR#0015: error 404, try again later.': -# df.drop(df.index[index], inplace=True) -# break -# pass -# df.loc[index, 'currency'] = currency -# print('Currency of ' + str(row['name']) + ' is ... ' + str(currency)) -# print('\n----------------------------------------------------------\n') -# -# df.to_csv(file, index=False) diff --git a/investpy/indices.py b/investpy/indices.py index 20dcf3e1..37d6caf6 100644 --- a/investpy/indices.py +++ b/investpy/indices.py @@ -2,6 +2,7 @@ # Copyright 2018-2019 Alvaro Bartolome # See LICENSE for details. + import json import pandas as pd @@ -429,4 +430,4 @@ def indices_as_dict(country=None, columns=None, as_json=False): if as_json: return json.dumps(indices[indices['country'] == unidecode.unidecode(country.lower())][columns].to_dict(orient='records')) else: - return indices[indices['country'] == unidecode.unidecode(country.lower())][columns].to_dict(orient='records') \ No newline at end of file + return indices[indices['country'] == unidecode.unidecode(country.lower())][columns].to_dict(orient='records')