# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name: LogHandler.py
Description : 日志操作模块
Author : JHao
date: 2017/3/6
-------------------------------------------------
Change Activity:
2017/03/06: log handler
2017/09/21: 屏幕输出/文件输出 可选(默认屏幕和文件均输出)
2020/07/13: Windows下TimedRotatingFileHandler线程不安全, 不再使用
-------------------------------------------------
"""
__author__ = 'JHao'
import os
import logging
import platform
from logging.handlers import TimedRotatingFileHandler
# 日志级别
CRITICAL = 50
FATAL = CRITICAL
ERROR = 40
WARNING = 30
WARN = WARNING
INFO = 20
DEBUG = 10
NOTSET = 0
CURRENT_PATH = os.path.dirname(os.path.abspath(os.getcwd()))
ROOT_PATH = os.path.join(CURRENT_PATH, os.pardir)
LOG_PATH = os.path.join(ROOT_PATH, 'log')
if not os.path.exists(LOG_PATH):
try:
os.mkdir(LOG_PATH)
except FileExistsError:
pass
class LogHandler(logging.Logger):
"""
LogHandler
"""
def __init__(self, name, level=DEBUG, stream=True, file=True):
self.name = name
self.level = level
logging.Logger.__init__(self, self.name, level=level)
if stream:
self.__setStreamHandler__()
if file:
if platform.system() != "Windows":
self.__setFileHandler__()
def __setFileHandler__(self, level=None):
"""
set file handler
:param level:
:return:
"""
file_name = os.path.join(LOG_PATH, '{name}.log'.format(name=self.name))
# 设置日志回滚, 保存在log目录, 一天保存一个文件, 保留15天
file_handler = TimedRotatingFileHandler(filename=file_name, when='D', interval=1, backupCount=15)
file_handler.suffix = '%Y%m%d.log'
if not level:
file_handler.setLevel(self.level)
else:
file_handler.setLevel(level)
formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
file_handler.setFormatter(formatter)
self.file_handler = file_handler
self.addHandler(file_handler)
def __setStreamHandler__(self, level=None):
"""
set stream handler
:param level:
:return:
"""
stream_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
stream_handler.setFormatter(formatter)
if not level:
stream_handler.setLevel(self.level)
else:
stream_handler.setLevel(level)
self.addHandler(stream_handler)
if __name__ == '__main__':
log = LogHandler('test')
log.info('this is a test msg')
log.error('redis connection time out: %s' % str(1), exc_info=True)
2022-09-10 20:22:00,069 4045635131.py[line:98] INFO this is a test msg
2022-09-10 20:22:00,073 4045635131.py[line:99] ERROR redis connection time out: 1
NoneType: None
# -*- coding: utf-8 -*-
from requests.models import Response
from lxml import etree
import requests
import random
import time
# from handler.logHandler import LogHandler
requests.packages.urllib3.disable_warnings()
class WebRequest(object):
name = "web_request"
def __init__(self, *args, **kwargs):
self.log = LogHandler(self.name, file=False)
self.response = Response()
@property
def user_agent(self):
"""
return an User-Agent at random
:return:
"""
ua_list = [
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
]
return random.choice(ua_list)
@property
def header(self):
"""
basic header
:return:
"""
return {
'User-Agent': self.user_agent,
'Accept': '*/*',
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN,zh;q=0.8'
}
def get(self,
url,
header=None,
retry_time=3,
retry_interval=5,
timeout=5,
*args,
**kwargs):
"""
get method
:param url: target url
:param header: headers
:param retry_time: retry time
:param retry_interval: retry interval
:param timeout: network timeout
:return:
"""
headers = self.header
if header and isinstance(header, dict):
headers.update(header)
while True:
try:
self.response = requests.get(url,
headers=headers,
timeout=timeout,
*args,
**kwargs)
return self
except Exception as e:
self.log.error("requests: %s error: %s" % (url, str(e)))
retry_time -= 1
if retry_time <= 0:
resp = Response()
resp.status_code = 200
return self
self.log.info("retry %s second after" % retry_interval)
time.sleep(retry_interval)
@property
def tree(self):
return etree.HTML(self.response.content)
@property
def text(self):
return self.response.text
@property
def json(self):
try:
return self.response.json()
except Exception as e:
self.log.error(str(e))
return {}
html_tree = WebRequest().get('https://www.baidu.com/').tree
# WebRequest().get('https://www.baidu.com/').tree()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/var/folders/w6/9k4dzqlj617f06dfby_vk1pr0000gn/T/ipykernel_58890/553297056.py in <module>
104
105 html_tree = WebRequest().get('https://www.baidu.com/').tree
--> 106 html_tree.to_string()
107 # WebRequest().get('https://www.baidu.com/').tree()
AttributeError: 'lxml.etree._Element' object has no attribute 'to_string'