-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsettings.py
executable file
·64 lines (46 loc) · 1.61 KB
/
settings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- coding: utf-8 -*-
# Scrapy settings for alice project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
#
BOT_NAME = 'alice'
SPIDER_MODULES = ['alice.spiders']
NEWSPIDER_MODULE = 'alice.spiders'
DOWNLOADER_MIDDLEWARES = {
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
'random_useragent.RandomUserAgentMiddleware': 400
}
USER_AGENT_LIST = "C:/alice/useragents.txt"
#USER_AGENT_LIST = "C:/Users/Carlos/Google Drive/ZetUp/Buscato/alice/useragents.txt"
#USER_AGENT_LIST = "/home/kaco/Desktop/alice/useragents.txt"
COOKIES_ENABLED = False
DOWNLOAD_DELAY = 2
''' Servidor
MYSQL_HOST = '192.168.1.34'
MYSQL_DBNAME = 'quevale_bd'
MYSQL_USER = 'pma'
MYSQL_PASSWD = 's3rv3r'
'''
''' Localhost'''
MYSQL_HOST = '127.0.0.1'
MYSQL_DBNAME = 'buscato'
MYSQL_USER = 'root'
MYSQL_PASSWD = ''
ITEM_PIPELINES = {
'alice.pipelines.MySQLStorePipeline': 0,
}
DOWNLOAD_HANDLERS = {
's3': None,
}
# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'abcdin (+http://www.yourdomain.com)'
#execute everything from windows
#WINDOWS
#scrapy crawl abcdin & scrapy crawl casaximena & scrapy crawl corona & scrapy crawl easy & scrapy crawl falabella & scrapy crawl hites & scrapy crawl lapolar & scrapy crawl paris & scrapy crawl pcfactory & scrapy crawl ripley & scrapy crawl sodimac & scrapy crawl zmart & scrapy crawl linio & scrapy crawl dafiti
#LINUX
#scrapy crawl
#python core.py
#en el log, buscar por "Spider error processing"