-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlearn about and download WRDS data.R
111 lines (94 loc) · 3.81 KB
/
learn about and download WRDS data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# SETUP ######################################################################
## Load Packages ####
library(pacman)
library(RPostgres)
library(glue)
library(arrow)
library(tictoc)
## Load Helper Scripts ####
source("_Global Parameters.R")
#source("utils.R")
# LOG INTO WRDS ##############################################################
if(exists("wrds")){
dbDisconnect(wrds) # because otherwise WRDS might time out
}
wrds <- dbConnect(Postgres(),
host='wrds-pgdata.wharton.upenn.edu',
port=9737,
user=rstudioapi::askForSecret("WRDS user"),
password=rstudioapi::askForSecret("WRDS pw"),
sslmode='require',
dbname='wrds')
wrds # checks whether WRDS connection exists
# LIST WRDS TABLES AND INFORMATION ###########################################
## List all the data libraries available at WRDS ####
res <- dbSendQuery(wrds, "select
distinct table_schema
from
information_schema.tables
where
table_type ='VIEW' or table_type ='FOREIGN TABLE'
order by
table_schema")
WRDSlibraries <- dbFetch(res, n=-1) #n=-1 includes all rows
dbClearResult(res)
WRDSlibraries
## List the tables within Audit Analytics ####
res <- dbSendQuery(wrds, "select
distinct table_name
from
information_schema.columns
where
table_schema='audit'
order by
table_name")
AAtables <- dbFetch(res, n=-1)
dbClearResult(res)
AAtables
## Determine the variables available within a given table ####
res <- dbSendQuery(wrds, "select
column_name
from
information_schema.columns
where
table_schema='audit' and table_name='auditopin'
order by
column_name")
AAOpinVars<- dbFetch(res, n=-1)
dbClearResult(res)
AAOpinVars
### start timer
tictoc::tic()
# PULL DATA FROM AUDIT ANALYTICS AUDIT OPINION DATA ##########################
res <- dbSendQuery(wrds, "select
audit_op_key, auditor_affil_fkey,
auditor_affilname, auditor_fkey, auditor_name,
sig_date_of_op_x, sig_date_of_op_s, going_concern,
auditor_city, auditor_state, auditor_state_name,
auditor_country, auditor_region,
auditor_con_sup_reg, fiscal_year_of_op,
fiscal_year_end_op, op_aud_pcaob, pcaob_reg_num,
note_1_date, note_2_date, note_3_date,note_4_date,
note_5_date, accnt_basis, ftp_file_fkey,
form_fkey, file_date, file_accepted, file_size,
http_name_html, http_name_text, company_fkey,
best_edgar_ticker, eventdate_aud_fkey,
eventdate_aud_name, opinion_text1, opinion_text2,
opinion_text3
from
audit.auditopin"
)
AAOpinData <- dbFetch(res, n=-1)
dbClearResult(res)
AAOpinData
## Download Data from WRDS and Save to Disk ####
write_parquet(AAOpinData,glue("{data_path}/auditopinion.parquet"))
### stop timer
tictoc::toc()
# CLEAN UP ###################################################################
# Clear environment
rm(list = ls())
# Clear packages
p_unload(all) # Remove all add-ons
# Clear console
cat("\014") # ctrl+L