Read a table of fixed width formatted data of different types into a data.frame for each type. This package is for people that need to load a file that contains lines of different fixed with format. Think of it as an extension to the read.fwf function. One use-case is reading a log file containing messages of a fixed-width text protocol.
library(multifwf)
# Create a temp file with a few lines from a simple exchange protocol.
ff <- tempfile()
cat(file = ff,
'10:15:03:279NOSLMT0000666 EVILCORP00010.77SomeClientId SomeAccountId ',
'10:15:03:793OC000001BLMT0000666 EVILCORP00010.77SomeClientId SomeAccountId ',
'10:17:45:153NOBLMT0000666 EVILCORP00001.10AnotherClientId AnotherAccountId',
'10:17:45:487RJAnotherClientId 004price out of range ',
'10:18:28:045NOBLMT0000666 EVILCORP00011.00AnotherClientId AnotherAccountId',
'10:18:28:472OC000002BLMT0000666 EVILCORP00011.00AnotherClientId AnotherAccountId',
'10:18:28:642TR0000010000010000666 EVILCORP00010.77',
'10:18:28:687TR0000010000020000666 EVILCORP00010.77',
sep = '\n')
# Create a list of specs. Each item contains the specification for each message
# type of this simple protocol.
specs <- list()
specs[['newOrder']] = data.frame(widths = c(12, 2, 1, 3, 7,
12, 8, 16, 16),
col.names = c('timestamp', 'msgType', 'side', 'type', 'volume',
'symbol', 'price', 'clientId', 'accountId'))
specs[['orderConf']] = data.frame(widths = c(12, 2, 6, 1, 3,
7, 12, 8, 16, 16),
col.names = c('timestamp', 'msgType', 'orderId', 'side', 'type',
'volume', 'symbol', 'price', 'clientId', 'accountId'))
specs[['rejection']] = data.frame(widths = c(12, 2, 16, 3, 48),
col.names = c('timestamp', 'msgType', 'clientId', 'rejectionCode', 'text'))
specs[['trade']] = data.frame(widths = c(12, 2, 6, 6, 7,
12, 8),
col.names = c('timestamp', 'msgType', 'tradeId', 'orderId', 'volume',
'symbol', 'price'))
# The selector function is responsible for identifying the message type of a line.
myselector <- function(line, specs) {
s <- substr(line, 13, 14)
spec_name = ''
if (s == 'NO')
spec_name = 'newOrder'
else if (s == 'OC')
spec_name = 'orderConf'
else if (s == 'TR')
spec_name = 'trade'
else if (s == 'RJ')
spec_name = 'rejection'
spec_name
}
read.multi.fwf(ff, multi.specs = specs, select = myselector)
#> $newOrder
#> timestamp msgType side type volume symbol price
#> 1 10:15:03:279 NO S LMT 666 EVILCORP 10.77
#> 2 10:17:45:153 NO B LMT 666 EVILCORP 1.10
#> 3 10:18:28:045 NO B LMT 666 EVILCORP 11.00
#> clientId accountId
#> 1 SomeClientId SomeAccountId
#> 2 AnotherClientId AnotherAccountId
#> 3 AnotherClientId AnotherAccountId
#>
#> $orderConf
#> timestamp msgType orderId side type volume symbol price
#> 1 10:15:03:793 OC 1 B LMT 666 EVILCORP 10.77
#> 2 10:18:28:472 OC 2 B LMT 666 EVILCORP 11.00
#> clientId accountId
#> 1 SomeClientId SomeAccountId
#> 2 AnotherClientId AnotherAccountId
#>
#> $rejection
#> timestamp msgType clientId rejectionCode
#> 1 10:17:45:487 RJ AnotherClientId 4
#> text
#> 1 price out of range
#>
#> $trade
#> timestamp msgType tradeId orderId volume symbol price
#> 1 10:18:28:642 TR 1 1 666 EVILCORP 10.77
#> 2 10:18:28:687 TR 1 2 666 EVILCORP 10.77
unlink(ff)
The easiest way to install multifwf
is from CRAN with install.packages('devtools')
.
To install directly from this repo you can use the devtools package.
You can also clone the repo and build it yourself. The easiest way to do so is by opening multifwf.Rproj with RStudio. To build the package you will also need:
- roxygen2 (for the documentation)
- testthat (to test the package)
- rstudio (optional)