Skip to content

Commit

Permalink
Merge PR #1154 (v2017.06 release) into master
Browse files Browse the repository at this point in the history
  • Loading branch information
eugeneia committed Jun 23, 2017
2 parents a54da2e + f9b5363 commit 4e27ab6
Show file tree
Hide file tree
Showing 271 changed files with 24,949 additions and 4,859 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ obj
*.so
*.o
*#
__pycache__
/src/snabbswitch
/src/snabb
/src/testlog
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ dist: all
mkdir "$(DISTDIR)"
git clone "$(BUILDDIR)" "$(DISTDIR)/snabbswitch"
rm -rf "$(DISTDIR)/snabbswitch/.git"
cp "$(BUILDDIR)/src/snabb" "$(DISTDIR)/$(DIST_BINARY)"
cp "$(BUILDDIR)/src/snabb" "$(DISTDIR)/"
if test "$(DIST_BINARY)" != "snabb"; then ln -s "snabb" "$(DISTDIR)/$(DIST_BINARY)"; fi
cd "$(DISTDIR)/.." && tar cJvf "`basename '$(DISTDIR)'`.tar.xz" "`basename '$(DISTDIR)'`"
rm -rf "$(DISTDIR)"

Expand Down
3 changes: 2 additions & 1 deletion default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
{ pkgs ? (import <nixpkgs> {})
, source ? ./.
, version ? "dev"
, supportOpenstack ? true
}:

with pkgs;
Expand All @@ -23,7 +24,7 @@ stdenv.mkDerivation rec {
for f in $(find src/program/snabbnfv/ -type f); do
substituteInPlace $f --replace "/bin/bash" "${bash}/bin/bash"
done
'' + lib.optionalString supportOpenstack ''
# We need a way to pass $PATH to the scripts
sed -i '2iexport PATH=${git}/bin:${mariadb}/bin:${which}/bin:${procps}/bin:${coreutils}/bin' src/program/snabbnfv/neutron_sync_master/neutron_sync_master.sh.inc
sed -i '2iexport PATH=${git}/bin:${coreutils}/bin:${diffutils}/bin:${nettools}/bin' src/program/snabbnfv/neutron_sync_agent/neutron_sync_agent.sh.inc
Expand Down
8 changes: 7 additions & 1 deletion lib/ljsyscall/syscall/syscalls.lua
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,13 @@ local function retiter(ret, err, array)
end

-- generic system calls
function S.close(fd) return retbool(C.close(getfd(fd))) end
function S.close(fd)
if fd == getfd(fd) then -- fd number
return retbool(C.close(getfd(fd)))
else -- fd object: avoid mulitple close
return fd:close()
end
end
function S.chdir(path) return retbool(C.chdir(path)) end
function S.fchdir(fd) return retbool(C.fchdir(getfd(fd))) end
function S.fchmod(fd, mode) return retbool(C.fchmod(getfd(fd), c.MODE[mode])) end
Expand Down
28 changes: 22 additions & 6 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ RMSRC = $(shell find . -name '*.md' -not -regex './obj.*' -printf '%P ')
PROGRAM = $(shell find program -regex '^[^/]+/[^/]+' -type d -printf '%P ')
# sort to eliminate potential duplicate of programs.inc
INCSRC = $(sort $(shell find . -regex '[^\#]*\.inc' -printf '%P ') programs.inc)
YANGSRC= $(shell find . -regex '[^\#]*\.yang' -printf '%P ')

LUAOBJ := $(patsubst %.lua,obj/%_lua.o,$(LUASRC))
PFLUAOBJ := $(patsubst %.lua,obj/%_lua.o,$(PFLUASRC))
Expand All @@ -36,14 +37,15 @@ JITOBJS:= $(patsubst %,obj/jit_%.o,$(JITSRC))
EXTRAOBJS := obj/jit_tprof.o obj/jit_vmprof.o obj/strict.o
RMOBJS := $(patsubst %,obj/%,$(RMSRC))
INCOBJ := $(patsubst %.inc,obj/%_inc.o, $(INCSRC))
YANGOBJ:= $(patsubst %.yang,obj/%_yang.o, $(YANGSRC))
EXE := bin/snabb $(patsubst %,bin/%,$(PROGRAM))

# TESTMODS expands to:
# core.memory core.lib ...
# for each module that has a top-level selftest () function.
TESTMODS = $(shell find . -regex '[^\#]*\.lua' -printf '%P ' | \
TESTMODS = $(shell find . -regex '[^\#]*\.\(lua\|dasl\)' -printf '%P ' | \
xargs grep -s -l '^function selftest *[[:punct:]]' | \
sed -e 's_\.lua__' -e 's_/_._g')
sed -e 's_\.lua__' -e 's_\.dasl__' -e 's_/_._g')

# TESTSCRIPTS expands to:
# lib/watchdog/selftest.sh ...
Expand All @@ -52,7 +54,7 @@ TESTSCRIPTS = $(shell find . -name "selftest.sh" -executable | xargs)

PATH := ../lib/luajit/usr/local/bin:$(PATH)

snabb: $(LUAOBJ) $(PFLUAOBJ) $(HOBJ) $(COBJ) $(ARCHOBJ) $(ASMOBJ) $(PFLUAASMOBJ) $(INCOBJ) $(LUAJIT_A)
snabb: $(LUAOBJ) $(PFLUAOBJ) $(HOBJ) $(COBJ) $(ARCHOBJ) $(ASMOBJ) $(PFLUAASMOBJ) $(INCOBJ) $(YANGOBJ) $(LUAJIT_A)
$(E) "LINK $@"
$(Q) $(CC) $(DEBUG) -Wl,--no-as-needed -Wl,-E -Werror -Wall -o $@ $^ \
../lib/luajit/src/libluajit.a \
Expand Down Expand Up @@ -171,6 +173,13 @@ $(INCOBJ): obj/%_inc.o: %.inc Makefile | $(OBJDIR)
echo "]=============]") > $(basename $@).luainc
$(Q) luajit -bg -n $(subst /,.,$*)_inc $(basename $@).luainc $@

$(YANGOBJ): obj/%_yang.o: %.yang Makefile | $(OBJDIR)
$(E) "YANG $@"
@(echo -n "return [=============["; \
cat $<; \
echo "]=============]") > $(basename $@).luayang
$(Q) luajit -bg -n $(subst /,.,$*)_yang $(basename $@).luayang $@

# Create list of programs that exist
programs.inc: program
@(for d in program/*/; do basename $$d; done) > $@
Expand Down Expand Up @@ -209,7 +218,15 @@ obj/doc/snabb.epub: obj/doc/snabb.markdown

CLEAN = snabb obj bin testlog programs.inc

clean:
clean_programs:
@(for d in program/*/; do \
if [ -f "$$d/Makefile" ]; then \
echo "CLEAN $$d"; \
make -s -C $$d clean; \
fi \
done)

clean: clean_programs
$(E) "RM $(CLEAN)"
$(Q)-rm -rf $(CLEAN)

Expand All @@ -220,5 +237,4 @@ mrproper: clean
benchmarks:
$(Q) (scripts/bench.sh)

.PHONY: clean $(TESTMODS) $(TESTSCRIPTS) benchmarks

.PHONY: clean_programs clean $(TESTMODS) $(TESTSCRIPTS) benchmarks
91 changes: 91 additions & 0 deletions src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,10 @@ If *readonly* is non-nil the shared object is mapped in read-only mode.
*Readonly* defaults to nil. Fails if the shared object does not already exist.
Returns a pointer to the mapped object.

— Function **shm.alias** *new-path* *existing-path*

Create an alias (symbolic link) for an object.

— Function **shm.exists** *name*

Returns a true value if shared object by *name* exists.
Expand Down Expand Up @@ -910,6 +914,93 @@ lib.parse({foo=42, bar=43}, {foo={required=true}, bar={}, baz={default=44}})
```


## Multiprocess operation (core.worker)

Snabb can operate as a _group_ of cooperating processes. The _main_
process is the initial one that you start directly. The optional
_worker_ processes are children spawned when the main process calls
the `core.worker` module.

DIAGRAM: Multiprocessing
+----------+
+-------+ Main +-------+
| +----------+ |
: : :
+-----+----+ +----+-----+ +----+-----+
| worker 1 | : .... | | worker N |
+----------+ +----------+ +----------+

Each worker is a complete Snabb process. They can define app networks,
run the engine, and do everything else that ordinary Snabb processes
do. The exact behavior of each worker is determined by a Lua
expression provided upon creation.

Groups of Snabb processes each have the following special properties:

- **Group termination**: Terminating the main process automatically terminates all of the
workers. This works for all process termination scenarios including
`kill -9`.
- **Shared DMA memory**: DMA memory pointers obtained with `memory.dma_alloc()` are usable by
all processes in the group. This means that you can share DMA memory
pointers between processes, for example via `shm` shared memory
objects, and reference them from any process. (The memory is
automatically mapped at the expected address via a `SEGV` signal
handler.)
- **PCI device shutdown**: For each PCI device opened by a process within the group, bus
mastering (DMA) is disabled upon termination before any DMA memory
is returned to the kernel. This prevents "dangling" DMA requests
from corrupting memory that has been freed and reused.

The `core.worker` API functions are available in the main process only:

— Function **worker.start** *name* *luacode*

Start a named worker process. The worker starts with a completely
fresh Snabb process image (`fork()+execve()`) and then executes the
string *luacode* as a Lua source code expression.

Example:

```
worker.start("myworker", [[
print("hello world, from a Snabb worker process!")
print("could configure and run the engine now...")
]])
```

— Function **worker.stop** *name*

Stop a named worker process. The worker is abruptly killed.

Example:

```
worker.stop("myworker")
```

— Function **worker.status**

Return a table summarizing the status of all workers. The table key is
the worker name and the value is a table with `pid` and `alive`
attributes.

Example:

```
for w, s in pairs(worker.status()) do
print((" worker %s: pid=%s alive=%s"):format(
w, s.pid, s.alive))
end
```

Output:

```
worker w3: pid=21949 alive=true
worker w1: pid=21947 alive=true
worker w2: pid=21948 alive=true
```

## Main

Snabb designs can be run either with:
Expand Down
145 changes: 145 additions & 0 deletions src/apps/config/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# Config leader and follower

Sometimes you want to query the state or configuration of a running
Snabb data plane, or reload its configuration, or incrementally update
that configuration. However, you want to minimize the impact of
configuration query and update on data plane performance. The
`Leader` and `Follower` apps are here to fulfill this need, while
minimizing performance overhead.

The high-level design is that a `Leader` app is responsible for
knowing the state and configuration of a data plane. The leader
offers an interface to allow the outside world to query the
configuration and state, and to request configuration updates. To
avoid data-plane overhead, the `Leader` app should be deployed in a
separate process. Because it knows the data-plane state, it can
respond to queries directly, without involving the data plane. It
processes update requests into a form that the data plane can handle,
and feeds those requests to the data plane via a high-performance
back-channel.

The data plane runs a `Follower` app that reads and applies update
messages sent to it from the leader. Checking for update availability
requires just a memory access, not a system call, so the overhead of
including a follower in the data plane is very low.

## Two protocols

The leader communicates with its followers using a private protocol.
Because the leader and the follower are from the same Snabb version,
the details of this protocol are subject to change. The private
protocol's only design constraint is that it should cause the lowest
overhead for the data plane.

The leader communicates with the world via a public protocol. The
"snabb config" command-line tool speaks this protocol. "snabb config
get foo /bar" will find the local Snabb instance named "foo", open the
UNIX socket that the "foo" instance is listening on, issue a request,
then read the response, then close the socket.

## Public protocol

The design constraint on the public protocol is that it be expressive
and future-proof. We also want to enable the leader to talk to more
than one "snabb config" at a time. In particular someone should be
able to have a long-lived "snabb config listen" session open, and that
shouldn't impede someone else from doing a "snabb config get" to read
state.

To this end the public protocol container is very simple:

```
Message = Length "\n" RPC*
```

Length is a base-10 string of characters indicating the length of the
message. There may be a maximum length restriction. This requires
that "snabb config" build up the whole message as a string and measure
its length, but that's OK. Knowing the length ahead of time allows
"snabb config" to use nonblocking operations to slurp up the whole
message as a string. A partial read can be resumed later. The
message can then be parsed without fear of blocking the main process.

The RPC is an RPC request or response for the
[`snabb-config-leader-v1` YANG
schema](../../lib/yang/snabb-config-leader-v1.yang), expressed in the
Snabb [textual data format for YANG data](../../lib/yang/README.md).
For example the `snabb-config-leader-v1` schema supports a
`get-config` RPC defined like this in the schema:

```yang
rpc get-config {
input {
leaf schema { type string; mandatory true; }
leaf revision { type string; }
leaf path { type string; default "/"; }
}
output {
leaf config { type string; }
}
}
```

A request to this RPC might look like:

```yang
get-config {
schema snabb-softwire-v1;
path "/foo";
}
```

As you can see, non-mandatory inputs can be left out. A response
might look like:

```yang
get-config {
config "blah blah blah";
}
```

Responses are prefixed by the RPC name. One message can include a
number of RPCs; the RPCs will be made in order. See the
[`snabb-config-leader-v1` YANG
schema](../../lib/yang/snabb-config-leader-v1.yang) for full details
of available RPCs.

## Private protocol

The leader maintains a configuration for the program as a whole. As
it gets requests, it computes the set of changes to app graphs that
would be needed to apply that configuration. These changes are then
passed through the private protocol to the follower. No response from
the follower is necessary.

In some remote or perhaps not so remote future, all Snabb apps will
have associated YANG schemas describing their individual
configurations. In this happy future, the generic way to ship
configurations from the leader to a follower is by the binary
serialization of YANG data, implemented already in the YANG modules.
Until then however, there is also generic Lua data without a schema.
The private protocol supports both kinds of information transfer.

In the meantime, the way to indicate that an app's configuration data
conforms to a YANG schema is to set the `schema_name` property on the
app's class.

The private protocol consists of binary messages passed over a ring
buffer. A follower's leader writes to the buffer, and the follower
reads from it. There are no other readers or writers. Given that a
message may in general be unbounded in size, whereas a ring buffer is
naturally fixed, messages which may include arbtrary-sized data may be
forced to put that data in the filesystem, and refer to it from the
messages in the ring buffer. Since this file system is backed by
`tmpfs`, stalls will be minimal.

## User interface

The above sections document how the leader and follower apps are
implemented so that a data-plane developer can understand the overhead
of run-time (re)configuration. End users won't be typing at a UNIX
socket though; we include the `snabb config` program as a command-line
interface to this functionality.

See [the `snabb config` documentation](../../program/config/README.md)
for full details.
Loading

0 comments on commit 4e27ab6

Please sign in to comment.