Commit ca8e96629441556abfbada83d22ce1ad28f51402

Authored by Julien LANGLOIS
1 parent ca2ee5a6

controller, datapath

Showing 85 changed files with 14979 additions and 0 deletions

Too many changes to show.

To preserve performance only 24 of 85 files are displayed.

controller/.deps/.dirstamp 0 → 100755
controller/.deps/controller.Po 0 → 100644
  1 +controller/controller.o: controller/controller.c \
  2 + /usr/include/stdc-predef.h config.h /usr/include/errno.h \
  3 + /usr/include/features.h /usr/include/x86_64-linux-gnu/sys/cdefs.h \
  4 + /usr/include/x86_64-linux-gnu/bits/wordsize.h \
  5 + /usr/include/x86_64-linux-gnu/gnu/stubs.h \
  6 + /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \
  7 + /usr/include/x86_64-linux-gnu/bits/errno.h /usr/include/linux/errno.h \
  8 + /usr/include/x86_64-linux-gnu/asm/errno.h \
  9 + /usr/include/asm-generic/errno.h /usr/include/asm-generic/errno-base.h \
  10 + /usr/include/getopt.h \
  11 + /usr/lib/gcc/x86_64-linux-gnu/4.8/include-fixed/limits.h \
  12 + /usr/lib/gcc/x86_64-linux-gnu/4.8/include-fixed/syslimits.h \
  13 + /usr/include/limits.h /usr/include/x86_64-linux-gnu/bits/posix1_lim.h \
  14 + /usr/include/x86_64-linux-gnu/bits/local_lim.h \
  15 + /usr/include/linux/limits.h \
  16 + /usr/include/x86_64-linux-gnu/bits/posix2_lim.h \
  17 + /usr/include/x86_64-linux-gnu/bits/xopen_lim.h \
  18 + /usr/include/x86_64-linux-gnu/bits/stdio_lim.h /usr/include/signal.h \
  19 + /usr/include/x86_64-linux-gnu/bits/sigset.h \
  20 + /usr/include/x86_64-linux-gnu/bits/types.h \
  21 + /usr/include/x86_64-linux-gnu/bits/typesizes.h \
  22 + /usr/include/x86_64-linux-gnu/bits/signum.h /usr/include/time.h \
  23 + /usr/include/x86_64-linux-gnu/bits/siginfo.h \
  24 + /usr/include/x86_64-linux-gnu/bits/sigaction.h \
  25 + /usr/include/x86_64-linux-gnu/bits/sigcontext.h \
  26 + /usr/lib/gcc/x86_64-linux-gnu/4.8/include/stddef.h \
  27 + /usr/include/x86_64-linux-gnu/bits/sigstack.h \
  28 + /usr/include/x86_64-linux-gnu/sys/ucontext.h \
  29 + /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h \
  30 + /usr/include/x86_64-linux-gnu/bits/sigthread.h /usr/include/stdlib.h \
  31 + /usr/include/x86_64-linux-gnu/bits/waitflags.h \
  32 + /usr/include/x86_64-linux-gnu/bits/waitstatus.h /usr/include/endian.h \
  33 + /usr/include/x86_64-linux-gnu/bits/endian.h \
  34 + /usr/include/x86_64-linux-gnu/bits/byteswap.h \
  35 + /usr/include/x86_64-linux-gnu/bits/byteswap-16.h /usr/include/xlocale.h \
  36 + /usr/include/x86_64-linux-gnu/sys/types.h \
  37 + /usr/include/x86_64-linux-gnu/sys/select.h \
  38 + /usr/include/x86_64-linux-gnu/bits/select.h \
  39 + /usr/include/x86_64-linux-gnu/bits/time.h \
  40 + /usr/include/x86_64-linux-gnu/bits/select2.h \
  41 + /usr/include/x86_64-linux-gnu/sys/sysmacros.h /usr/include/alloca.h \
  42 + /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h \
  43 + /usr/include/x86_64-linux-gnu/bits/stdlib-float.h \
  44 + /usr/include/x86_64-linux-gnu/bits/stdlib.h /usr/include/string.h \
  45 + /usr/include/x86_64-linux-gnu/bits/string.h \
  46 + /usr/include/x86_64-linux-gnu/bits/string2.h \
  47 + /usr/include/x86_64-linux-gnu/bits/string3.h lib/command-line.h \
  48 + lib/compiler.h lib/daemon.h \
  49 + /usr/lib/gcc/x86_64-linux-gnu/4.8/include/stdbool.h lib/fault.h \
  50 + lib/learning-switch.h lib/ofpbuf.h include/openflow/openflow.h \
  51 + /usr/lib/gcc/x86_64-linux-gnu/4.8/include/stdint.h /usr/include/stdint.h \
  52 + /usr/include/x86_64-linux-gnu/bits/wchar.h lib/poll-loop.h \
  53 + /usr/include/poll.h /usr/include/x86_64-linux-gnu/sys/poll.h \
  54 + /usr/include/x86_64-linux-gnu/bits/poll.h \
  55 + /usr/include/x86_64-linux-gnu/bits/poll2.h lib/rconn.h lib/queue.h \
  56 + /usr/include/x86_64-linux-gnu/bits/timex.h lib/timeval.h \
  57 + lib/type-props.h lib/util.h \
  58 + /usr/lib/gcc/x86_64-linux-gnu/4.8/include/stdarg.h /usr/include/stdio.h \
  59 + /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \
  60 + /usr/include/x86_64-linux-gnu/bits/sys_errlist.h \
  61 + /usr/include/x86_64-linux-gnu/bits/stdio.h \
  62 + /usr/include/x86_64-linux-gnu/bits/stdio2.h lib/compiler.h lib/util.h \
  63 + lib/vconn-ssl.h lib/vconn.h lib/vlog-socket.h lib/vlog.h \
  64 + lib/vlog-modules.def
  65 +
  66 +/usr/include/stdc-predef.h:
  67 +
  68 +config.h:
  69 +
  70 +/usr/include/errno.h:
  71 +
  72 +/usr/include/features.h:
  73 +
  74 +/usr/include/x86_64-linux-gnu/sys/cdefs.h:
  75 +
  76 +/usr/include/x86_64-linux-gnu/bits/wordsize.h:
  77 +
  78 +/usr/include/x86_64-linux-gnu/gnu/stubs.h:
  79 +
  80 +/usr/include/x86_64-linux-gnu/gnu/stubs-64.h:
  81 +
  82 +/usr/include/x86_64-linux-gnu/bits/errno.h:
  83 +
  84 +/usr/include/linux/errno.h:
  85 +
  86 +/usr/include/x86_64-linux-gnu/asm/errno.h:
  87 +
  88 +/usr/include/asm-generic/errno.h:
  89 +
  90 +/usr/include/asm-generic/errno-base.h:
  91 +
  92 +/usr/include/getopt.h:
  93 +
  94 +/usr/lib/gcc/x86_64-linux-gnu/4.8/include-fixed/limits.h:
  95 +
  96 +/usr/lib/gcc/x86_64-linux-gnu/4.8/include-fixed/syslimits.h:
  97 +
  98 +/usr/include/limits.h:
  99 +
  100 +/usr/include/x86_64-linux-gnu/bits/posix1_lim.h:
  101 +
  102 +/usr/include/x86_64-linux-gnu/bits/local_lim.h:
  103 +
  104 +/usr/include/linux/limits.h:
  105 +
  106 +/usr/include/x86_64-linux-gnu/bits/posix2_lim.h:
  107 +
  108 +/usr/include/x86_64-linux-gnu/bits/xopen_lim.h:
  109 +
  110 +/usr/include/x86_64-linux-gnu/bits/stdio_lim.h:
  111 +
  112 +/usr/include/signal.h:
  113 +
  114 +/usr/include/x86_64-linux-gnu/bits/sigset.h:
  115 +
  116 +/usr/include/x86_64-linux-gnu/bits/types.h:
  117 +
  118 +/usr/include/x86_64-linux-gnu/bits/typesizes.h:
  119 +
  120 +/usr/include/x86_64-linux-gnu/bits/signum.h:
  121 +
  122 +/usr/include/time.h:
  123 +
  124 +/usr/include/x86_64-linux-gnu/bits/siginfo.h:
  125 +
  126 +/usr/include/x86_64-linux-gnu/bits/sigaction.h:
  127 +
  128 +/usr/include/x86_64-linux-gnu/bits/sigcontext.h:
  129 +
  130 +/usr/lib/gcc/x86_64-linux-gnu/4.8/include/stddef.h:
  131 +
  132 +/usr/include/x86_64-linux-gnu/bits/sigstack.h:
  133 +
  134 +/usr/include/x86_64-linux-gnu/sys/ucontext.h:
  135 +
  136 +/usr/include/x86_64-linux-gnu/bits/pthreadtypes.h:
  137 +
  138 +/usr/include/x86_64-linux-gnu/bits/sigthread.h:
  139 +
  140 +/usr/include/stdlib.h:
  141 +
  142 +/usr/include/x86_64-linux-gnu/bits/waitflags.h:
  143 +
  144 +/usr/include/x86_64-linux-gnu/bits/waitstatus.h:
  145 +
  146 +/usr/include/endian.h:
  147 +
  148 +/usr/include/x86_64-linux-gnu/bits/endian.h:
  149 +
  150 +/usr/include/x86_64-linux-gnu/bits/byteswap.h:
  151 +
  152 +/usr/include/x86_64-linux-gnu/bits/byteswap-16.h:
  153 +
  154 +/usr/include/xlocale.h:
  155 +
  156 +/usr/include/x86_64-linux-gnu/sys/types.h:
  157 +
  158 +/usr/include/x86_64-linux-gnu/sys/select.h:
  159 +
  160 +/usr/include/x86_64-linux-gnu/bits/select.h:
  161 +
  162 +/usr/include/x86_64-linux-gnu/bits/time.h:
  163 +
  164 +/usr/include/x86_64-linux-gnu/bits/select2.h:
  165 +
  166 +/usr/include/x86_64-linux-gnu/sys/sysmacros.h:
  167 +
  168 +/usr/include/alloca.h:
  169 +
  170 +/usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h:
  171 +
  172 +/usr/include/x86_64-linux-gnu/bits/stdlib-float.h:
  173 +
  174 +/usr/include/x86_64-linux-gnu/bits/stdlib.h:
  175 +
  176 +/usr/include/string.h:
  177 +
  178 +/usr/include/x86_64-linux-gnu/bits/string.h:
  179 +
  180 +/usr/include/x86_64-linux-gnu/bits/string2.h:
  181 +
  182 +/usr/include/x86_64-linux-gnu/bits/string3.h:
  183 +
  184 +lib/command-line.h:
  185 +
  186 +lib/compiler.h:
  187 +
  188 +lib/daemon.h:
  189 +
  190 +/usr/lib/gcc/x86_64-linux-gnu/4.8/include/stdbool.h:
  191 +
  192 +lib/fault.h:
  193 +
  194 +lib/learning-switch.h:
  195 +
  196 +lib/ofpbuf.h:
  197 +
  198 +include/openflow/openflow.h:
  199 +
  200 +/usr/lib/gcc/x86_64-linux-gnu/4.8/include/stdint.h:
  201 +
  202 +/usr/include/stdint.h:
  203 +
  204 +/usr/include/x86_64-linux-gnu/bits/wchar.h:
  205 +
  206 +lib/poll-loop.h:
  207 +
  208 +/usr/include/poll.h:
  209 +
  210 +/usr/include/x86_64-linux-gnu/sys/poll.h:
  211 +
  212 +/usr/include/x86_64-linux-gnu/bits/poll.h:
  213 +
  214 +/usr/include/x86_64-linux-gnu/bits/poll2.h:
  215 +
  216 +lib/rconn.h:
  217 +
  218 +lib/queue.h:
  219 +
  220 +/usr/include/x86_64-linux-gnu/bits/timex.h:
  221 +
  222 +lib/timeval.h:
  223 +
  224 +lib/type-props.h:
  225 +
  226 +lib/util.h:
  227 +
  228 +/usr/lib/gcc/x86_64-linux-gnu/4.8/include/stdarg.h:
  229 +
  230 +/usr/include/stdio.h:
  231 +
  232 +/usr/include/libio.h:
  233 +
  234 +/usr/include/_G_config.h:
  235 +
  236 +/usr/include/wchar.h:
  237 +
  238 +/usr/include/x86_64-linux-gnu/bits/sys_errlist.h:
  239 +
  240 +/usr/include/x86_64-linux-gnu/bits/stdio.h:
  241 +
  242 +/usr/include/x86_64-linux-gnu/bits/stdio2.h:
  243 +
  244 +lib/compiler.h:
  245 +
  246 +lib/util.h:
  247 +
  248 +lib/vconn-ssl.h:
  249 +
  250 +lib/vconn.h:
  251 +
  252 +lib/vlog-socket.h:
  253 +
  254 +lib/vlog.h:
  255 +
  256 +lib/vlog-modules.def:
... ...
controller/.dirstamp 0 → 100755
controller/.gitignore 0 → 100755
  1 +/Makefile
  2 +/Makefile.in
  3 +/controller
  4 +/controller.8
... ...
controller/automake.mk 0 → 100755
  1 +bin_PROGRAMS += controller/controller
  2 +man_MANS += controller/controller.8
  3 +DISTCLEANFILES += controller/controller.8
  4 +
  5 +controller_controller_SOURCES = controller/controller.c
  6 +controller_controller_LDADD = lib/libopenflow.a $(FAULT_LIBS) $(SSL_LIBS)
  7 +
  8 +EXTRA_DIST += controller/controller.8.in
... ...
controller/controller.8.in 0 → 100755
  1 +.ds PN controller
  2 +
  3 +.TH controller 8 "May 2008" "OpenFlow" "OpenFlow Manual"
  4 +
  5 +.SH NAME
  6 +controller \- simple OpenFlow controller reference implementation
  7 +
  8 +.SH SYNOPSIS
  9 +.B controller
  10 +[\fIoptions\fR] \fImethod\fR \fB[\fImethod\fR]\&...
  11 +
  12 +.SH DESCRIPTION
  13 +A sample OpenFlow controller which functions as an L2 MAC-learning
  14 +switch or hub. \fBcontroller\fR can manage a remote datapath through
  15 +a secure channel (see \fBofprotocol(8)\fR). It can also connect directly
  16 +to a local datapath via Netlink.
  17 +
  18 +\fBcontroller\fR controls one or more OpenFlow switches, specified as
  19 +one or more of the following OpenFlow connection methods:
  20 +
  21 +.TP
  22 +\fBpssl:\fR[\fIport\fR]
  23 +Listens for SSL connections from remote OpenFlow switches on
  24 +\fIport\fR (default: 6633). The \fB--private-key\fR,
  25 +\fB--certificate\fR, and \fB--ca-cert\fR options are mandatory when
  26 +this form is used.
  27 +
  28 +.TP
  29 +\fBptcp:\fR[\fIport\fR]
  30 +Listens for TCP connections from remote OpenFlow switches on
  31 +\fIport\fR (default: 6633).
  32 +
  33 +.TP
  34 +\fBpunix:\fIfile\fR
  35 +Listens for connections from OpenFlow switches on the Unix domain
  36 +server socket named \fIfile\fR.
  37 +
  38 +.TP
  39 +\fBnl:\fIdp_idx\fR
  40 +The local Netlink datapath numbered \fIdp_idx\fR, as configured with
  41 +.BR dpctl (8).
  42 +This form requires that the local host has the OpenFlow kernel
  43 +module for Linux loaded.
  44 +
  45 +.TP
  46 +\fBssl:\fIhost\fR[\fB:\fIport\fR]
  47 +The specified SSL \fIport\fR (default: 6633) on the given remote
  48 +\fIhost\fR. The \fB--private-key\fR, \fB--certificate\fR, and
  49 +\fB--ca-cert\fR options are mandatory when this form is used.
  50 +
  51 +.TP
  52 +\fBtcp:\fIhost\fR[\fB:\fIport\fR]
  53 +The specified TCP \fIport\fR (default: 6633) on the given remote
  54 +\fIhost\fR.
  55 +
  56 +.TP
  57 +\fBunix:\fIfile\fR
  58 +The Unix domain server socket named \fIfile\fR.
  59 +
  60 +.SH OPTIONS
  61 +.TP
  62 +\fB-p\fR, \fB--private-key=\fIprivkey.pem\fR
  63 +Specifies a PEM file containing the private key used as the switch's
  64 +identity for SSL connections to the controller.
  65 +
  66 +.TP
  67 +\fB-c\fR, \fB--certificate=\fIcert.pem\fR
  68 +Specifies a PEM file containing a certificate, signed by the
  69 +controller's certificate authority (CA), that certifies the switch's
  70 +private key to identify a trustworthy switch.
  71 +
  72 +.TP
  73 +\fB-C\fR, \fB--ca-cert=\fIswitch-cacert.pem\fR
  74 +Specifies a PEM file containing the CA certificate used to verify that
  75 +the switch is connected to a trustworthy controller.
  76 +
  77 +.TP
  78 +\fB--peer-ca-cert=\fIcontroller-cacert.pem\fR
  79 +Specifies a PEM file that contains one or more additional certificates
  80 +to send to switches. \fIcontroller-cacert.pem\fR should be the CA
  81 +certificate used to sign the controller's own certificate (the
  82 +certificate specified on \fB-c\fR or \fB--certificate\fR).
  83 +
  84 +This option is not useful in normal operation, because the switch must
  85 +already have the controller CA certificate for it to have any
  86 +confidence in the controller's identity. However, this option allows
  87 +a newly installed switch to obtain the controller CA certificate on
  88 +first boot using, e.g., the \fB--bootstrap-ca-cert\fR option to
  89 +\fBofprotocol\fR(8).
  90 +
  91 +.TP
  92 +.BR \-n ", " \-\^\-noflow
  93 +By default, the controller sets up a flow in each OpenFlow switch
  94 +whenever it receives a packet whose destination is known due through
  95 +MAC learning. This option disables flow setup, so that every packet
  96 +in the network passes through the controller.
  97 +
  98 +This option is most useful for debugging. It reduces switching
  99 +performance, so it should not be used in production.
  100 +
  101 +.TP
  102 +\fB--max-idle=\fIsecs\fR|\fBpermanent\fR
  103 +Sets \fIsecs\fR as the number of seconds that a flow set up by the
  104 +controller will remain in the switch's flow table without any matching
  105 +packets being seen. If \fBpermanent\fR is specified, which is not
  106 +recommended, flows will never expire. The default is 60 seconds.
  107 +
  108 +This option affects only flows set up by the OpenFlow controller. In
  109 +some configurations, the OpenFlow secure channel can set up some flows
  110 +on its own. To set the idle time for those flows, pass
  111 +\fB--max-idle\fR to \fBofprotocol\fR(8).
  112 +
  113 +This option has no effect when \fB-n\fR (or \fB--noflow\fR) is in use
  114 +(because the controller does not set up flows in that case).
  115 +
  116 +.TP
  117 +.BR \-H ", " \-\^\-hub
  118 +By default, the controller acts as an L2 MAC-learning switch. This
  119 +option changes its behavior to that of a hub that floods packets on
  120 +all but the incoming port.
  121 +
  122 +If \fB-H\fR (or \fB--hub\fR) and \fB-n\fR (or \fB--noflow\fR) are used
  123 +together, then the cumulative effect is that every packet passes
  124 +through the controller and every packet is flooded.
  125 +
  126 +This option is most useful for debugging. It reduces switching
  127 +performance, so it should not be used in production.
  128 +
  129 +.so lib/daemon.man
  130 +.so lib/vlog.man
  131 +.so lib/common.man
  132 +
  133 +.SH EXAMPLES
  134 +
  135 +.TP
  136 +To connect directly to local datapath 0 over netlink (Linux only):
  137 +
  138 +.B % controller nl:0
  139 +
  140 +.TP
  141 +To bind locally to port 6633 (the default) and wait for incoming connections from OpenFlow switches:
  142 +
  143 +.B % controller ptcp:
  144 +
  145 +.SH "SEE ALSO"
  146 +
  147 +.BR dpctl (8),
  148 +.BR ofprotocol (8),
  149 +.BR ofdatapath (8),
  150 +.BR vlogconf (8)
... ...
controller/controller.c 0 → 100755
  1 +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford
  2 + * Junior University
  3 + *
  4 + * We are making the OpenFlow specification and associated documentation
  5 + * (Software) available for public use and benefit with the expectation
  6 + * that others will use, modify and enhance the Software and contribute
  7 + * those enhancements back to the community. However, since we would
  8 + * like to make the Software available for broadest use, with as few
  9 + * restrictions as possible permission is hereby granted, free of
  10 + * charge, to any person obtaining a copy of this Software to deal in
  11 + * the Software under the copyrights without restriction, including
  12 + * without limitation the rights to use, copy, modify, merge, publish,
  13 + * distribute, sublicense, and/or sell copies of the Software, and to
  14 + * permit persons to whom the Software is furnished to do so, subject to
  15 + * the following conditions:
  16 + *
  17 + * The above copyright notice and this permission notice shall be
  18 + * included in all copies or substantial portions of the Software.
  19 + *
  20 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  24 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  26 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  27 + * SOFTWARE.
  28 + *
  29 + * The name and trademarks of copyright holder(s) may NOT be used in
  30 + * advertising or publicity pertaining to the Software or any
  31 + * derivatives without specific, written prior permission.
  32 + */
  33 +
  34 +#include <config.h>
  35 +
  36 +#include <errno.h>
  37 +#include <getopt.h>
  38 +#include <limits.h>
  39 +#include <signal.h>
  40 +#include <stdlib.h>
  41 +#include <string.h>
  42 +
  43 +#include "command-line.h"
  44 +#include "compiler.h"
  45 +#include "daemon.h"
  46 +#include "fault.h"
  47 +#include "learning-switch.h"
  48 +#include "ofpbuf.h"
  49 +#include "openflow/openflow.h"
  50 +#include "poll-loop.h"
  51 +#include "rconn.h"
  52 +#include "timeval.h"
  53 +#include "util.h"
  54 +#include "vconn-ssl.h"
  55 +#include "vconn.h"
  56 +#include "vlog-socket.h"
  57 +
  58 +#include "vlog.h"
  59 +#define THIS_MODULE VLM_controller
  60 +
  61 +#define MAX_SWITCHES 4096
  62 +#define MAX_LISTENERS 4096
  63 +
  64 +struct switch_ {
  65 + struct lswitch *lswitch;
  66 + struct rconn *rconn;
  67 +};
  68 +
  69 +/* Learn the ports on which MAC addresses appear? */
  70 +static bool learn_macs = true;
  71 +
  72 +/* Set up flows? (If not, every packet is processed at the controller.) */
  73 +static bool setup_flows = true;
  74 +
  75 +/* --max-idle: Maximum idle time, in seconds, before flows expire. */
  76 +static int max_idle = 60;
  77 +
  78 +static int do_switching(struct switch_ *);
  79 +static void new_switch(struct switch_ *, struct vconn *, const char *name);
  80 +static void parse_options(int argc, char *argv[]);
  81 +static void usage(void) NO_RETURN;
  82 +
  83 +int
  84 +main(int argc, char *argv[])
  85 +{
  86 + struct switch_ switches[MAX_SWITCHES];
  87 + struct pvconn *listeners[MAX_LISTENERS];
  88 + int n_switches, n_listeners;
  89 + int retval;
  90 + int i;
  91 +
  92 + set_program_name(argv[0]);
  93 + register_fault_handlers();
  94 + time_init();
  95 + vlog_init();
  96 + parse_options(argc, argv);
  97 + signal(SIGPIPE, SIG_IGN);
  98 +
  99 + if (argc - optind < 1) {
  100 + ofp_fatal(0, "at least one vconn argument required; "
  101 + "use --help for usage");
  102 + }
  103 +
  104 + n_switches = n_listeners = 0;
  105 + for (i = optind; i < argc; i++) {
  106 + const char *name = argv[i];
  107 + struct vconn *vconn;
  108 + int retval;
  109 +
  110 + retval = vconn_open(name, OFP_VERSION, &vconn);
  111 + if (!retval) {
  112 + if (n_switches >= MAX_SWITCHES) {
  113 + ofp_fatal(0, "max %d switch connections", n_switches);
  114 + }
  115 + new_switch(&switches[n_switches++], vconn, name);
  116 + continue;
  117 + } else if (retval == EAFNOSUPPORT) {
  118 + struct pvconn *pvconn;
  119 + retval = pvconn_open(name, &pvconn);
  120 + if (!retval) {
  121 + if (n_listeners >= MAX_LISTENERS) {
  122 + ofp_fatal(0, "max %d passive connections", n_listeners);
  123 + }
  124 + listeners[n_listeners++] = pvconn;
  125 + }
  126 + }
  127 + if (retval) {
  128 + VLOG_ERR("%s: connect: %s", name, strerror(retval));
  129 + }
  130 + }
  131 + if (n_switches == 0 && n_listeners == 0) {
  132 + ofp_fatal(0, "no active or passive switch connections");
  133 + }
  134 +
  135 + die_if_already_running();
  136 + daemonize();
  137 +
  138 + retval = vlog_server_listen(NULL, NULL);
  139 + if (retval) {
  140 + ofp_fatal(retval, "Could not listen for vlog connections");
  141 + }
  142 +
  143 + while (n_switches > 0 || n_listeners > 0) {
  144 + int iteration;
  145 + int i;
  146 +
  147 + /* Accept connections on listening vconns. */
  148 + for (i = 0; i < n_listeners && n_switches < MAX_SWITCHES; ) {
  149 + struct vconn *new_vconn;
  150 + int retval;
  151 +
  152 + retval = pvconn_accept(listeners[i], OFP_VERSION, &new_vconn);
  153 + if (!retval || retval == EAGAIN) {
  154 + if (!retval) {
  155 + new_switch(&switches[n_switches++], new_vconn, "tcp");
  156 + }
  157 + i++;
  158 + } else {
  159 + pvconn_close(listeners[i]);
  160 + listeners[i] = listeners[--n_listeners];
  161 + }
  162 + }
  163 +
  164 + /* Do some switching work. Limit the number of iterations so that
  165 + * callbacks registered with the poll loop don't starve. */
  166 + for (iteration = 0; iteration < 50; iteration++) {
  167 + bool progress = false;
  168 + for (i = 0; i < n_switches; ) {
  169 + struct switch_ *this = &switches[i];
  170 + int retval = do_switching(this);
  171 + if (!retval || retval == EAGAIN) {
  172 + if (!retval) {
  173 + progress = true;
  174 + }
  175 + i++;
  176 + } else {
  177 + rconn_destroy(this->rconn);
  178 + lswitch_destroy(this->lswitch);
  179 + switches[i] = switches[--n_switches];
  180 + }
  181 + }
  182 + if (!progress) {
  183 + break;
  184 + }
  185 + }
  186 + for (i = 0; i < n_switches; i++) {
  187 + struct switch_ *this = &switches[i];
  188 + lswitch_run(this->lswitch, this->rconn);
  189 + }
  190 +
  191 + /* Wait for something to happen. */
  192 + if (n_switches < MAX_SWITCHES) {
  193 + for (i = 0; i < n_listeners; i++) {
  194 + pvconn_wait(listeners[i]);
  195 + }
  196 + }
  197 + for (i = 0; i < n_switches; i++) {
  198 + struct switch_ *sw = &switches[i];
  199 + rconn_run_wait(sw->rconn);
  200 + rconn_recv_wait(sw->rconn);
  201 + lswitch_wait(sw->lswitch);
  202 + }
  203 + poll_block();
  204 + }
  205 +
  206 + return 0;
  207 +}
  208 +
  209 +static void
  210 +new_switch(struct switch_ *sw, struct vconn *vconn, const char *name)
  211 +{
  212 + sw->rconn = rconn_new_from_vconn(name, vconn);
  213 + sw->lswitch = lswitch_create(sw->rconn, learn_macs,
  214 + setup_flows ? max_idle : -1);
  215 +}
  216 +
  217 +static int
  218 +do_switching(struct switch_ *sw)
  219 +{
  220 + unsigned int packets_sent;
  221 + struct ofpbuf *msg;
  222 +
  223 + packets_sent = rconn_packets_sent(sw->rconn);
  224 +
  225 + msg = rconn_recv(sw->rconn);
  226 + if (msg) {
  227 + lswitch_process_packet(sw->lswitch, sw->rconn, msg);
  228 + ofpbuf_delete(msg);
  229 + }
  230 + rconn_run(sw->rconn);
  231 +
  232 + return (!rconn_is_alive(sw->rconn) ? EOF
  233 + : rconn_packets_sent(sw->rconn) != packets_sent ? 0
  234 + : EAGAIN);
  235 +}
  236 +
  237 +static void
  238 +parse_options(int argc, char *argv[])
  239 +{
  240 + enum {
  241 + OPT_MAX_IDLE = UCHAR_MAX + 1,
  242 + OPT_PEER_CA_CERT,
  243 + VLOG_OPTION_ENUMS
  244 + };
  245 + static struct option long_options[] = {
  246 + {"hub", no_argument, 0, 'H'},
  247 + {"noflow", no_argument, 0, 'n'},
  248 + {"max-idle", required_argument, 0, OPT_MAX_IDLE},
  249 + {"help", no_argument, 0, 'h'},
  250 + {"version", no_argument, 0, 'V'},
  251 + DAEMON_LONG_OPTIONS,
  252 + VLOG_LONG_OPTIONS,
  253 +#ifdef HAVE_OPENSSL
  254 + VCONN_SSL_LONG_OPTIONS
  255 + {"peer-ca-cert", required_argument, 0, OPT_PEER_CA_CERT},
  256 +#endif
  257 + {0, 0, 0, 0},
  258 + };
  259 + char *short_options = long_options_to_short_options(long_options);
  260 +
  261 + for (;;) {
  262 + int indexptr;
  263 + int c;
  264 +
  265 + c = getopt_long(argc, argv, short_options, long_options, &indexptr);
  266 + if (c == -1) {
  267 + break;
  268 + }
  269 +
  270 + switch (c) {
  271 + case 'H':
  272 + learn_macs = false;
  273 + break;
  274 +
  275 + case 'n':
  276 + setup_flows = false;
  277 + break;
  278 +
  279 + case OPT_MAX_IDLE:
  280 + if (!strcmp(optarg, "permanent")) {
  281 + max_idle = OFP_FLOW_PERMANENT;
  282 + } else {
  283 + max_idle = atoi(optarg);
  284 + if (max_idle < 1 || max_idle > 65535) {
  285 + ofp_fatal(0, "--max-idle argument must be between 1 and "
  286 + "65535 or the word 'permanent'");
  287 + }
  288 + }
  289 + break;
  290 +
  291 + case 'h':
  292 + usage();
  293 +
  294 + case 'V':
  295 + printf("%s "VERSION" compiled "__DATE__" "__TIME__"\n", argv[0]);
  296 + exit(EXIT_SUCCESS);
  297 +
  298 + VLOG_OPTION_HANDLERS
  299 + DAEMON_OPTION_HANDLERS
  300 +
  301 +#ifdef HAVE_OPENSSL
  302 + VCONN_SSL_OPTION_HANDLERS
  303 +
  304 + case OPT_PEER_CA_CERT:
  305 + vconn_ssl_set_peer_ca_cert_file(optarg);
  306 + break;
  307 +#endif
  308 +
  309 + case '?':
  310 + exit(EXIT_FAILURE);
  311 +
  312 + default:
  313 + abort();
  314 + }
  315 + }
  316 + free(short_options);
  317 +}
  318 +
  319 +static void
  320 +usage(void)
  321 +{
  322 + printf("%s: OpenFlow controller\n"
  323 + "usage: %s [OPTIONS] METHOD\n"
  324 + "where METHOD is any OpenFlow connection method.\n",
  325 + program_name, program_name);
  326 + vconn_usage(true, true, false);
  327 + daemon_usage();
  328 + vlog_usage();
  329 + printf("\nOther options:\n"
  330 + " -H, --hub act as hub instead of learning switch\n"
  331 + " -n, --noflow pass traffic, but don't add flows\n"
  332 + " --max-idle=SECS max idle time for new flows\n"
  333 + " -h, --help display this help message\n"
  334 + " -V, --version display version information\n");
  335 + exit(EXIT_SUCCESS);
  336 +}
... ...
controller/controller.o 0 → 100644
No preview for this file type
datapath/.gitignore 0 → 100755
  1 +/Makefile
  2 +/Makefile.in
  3 +*.cmd
  4 +*.ko
  5 +*.mod.c
  6 +Module.symvers
  7 +
... ...
datapath/Makefile.am 0 → 100755
  1 +SUBDIRS =
  2 +if L26_ENABLED
  3 +SUBDIRS += linux-2.6
  4 +endif
  5 +
  6 +EXTRA_DIST = $(dist_headers) $(dist_sources)
  7 +EXTRA_DIST += hwtable_dummy/Modules.mk hwtable_dummy/hwtable_dummy.c
  8 +EXTRA_DIST += \
  9 + hwtable_nf2/Modules.mk \
  10 + hwtable_nf2/nf2.h \
  11 + hwtable_nf2/nf2_hwapi.h \
  12 + hwtable_nf2/nf2_reg.h \
  13 + hwtable_nf2/nf2_flowtable.c \
  14 + hwtable_nf2/nf2_flowtable.h \
  15 + hwtable_nf2/nf2_lib.c \
  16 + hwtable_nf2/nf2_lib.h \
  17 + hwtable_nf2/nf2_procfs.c \
  18 + hwtable_nf2/nf2_procfs.h \
  19 + hwtable_nf2/nf2_openflow.c \
  20 + hwtable_nf2/nf2_openflow.h \
  21 + hwtable_nf2/openflow_switch.bit
  22 +
  23 +# Suppress warnings about GNU extensions in Modules.mk files.
  24 +AUTOMAKE_OPTIONS = -Wno-portability
  25 +
  26 +include Modules.mk
  27 +include linux-2.6/Modules.mk
... ...
datapath/Modules.mk 0 → 100755
  1 +# Some modules should be built and distributed, e.g. openflow.
  2 +#
  3 +# Some modules should be distributed but not built, e.g. we do not build
  4 +# veth if the kernel in question already has it.
  5 +#
  6 +# Some modules should be built but not distributed, e.g. third-party
  7 +# hwtable modules.
  8 +both_modules = ofdatapath
  9 +build_modules = $(both_modules) # Modules to build
  10 +dist_modules = $(both_modules) # Modules to distribute
  11 +
  12 +ofdatapath_sources = \
  13 + chain.c \
  14 + crc32.c \
  15 + datapath.c \
  16 + dp_act.c \
  17 + dp_dev.c \
  18 + dp_notify.c \
  19 + flow.c \
  20 + forward.c \
  21 + private-msg.c \
  22 + table-hash.c \
  23 + table-linear.c
  24 +
  25 +ofdatapath_headers = \
  26 + chain.h \
  27 + compat.h \
  28 + crc32.h \
  29 + datapath.h \
  30 + dp_dev.h \
  31 + flow.h \
  32 + forward.h \
  33 + dp_act.h \
  34 + private-msg.h \
  35 + table.h
  36 +
  37 +dist_sources = $(foreach module,$(dist_modules),$($(module)_sources))
  38 +dist_headers = $(foreach module,$(dist_modules),$($(module)_headers))
  39 +build_sources = $(foreach module,$(build_modules),$($(module)_sources))
  40 +build_headers = $(foreach module,$(build_modules),$($(module)_headers))
  41 +build_links = $(notdir $(build_sources))
  42 +build_objects = $(notdir $(patsubst %.c,%.o,$(build_sources)))
... ...
datapath/chain.c 0 → 100755
  1 +/*
  2 + * Distributed under the terms of the GNU GPL version 2.
  3 + * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
  4 + * Stanford Junior University
  5 + */
  6 +
  7 +#include "chain.h"
  8 +#include "datapath.h"
  9 +#include "flow.h"
  10 +#include "table.h"
  11 +#include <linux/module.h>
  12 +#include <linux/rcupdate.h>
  13 +#include <linux/slab.h>
  14 +#include <linux/spinlock.h>
  15 +
  16 +static struct sw_table *(*create_hw_table_hook)(void);
  17 +static struct module *hw_table_owner;
  18 +static DEFINE_SPINLOCK(hook_lock);
  19 +
  20 +/* Attempts to append 'table' to the set of tables in 'chain'. Returns 0 or
  21 + * negative error. If 'table' is null it is assumed that table creation failed
  22 + * due to out-of-memory. */
  23 +static int add_table(struct sw_chain *chain, struct sw_table *table, int emerg)
  24 +{
  25 + if (table == NULL)
  26 + return -ENOMEM;
  27 + if (chain->n_tables >= CHAIN_MAX_TABLES) {
  28 + printk(KERN_EMERG "%s: too many tables in chain\n",
  29 + chain->dp->netdev->name);
  30 + table->destroy(table);
  31 + return -ENOBUFS;
  32 + }
  33 + if (emerg)
  34 + chain->emerg_table = table;
  35 + else
  36 + chain->tables[chain->n_tables++] = table;
  37 + return 0;
  38 +}
  39 +
  40 +/* Creates and returns a new chain associated with 'dp'. Returns NULL if the
  41 + * chain cannot be created. */
  42 +struct sw_chain *chain_create(struct datapath *dp)
  43 +{
  44 + struct sw_chain *chain = kzalloc(sizeof *chain, GFP_KERNEL);
  45 + if (chain == NULL)
  46 + goto error;
  47 + chain->dp = dp;
  48 + chain->owner = try_module_get(hw_table_owner) ? hw_table_owner : NULL;
  49 + if (chain->owner && create_hw_table_hook) {
  50 + struct sw_table *hwtable = create_hw_table_hook();
  51 + if (!hwtable || add_table(chain, hwtable, 0))
  52 + goto error;
  53 + }
  54 +
  55 + if (add_table(chain, table_hash2_create(0x1EDC6F41, TABLE_HASH_MAX_FLOWS,
  56 + 0x741B8CD7, TABLE_HASH_MAX_FLOWS),
  57 + 0)
  58 + || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS), 0)
  59 + || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS), 1))
  60 + goto error;
  61 + return chain;
  62 +
  63 +error:
  64 + if (chain)
  65 + chain_destroy(chain);
  66 + return NULL;
  67 +}
  68 +
  69 +/* Searches 'chain' for a flow matching 'key', which must not have any wildcard
  70 + * fields. Returns the flow if successful, otherwise a null pointer.
  71 + *
  72 + * Caller must hold rcu_read_lock or dp_mutex. */
  73 +struct sw_flow *chain_lookup(struct sw_chain *chain,
  74 + const struct sw_flow_key *key, int emerg)
  75 +{
  76 + int i;
  77 +
  78 + BUG_ON(key->wildcards);
  79 + if (emerg) {
  80 + struct sw_table *t = chain->emerg_table;
  81 + struct sw_flow *flow = t->lookup(t, key);
  82 + t->n_lookup++;
  83 + if (flow) {
  84 + t->n_matched++;
  85 + return flow;
  86 + }
  87 + } else {
  88 + for (i = 0; i < chain->n_tables; i++) {
  89 + struct sw_table *t = chain->tables[i];
  90 + struct sw_flow *flow = t->lookup(t, key);
  91 + t->n_lookup++;
  92 + if (flow) {
  93 + t->n_matched++;
  94 + return flow;
  95 + }
  96 + }
  97 + }
  98 + return NULL;
  99 +}
  100 +
  101 +/* Inserts 'flow' into 'chain', replacing any duplicate flow. Returns 0 if
  102 + * successful or a negative error.
  103 + *
  104 + * If successful, 'flow' becomes owned by the chain, otherwise it is retained
  105 + * by the caller.
  106 + *
  107 + * Caller must hold dp_mutex. */
  108 +int chain_insert(struct sw_chain *chain, struct sw_flow *flow, int emerg)
  109 +{
  110 + int i;
  111 +
  112 + might_sleep();
  113 + if (emerg) {
  114 + struct sw_table *t = chain->emerg_table;
  115 + if (t->insert(t, flow))
  116 + return 0;
  117 + } else {
  118 + for (i = 0; i < chain->n_tables; i++) {
  119 + struct sw_table *t = chain->tables[i];
  120 + if (t->insert(t, flow))
  121 + return 0;
  122 + }
  123 + }
  124 + return -ENOBUFS;
  125 +}
  126 +
  127 +/* Modifies actions in 'chain' that match 'key'. If 'strict' set, wildcards
  128 + * and priority must match. Returns the number of flows that were modified.
  129 + *
  130 + * Expensive in the general case as currently implemented, since it requires
  131 + * iterating through the entire contents of each table for keys that contain
  132 + * wildcards. Relatively cheap for fully specified keys. */
  133 +int
  134 +chain_modify(struct sw_chain *chain, const struct sw_flow_key *key,
  135 + uint16_t priority, int strict,
  136 + const struct ofp_action_header *actions, size_t actions_len,
  137 + int emerg)
  138 +{
  139 + int count = 0;
  140 + int i;
  141 +
  142 + if (emerg) {
  143 + struct sw_table *t = chain->emerg_table;
  144 + count += t->modify(t, key, priority, strict,
  145 + actions, actions_len);
  146 + } else {
  147 + for (i = 0; i < chain->n_tables; i++) {
  148 + struct sw_table *t = chain->tables[i];
  149 + count += t->modify(t, key, priority, strict,
  150 + actions, actions_len);
  151 + }
  152 + }
  153 + return count;
  154 +}
  155 +
  156 +/* Checks whether the chain has an entry with the same priority which conflicts
  157 + * with 'key'. If 'strict' set, wildcards should also match. If 'strict' is not
  158 + * set, comparison is done 'module wildcards'.
  159 + *
  160 + * Returns 'true' if such an entry exists, 'false' otherwise. */
  161 +int
  162 +chain_has_conflict(struct sw_chain *chain, const struct sw_flow_key *key,
  163 + uint16_t priority, int strict)
  164 +{
  165 + int i;
  166 +
  167 + for (i = 0; i < chain->n_tables; i++) {
  168 + struct sw_table *t = chain->tables[i];
  169 + if (t->has_conflict(t, key, priority, strict)) {
  170 + return true;
  171 + }
  172 + }
  173 +
  174 + return false;
  175 +}
  176 +
  177 +/* Deletes from 'chain' any and all flows that match 'key'. If 'out_port'
  178 + * is not OFPP_NONE, then matching entries must have that port as an
  179 + * argument for an output action. If 'strict" is set, then wildcards and
  180 + * priority must match. Returns the number of flows that were deleted.
  181 + *
  182 + * Expensive in the general case as currently implemented, since it requires
  183 + * iterating through the entire contents of each table for keys that contain
  184 + * wildcards. Relatively cheap for fully specified keys.
  185 + *
  186 + * Caller must hold dp_mutex. */
  187 +int chain_delete(struct sw_chain *chain, const struct sw_flow_key *key,
  188 + uint16_t out_port, uint16_t priority, int strict, int emerg)
  189 +{
  190 + int count = 0;
  191 + int i;
  192 +
  193 + might_sleep();
  194 + if (emerg) {
  195 + struct sw_table *t = chain->emerg_table;
  196 + count += t->delete(chain->dp, t, key,
  197 + out_port, priority, strict);
  198 + } else {
  199 + for (i = 0; i < chain->n_tables; i++) {
  200 + struct sw_table *t = chain->tables[i];
  201 + count += t->delete(chain->dp, t, key,
  202 + out_port, priority, strict);
  203 + }
  204 + }
  205 + return count;
  206 +}
  207 +
  208 +/* Performs timeout processing on all the tables in 'chain'. Returns the
  209 + * number of flow entries deleted through expiration.
  210 + *
  211 + * Expensive as currently implemented, since it iterates through the entire
  212 + * contents of each table.
  213 + *
  214 + * Caller must not hold dp_mutex, because individual tables take and release it
  215 + * as necessary. */
  216 +int chain_timeout(struct sw_chain *chain)
  217 +{
  218 + int count = 0;
  219 + int i;
  220 +
  221 + might_sleep();
  222 + for (i = 0; i < chain->n_tables; i++) {
  223 + struct sw_table *t = chain->tables[i];
  224 + count += t->timeout(chain->dp, t);
  225 + }
  226 + return count;
  227 +}
  228 +
  229 +/* Destroys 'chain', which must not have any users. */
  230 +void chain_destroy(struct sw_chain *chain)
  231 +{
  232 + int i;
  233 + struct sw_table *t = NULL;
  234 +
  235 + synchronize_rcu();
  236 + for (i = 0; i < chain->n_tables; i++) {
  237 + t = chain->tables[i];
  238 + if (t->destroy)
  239 + t->destroy(t);
  240 + }
  241 + t = chain->emerg_table;
  242 + if (t->destroy)
  243 + t->destroy(t);
  244 + module_put(chain->owner);
  245 + kfree(chain);
  246 +}
  247 +
  248 +int chain_set_hw_hook(struct sw_table *(*create_hw_table)(void),
  249 + struct module *owner)
  250 +{
  251 + int retval = -EBUSY;
  252 +
  253 + spin_lock(&hook_lock);
  254 + if (!create_hw_table_hook) {
  255 + create_hw_table_hook = create_hw_table;
  256 + hw_table_owner = owner;
  257 + retval = 0;
  258 + }
  259 + spin_unlock(&hook_lock);
  260 +
  261 + return retval;
  262 +}
  263 +EXPORT_SYMBOL(chain_set_hw_hook);
  264 +
  265 +void chain_clear_hw_hook(void)
  266 +{
  267 + create_hw_table_hook = NULL;
  268 + hw_table_owner = NULL;
  269 +}
  270 +EXPORT_SYMBOL(chain_clear_hw_hook);
... ...
datapath/chain.h 0 → 100755
  1 +#ifndef CHAIN_H
  2 +#define CHAIN_H 1
  3 +
  4 +#include <linux/types.h>
  5 +
  6 +struct sw_flow;
  7 +struct sw_flow_key;
  8 +struct ofp_action_header;
  9 +struct datapath;
  10 +
  11 +
  12 +#define TABLE_LINEAR_MAX_FLOWS 100
  13 +#define TABLE_HASH_MAX_FLOWS 65536
  14 +
  15 +/* Set of tables chained together in sequence from cheap to expensive. */
  16 +#define CHAIN_MAX_TABLES 4
  17 +struct sw_chain {
  18 + int n_tables;
  19 + struct sw_table *tables[CHAIN_MAX_TABLES];
  20 + struct sw_table *emerg_table;
  21 +
  22 + struct datapath *dp;
  23 + struct module *owner;
  24 +};
  25 +
  26 +struct sw_chain *chain_create(struct datapath *);
  27 +struct sw_flow *chain_lookup(struct sw_chain *, const struct sw_flow_key *,
  28 + int);
  29 +int chain_insert(struct sw_chain *, struct sw_flow *, int);
  30 +int chain_modify(struct sw_chain *, const struct sw_flow_key *,
  31 + uint16_t, int, const struct ofp_action_header *, size_t, int);
  32 +int chain_has_conflict(struct sw_chain *, const struct sw_flow_key *,
  33 + uint16_t, int);
  34 +int chain_delete(struct sw_chain *, const struct sw_flow_key *, uint16_t,
  35 + uint16_t, int, int);
  36 +int chain_timeout(struct sw_chain *);
  37 +void chain_destroy(struct sw_chain *);
  38 +
  39 +int chain_set_hw_hook(struct sw_table *(*create_hw_table)(void),
  40 + struct module *owner);
  41 +void chain_clear_hw_hook(void);
  42 +
  43 +#endif /* chain.h */
... ...
datapath/compat.h 0 → 100755
  1 +#ifndef COMPAT_H
  2 +#define COMPAT_H 1
  3 +
  4 +#include <linux/version.h>
  5 +
  6 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  7 +
  8 +#include "compat26.h"
  9 +
  10 +#endif
  11 +
  12 +#endif /* compat.h */
... ...
datapath/crc32.c 0 → 100755
  1 +/*
  2 + * Distributed under the terms of the GNU GPL version 2.
  3 + * Copyright (c) 2007, 2008 The Board of Trustees of The Leland
  4 + * Stanford Junior University
  5 + */
  6 +
  7 +#include <linux/module.h>
  8 +#include "crc32.h"
  9 +
  10 +void crc32_init(struct crc32 *crc, unsigned int polynomial)
  11 +{
  12 + int i;
  13 +
  14 + for (i = 0; i < CRC32_TABLE_SIZE; ++i) {
  15 + unsigned int reg = i << 24;
  16 + int j;
  17 + for (j = 0; j < CRC32_TABLE_BITS; j++) {
  18 + int topBit = (reg & 0x80000000) != 0;
  19 + reg <<= 1;
  20 + if (topBit)
  21 + reg ^= polynomial;
  22 + }
  23 + crc->table[i] = reg;
  24 + }
  25 +}
  26 +
  27 +EXPORT_SYMBOL(crc32_init);
  28 +
  29 +unsigned int crc32_calculate(const struct crc32 *crc,
  30 + const void *data_, size_t n_bytes)
  31 +{
  32 + // FIXME: this can be optimized by unrolling, see linux-2.6/lib/crc32.c.
  33 + const uint8_t *data = data_;
  34 + unsigned int result = 0;
  35 + size_t i;
  36 +
  37 + for (i = 0; i < n_bytes; i++) {
  38 + unsigned int top = result >> 24;
  39 + top ^= data[i];
  40 + result = (result << 8) ^ crc->table[top];
  41 + }
  42 + return result;
  43 +}
  44 +
  45 +EXPORT_SYMBOL(crc32_calculate);
... ...
datapath/crc32.h 0 → 100755
  1 +#ifndef CRC32_H
  2 +#define CRC32_H 1
  3 +
  4 +#include <linux/types.h>
  5 +#ifndef __KERNEL__
  6 +#include <stdint.h>
  7 +#endif
  8 +#include <stddef.h>
  9 +
  10 +#define CRC32_TABLE_BITS 8
  11 +#define CRC32_TABLE_SIZE (1u << CRC32_TABLE_BITS)
  12 +
  13 +struct crc32 {
  14 + unsigned int table[CRC32_TABLE_SIZE];
  15 +};
  16 +
  17 +void crc32_init(struct crc32 *, unsigned int polynomial);
  18 +unsigned int crc32_calculate(const struct crc32 *,
  19 + const void *data_, size_t n_bytes);
  20 +
  21 +
  22 +#endif /* crc32.h */
... ...
datapath/datapath.c 0 → 100755
  1 +/*
  2 + * Distributed under the terms of the GNU GPL version 2.
  3 + * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
  4 + * Stanford Junior University
  5 + */
  6 +
  7 +/* Functions for managing the dp interface/device. */
  8 +
  9 +#include <linux/init.h>
  10 +#include <linux/module.h>
  11 +#include <linux/if_arp.h>
  12 +#include <linux/if_bridge.h>
  13 +#include <linux/if_vlan.h>
  14 +#include <linux/in.h>
  15 +#include <net/genetlink.h>
  16 +#include <linux/ip.h>
  17 +#include <linux/delay.h>
  18 +#include <linux/time.h>
  19 +#include <linux/etherdevice.h>
  20 +#include <linux/kernel.h>
  21 +#include <linux/kthread.h>
  22 +#include <linux/mutex.h>
  23 +#include <linux/rtnetlink.h>
  24 +#include <linux/rcupdate.h>
  25 +#include <linux/version.h>
  26 +#include <linux/ethtool.h>
  27 +#include <linux/random.h>
  28 +#include <linux/utsname.h>
  29 +#include <asm/system.h>
  30 +#include <asm/div64.h>
  31 +#include <linux/netfilter_bridge.h>
  32 +#include <linux/netfilter_ipv4.h>
  33 +#include <linux/inetdevice.h>
  34 +#include <linux/list.h>
  35 +#include <linux/rculist.h>
  36 +#include <linux/workqueue.h>
  37 +#include <linux/dmi.h>
  38 +
  39 +#include "openflow/nicira-ext.h"
  40 +#include "openflow/openflow-netlink.h"
  41 +#include "datapath.h"
  42 +#include "table.h"
  43 +#include "chain.h"
  44 +#include "dp_dev.h"
  45 +#include "forward.h"
  46 +#include "flow.h"
  47 +
  48 +#include "compat.h"
  49 +
  50 +
  51 +/* Strings to describe the manufacturer, hardware, and software. This data
  52 + * is queriable through the switch description stats message. */
  53 +static char mfr_desc[DESC_STR_LEN] = "Stanford University";
  54 +static char hw_desc[DESC_STR_LEN] = "Reference Kernelspace Switch";
  55 +static char sw_desc[DESC_STR_LEN] = VERSION BUILDNR;
  56 +static char serial_num[SERIAL_NUM_LEN] = "None";
  57 +
  58 +module_param_string(mfr_desc, mfr_desc, sizeof mfr_desc, 0444);
  59 +module_param_string(hw_desc, hw_desc, sizeof hw_desc, 0444);
  60 +module_param_string(sw_desc, sw_desc, sizeof sw_desc, 0444);
  61 +module_param_string(serial_num, serial_num, sizeof serial_num, 0444);
  62 +
  63 +int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
  64 +EXPORT_SYMBOL(dp_ioctl_hook);
  65 +
  66 +int (*dp_add_dp_hook)(struct datapath *dp);
  67 +EXPORT_SYMBOL(dp_add_dp_hook);
  68 +
  69 +int (*dp_del_dp_hook)(struct datapath *dp);
  70 +EXPORT_SYMBOL(dp_del_dp_hook);
  71 +
  72 +int (*dp_add_if_hook)(struct net_bridge_port *p);
  73 +EXPORT_SYMBOL(dp_add_if_hook);
  74 +
  75 +int (*dp_del_if_hook)(struct net_bridge_port *p);
  76 +EXPORT_SYMBOL(dp_del_if_hook);
  77 +
  78 +/* Number of milliseconds between runs of the maintenance thread. */
  79 +#define MAINT_SLEEP_MSECS 1000
  80 +
  81 +static struct genl_family dp_genl_family;
  82 +
  83 +/*
  84 + * Datapath multicast groups.
  85 + *
  86 + * Really we want one multicast group per in-use datapath (or even more than
  87 + * one). Locking issues, however, mean that we can't allocate a multicast
  88 + * group at the point in the code where we we actually create a datapath[*], so
  89 + * we have to pre-allocate them. It's massive overkill to allocate DP_MAX of
  90 + * them in advance, since we will hardly ever actually create DP_MAX datapaths,
  91 + * so instead we allocate a few multicast groups at startup and choose one for
  92 + * each datapath by hashing its datapath index.
  93 + *
  94 + * [*] dp_genl_add, to add a new datapath, is called under the genl_lock
  95 + * mutex, and genl_register_mc_group, called to acquire a new multicast
  96 + * group ID, also acquires genl_lock, thus deadlock.
  97 + */
  98 +#define N_MC_GROUPS 16 /* Must be power of 2. */
  99 +static struct genl_multicast_group mc_groups[N_MC_GROUPS];
  100 +
  101 +/* Datapaths. Protected on the read side by rcu_read_lock, on the write side
  102 + * by dp_mutex. dp_mutex is almost completely redundant with genl_mutex
  103 + * maintained by the Generic Netlink code, but the timeout path needs mutual
  104 + * exclusion too.
  105 + *
  106 + * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL
  107 + * lock first.
  108 + *
  109 + * It is safe to access the datapath and net_bridge_port structures with just
  110 + * dp_mutex.
  111 + */
  112 +static struct datapath *dps[DP_MAX];
  113 +DEFINE_MUTEX(dp_mutex);
  114 +EXPORT_SYMBOL(dp_mutex);
  115 +
  116 +static int dp_maint_func(void *data);
  117 +static void init_port_status(struct net_bridge_port *p);
  118 +static int dp_genl_openflow_done(struct netlink_callback *);
  119 +static struct net_bridge_port *new_nbp(struct datapath *,
  120 + struct net_device *, int port_no);
  121 +
  122 +/* nla_shrink - reduce amount of space reserved by nla_reserve
  123 + * @skb: socket buffer from which to recover room
  124 + * @nla: netlink attribute to adjust
  125 + * @len: new length of attribute payload
  126 + *
  127 + * Reduces amount of space reserved by a call to nla_reserve.
  128 + *
  129 + * No other attributes may be added between calling nla_reserve and this
  130 + * function, since it will create a hole in the message.
  131 + */
  132 +void nla_shrink(struct sk_buff *skb, struct nlattr *nla, int len)
  133 +{
  134 + int delta = nla_total_size(len) - nla_total_size(nla_len(nla));
  135 + BUG_ON(delta > 0);
  136 + skb->tail += delta;
  137 + skb->len += delta;
  138 + nla->nla_len = nla_attr_size(len);
  139 +}
  140 +
  141 +/* Puts a set of openflow headers for a message of the given 'type' into 'skb'.
  142 + * If 'sender' is nonnull, then it is used as the message's destination. 'dp'
  143 + * must specify the datapath to use.
  144 + *
  145 + * '*max_openflow_len' receives the maximum number of bytes that are available
  146 + * for the embedded OpenFlow message. The caller must call
  147 + * resize_openflow_skb() to set the actual size of the message to this number
  148 + * of bytes or less.
  149 + *
  150 + * Returns the openflow header if successful, otherwise (if 'skb' is too small)
  151 + * an error code. */
  152 +static void *
  153 +put_openflow_headers(struct datapath *dp, struct sk_buff *skb, uint8_t type,
  154 + const struct sender *sender, int *max_openflow_len)
  155 +{
  156 + struct ofp_header *oh;
  157 + struct nlattr *attr;
  158 + int openflow_len;
  159 +
  160 + /* Assemble the Generic Netlink wrapper. */
  161 + if (!genlmsg_put(skb,
  162 + sender ? sender->pid : 0,
  163 + sender ? sender->seq : 0,
  164 + &dp_genl_family, 0, DP_GENL_C_OPENFLOW))
  165 + return ERR_PTR(-ENOBUFS);
  166 + if (nla_put_u32(skb, DP_GENL_A_DP_IDX, dp->dp_idx) < 0)
  167 + return ERR_PTR(-ENOBUFS);
  168 + openflow_len = (skb_tailroom(skb) - NLA_HDRLEN) & ~(NLA_ALIGNTO - 1);
  169 + if (openflow_len < sizeof *oh)
  170 + return ERR_PTR(-ENOBUFS);
  171 + *max_openflow_len = openflow_len;
  172 + attr = nla_reserve(skb, DP_GENL_A_OPENFLOW, openflow_len);
  173 + BUG_ON(!attr);
  174 +
  175 + /* Fill in the header. The caller is responsible for the length. */
  176 + oh = nla_data(attr);
  177 + oh->version = OFP_VERSION;
  178 + oh->type = type;
  179 + oh->xid = sender ? sender->xid : 0;
  180 +
  181 + return oh;
  182 +}
  183 +
  184 +/* Resizes OpenFlow header 'oh', which must be at the tail end of 'skb', to new
  185 + * length 'new_length' (in bytes), adjusting pointers and size values as
  186 + * necessary. */
  187 +static void
  188 +resize_openflow_skb(struct sk_buff *skb,
  189 + struct ofp_header *oh, size_t new_length)
  190 +{
  191 + struct nlattr *attr = ((void *) oh) - NLA_HDRLEN;
  192 + nla_shrink(skb, attr, new_length);
  193 + oh->length = htons(new_length);
  194 + nlmsg_end(skb, (struct nlmsghdr *) skb->data);
  195 +}
  196 +
  197 +/* Allocates a new skb to contain an OpenFlow message 'openflow_len' bytes in
  198 + * length. Returns a null pointer if memory is unavailable, otherwise returns
  199 + * the OpenFlow header and stores a pointer to the skb in '*pskb'.
  200 + *
  201 + * 'type' is the OpenFlow message type. If 'sender' is nonnull, then it is
  202 + * used as the message's destination. 'dp' must specify the datapath to
  203 + * use. */
  204 +static void *
  205 +alloc_openflow_skb(struct datapath *dp, size_t openflow_len, uint8_t type,
  206 + const struct sender *sender, struct sk_buff **pskb)
  207 +{
  208 + struct ofp_header *oh;
  209 + size_t genl_len;
  210 + struct sk_buff *skb;
  211 + int max_openflow_len;
  212 +
  213 + if ((openflow_len + sizeof(struct ofp_header)) > UINT16_MAX) {
  214 + if (net_ratelimit())
  215 + printk(KERN_ERR "%s: alloc_openflow_skb: openflow "
  216 + "message too large: %zu\n",
  217 + dp->netdev->name, openflow_len);
  218 + return NULL;
  219 + }
  220 +
  221 + genl_len = nlmsg_total_size(GENL_HDRLEN + dp_genl_family.hdrsize);
  222 + genl_len += nla_total_size(sizeof(uint32_t)); /* DP_GENL_A_DP_IDX */
  223 + genl_len += nla_total_size(openflow_len); /* DP_GENL_A_OPENFLOW */
  224 + skb = *pskb = genlmsg_new(genl_len, GFP_ATOMIC);
  225 + if (!skb) {
  226 + return NULL;
  227 + }
  228 +
  229 + oh = put_openflow_headers(dp, skb, type, sender, &max_openflow_len);
  230 + BUG_ON(!oh || IS_ERR(oh));
  231 + resize_openflow_skb(skb, oh, openflow_len);
  232 +
  233 + return oh;
  234 +}
  235 +
  236 +/* Returns the ID of the multicast group used by datapath 'dp'. */
  237 +static u32
  238 +dp_mc_group(const struct datapath *dp)
  239 +{
  240 + return mc_groups[dp->dp_idx & (N_MC_GROUPS - 1)].id;
  241 +}
  242 +
  243 +/* Sends 'skb' to 'sender' if it is nonnull, otherwise multicasts 'skb' to all
  244 + * listeners. */
  245 +static int
  246 +send_openflow_skb(const struct datapath *dp,
  247 + struct sk_buff *skb, const struct sender *sender)
  248 +{
  249 + return (sender
  250 + ? genlmsg_unicast(skb, sender->pid)
  251 + : genlmsg_multicast(skb, 0, dp_mc_group(dp), GFP_ATOMIC));
  252 +}
  253 +
  254 +/* Retrieves the datapath id, which is the MAC address of the "of" device. */
  255 +static
  256 +uint64_t get_datapath_id(struct net_device *dev)
  257 +{
  258 + uint64_t id = 0;
  259 + int i;
  260 +
  261 + for (i=0; i<ETH_ALEN; i++)
  262 + id |= (uint64_t)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
  263 +
  264 + return id;
  265 +}
  266 +
  267 +/* Find the first free datapath index. Return the index or -1 if a free
  268 + * index could not be found. */
  269 +int gen_dp_idx(void)
  270 +{
  271 + int i;
  272 +
  273 + for (i=0; i<DP_MAX; i++) {
  274 + if (!dps[i])
  275 + return i;
  276 + }
  277 +
  278 + return -1;
  279 +}
  280 +
  281 +/* Creates a new datapath numbered 'dp_idx'. If 'dp_idx' is -1, it
  282 + * allocates the lowest numbered index available. If 'dp_name' is not
  283 + * null, it is used as the device name instead of the default one.
  284 + * Returns 0 for success or a negative error code. */
  285 +static int new_dp(int dp_idx, const char *dp_name)
  286 +{
  287 + struct datapath *dp;
  288 + struct new_utsname *u;
  289 + int err;
  290 +
  291 + rtnl_lock();
  292 + mutex_lock(&dp_mutex);
  293 + if (dp_idx == -1)
  294 + dp_idx = gen_dp_idx();
  295 +
  296 + err = -EINVAL;
  297 + if (dp_idx < 0 || dp_idx >= DP_MAX)
  298 + goto err_unlock;
  299 +
  300 + err = -ENODEV;
  301 + if (!try_module_get(THIS_MODULE))
  302 + goto err_unlock;
  303 +
  304 + /* Exit early if a datapath with that number already exists. */
  305 + err = -EEXIST;
  306 + if (dps[dp_idx])
  307 + goto err_put;
  308 +
  309 + err = -ENOMEM;
  310 + dp = kzalloc(sizeof *dp, GFP_KERNEL);
  311 + if (dp == NULL)
  312 + goto err_put;
  313 +
  314 + dp->dp_idx = dp_idx;
  315 + /* copied from sys_gethostname() */
  316 + u = utsname();
  317 + /* shouldn't need to lock b/c no userspace interactions */
  318 + snprintf(dp->dp_desc, sizeof dp->dp_desc, "%s idx=%d", u->nodename, dp_idx);
  319 +
  320 + /* Setup our datapath device */
  321 + err = dp_dev_setup(dp, dp_name);
  322 + if (err)
  323 + goto err_free_dp;
  324 +
  325 + dp->chain = chain_create(dp);
  326 + if (dp->chain == NULL)
  327 + goto err_destroy_dp_dev;
  328 + INIT_LIST_HEAD(&dp->port_list);
  329 +
  330 + dp->local_port = new_nbp(dp, dp->netdev, OFPP_LOCAL);
  331 + if (IS_ERR(dp->local_port)) {
  332 + err = PTR_ERR(dp->local_port);
  333 + goto err_destroy_local_port;
  334 + }
  335 +
  336 + dp->flags = 0;
  337 + dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN;
  338 +
  339 + dp->dp_task = kthread_run(dp_maint_func, dp, "dp%d", dp_idx);
  340 + if (IS_ERR(dp->dp_task))
  341 + goto err_destroy_chain;
  342 +
  343 + dps[dp_idx] = dp;
  344 + mutex_unlock(&dp_mutex);
  345 + rtnl_unlock();
  346 +
  347 + if (dp_add_dp_hook)
  348 + dp_add_dp_hook(dp);
  349 +
  350 + return 0;
  351 +
  352 +err_destroy_local_port:
  353 + dp_del_switch_port(dp->local_port);
  354 +err_destroy_chain:
  355 + chain_destroy(dp->chain);
  356 +err_destroy_dp_dev:
  357 + dp_dev_destroy(dp);
  358 +err_free_dp:
  359 + kfree(dp);
  360 +err_put:
  361 + module_put(THIS_MODULE);
  362 +err_unlock:
  363 + mutex_unlock(&dp_mutex);
  364 + rtnl_unlock();
  365 + return err;
  366 +}
  367 +
  368 +/* Find and return a free port number under 'dp'. */
  369 +static int find_portno(struct datapath *dp)
  370 +{
  371 + int i;
  372 + for (i = 1; i < DP_MAX_PORTS; i++)
  373 + if (dp->ports[i] == NULL)
  374 + return i;
  375 + return -EXFULL;
  376 +}
  377 +
  378 +/* Called with RTNL lock and dp_mutex. */
  379 +static struct net_bridge_port *new_nbp(struct datapath *dp,
  380 + struct net_device *dev, int port_no)
  381 +{
  382 + struct net_bridge_port *p;
  383 +
  384 + if (dev->br_port != NULL)
  385 + return ERR_PTR(-EBUSY);
  386 +
  387 + p = kzalloc(sizeof(*p), GFP_KERNEL);
  388 + if (p == NULL)
  389 + return ERR_PTR(-ENOMEM);
  390 +
  391 + dev_set_promiscuity(dev, 1);
  392 + dev_hold(dev);
  393 + p->dp = dp;
  394 + p->dev = dev;
  395 + p->port_no = port_no;
  396 + spin_lock_init(&p->lock);
  397 + if (port_no != OFPP_LOCAL)
  398 + rcu_assign_pointer(dev->br_port, p);
  399 + if (port_no < DP_MAX_PORTS)
  400 + rcu_assign_pointer(dp->ports[port_no], p);
  401 + list_add_rcu(&p->node, &dp->port_list);
  402 +
  403 + return p;
  404 +}
  405 +
  406 +/* Called with RTNL lock and dp_mutex. */
  407 +int add_switch_port(struct datapath *dp, struct net_device *dev)
  408 +{
  409 + struct net_bridge_port *p;
  410 + int port_no;
  411 +
  412 + if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER
  413 + || is_dp_dev(dev))
  414 + return -EINVAL;
  415 +
  416 + port_no = find_portno(dp);
  417 + if (port_no < 0)
  418 + return port_no;
  419 +
  420 + p = new_nbp(dp, dev, port_no);
  421 + if (IS_ERR(p))
  422 + return PTR_ERR(p);
  423 +
  424 + init_port_status(p);
  425 +
  426 + if (dp_add_if_hook)
  427 + dp_add_if_hook(p);
  428 +
  429 + /* Notify the ctlpath that this port has been added */
  430 + dp_send_port_status(p, OFPPR_ADD);
  431 +
  432 + return 0;
  433 +}
  434 +
  435 +/* Delete 'p' from switch.
  436 + * Called with RTNL lock and dp_mutex. */
  437 +int dp_del_switch_port(struct net_bridge_port *p)
  438 +{
  439 +
  440 +#if CONFIG_SYSFS
  441 + if ((p->port_no != OFPP_LOCAL) && dp_del_if_hook)
  442 + sysfs_remove_link(&p->dp->ifobj, p->dev->name);
  443 +#endif
  444 +
  445 + /* First drop references to device. */
  446 + dev_set_promiscuity(p->dev, -1);
  447 + list_del_rcu(&p->node);
  448 + if (p->port_no != OFPP_LOCAL)
  449 + rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
  450 + rcu_assign_pointer(p->dev->br_port, NULL);
  451 +
  452 + /* Then wait until no one is still using it, and destroy it. */
  453 + synchronize_rcu();
  454 +
  455 + /* Notify the ctlpath that this port no longer exists */
  456 + dp_send_port_status(p, OFPPR_DELETE);
  457 +
  458 + if ((p->port_no != OFPP_LOCAL) && dp_del_if_hook) {
  459 + dp_del_if_hook(p);
  460 + } else {
  461 + dev_put(p->dev);
  462 + kfree(p);
  463 + }
  464 +
  465 + return 0;
  466 +}
  467 +
  468 +static void del_dp(struct datapath *dp)
  469 +{
  470 + struct net_bridge_port *p, *n;
  471 +
  472 + send_sig(SIGKILL, dp->dp_task, 0);
  473 + kthread_stop(dp->dp_task);
  474 +
  475 + /* Drop references to DP. */
  476 + list_for_each_entry_safe (p, n, &dp->port_list, node)
  477 + dp_del_switch_port(p);
  478 +
  479 + if (dp_del_dp_hook)
  480 + dp_del_dp_hook(dp);
  481 +
  482 + rcu_assign_pointer(dps[dp->dp_idx], NULL);
  483 +
  484 + /* Kill off local_port dev references from buffered packets that have
  485 + * associated dst entries. */
  486 + synchronize_rcu();
  487 + fwd_discard_all();
  488 +
  489 + /* Destroy dp->netdev. (Must follow deleting switch ports since
  490 + * dp->local_port has a reference to it.) */
  491 + dp_dev_destroy(dp);
  492 +
  493 + /* Wait until no longer in use, then destroy it. */
  494 + synchronize_rcu();
  495 + chain_destroy(dp->chain);
  496 + kfree(dp);
  497 + module_put(THIS_MODULE);
  498 +}
  499 +
  500 +static int dp_maint_func(void *data)
  501 +{
  502 + struct datapath *dp = (struct datapath *) data;
  503 +
  504 + allow_signal(SIGKILL);
  505 + while (!signal_pending(current)) {
  506 + /* Timeout old entries */
  507 + chain_timeout(dp->chain);
  508 + msleep_interruptible(MAINT_SLEEP_MSECS);
  509 + }
  510 + while (!kthread_should_stop()) {
  511 + set_current_state(TASK_UNINTERRUPTIBLE);
  512 + schedule();
  513 + }
  514 + return 0;
  515 +}
  516 +
  517 +static void
  518 +do_port_input(struct net_bridge_port *p, struct sk_buff *skb)
  519 +{
  520 + /* Make our own copy of the packet. Otherwise we will mangle the
  521 + * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
  522 + * (No one comes after us, since we tell handle_bridge() that we took
  523 + * the packet.) */
  524 + skb = skb_share_check(skb, GFP_ATOMIC);
  525 + if (!skb)
  526 + return;
  527 +
  528 + /* Push the Ethernet header back on. */
  529 + skb_push(skb, ETH_HLEN);
  530 + skb_reset_mac_header(skb);
  531 + fwd_port_input(p->dp->chain, skb, p);
  532 +}
  533 +
  534 +/*
  535 + * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on
  536 + * different set of devices!)
  537 + */
  538 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
  539 +/* Called with rcu_read_lock. */
  540 +static struct sk_buff *dp_frame_hook(struct net_bridge_port *p,
  541 + struct sk_buff *skb)
  542 +{
  543 + do_port_input(p, skb);
  544 + return NULL;
  545 +}
  546 +#else
  547 +static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
  548 +{
  549 + do_port_input(p, *pskb);
  550 + return 1;
  551 +}
  552 +#endif
  553 +
  554 +/* Forwarding output path.
  555 + * Based on net/bridge/br_forward.c. */
  556 +
  557 +static inline unsigned packet_length(const struct sk_buff *skb)
  558 +{
  559 + unsigned length = skb->len - ETH_HLEN;
  560 + if (skb->protocol == htons(ETH_P_8021Q))
  561 + length -= VLAN_HLEN;
  562 + return length;
  563 +}
  564 +
  565 +/* Send packets out all the ports except the originating one. If the
  566 + * "flood" argument is set, only send along the minimum spanning tree.
  567 + */
  568 +static int
  569 +output_all(struct datapath *dp, struct sk_buff *skb, int flood)
  570 +{
  571 + u32 disable = flood ? OFPPC_NO_FLOOD : 0;
  572 + struct net_bridge_port *p;
  573 + int prev_port = -1;
  574 +
  575 + list_for_each_entry_rcu (p, &dp->port_list, node) {
  576 + if (skb->dev == p->dev || p->config & disable)
  577 + continue;
  578 + if (prev_port != -1) {
  579 + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
  580 + if (!clone) {
  581 + kfree_skb(skb);
  582 + return -ENOMEM;
  583 + }
  584 + dp_output_port(dp, clone, prev_port, 0);
  585 + }
  586 + prev_port = p->port_no;
  587 + }
  588 + if (prev_port != -1)
  589 + dp_output_port(dp, skb, prev_port, 0);
  590 + else
  591 + kfree_skb(skb);
  592 +
  593 + return 0;
  594 +}
  595 +
  596 +/* Marks 'skb' as having originated from 'in_port' in 'dp'.
  597 + FIXME: how are devices reference counted? */
  598 +void dp_set_origin(struct datapath *dp, uint16_t in_port,
  599 + struct sk_buff *skb)
  600 +{
  601 + struct net_bridge_port *p;
  602 + p = (in_port < DP_MAX_PORTS ? dp->ports[in_port]
  603 + : in_port == OFPP_LOCAL ? dp->local_port
  604 + : NULL);
  605 + if (p)
  606 + skb->dev = p->dev;
  607 + else
  608 + skb->dev = NULL;
  609 +}
  610 +
  611 +int
  612 +dp_xmit_skb(struct sk_buff *skb)
  613 +{
  614 + struct datapath *dp = skb->dev->br_port->dp;
  615 + int len = skb->len;
  616 +
  617 + if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) {
  618 + printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n",
  619 + dp->netdev->name, packet_length(skb), skb->dev->mtu);
  620 + kfree_skb(skb);
  621 + return -E2BIG;
  622 + }
  623 +
  624 + dev_queue_xmit(skb);
  625 +
  626 + return len;
  627 +}
  628 +
  629 +/* Takes ownership of 'skb' and transmits it to 'out_port' on 'dp'.
  630 + */
  631 +int dp_output_port(struct datapath *dp, struct sk_buff *skb, int out_port,
  632 + int ignore_no_fwd)
  633 +{
  634 + BUG_ON(!skb);
  635 + switch (out_port){
  636 + case OFPP_IN_PORT:
  637 + /* Send it out the port it came in on, which is already set in
  638 + * the skb. */
  639 + if (!skb->dev) {
  640 + if (net_ratelimit())
  641 + printk(KERN_NOTICE "%s: skb device not set "
  642 + "forwarding to in_port\n",
  643 + dp->netdev->name);
  644 + kfree_skb(skb);
  645 + return -ESRCH;
  646 + }
  647 + return dp_xmit_skb(skb);
  648 +
  649 + case OFPP_TABLE: {
  650 + int retval = run_flow_through_tables(dp->chain, skb,
  651 + skb->dev->br_port);
  652 + if (retval)
  653 + kfree_skb(skb);
  654 + return retval;
  655 + }
  656 +
  657 + case OFPP_FLOOD:
  658 + return output_all(dp, skb, 1);
  659 +
  660 + case OFPP_ALL:
  661 + return output_all(dp, skb, 0);
  662 +
  663 + case OFPP_CONTROLLER:
  664 + return dp_output_control(dp, skb, UINT16_MAX, OFPR_ACTION);
  665 +
  666 + case OFPP_LOCAL: {
  667 + struct net_device *dev = dp->netdev;
  668 + return dev ? dp_dev_recv(dev, skb) : -ESRCH;
  669 + }
  670 +
  671 + case 0 ... DP_MAX_PORTS - 1: {
  672 + struct net_bridge_port *p = dp->ports[out_port];
  673 + if (p == NULL)
  674 + goto bad_port;
  675 + if (p->dev == skb->dev) {
  676 + /* To send to the input port, must use OFPP_IN_PORT */
  677 + kfree_skb(skb);
  678 + if (net_ratelimit())
  679 + printk(KERN_NOTICE "%s: can't directly "
  680 + "forward to input port\n",
  681 + dp->netdev->name);
  682 + return -EINVAL;
  683 + }
  684 + if (p->config & OFPPC_NO_FWD && !ignore_no_fwd) {
  685 + kfree_skb(skb);
  686 + return 0;
  687 + }
  688 + skb->dev = p->dev;
  689 + return dp_xmit_skb(skb);
  690 + }
  691 +
  692 + default:
  693 + goto bad_port;
  694 + }
  695 +
  696 +bad_port:
  697 + kfree_skb(skb);
  698 + if (net_ratelimit())
  699 + printk(KERN_NOTICE "%s: can't forward to bad port %d\n",
  700 + dp->netdev->name, out_port);
  701 + return -ENOENT;
  702 +}
  703 +
  704 +/* Takes ownership of 'skb' and transmits it to 'dp''s control path. 'reason'
  705 + * indicates why 'skb' is being sent. 'max_len' sets the maximum number of
  706 + * bytes that the caller wants to be sent.
  707 + */
  708 +int
  709 +dp_output_control(struct datapath *dp, struct sk_buff *skb,
  710 + size_t max_len, int reason)
  711 +{
  712 + /* FIXME? Can we avoid creating a new skbuff in the case where we
  713 + * forward the whole packet? */
  714 + struct sk_buff *f_skb;
  715 + struct ofp_packet_in *opi;
  716 + size_t fwd_len, opi_len;
  717 + uint32_t buffer_id;
  718 + int err;
  719 +
  720 + WARN_ON_ONCE(skb_shared(skb));
  721 +
  722 + buffer_id = fwd_save_skb(skb);
  723 +
  724 + fwd_len = skb->len;
  725 + if (buffer_id != (uint32_t) -1)
  726 + fwd_len = min(fwd_len, max_len);
  727 +
  728 + opi_len = offsetof(struct ofp_packet_in, data) + fwd_len;
  729 + opi = alloc_openflow_skb(dp, opi_len, OFPT_PACKET_IN, NULL, &f_skb);
  730 + if (!opi) {
  731 + err = -ENOMEM;
  732 + goto out;
  733 + }
  734 + opi->buffer_id = htonl(buffer_id);
  735 + opi->total_len = htons(skb->len);
  736 + opi->in_port = htons(skb->dev && skb->dev->br_port
  737 + ? skb->dev->br_port->port_no
  738 + : OFPP_LOCAL);
  739 + opi->reason = reason;
  740 + opi->pad = 0;
  741 + skb_copy_bits(skb, 0, opi->data, fwd_len);
  742 + err = send_openflow_skb(dp, f_skb, NULL);
  743 +
  744 +out:
  745 + kfree_skb(skb);
  746 + return err;
  747 +}
  748 +
  749 +static void fill_port_desc(struct net_bridge_port *p, struct ofp_phy_port *desc)
  750 +{
  751 + unsigned long flags;
  752 + desc->port_no = htons(p->port_no);
  753 + strncpy(desc->name, p->dev->name, OFP_MAX_PORT_NAME_LEN);
  754 + desc->name[OFP_MAX_PORT_NAME_LEN-1] = '\0';
  755 + memcpy(desc->hw_addr, p->dev->dev_addr, ETH_ALEN);
  756 + desc->curr = 0;
  757 + desc->supported = 0;
  758 + desc->advertised = 0;
  759 + desc->peer = 0;
  760 +
  761 + spin_lock_irqsave(&p->lock, flags);
  762 + desc->config = htonl(p->config);
  763 + desc->state = htonl(p->state);
  764 + spin_unlock_irqrestore(&p->lock, flags);
  765 +
  766 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,24)
  767 + if (p->dev->ethtool_ops && p->dev->ethtool_ops->get_settings) {
  768 + struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
  769 +
  770 + if (!p->dev->ethtool_ops->get_settings(p->dev, &ecmd)) {
  771 + /* Set the supported features */
  772 + if (ecmd.supported & SUPPORTED_10baseT_Half)
  773 + desc->supported |= OFPPF_10MB_HD;
  774 + if (ecmd.supported & SUPPORTED_10baseT_Full)
  775 + desc->supported |= OFPPF_10MB_FD;
  776 + if (ecmd.supported & SUPPORTED_100baseT_Half)
  777 + desc->supported |= OFPPF_100MB_HD;
  778 + if (ecmd.supported & SUPPORTED_100baseT_Full)
  779 + desc->supported |= OFPPF_100MB_FD;
  780 + if (ecmd.supported & SUPPORTED_1000baseT_Half)
  781 + desc->supported |= OFPPF_1GB_HD;
  782 + if (ecmd.supported & SUPPORTED_1000baseT_Full)
  783 + desc->supported |= OFPPF_1GB_FD;
  784 + if (ecmd.supported & SUPPORTED_10000baseT_Full)
  785 + desc->supported |= OFPPF_10GB_FD;
  786 + if (ecmd.supported & SUPPORTED_TP)
  787 + desc->supported |= OFPPF_COPPER;
  788 + if (ecmd.supported & SUPPORTED_FIBRE)
  789 + desc->supported |= OFPPF_FIBER;
  790 + if (ecmd.supported & SUPPORTED_Autoneg)
  791 + desc->supported |= OFPPF_AUTONEG;
  792 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
  793 + if (ecmd.supported & SUPPORTED_Pause)
  794 + desc->supported |= OFPPF_PAUSE;
  795 + if (ecmd.supported & SUPPORTED_Asym_Pause)
  796 + desc->supported |= OFPPF_PAUSE_ASYM;
  797 +#endif /* kernel >= 2.6.14 */
  798 +
  799 + /* Set the advertised features */
  800 + if (ecmd.advertising & ADVERTISED_10baseT_Half)
  801 + desc->advertised |= OFPPF_10MB_HD;
  802 + if (ecmd.advertising & ADVERTISED_10baseT_Full)
  803 + desc->advertised |= OFPPF_10MB_FD;
  804 + if (ecmd.advertising & ADVERTISED_100baseT_Half)
  805 + desc->advertised |= OFPPF_100MB_HD;
  806 + if (ecmd.advertising & ADVERTISED_100baseT_Full)
  807 + desc->advertised |= OFPPF_100MB_FD;
  808 + if (ecmd.advertising & ADVERTISED_1000baseT_Half)
  809 + desc->advertised |= OFPPF_1GB_HD;
  810 + if (ecmd.advertising & ADVERTISED_1000baseT_Full)
  811 + desc->advertised |= OFPPF_1GB_FD;
  812 + if (ecmd.advertising & ADVERTISED_10000baseT_Full)
  813 + desc->advertised |= OFPPF_10GB_FD;
  814 + if (ecmd.advertising & ADVERTISED_TP)
  815 + desc->advertised |= OFPPF_COPPER;
  816 + if (ecmd.advertising & ADVERTISED_FIBRE)
  817 + desc->advertised |= OFPPF_FIBER;
  818 + if (ecmd.advertising & ADVERTISED_Autoneg)
  819 + desc->advertised |= OFPPF_AUTONEG;
  820 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
  821 + if (ecmd.advertising & ADVERTISED_Pause)
  822 + desc->advertised |= OFPPF_PAUSE;
  823 + if (ecmd.advertising & ADVERTISED_Asym_Pause)
  824 + desc->advertised |= OFPPF_PAUSE_ASYM;
  825 +#endif /* kernel >= 2.6.14 */
  826 +
  827 + /* Set the current features */
  828 + if (ecmd.speed == SPEED_10)
  829 + desc->curr = (ecmd.duplex) ? OFPPF_10MB_FD : OFPPF_10MB_HD;
  830 + else if (ecmd.speed == SPEED_100)
  831 + desc->curr = (ecmd.duplex) ? OFPPF_100MB_FD : OFPPF_100MB_HD;
  832 + else if (ecmd.speed == SPEED_1000)
  833 + desc->curr = (ecmd.duplex) ? OFPPF_1GB_FD : OFPPF_1GB_HD;
  834 + else if (ecmd.speed == SPEED_10000)
  835 + desc->curr = OFPPF_10GB_FD;
  836 +
  837 + if (ecmd.port == PORT_TP)
  838 + desc->curr |= OFPPF_COPPER;
  839 + else if (ecmd.port == PORT_FIBRE)
  840 + desc->curr |= OFPPF_FIBER;
  841 +
  842 + if (ecmd.autoneg)
  843 + desc->curr |= OFPPF_AUTONEG;
  844 + }
  845 + }
  846 +#endif
  847 + desc->curr = htonl(desc->curr);
  848 + desc->supported = htonl(desc->supported);
  849 + desc->advertised = htonl(desc->advertised);
  850 + desc->peer = htonl(desc->peer);
  851 +}
  852 +
  853 +static int
  854 +fill_features_reply(struct datapath *dp, struct ofp_switch_features *ofr)
  855 +{
  856 + struct net_bridge_port *p;
  857 + uint64_t dpid = get_datapath_id(dp->netdev);
  858 + int port_count = 0;
  859 +
  860 + ofr->datapath_id = cpu_to_be64(dpid);
  861 +
  862 + ofr->n_buffers = htonl(N_PKT_BUFFERS);
  863 + ofr->n_tables = dp->chain->n_tables;
  864 + ofr->capabilities = htonl(OFP_SUPPORTED_CAPABILITIES);
  865 + ofr->actions = htonl(OFP_SUPPORTED_ACTIONS);
  866 + memset(ofr->pad, 0, sizeof ofr->pad);
  867 +
  868 + list_for_each_entry_rcu (p, &dp->port_list, node) {
  869 + fill_port_desc(p, &ofr->ports[port_count]);
  870 + port_count++;
  871 + }
  872 +
  873 + return port_count;
  874 +}
  875 +
  876 +int
  877 +dp_send_features_reply(struct datapath *dp, const struct sender *sender)
  878 +{
  879 + struct sk_buff *skb;
  880 + struct ofp_switch_features *ofr;
  881 + size_t ofr_len, port_max_len;
  882 + int port_count;
  883 +
  884 + /* Overallocate. */
  885 + port_max_len = sizeof(struct ofp_phy_port) * DP_MAX_PORTS;
  886 + ofr = alloc_openflow_skb(dp, sizeof(*ofr) + port_max_len,
  887 + OFPT_FEATURES_REPLY, sender, &skb);
  888 + if (!ofr)
  889 + return -ENOMEM;
  890 +
  891 + /* Fill. */
  892 + port_count = fill_features_reply(dp, ofr);
  893 +
  894 + /* Shrink to fit. */
  895 + ofr_len = sizeof(*ofr) + (sizeof(struct ofp_phy_port) * port_count);
  896 + resize_openflow_skb(skb, &ofr->header, ofr_len);
  897 + return send_openflow_skb(dp, skb, sender);
  898 +}
  899 +
  900 +int
  901 +dp_send_config_reply(struct datapath *dp, const struct sender *sender)
  902 +{
  903 + struct sk_buff *skb;
  904 + struct ofp_switch_config *osc;
  905 +
  906 + osc = alloc_openflow_skb(dp, sizeof *osc, OFPT_GET_CONFIG_REPLY, sender,
  907 + &skb);
  908 + if (!osc)
  909 + return -ENOMEM;
  910 +
  911 + osc->flags = htons(dp->flags);
  912 + osc->miss_send_len = htons(dp->miss_send_len);
  913 +
  914 + return send_openflow_skb(dp, skb, sender);
  915 +}
  916 +
  917 +int
  918 +dp_send_hello(struct datapath *dp, const struct sender *sender,
  919 + const struct ofp_header *request)
  920 +{
  921 + if (request->version < OFP_VERSION) {
  922 + char err[64];
  923 + sprintf(err, "Only version 0x%02x supported", OFP_VERSION);
  924 + dp_send_error_msg(dp, sender, OFPET_HELLO_FAILED,
  925 + OFPHFC_INCOMPATIBLE, err, strlen(err));
  926 + return -EINVAL;
  927 + } else {
  928 + struct sk_buff *skb;
  929 + struct ofp_header *reply;
  930 +
  931 + reply = alloc_openflow_skb(dp, sizeof *reply,
  932 + OFPT_HELLO, sender, &skb);
  933 + if (!reply)
  934 + return -ENOMEM;
  935 +
  936 + return send_openflow_skb(dp, skb, sender);
  937 + }
  938 +}
  939 +
  940 +int
  941 +dp_send_barrier_reply(struct datapath *dp, const struct sender *sender,
  942 + const struct ofp_header *request)
  943 +{
  944 + struct sk_buff *skb;
  945 + struct ofp_header *reply;
  946 +
  947 + reply = alloc_openflow_skb(dp, sizeof *reply,
  948 + OFPT_BARRIER_REPLY, sender, &skb);
  949 + if (!reply)
  950 + return -ENOMEM;
  951 +
  952 + return send_openflow_skb(dp, skb, sender);
  953 +}
  954 +
  955 +int
  956 +dp_update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm)
  957 +{
  958 + unsigned long int flags;
  959 + int port_no = ntohs(opm->port_no);
  960 + struct net_bridge_port *p;
  961 + p = (port_no < DP_MAX_PORTS ? dp->ports[port_no]
  962 + : port_no == OFPP_LOCAL ? dp->local_port
  963 + : NULL);
  964 +
  965 + /* Make sure the port id hasn't changed since this was sent */
  966 + if (!p || memcmp(opm->hw_addr, p->dev->dev_addr, ETH_ALEN))
  967 + return -1;
  968 +
  969 + spin_lock_irqsave(&p->lock, flags);
  970 + if (opm->mask) {
  971 + uint32_t config_mask = ntohl(opm->mask);
  972 + p->config &= ~config_mask;
  973 + p->config |= ntohl(opm->config) & config_mask;
  974 + }
  975 + spin_unlock_irqrestore(&p->lock, flags);
  976 +
  977 + return 0;
  978 +}
  979 +
  980 +/* Initialize the port status field of the bridge port. */
  981 +static void
  982 +init_port_status(struct net_bridge_port *p)
  983 +{
  984 + unsigned long int flags;
  985 +
  986 + spin_lock_irqsave(&p->lock, flags);
  987 +
  988 + if (p->dev->flags & IFF_UP)
  989 + p->config &= ~OFPPC_PORT_DOWN;
  990 + else
  991 + p->config |= OFPPC_PORT_DOWN;
  992 +
  993 + if (netif_carrier_ok(p->dev))
  994 + p->state &= ~OFPPS_LINK_DOWN;
  995 + else
  996 + p->state |= OFPPS_LINK_DOWN;
  997 +
  998 + spin_unlock_irqrestore(&p->lock, flags);
  999 +}
  1000 +
  1001 +int
  1002 +dp_send_port_status(struct net_bridge_port *p, uint8_t status)
  1003 +{
  1004 + struct sk_buff *skb;
  1005 + struct ofp_port_status *ops;
  1006 +
  1007 + ops = alloc_openflow_skb(p->dp, sizeof *ops, OFPT_PORT_STATUS, NULL,
  1008 + &skb);
  1009 + if (!ops)
  1010 + return -ENOMEM;
  1011 + ops->reason = status;
  1012 + memset(ops->pad, 0, sizeof ops->pad);
  1013 + fill_port_desc(p, &ops->desc);
  1014 +
  1015 + return send_openflow_skb(p->dp, skb, NULL);
  1016 +}
  1017 +
  1018 +/* Convert jiffies_64 to seconds. */
  1019 +static u32 inline jiffies_64_to_secs(u64 j)
  1020 +{
  1021 + /* Call to do_div is necessary as we can't do a 64-bit division in a
  1022 + * 32-bit kernel (at least not without linking to libgcc) */
  1023 + do_div(j, HZ);
  1024 + return j;
  1025 +}
  1026 +
  1027 +/* Convert jiffies_64 to just the nanosec part between seconds. */
  1028 +static u32 inline jiffies_64_to_nsecs(u64 j)
  1029 +{
  1030 + /* Call to do_div is necessary as we can't do a 64-bit division in a
  1031 + * 32-bit kernel (at least not without linking to libgcc) */
  1032 + return (j - jiffies_64_to_secs(j));
  1033 +}
  1034 +
  1035 +int
  1036 +dp_send_flow_end(struct datapath *dp, struct sw_flow *flow,
  1037 + enum ofp_flow_removed_reason reason)
  1038 +{
  1039 + struct sk_buff *skb;
  1040 + struct ofp_flow_removed *ofr;
  1041 +
  1042 + if (!flow->send_flow_rem)
  1043 + return 0;
  1044 +
  1045 + if (flow->emerg_flow)
  1046 + return 0;
  1047 +
  1048 + ofr = alloc_openflow_skb(dp, sizeof *ofr, OFPT_FLOW_REMOVED, 0, &skb);
  1049 + if (!ofr)
  1050 + return -ENOMEM;
  1051 +
  1052 + flow_fill_match(&ofr->match, &flow->key);
  1053 +
  1054 + ofr->priority = htons(flow->priority);
  1055 + ofr->reason = reason;
  1056 +
  1057 + ofr->duration_sec = htonl(jiffies_64_to_secs(get_jiffies_64()-flow->created));
  1058 + ofr->duration_nsec = htonl(jiffies_64_to_nsecs(get_jiffies_64()-flow->created));
  1059 + ofr->idle_timeout = htons(flow->idle_timeout);
  1060 +
  1061 + ofr->packet_count = cpu_to_be64(flow->packet_count);
  1062 + ofr->byte_count = cpu_to_be64(flow->byte_count);
  1063 +
  1064 + return send_openflow_skb(dp, skb, NULL);
  1065 +}
  1066 +EXPORT_SYMBOL(dp_send_flow_end);
  1067 +
  1068 +int
  1069 +dp_send_error_msg(struct datapath *dp, const struct sender *sender,
  1070 + uint16_t type, uint16_t code, const void *data, size_t len)
  1071 +{
  1072 + struct sk_buff *skb;
  1073 + struct ofp_error_msg *oem;
  1074 +
  1075 +
  1076 + oem = alloc_openflow_skb(dp, sizeof(*oem)+len, OFPT_ERROR,
  1077 + sender, &skb);
  1078 + if (!oem)
  1079 + return -ENOMEM;
  1080 +
  1081 + oem->type = htons(type);
  1082 + oem->code = htons(code);
  1083 + memcpy(oem->data, data, len);
  1084 +
  1085 + return send_openflow_skb(dp, skb, sender);
  1086 +}
  1087 +
  1088 +int
  1089 +dp_send_echo_reply(struct datapath *dp, const struct sender *sender,
  1090 + const struct ofp_header *rq)
  1091 +{
  1092 + struct sk_buff *skb;
  1093 + struct ofp_header *reply;
  1094 +
  1095 + reply = alloc_openflow_skb(dp, ntohs(rq->length), OFPT_ECHO_REPLY,
  1096 + sender, &skb);
  1097 + if (!reply)
  1098 + return -ENOMEM;
  1099 +
  1100 + memcpy(reply + 1, rq + 1, ntohs(rq->length) - sizeof *rq);
  1101 + return send_openflow_skb(dp, skb, sender);
  1102 +}
  1103 +
  1104 +/* Generic Netlink interface.
  1105 + *
  1106 + * See netlink(7) for an introduction to netlink. See
  1107 + * http://linux-net.osdl.org/index.php/Netlink for more information and
  1108 + * pointers on how to work with netlink and Generic Netlink in the kernel and
  1109 + * in userspace. */
  1110 +
  1111 +static struct genl_family dp_genl_family = {
  1112 + .id = GENL_ID_GENERATE,
  1113 + .hdrsize = 0,
  1114 + .name = DP_GENL_FAMILY_NAME,
  1115 + .version = 1,
  1116 + .maxattr = DP_GENL_A_MAX,
  1117 +};
  1118 +
  1119 +/* Attribute policy: what each attribute may contain. */
  1120 +static struct nla_policy dp_genl_policy[DP_GENL_A_MAX + 1] = {
  1121 + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
  1122 + [DP_GENL_A_DP_NAME] = { .type = NLA_NUL_STRING },
  1123 + [DP_GENL_A_MC_GROUP] = { .type = NLA_U32 },
  1124 + [DP_GENL_A_PORTNAME] = { .type = NLA_NUL_STRING }
  1125 +};
  1126 +
  1127 +static int dp_genl_add(struct sk_buff *skb, struct genl_info *info)
  1128 +{
  1129 + int dp_idx = info->attrs[DP_GENL_A_DP_IDX] ?
  1130 + nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]) : -1;
  1131 + const char *dp_name = info->attrs[DP_GENL_A_DP_NAME] ?
  1132 + nla_data(info->attrs[DP_GENL_A_DP_NAME]) : NULL;
  1133 +
  1134 + if (VERIFY_NUL_STRING(info->attrs[DP_GENL_A_DP_NAME]))
  1135 + return -EINVAL;
  1136 +
  1137 + if ((dp_idx == -1) && (!dp_name))
  1138 + return -EINVAL;
  1139 +
  1140 + return new_dp(dp_idx, dp_name);
  1141 +}
  1142 +
  1143 +static struct genl_ops dp_genl_ops_add_dp = {
  1144 + .cmd = DP_GENL_C_ADD_DP,
  1145 + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1146 + .policy = dp_genl_policy,
  1147 + .doit = dp_genl_add,
  1148 + .dumpit = NULL,
  1149 +};
  1150 +
  1151 +/* Must be called with rcu_read_lock or dp_mutex. */
  1152 +struct datapath *dp_get_by_idx(int dp_idx)
  1153 +{
  1154 + if (dp_idx < 0 || dp_idx >= DP_MAX)
  1155 + return NULL;
  1156 + return rcu_dereference(dps[dp_idx]);
  1157 +}
  1158 +EXPORT_SYMBOL(dp_get_by_idx);
  1159 +
  1160 +/* Must be called with rcu_read_lock or dp_mutex. */
  1161 +struct datapath *dp_get_by_name(const char *dp_name)
  1162 +{
  1163 + int i;
  1164 + for (i=0; i<DP_MAX; i++) {
  1165 + struct datapath *dp = rcu_dereference(dps[i]);
  1166 + if (dp && !strcmp(dp->netdev->name, dp_name))
  1167 + return dp;
  1168 + }
  1169 + return NULL;
  1170 +}
  1171 +
  1172 +/* Must be called with rcu_read_lock or dp_mutex. */
  1173 +static struct datapath *
  1174 +lookup_dp(struct genl_info *info)
  1175 +{
  1176 + int dp_idx = info->attrs[DP_GENL_A_DP_IDX] ?
  1177 + nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]) : -1;
  1178 + const char *dp_name = info->attrs[DP_GENL_A_DP_NAME] ?
  1179 + nla_data(info->attrs[DP_GENL_A_DP_NAME]) : NULL;
  1180 +
  1181 + if (VERIFY_NUL_STRING(info->attrs[DP_GENL_A_DP_NAME]))
  1182 + return ERR_PTR(-EINVAL);
  1183 +
  1184 + if (dp_idx != -1) {
  1185 + struct datapath *dp = dp_get_by_idx(dp_idx);
  1186 + if (!dp)
  1187 + return ERR_PTR(-ENOENT);
  1188 + else if (dp_name && strcmp(dp->netdev->name, dp_name))
  1189 + return ERR_PTR(-EINVAL);
  1190 + else
  1191 + return dp;
  1192 + } else if (dp_name) {
  1193 + struct datapath *dp = dp_get_by_name(dp_name);
  1194 + return dp ? dp : ERR_PTR(-ENOENT);
  1195 + } else {
  1196 + return ERR_PTR(-EINVAL);
  1197 + }
  1198 +}
  1199 +
  1200 +static int dp_genl_del(struct sk_buff *skb, struct genl_info *info)
  1201 +{
  1202 + struct net_device *dev = NULL;
  1203 + struct datapath *dp;
  1204 + int err;
  1205 +
  1206 + rtnl_lock();
  1207 + mutex_lock(&dp_mutex);
  1208 + dp = lookup_dp(info);
  1209 + if (IS_ERR(dp))
  1210 + err = PTR_ERR(dp);
  1211 + else {
  1212 + dev = dp->netdev;
  1213 + del_dp(dp);
  1214 + err = 0;
  1215 + }
  1216 + mutex_unlock(&dp_mutex);
  1217 + rtnl_unlock();
  1218 + if (dev)
  1219 + free_netdev(dev);
  1220 + return err;
  1221 +}
  1222 +
  1223 +static struct genl_ops dp_genl_ops_del_dp = {
  1224 + .cmd = DP_GENL_C_DEL_DP,
  1225 + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1226 + .policy = dp_genl_policy,
  1227 + .doit = dp_genl_del,
  1228 + .dumpit = NULL,
  1229 +};
  1230 +
  1231 +/* Queries a datapath for related information. Currently the only relevant
  1232 + * information is the datapath's multicast group ID, datapath ID, and
  1233 + * datapath device name. */
  1234 +static int dp_genl_query(struct sk_buff *skb, struct genl_info *info)
  1235 +{
  1236 + struct datapath *dp;
  1237 + struct sk_buff *ans_skb = NULL;
  1238 + int err;
  1239 +
  1240 + rcu_read_lock();
  1241 + dp = lookup_dp(info);
  1242 + if (IS_ERR(dp))
  1243 + err = PTR_ERR(dp);
  1244 + else {
  1245 + void *data;
  1246 + ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  1247 + if (!ans_skb) {
  1248 + err = -ENOMEM;
  1249 + goto err;
  1250 + }
  1251 + err = -ENOMEM;
  1252 + data = genlmsg_put_reply(ans_skb, info, &dp_genl_family,
  1253 + 0, DP_GENL_C_QUERY_DP);
  1254 + if (data == NULL)
  1255 + goto err;
  1256 + NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp->dp_idx);
  1257 + NLA_PUT_STRING(ans_skb, DP_GENL_A_DP_NAME, dp->netdev->name);
  1258 + NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, dp_mc_group(dp));
  1259 +
  1260 + genlmsg_end(ans_skb, data);
  1261 + err = genlmsg_reply(ans_skb, info);
  1262 + ans_skb = NULL;
  1263 + }
  1264 +err:
  1265 +nla_put_failure:
  1266 + kfree_skb(ans_skb);
  1267 + rcu_read_unlock();
  1268 + return err;
  1269 +}
  1270 +
  1271 +static struct genl_ops dp_genl_ops_query_dp = {
  1272 + .cmd = DP_GENL_C_QUERY_DP,
  1273 + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1274 + .policy = dp_genl_policy,
  1275 + .doit = dp_genl_query,
  1276 + .dumpit = NULL,
  1277 +};
  1278 +
  1279 +static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info)
  1280 +{
  1281 + struct datapath *dp;
  1282 + struct net_device *port;
  1283 + int err;
  1284 +
  1285 + if (!info->attrs[DP_GENL_A_PORTNAME] ||
  1286 + VERIFY_NUL_STRING(info->attrs[DP_GENL_A_PORTNAME]))
  1287 + return -EINVAL;
  1288 +
  1289 + rtnl_lock();
  1290 + mutex_lock(&dp_mutex);
  1291 +
  1292 + /* Get datapath. */
  1293 + dp = lookup_dp(info);
  1294 + if (IS_ERR(dp)) {
  1295 + err = PTR_ERR(dp);
  1296 + goto out_unlock;
  1297 + }
  1298 +
  1299 + /* Get interface to add/remove. */
  1300 + port = dev_get_by_name(&init_net,
  1301 + nla_data(info->attrs[DP_GENL_A_PORTNAME]));
  1302 + if (!port) {
  1303 + err = -ENOENT;
  1304 + goto out_unlock;
  1305 + }
  1306 +
  1307 + /* Execute operation. */
  1308 + if (info->genlhdr->cmd == DP_GENL_C_ADD_PORT)
  1309 + err = add_switch_port(dp, port);
  1310 + else {
  1311 + if (port->br_port == NULL || port->br_port->dp != dp) {
  1312 + err = -ENOENT;
  1313 + goto out_put;
  1314 + }
  1315 + err = dp_del_switch_port(port->br_port);
  1316 + }
  1317 +
  1318 +out_put:
  1319 + dev_put(port);
  1320 +out_unlock:
  1321 + mutex_unlock(&dp_mutex);
  1322 + rtnl_unlock();
  1323 + return err;
  1324 +}
  1325 +
  1326 +static struct genl_ops dp_genl_ops_add_port = {
  1327 + .cmd = DP_GENL_C_ADD_PORT,
  1328 + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1329 + .policy = dp_genl_policy,
  1330 + .doit = dp_genl_add_del_port,
  1331 + .dumpit = NULL,
  1332 +};
  1333 +
  1334 +static struct genl_ops dp_genl_ops_del_port = {
  1335 + .cmd = DP_GENL_C_DEL_PORT,
  1336 + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1337 + .policy = dp_genl_policy,
  1338 + .doit = dp_genl_add_del_port,
  1339 + .dumpit = NULL,
  1340 +};
  1341 +
  1342 +static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info)
  1343 +{
  1344 + struct nlattr *va = info->attrs[DP_GENL_A_OPENFLOW];
  1345 + struct datapath *dp;
  1346 + struct ofp_header *oh;
  1347 + struct sender sender;
  1348 + int err;
  1349 +
  1350 + if (!info->attrs[DP_GENL_A_DP_IDX] || !va)
  1351 + return -EINVAL;
  1352 +
  1353 + dp = dp_get_by_idx(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]));
  1354 + if (!dp)
  1355 + return -ENOENT;
  1356 +
  1357 + if (nla_len(va) < sizeof(struct ofp_header))
  1358 + return -EINVAL;
  1359 + oh = nla_data(va);
  1360 +
  1361 + sender.xid = oh->xid;
  1362 + sender.pid = info->snd_pid;
  1363 + sender.seq = info->snd_seq;
  1364 +
  1365 + mutex_lock(&dp_mutex);
  1366 + err = fwd_control_input(dp->chain, &sender,
  1367 + nla_data(va), nla_len(va));
  1368 + mutex_unlock(&dp_mutex);
  1369 + return err;
  1370 +}
  1371 +
  1372 +static struct nla_policy dp_genl_openflow_policy[DP_GENL_A_MAX + 1] = {
  1373 + [DP_GENL_A_DP_IDX] = { .type = NLA_U32 },
  1374 +};
  1375 +
  1376 +static int desc_stats_dump(struct datapath *dp, void *state,
  1377 + void *body, int *body_len)
  1378 +{
  1379 + struct ofp_desc_stats *ods = body;
  1380 + int n_bytes = sizeof *ods;
  1381 +
  1382 + if (n_bytes > *body_len) {
  1383 + return -ENOBUFS;
  1384 + }
  1385 + *body_len = n_bytes;
  1386 +
  1387 + strncpy(ods->mfr_desc, mfr_desc, sizeof ods->mfr_desc);
  1388 + strncpy(ods->hw_desc, hw_desc, sizeof ods->hw_desc);
  1389 + strncpy(ods->sw_desc, sw_desc, sizeof ods->sw_desc);
  1390 + strncpy(ods->dp_desc, dp->dp_desc, sizeof ods->dp_desc);
  1391 + strncpy(ods->serial_num, serial_num, sizeof ods->serial_num);
  1392 +
  1393 + return 0;
  1394 +}
  1395 +
  1396 +struct flow_stats_state {
  1397 + int table_idx;
  1398 + struct sw_table_position position;
  1399 + const struct ofp_flow_stats_request *rq;
  1400 +
  1401 + void *body;
  1402 + int bytes_used, bytes_allocated;
  1403 +};
  1404 +
  1405 +#define EMERG_TABLE_ID_FOR_STATS 0xfe
  1406 +
  1407 +static int flow_stats_init(struct datapath *dp, const void *body, int body_len,
  1408 + void **state)
  1409 +{
  1410 + const struct ofp_flow_stats_request *fsr = body;
  1411 + struct flow_stats_state *s = kmalloc(sizeof *s, GFP_ATOMIC);
  1412 + if (!s)
  1413 + return -ENOMEM;
  1414 + s->table_idx = fsr->table_id == 0xff ? 0 : fsr->table_id;
  1415 + memset(&s->position, 0, sizeof s->position);
  1416 + s->rq = fsr;
  1417 + *state = s;
  1418 + return 0;
  1419 +}
  1420 +
  1421 +static int flow_stats_dump_callback(struct sw_flow *flow, void *private)
  1422 +{
  1423 + struct sw_flow_actions *sf_acts = rcu_dereference(flow->sf_acts);
  1424 + struct flow_stats_state *s = private;
  1425 + struct ofp_flow_stats *ofs;
  1426 + int length;
  1427 + uint64_t duration;
  1428 +
  1429 + length = sizeof *ofs + sf_acts->actions_len;
  1430 + if (length + s->bytes_used > s->bytes_allocated)
  1431 + return 1;
  1432 +
  1433 + ofs = s->body + s->bytes_used;
  1434 + ofs->length = htons(length);
  1435 + ofs->table_id = s->table_idx;
  1436 + ofs->pad = 0;
  1437 + ofs->match.wildcards = htonl(flow->key.wildcards);
  1438 + ofs->match.in_port = flow->key.in_port;
  1439 + memcpy(ofs->match.dl_src, flow->key.dl_src, ETH_ALEN);
  1440 + memcpy(ofs->match.dl_dst, flow->key.dl_dst, ETH_ALEN);
  1441 + ofs->match.dl_vlan = flow->key.dl_vlan;
  1442 + ofs->match.dl_type = flow->key.dl_type;
  1443 + ofs->match.nw_tos = flow->key.nw_tos;
  1444 + ofs->match.nw_proto = flow->key.nw_proto;
  1445 + ofs->match.nw_src = flow->key.nw_src;
  1446 + ofs->match.nw_dst = flow->key.nw_dst;
  1447 + ofs->match.dl_vlan_pcp = flow->key.dl_vlan_pcp;
  1448 + ofs->match.tp_src = flow->key.tp_src;
  1449 + ofs->match.tp_dst = flow->key.tp_dst;
  1450 +
  1451 + /* The kernel doesn't support 64-bit division, so use the 'do_div'
  1452 + * macro instead. The first argument is replaced with the quotient,
  1453 + * while the remainder is the return value. */
  1454 + duration = get_jiffies_64() - flow->created;
  1455 + do_div(duration, HZ);
  1456 + ofs->duration_sec = htonl(jiffies_64_to_secs(duration));
  1457 + ofs->duration_nsec = htonl(jiffies_64_to_nsecs(duration));
  1458 +
  1459 + ofs->priority = htons(flow->priority);
  1460 + ofs->idle_timeout = htons(flow->idle_timeout);
  1461 + ofs->hard_timeout = htons(flow->hard_timeout);
  1462 + memset(&ofs->pad2, 0, sizeof ofs->pad2);
  1463 + ofs->packet_count = cpu_to_be64(flow->packet_count);
  1464 + ofs->byte_count = cpu_to_be64(flow->byte_count);
  1465 + memcpy(ofs->actions, sf_acts->actions, sf_acts->actions_len);
  1466 +
  1467 + s->bytes_used += length;
  1468 + return 0;
  1469 +}
  1470 +
  1471 +static int flow_stats_dump(struct datapath *dp, void *state,
  1472 + void *body, int *body_len)
  1473 +{
  1474 + struct flow_stats_state *s = state;
  1475 + struct sw_flow_key match_key;
  1476 + int error = 0;
  1477 +
  1478 + s->bytes_used = 0;
  1479 + s->bytes_allocated = *body_len;
  1480 + s->body = body;
  1481 +
  1482 + flow_extract_match(&match_key, &s->rq->match);
  1483 + if (s->rq->table_id == EMERG_TABLE_ID_FOR_STATS) {
  1484 + struct sw_table *table = dp->chain->emerg_table;
  1485 +
  1486 + error = table->iterate(table, &match_key, s->rq->out_port,
  1487 + &s->position, flow_stats_dump_callback,
  1488 + s);
  1489 + } else {
  1490 + while (s->table_idx < dp->chain->n_tables
  1491 + && (s->rq->table_id == 0xff
  1492 + || s->rq->table_id == s->table_idx)) {
  1493 + struct sw_table *table = dp->chain->tables[s->table_idx];
  1494 +
  1495 + error = table->iterate(table, &match_key,
  1496 + s->rq->out_port, &s->position,
  1497 + flow_stats_dump_callback, s);
  1498 + if (error)
  1499 + break;
  1500 +
  1501 + s->table_idx++;
  1502 + memset(&s->position, 0, sizeof s->position);
  1503 + }
  1504 + }
  1505 + *body_len = s->bytes_used;
  1506 +
  1507 + /* If error is 0, we're done.
  1508 + * Otherwise, if some bytes were used, there are more flows to come.
  1509 + * Otherwise, we were not able to fit even a single flow in the body,
  1510 + * which indicates that we have a single flow with too many actions to
  1511 + * fit. We won't ever make any progress at that rate, so give up. */
  1512 + return !error ? 0 : s->bytes_used ? 1 : -ENOMEM;
  1513 +}
  1514 +
  1515 +static void flow_stats_done(void *state)
  1516 +{
  1517 + kfree(state);
  1518 +}
  1519 +
  1520 +static int aggregate_stats_init(struct datapath *dp,
  1521 + const void *body, int body_len,
  1522 + void **state)
  1523 +{
  1524 + *state = (void *)body;
  1525 + return 0;
  1526 +}
  1527 +
  1528 +static int aggregate_stats_dump_callback(struct sw_flow *flow, void *private)
  1529 +{
  1530 + struct ofp_aggregate_stats_reply *rpy = private;
  1531 + rpy->packet_count += flow->packet_count;
  1532 + rpy->byte_count += flow->byte_count;
  1533 + rpy->flow_count++;
  1534 + return 0;
  1535 +}
  1536 +
  1537 +static int aggregate_stats_dump(struct datapath *dp, void *state,
  1538 + void *body, int *body_len)
  1539 +{
  1540 + struct ofp_aggregate_stats_request *rq = state;
  1541 + struct ofp_aggregate_stats_reply *rpy;
  1542 + struct sw_table_position position;
  1543 + struct sw_flow_key match_key;
  1544 + int table_idx;
  1545 + int error = 0;
  1546 +
  1547 + if (*body_len < sizeof *rpy)
  1548 + return -ENOBUFS;
  1549 + rpy = body;
  1550 + *body_len = sizeof *rpy;
  1551 +
  1552 + memset(rpy, 0, sizeof *rpy);
  1553 +
  1554 + flow_extract_match(&match_key, &rq->match);
  1555 + table_idx = rq->table_id == 0xff ? 0 : rq->table_id;
  1556 + memset(&position, 0, sizeof position);
  1557 +
  1558 + if (rq->table_id == EMERG_TABLE_ID_FOR_STATS) {
  1559 + struct sw_table *table = dp->chain->emerg_table;
  1560 +
  1561 + error = table->iterate(table, &match_key, rq->out_port,
  1562 + &position,
  1563 + aggregate_stats_dump_callback, rpy);
  1564 + if (error)
  1565 + return error;
  1566 + } else {
  1567 + while (table_idx < dp->chain->n_tables
  1568 + && (rq->table_id == 0xff || rq->table_id == table_idx)) {
  1569 + struct sw_table *table = dp->chain->tables[table_idx];
  1570 +
  1571 + error = table->iterate(table, &match_key, rq->out_port,
  1572 + &position,
  1573 + aggregate_stats_dump_callback,
  1574 + rpy);
  1575 + if (error)
  1576 + return error;
  1577 +
  1578 + table_idx++;
  1579 + memset(&position, 0, sizeof position);
  1580 + }
  1581 + }
  1582 +
  1583 + rpy->packet_count = cpu_to_be64(rpy->packet_count);
  1584 + rpy->byte_count = cpu_to_be64(rpy->byte_count);
  1585 + rpy->flow_count = htonl(rpy->flow_count);
  1586 + return 0;
  1587 +}
  1588 +
  1589 +static int table_stats_dump(struct datapath *dp, void *state,
  1590 + void *body, int *body_len)
  1591 +{
  1592 + struct ofp_table_stats *ots;
  1593 + int n_bytes = dp->chain->n_tables * sizeof *ots;
  1594 + int i;
  1595 + if (n_bytes > *body_len)
  1596 + return -ENOBUFS;
  1597 + *body_len = n_bytes;
  1598 + for (i = 0, ots = body; i < dp->chain->n_tables; i++, ots++) {
  1599 + struct sw_table_stats stats;
  1600 + dp->chain->tables[i]->stats(dp->chain->tables[i], &stats);
  1601 + strncpy(ots->name, stats.name, sizeof ots->name);
  1602 + ots->table_id = i;
  1603 + ots->wildcards = htonl(stats.wildcards);
  1604 + memset(ots->pad, 0, sizeof ots->pad);
  1605 + ots->max_entries = htonl(stats.max_flows);
  1606 + ots->active_count = htonl(stats.n_flows);
  1607 + ots->lookup_count = cpu_to_be64(stats.n_lookup);
  1608 + ots->matched_count = cpu_to_be64(stats.n_matched);
  1609 + }
  1610 + return 0;
  1611 +}
  1612 +
  1613 +struct port_stats_state {
  1614 + int start_port; /* port to start dumping from */
  1615 + int port_no; /* from ofp_port_stats_request */
  1616 +};
  1617 +
  1618 +static int port_stats_init(struct datapath *dp, const void *body, int body_len,
  1619 + void **state)
  1620 +{
  1621 + struct port_stats_state *s = kmalloc(sizeof *s, GFP_ATOMIC);
  1622 + struct ofp_port_stats_request *psr
  1623 + = (struct ofp_port_stats_request *)body;
  1624 +
  1625 + if (!s)
  1626 + return -ENOMEM;
  1627 + s->start_port = 1;
  1628 + s->port_no = ntohs(psr->port_no);
  1629 + *state = s;
  1630 + return 0;
  1631 +}
  1632 +
  1633 +static void
  1634 +dump_port_stats(struct ofp_port_stats *ops, struct net_bridge_port *p)
  1635 +{
  1636 + struct net_device_stats *stats;
  1637 +
  1638 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
  1639 + stats = p->dev->netdev_ops->ndo_get_stats(p->dev);
  1640 +#else
  1641 + stats = p->dev->get_stats(p->dev);
  1642 +#endif
  1643 + ops->port_no = htons(p->port_no);
  1644 + memset(ops->pad, 0, sizeof ops->pad);
  1645 + ops->rx_packets = cpu_to_be64(stats->rx_packets);
  1646 + ops->tx_packets = cpu_to_be64(stats->tx_packets);
  1647 + ops->rx_bytes = cpu_to_be64(stats->rx_bytes);
  1648 + ops->tx_bytes = cpu_to_be64(stats->tx_bytes);
  1649 + ops->rx_dropped = cpu_to_be64(stats->rx_dropped);
  1650 + ops->tx_dropped = cpu_to_be64(stats->tx_dropped);
  1651 + ops->rx_errors = cpu_to_be64(stats->rx_errors);
  1652 + ops->tx_errors = cpu_to_be64(stats->tx_errors);
  1653 + ops->rx_frame_err = cpu_to_be64(stats->rx_frame_errors);
  1654 + ops->rx_over_err = cpu_to_be64(stats->rx_over_errors);
  1655 + ops->rx_crc_err = cpu_to_be64(stats->rx_crc_errors);
  1656 + ops->collisions = cpu_to_be64(stats->collisions);
  1657 +}
  1658 +
  1659 +static struct net_bridge_port *
  1660 +lookup_port(struct datapath *dp, uint16_t port_no)
  1661 +{
  1662 + return (port_no < DP_MAX_PORTS ? dp->ports[port_no]
  1663 + : port_no == OFPP_LOCAL ? dp->local_port
  1664 + : NULL);
  1665 +}
  1666 +
  1667 +
  1668 +static int port_stats_dump(struct datapath *dp, void *state,
  1669 + void *body, int *body_len)
  1670 +{
  1671 + struct port_stats_state *s = state;
  1672 + struct net_bridge_port *p = NULL;
  1673 + struct ofp_port_stats *ops = NULL;
  1674 + int n_ports = 0;
  1675 + int max_ports = 0;
  1676 + int i = 0;
  1677 +
  1678 + max_ports = *body_len / sizeof *ops;
  1679 + if (!max_ports)
  1680 + return -ENOMEM;
  1681 + ops = body;
  1682 +
  1683 + if (s->port_no == OFPP_NONE) {
  1684 + for (i = s->start_port; i < DP_MAX_PORTS && n_ports < max_ports;
  1685 + i++) {
  1686 + p = dp->ports[i];
  1687 + if (!p)
  1688 + continue;
  1689 + dump_port_stats(ops, p);
  1690 + n_ports++;
  1691 + ops++;
  1692 + }
  1693 + s->start_port = i;
  1694 + if (dp->local_port) {
  1695 + dump_port_stats(ops, dp->local_port);
  1696 + n_ports++;
  1697 + ops++;
  1698 + s->start_port = OFPP_LOCAL + 1; /* == OFPP_NONE, > DP_MAX_PORTS */
  1699 + }
  1700 + } else {
  1701 + p = lookup_port(dp, s->port_no);
  1702 + if (p) {
  1703 + dump_port_stats(ops, p);
  1704 + n_ports++;
  1705 + ops++;
  1706 + }
  1707 + }
  1708 +
  1709 + *body_len = n_ports * sizeof *ops;
  1710 + return n_ports >= max_ports;
  1711 +}
  1712 +
  1713 +static void port_stats_done(void *state)
  1714 +{
  1715 + kfree(state);
  1716 +}
  1717 +
  1718 +/*
  1719 + * We don't define any vendor_stats_state, we let the actual
  1720 + * vendor implementation do that.
  1721 + * The only requirement is that the first member of that object
  1722 + * should be the vendor id.
  1723 + * Jean II
  1724 + *
  1725 + * Basically, it would look like :
  1726 + * struct acme_stats_state {
  1727 + * uint32_t vendor; // ACME_VENDOR_ID.
  1728 + * <...> // Other stuff.
  1729 + * };
  1730 + */
  1731 +static int vendor_stats_init(struct datapath *dp, const void *body,
  1732 + int body_len, void **state)
  1733 +{
  1734 + /* min_body was checked, this is safe */
  1735 + const uint32_t vendor = ntohl(*((uint32_t *)body));
  1736 + int err;
  1737 +
  1738 + switch (vendor) {
  1739 + default:
  1740 + err = -EINVAL;
  1741 + }
  1742 +
  1743 + return err;
  1744 +}
  1745 +
  1746 +static int vendor_stats_dump(struct datapath *dp, void *state, void *body,
  1747 + int *body_len)
  1748 +{
  1749 + const uint32_t vendor = *((uint32_t *)state);
  1750 + int newbuf;
  1751 +
  1752 + switch (vendor) {
  1753 + default:
  1754 + /* Should never happen */
  1755 + newbuf = 0;
  1756 + }
  1757 +
  1758 + return newbuf;
  1759 +}
  1760 +
  1761 +static void vendor_stats_done(void *state)
  1762 +{
  1763 + const uint32_t vendor = *((uint32_t *)state);
  1764 +
  1765 + switch (vendor) {
  1766 + default:
  1767 + /* Should never happen */
  1768 + kfree(state);
  1769 + }
  1770 +
  1771 + return;
  1772 +}
  1773 +
  1774 +struct stats_type {
  1775 + /* Minimum and maximum acceptable number of bytes in body member of
  1776 + * struct ofp_stats_request. */
  1777 + size_t min_body, max_body;
  1778 +
  1779 + /* Prepares to dump some kind of statistics on 'dp'. 'body' and
  1780 + * 'body_len' are the 'body' member of the struct ofp_stats_request.
  1781 + * Returns zero if successful, otherwise a negative error code.
  1782 + * May initialize '*state' to state information. May be null if no
  1783 + * initialization is required.*/
  1784 + int (*init)(struct datapath *dp, const void *body, int body_len,
  1785 + void **state);
  1786 +
  1787 + /* Dumps statistics for 'dp' into the '*body_len' bytes at 'body', and
  1788 + * modifies '*body_len' to reflect the number of bytes actually used.
  1789 + * ('body' will be transmitted as the 'body' member of struct
  1790 + * ofp_stats_reply.) */
  1791 + int (*dump)(struct datapath *dp, void *state,
  1792 + void *body, int *body_len);
  1793 +
  1794 + /* Cleans any state created by the init or dump functions. May be null
  1795 + * if no cleanup is required. */
  1796 + void (*done)(void *state);
  1797 +};
  1798 +
  1799 +static const struct stats_type stats[] = {
  1800 + [OFPST_DESC] = {
  1801 + 0,
  1802 + 0,
  1803 + NULL,
  1804 + desc_stats_dump,
  1805 + NULL
  1806 + },
  1807 + [OFPST_FLOW] = {
  1808 + sizeof(struct ofp_flow_stats_request),
  1809 + sizeof(struct ofp_flow_stats_request),
  1810 + flow_stats_init,
  1811 + flow_stats_dump,
  1812 + flow_stats_done
  1813 + },
  1814 + [OFPST_AGGREGATE] = {
  1815 + sizeof(struct ofp_aggregate_stats_request),
  1816 + sizeof(struct ofp_aggregate_stats_request),
  1817 + aggregate_stats_init,
  1818 + aggregate_stats_dump,
  1819 + NULL
  1820 + },
  1821 + [OFPST_TABLE] = {
  1822 + 0,
  1823 + 0,
  1824 + NULL,
  1825 + table_stats_dump,
  1826 + NULL
  1827 + },
  1828 + [OFPST_PORT] = {
  1829 + sizeof(struct ofp_port_stats_request),
  1830 + sizeof(struct ofp_port_stats_request),
  1831 + port_stats_init,
  1832 + port_stats_dump,
  1833 + port_stats_done
  1834 + },
  1835 +};
  1836 +
  1837 +/* For OFPST_VENDOR... Jean II */
  1838 +static const struct stats_type stats_vendor = {
  1839 + 8, /* vendor + subtype */
  1840 + 32, /* whatever */
  1841 + vendor_stats_init,
  1842 + vendor_stats_dump,
  1843 + vendor_stats_done
  1844 +};
  1845 +
  1846 +static int
  1847 +dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
  1848 +{
  1849 + struct datapath *dp;
  1850 + struct sender sender;
  1851 + const struct stats_type *s;
  1852 + struct ofp_stats_reply *osr;
  1853 + int dp_idx;
  1854 + int max_openflow_len, body_len;
  1855 + void *body;
  1856 + int err;
  1857 +
  1858 + /* Set up the cleanup function for this dump. Linux 2.6.20 and later
  1859 + * support setting up cleanup functions via the .doneit member of
  1860 + * struct genl_ops. This kluge supports earlier versions also. */
  1861 + cb->done = dp_genl_openflow_done;
  1862 +
  1863 + sender.pid = NETLINK_CB(cb->skb).pid;
  1864 + sender.seq = cb->nlh->nlmsg_seq;
  1865 + if (!cb->args[0]) {
  1866 + struct nlattr *attrs[DP_GENL_A_MAX + 1];
  1867 + struct ofp_stats_request *rq;
  1868 + struct nlattr *va;
  1869 + size_t len, body_len;
  1870 + int type;
  1871 +
  1872 + err = nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, DP_GENL_A_MAX,
  1873 + dp_genl_openflow_policy);
  1874 + if (err < 0)
  1875 + return err;
  1876 +
  1877 + if (!attrs[DP_GENL_A_DP_IDX])
  1878 + return -EINVAL;
  1879 + dp_idx = nla_get_u16(attrs[DP_GENL_A_DP_IDX]);
  1880 + dp = dp_get_by_idx(dp_idx);
  1881 + if (!dp)
  1882 + return -ENOENT;
  1883 +
  1884 + va = attrs[DP_GENL_A_OPENFLOW];
  1885 + len = nla_len(va);
  1886 + if (!va || len < sizeof *rq)
  1887 + return -EINVAL;
  1888 +
  1889 + rq = nla_data(va);
  1890 + sender.xid = rq->header.xid;
  1891 + type = ntohs(rq->type);
  1892 + if (rq->header.version != OFP_VERSION) {
  1893 + dp_send_error_msg(dp, &sender, OFPET_BAD_REQUEST,
  1894 + OFPBRC_BAD_VERSION, rq, len);
  1895 + return -EINVAL;
  1896 + }
  1897 + if (rq->header.type != OFPT_STATS_REQUEST
  1898 + || ntohs(rq->header.length) != len)
  1899 + return -EINVAL;
  1900 +
  1901 + if (type == OFPST_VENDOR) {
  1902 + /* Vendor is not in the array, take care of it */
  1903 + s = &stats_vendor;
  1904 + } else {
  1905 + if (type >= ARRAY_SIZE(stats) || !stats[type].dump) {
  1906 + dp_send_error_msg(dp, &sender,
  1907 + OFPET_BAD_REQUEST,
  1908 + OFPBRC_BAD_STAT, rq, len);
  1909 + return -EINVAL;
  1910 + }
  1911 + s = &stats[type];
  1912 + }
  1913 + body_len = len - offsetof(struct ofp_stats_request, body);
  1914 + if (body_len < s->min_body || body_len > s->max_body)
  1915 + return -EINVAL;
  1916 +
  1917 + cb->args[0] = 1;
  1918 + cb->args[1] = dp_idx;
  1919 + cb->args[2] = type;
  1920 + cb->args[3] = rq->header.xid;
  1921 + if (s->init) {
  1922 + void *state;
  1923 + err = s->init(dp, rq->body, body_len, &state);
  1924 + if (err)
  1925 + return err;
  1926 + cb->args[4] = (long) state;
  1927 + }
  1928 + } else if (cb->args[0] == 1) {
  1929 + sender.xid = cb->args[3];
  1930 + dp_idx = cb->args[1];
  1931 + if (cb->args[2] == OFPST_VENDOR) {
  1932 + /* Vendor is not in the array, take care of it */
  1933 + s = &stats_vendor;
  1934 + } else {
  1935 + s = &stats[cb->args[2]];
  1936 + }
  1937 +
  1938 + dp = dp_get_by_idx(dp_idx);
  1939 + if (!dp)
  1940 + return -ENOENT;
  1941 + } else {
  1942 + return 0;
  1943 + }
  1944 +
  1945 + osr = put_openflow_headers(dp, skb, OFPT_STATS_REPLY, &sender,
  1946 + &max_openflow_len);
  1947 + if (IS_ERR(osr))
  1948 + return PTR_ERR(osr);
  1949 + osr->type = htons(cb->args[2]);
  1950 + osr->flags = 0;
  1951 + resize_openflow_skb(skb, &osr->header, max_openflow_len);
  1952 + body = osr->body;
  1953 + body_len = max_openflow_len - offsetof(struct ofp_stats_reply, body);
  1954 +
  1955 + err = s->dump(dp, (void *) cb->args[4], body, &body_len);
  1956 + if (err >= 0) {
  1957 + if (!err)
  1958 + cb->args[0] = 2;
  1959 + else
  1960 + osr->flags = ntohs(OFPSF_REPLY_MORE);
  1961 + resize_openflow_skb(skb, &osr->header,
  1962 + (offsetof(struct ofp_stats_reply, body)
  1963 + + body_len));
  1964 + err = skb->len;
  1965 + }
  1966 +
  1967 + return err;
  1968 +}
  1969 +
  1970 +static int
  1971 +dp_genl_openflow_done(struct netlink_callback *cb)
  1972 +{
  1973 + if (cb->args[0]) {
  1974 + const struct stats_type *s;
  1975 + if (cb->args[2] == OFPST_VENDOR) {
  1976 + /* Vendor is not in the array, take care of it */
  1977 + s = &stats_vendor;
  1978 + } else {
  1979 + s = &stats[cb->args[2]];
  1980 + }
  1981 + if (s->done)
  1982 + s->done((void *) cb->args[4]);
  1983 + }
  1984 + return 0;
  1985 +}
  1986 +
  1987 +static struct genl_ops dp_genl_ops_openflow = {
  1988 + .cmd = DP_GENL_C_OPENFLOW,
  1989 + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1990 + .policy = dp_genl_openflow_policy,
  1991 + .doit = dp_genl_openflow,
  1992 + .dumpit = dp_genl_openflow_dumpit,
  1993 +};
  1994 +
  1995 +static struct genl_ops *dp_genl_all_ops[] = {
  1996 + /* Keep this operation first. Generic Netlink dispatching
  1997 + * looks up operations with linear search, so we want it at the
  1998 + * front. */
  1999 + &dp_genl_ops_openflow,
  2000 +
  2001 + &dp_genl_ops_add_dp,
  2002 + &dp_genl_ops_del_dp,
  2003 + &dp_genl_ops_query_dp,
  2004 + &dp_genl_ops_add_port,
  2005 + &dp_genl_ops_del_port,
  2006 +};
  2007 +
  2008 +static int dp_init_netlink(void)
  2009 +{
  2010 + int err;
  2011 + int i;
  2012 +
  2013 + err = genl_register_family(&dp_genl_family);
  2014 + if (err)
  2015 + return err;
  2016 +
  2017 + for (i = 0; i < ARRAY_SIZE(dp_genl_all_ops); i++) {
  2018 + err = genl_register_ops(&dp_genl_family, dp_genl_all_ops[i]);
  2019 + if (err)
  2020 + goto err_unregister;
  2021 + }
  2022 +
  2023 + for (i = 0; i < N_MC_GROUPS; i++) {
  2024 + snprintf(mc_groups[i].name, sizeof mc_groups[i].name,
  2025 + "openflow%d", i);
  2026 + err = genl_register_mc_group(&dp_genl_family, &mc_groups[i]);
  2027 + if (err < 0)
  2028 + goto err_unregister;
  2029 + }
  2030 +
  2031 + return 0;
  2032 +
  2033 +err_unregister:
  2034 + genl_unregister_family(&dp_genl_family);
  2035 + return err;
  2036 +}
  2037 +
  2038 +static void dp_uninit_netlink(void)
  2039 +{
  2040 + genl_unregister_family(&dp_genl_family);
  2041 +}
  2042 +
  2043 +/* Set the description strings if appropriate values are available from
  2044 + * the DMI. */
  2045 +static void set_desc(void)
  2046 +{
  2047 + const char *uuid = dmi_get_system_info(DMI_PRODUCT_UUID);
  2048 + const char *vendor = dmi_get_system_info(DMI_SYS_VENDOR);
  2049 + const char *name = dmi_get_system_info(DMI_PRODUCT_NAME);
  2050 + const char *version = dmi_get_system_info(DMI_PRODUCT_VERSION);
  2051 + const char *serial = dmi_get_system_info(DMI_PRODUCT_SERIAL);
  2052 + const char *uptr;
  2053 +
  2054 + if (!uuid || *uuid == '\0' || strlen(uuid) != 36)
  2055 + return;
  2056 +
  2057 + /* We are only interested version 1 UUIDs, since the last six bytes
  2058 + * are an IEEE 802 MAC address. */
  2059 + if (uuid[14] != '1')
  2060 + return;
  2061 +
  2062 + /* Only set if the UUID is from Nicira. */
  2063 + uptr = uuid + 24;
  2064 + if (strncmp(uptr, NICIRA_OUI_STR, strlen(NICIRA_OUI_STR)))
  2065 + return;
  2066 +
  2067 + if (vendor)
  2068 + strlcpy(mfr_desc, vendor, sizeof(mfr_desc));
  2069 + if (name || version)
  2070 + snprintf(hw_desc, sizeof(hw_desc), "%s %s",
  2071 + name ? name : "",
  2072 + version ? version : "");
  2073 + if (serial)
  2074 + strlcpy(serial_num, serial, sizeof(serial_num));
  2075 +}
  2076 +
  2077 +static int __init dp_init(void)
  2078 +{
  2079 + int err;
  2080 +
  2081 + printk("OpenFlow %s, built "__DATE__" "__TIME__", "
  2082 + "protocol 0x%02x\n", VERSION BUILDNR, OFP_VERSION);
  2083 +
  2084 + err = flow_init();
  2085 + if (err)
  2086 + goto error;
  2087 +
  2088 + err = register_netdevice_notifier(&dp_device_notifier);
  2089 + if (err)
  2090 + goto error_flow_exit;
  2091 +
  2092 + err = dp_init_netlink();
  2093 + if (err)
  2094 + goto error_unreg_notifier;
  2095 +
  2096 + dp_ioctl_hook = NULL;
  2097 + dp_add_dp_hook = NULL;
  2098 + dp_del_dp_hook = NULL;
  2099 + dp_add_if_hook = NULL;
  2100 + dp_del_if_hook = NULL;
  2101 +
  2102 + /* Check if better descriptions of the switch are available than the
  2103 + * defaults. */
  2104 + set_desc();
  2105 +
  2106 + /* Hook into callback used by the bridge to intercept packets.
  2107 + * Parasites we are. */
  2108 + if (br_handle_frame_hook)
  2109 + printk("openflow: hijacking bridge hook\n");
  2110 + br_handle_frame_hook = dp_frame_hook;
  2111 +
  2112 + return 0;
  2113 +
  2114 +error_unreg_notifier:
  2115 + unregister_netdevice_notifier(&dp_device_notifier);
  2116 +error_flow_exit:
  2117 + flow_exit();
  2118 +error:
  2119 + printk(KERN_EMERG "openflow: failed to install!");
  2120 + return err;
  2121 +}
  2122 +
  2123 +static void dp_cleanup(void)
  2124 +{
  2125 + fwd_exit();
  2126 + dp_uninit_netlink();
  2127 + unregister_netdevice_notifier(&dp_device_notifier);
  2128 + flow_exit();
  2129 + br_handle_frame_hook = NULL;
  2130 +}
  2131 +
  2132 +module_init(dp_init);
  2133 +module_exit(dp_cleanup);
  2134 +
  2135 +MODULE_DESCRIPTION("OpenFlow Switching Datapath");
  2136 +MODULE_AUTHOR("Copyright (c) 2007, 2008 The Board of Trustees of The Leland Stanford Junior University");
  2137 +MODULE_LICENSE("GPL");
... ...
datapath/datapath.h 0 → 100755
  1 +/* Interface exported by OpenFlow module. */
  2 +
  3 +#ifndef DATAPATH_H
  4 +#define DATAPATH_H 1
  5 +
  6 +#include <linux/kernel.h>
  7 +#include <linux/mutex.h>
  8 +#include <linux/netlink.h>
  9 +#include <linux/netdevice.h>
  10 +#include <linux/workqueue.h>
  11 +#include <linux/skbuff.h>
  12 +#include "openflow/openflow.h"
  13 +#include "openflow/nicira-ext.h"
  14 +#include "flow.h"
  15 +
  16 +
  17 +#define NL_FLOWS_PER_MESSAGE 100
  18 +
  19 +/* Capabilities supported by this implementation. */
  20 +#define OFP_SUPPORTED_CAPABILITIES ( OFPC_FLOW_STATS \
  21 + | OFPC_TABLE_STATS \
  22 + | OFPC_PORT_STATS )
  23 +
  24 +/* Actions supported by this implementation. */
  25 +#define OFP_SUPPORTED_ACTIONS ( (1 << OFPAT_OUTPUT) \
  26 + | (1 << OFPAT_SET_VLAN_VID) \
  27 + | (1 << OFPAT_SET_VLAN_PCP) \
  28 + | (1 << OFPAT_STRIP_VLAN) \
  29 + | (1 << OFPAT_SET_DL_SRC) \
  30 + | (1 << OFPAT_SET_DL_DST) \
  31 + | (1 << OFPAT_SET_NW_SRC) \
  32 + | (1 << OFPAT_SET_NW_DST) \
  33 + | (1 << OFPAT_SET_TP_SRC) \
  34 + | (1 << OFPAT_SET_TP_DST) )
  35 +
  36 +struct sk_buff;
  37 +
  38 +#define DP_MAX_PORTS 255
  39 +
  40 +struct datapath {
  41 + int dp_idx;
  42 +
  43 + struct timer_list timer; /* Expiration timer. */
  44 + struct sw_chain *chain; /* Forwarding rules. */
  45 + struct task_struct *dp_task; /* Kernel thread for maintenance. */
  46 +
  47 + /* Data related to the "of" device of this datapath */
  48 + struct net_device *netdev;
  49 + char dp_desc[DESC_STR_LEN]; /* human readible comment to ID this DP */
  50 +
  51 + /* Configuration set from controller */
  52 + uint16_t flags;
  53 + uint16_t miss_send_len;
  54 +
  55 + struct kobject ifobj;
  56 +
  57 + /* Switch ports. */
  58 + struct net_bridge_port *ports[DP_MAX_PORTS];
  59 + struct net_bridge_port *local_port; /* OFPP_LOCAL port. */
  60 + struct list_head port_list; /* All ports, including local_port. */
  61 +};
  62 +
  63 +/* Information necessary to reply to the sender of an OpenFlow message. */
  64 +struct sender {
  65 + uint32_t xid; /* OpenFlow transaction ID of request. */
  66 + uint32_t pid; /* Netlink process ID of sending socket. */
  67 + uint32_t seq; /* Netlink sequence ID of request. */
  68 +};
  69 +
  70 +struct net_bridge_port {
  71 + u16 port_no;
  72 + u32 config; /* Some subset of OFPPC_* flags. */
  73 + u32 state; /* Some subset of OFPPS_* flags. */
  74 + spinlock_t lock;
  75 + struct datapath *dp;
  76 + struct net_device *dev;
  77 + struct kobject kobj;
  78 + struct list_head node; /* Element in datapath.ports. */
  79 +};
  80 +
  81 +extern struct mutex dp_mutex;
  82 +extern struct notifier_block dp_device_notifier;
  83 +extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
  84 +extern int (*dp_add_dp_hook)(struct datapath *dp);
  85 +extern int (*dp_del_dp_hook)(struct datapath *dp);
  86 +extern int (*dp_add_if_hook)(struct net_bridge_port *p);
  87 +extern int (*dp_del_if_hook)(struct net_bridge_port *p);
  88 +
  89 +int dp_del_switch_port(struct net_bridge_port *);
  90 +int dp_xmit_skb(struct sk_buff *skb);
  91 +int dp_output_port(struct datapath *, struct sk_buff *, int out_port,
  92 + int ignore_no_fwd);
  93 +int dp_output_control(struct datapath *, struct sk_buff *, size_t, int);
  94 +void dp_set_origin(struct datapath *, uint16_t, struct sk_buff *);
  95 +int dp_send_features_reply(struct datapath *, const struct sender *);
  96 +int dp_send_config_reply(struct datapath *, const struct sender *);
  97 +int dp_send_port_status(struct net_bridge_port *p, uint8_t status);
  98 +int dp_send_flow_end(struct datapath *, struct sw_flow *,
  99 + enum ofp_flow_removed_reason);
  100 +int dp_send_error_msg(struct datapath *, const struct sender *,
  101 + uint16_t, uint16_t, const void *, size_t);
  102 +int dp_update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm);
  103 +int dp_send_echo_reply(struct datapath *, const struct sender *,
  104 + const struct ofp_header *);
  105 +int dp_send_hello(struct datapath *, const struct sender *,
  106 + const struct ofp_header *);
  107 +int dp_send_barrier_reply(struct datapath *, const struct sender *,
  108 + const struct ofp_header *);
  109 +
  110 +/* Should hold at least RCU read lock when calling */
  111 +struct datapath *dp_get_by_idx(int dp_idx);
  112 +struct datapath *dp_get_by_name(const char *dp_name);
  113 +
  114 +#endif /* datapath.h */
... ...
datapath/dp_act.c 0 → 100755
  1 +/*
  2 + * Distributed under the terms of the GNU GPL version 2.
  3 + * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
  4 + * Stanford Junior University
  5 + */
  6 +
  7 +/* Functions for executing OpenFlow actions. */
  8 +
  9 +#include <linux/skbuff.h>
  10 +#include <linux/in.h>
  11 +#include <linux/ip.h>
  12 +#include <linux/tcp.h>
  13 +#include <linux/udp.h>
  14 +#include <linux/in6.h>
  15 +#include <linux/if_vlan.h>
  16 +#include <net/checksum.h>
  17 +#include "forward.h"
  18 +#include "dp_act.h"
  19 +#include "openflow/nicira-ext.h"
  20 +#include "flow.h"
  21 +
  22 +
  23 +static uint16_t
  24 +validate_output(struct datapath *dp, const struct sw_flow_key *key,
  25 + const struct ofp_action_header *ah)
  26 +{
  27 + struct ofp_action_output *oa = (struct ofp_action_output *)ah;
  28 +
  29 + if (oa->port == htons(OFPP_NONE) ||
  30 + (!(key->wildcards & OFPFW_IN_PORT) && oa->port == key->in_port))
  31 + return OFPBAC_BAD_OUT_PORT;
  32 +
  33 + return ACT_VALIDATION_OK;
  34 +}
  35 +
  36 +static int
  37 +do_output(struct datapath *dp, struct sk_buff *skb, size_t max_len,
  38 + int out_port, int ignore_no_fwd)
  39 +{
  40 + if (!skb)
  41 + return -ENOMEM;
  42 + return (likely(out_port != OFPP_CONTROLLER)
  43 + ? dp_output_port(dp, skb, out_port, ignore_no_fwd)
  44 + : dp_output_control(dp, skb, max_len, OFPR_ACTION));
  45 +}
  46 +
  47 +
  48 +static struct sk_buff *
  49 +vlan_pull_tag(struct sk_buff *skb)
  50 +{
  51 + struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
  52 + struct ethhdr *eh;
  53 +
  54 +
  55 + /* Verify we were given a vlan packet */
  56 + if (vh->h_vlan_proto != htons(ETH_P_8021Q))
  57 + return skb;
  58 +
  59 + memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN);
  60 +
  61 + eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
  62 +
  63 + skb->protocol = eh->h_proto;
  64 + skb->mac_header += VLAN_HLEN;
  65 +
  66 + return skb;
  67 +}
  68 +
  69 +
  70 +static struct sk_buff *
  71 +modify_vlan_tci(struct sk_buff *skb, struct sw_flow_key *key,
  72 + uint16_t tci, uint16_t mask)
  73 +{
  74 + struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
  75 +
  76 + if (key->dl_vlan != htons(OFP_VLAN_NONE)) {
  77 + /* Modify vlan id, but maintain other TCI values */
  78 + vh->h_vlan_TCI = (vh->h_vlan_TCI & ~(htons(mask))) | htons(tci);
  79 + } else {
  80 + /* Add vlan header */
  81 +
  82 + /* xxx The vlan_put_tag function, doesn't seem to work
  83 + * xxx reliably when it attempts to use the hardware-accelerated
  84 + * xxx version. We'll directly use the software version
  85 + * xxx until the problem can be diagnosed.
  86 + */
  87 + skb = __vlan_put_tag(skb, tci);
  88 + vh = vlan_eth_hdr(skb);
  89 + }
  90 + key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK);
  91 + key->dl_vlan_pcp = (uint8_t)((ntohs(vh->h_vlan_TCI) >> VLAN_PCP_SHIFT)
  92 + & VLAN_PCP_BITMASK);
  93 +
  94 + return skb;
  95 +}
  96 +
  97 +static struct sk_buff *
  98 +set_vlan_vid(struct sk_buff *skb, struct sw_flow_key *key,
  99 + const struct ofp_action_header *ah)
  100 +{
  101 + struct ofp_action_vlan_vid *va = (struct ofp_action_vlan_vid *)ah;
  102 + uint16_t tci = ntohs(va->vlan_vid);
  103 +
  104 + return modify_vlan_tci(skb, key, tci, VLAN_VID_MASK);
  105 +}
  106 +
  107 +/* Mask for the priority bits in a vlan header. The kernel doesn't
  108 + * define this like it does for VID. */
  109 +#define VLAN_PCP_MASK 0xe000
  110 +
  111 +static struct sk_buff *
  112 +set_vlan_pcp(struct sk_buff *skb, struct sw_flow_key *key,
  113 + const struct ofp_action_header *ah)
  114 +{
  115 + struct ofp_action_vlan_pcp *va = (struct ofp_action_vlan_pcp *)ah;
  116 + uint16_t tci = (uint16_t)va->vlan_pcp << 13;
  117 +
  118 + return modify_vlan_tci(skb, key, tci, VLAN_PCP_MASK);
  119 +}
  120 +
  121 +static struct sk_buff *
  122 +strip_vlan(struct sk_buff *skb, struct sw_flow_key *key,
  123 + const struct ofp_action_header *ah)
  124 +{
  125 + vlan_pull_tag(skb);
  126 + key->dl_vlan = htons(OFP_VLAN_NONE);
  127 +
  128 + return skb;
  129 +}
  130 +
  131 +static struct sk_buff *
  132 +set_dl_addr(struct sk_buff *skb, struct sw_flow_key *key,
  133 + const struct ofp_action_header *ah)
  134 +{
  135 + struct ofp_action_dl_addr *da = (struct ofp_action_dl_addr *)ah;
  136 + struct ethhdr *eh = eth_hdr(skb);
  137 +
  138 + if (da->type == htons(OFPAT_SET_DL_SRC))
  139 + memcpy(eh->h_source, da->dl_addr, sizeof eh->h_source);
  140 + else
  141 + memcpy(eh->h_dest, da->dl_addr, sizeof eh->h_dest);
  142 +
  143 + return skb;
  144 +}
  145 +
  146 +/* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field
  147 + * covered by the sum has been changed from 'from' to 'to'. If set,
  148 + * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header.
  149 + * Based on nf_proto_csum_replace4. */
  150 +static void update_csum(__sum16 *sum, struct sk_buff *skb,
  151 + __be32 from, __be32 to, int pseudohdr)
  152 +{
  153 + __be32 diff[] = { ~from, to };
  154 + if (skb->ip_summed != CHECKSUM_PARTIAL) {
  155 + *sum = csum_fold(csum_partial((char *)diff, sizeof(diff),
  156 + ~csum_unfold(*sum)));
  157 + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
  158 + skb->csum = ~csum_partial((char *)diff, sizeof(diff),
  159 + ~skb->csum);
  160 + } else if (pseudohdr)
  161 + *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff),
  162 + csum_unfold(*sum)));
  163 +}
  164 +
  165 +static struct sk_buff *
  166 +set_nw_addr(struct sk_buff *skb, struct sw_flow_key *key,
  167 + const struct ofp_action_header *ah)
  168 +{
  169 + struct ofp_action_nw_addr *na = (struct ofp_action_nw_addr *)ah;
  170 + uint16_t eth_proto = ntohs(key->dl_type);
  171 +
  172 + if (eth_proto == ETH_P_IP) {
  173 + struct iphdr *nh = ip_hdr(skb);
  174 + uint32_t new, *field;
  175 +
  176 + new = na->nw_addr;
  177 +
  178 + if (ah->type == htons(OFPAT_SET_NW_SRC))
  179 + field = &nh->saddr;
  180 + else
  181 + field = &nh->daddr;
  182 +
  183 + if (key->nw_proto == IPPROTO_TCP) {
  184 + struct tcphdr *th = tcp_hdr(skb);
  185 + update_csum(&th->check, skb, *field, new, 1);
  186 + } else if (key->nw_proto == IPPROTO_UDP) {
  187 + struct udphdr *th = udp_hdr(skb);
  188 + update_csum(&th->check, skb, *field, new, 1);
  189 + }
  190 + update_csum(&nh->check, skb, *field, new, 0);
  191 + *field = new;
  192 + }
  193 +
  194 + return skb;
  195 +}
  196 +
  197 +static struct sk_buff *
  198 +set_nw_tos(struct sk_buff *skb, struct sw_flow_key *key,
  199 + const struct ofp_action_header *ah)
  200 +{
  201 + struct ofp_action_nw_tos *nt = (struct ofp_action_nw_tos *)ah;
  202 + uint16_t eth_proto = ntohs(key->dl_type);
  203 +
  204 + if (eth_proto == ETH_P_IP) {
  205 + struct iphdr *nh = ip_hdr(skb);
  206 + uint8_t new, *field;
  207 +
  208 + /* JeanII : Set only 6 bits, don't clobber ECN */
  209 + new = (nt->nw_tos & 0xFC) | (nh->tos & 0x03);
  210 +
  211 + /* Get address of field */
  212 + field = &nh->tos;
  213 + /* jklee : ip tos field is not included in TCP pseudo header.
  214 + * Need magic as update_csum() don't work with 8 bits. */
  215 + update_csum(&nh->check, skb, htons((uint16_t)*field),
  216 + htons((uint16_t)new), 0);
  217 +
  218 + /* Update in packet */
  219 + *field = new;
  220 + }
  221 +
  222 + return skb;
  223 +}
  224 +
  225 +static struct sk_buff *
  226 +set_tp_port(struct sk_buff *skb, struct sw_flow_key *key,
  227 + const struct ofp_action_header *ah)
  228 +{
  229 + struct ofp_action_tp_port *ta = (struct ofp_action_tp_port *)ah;
  230 + uint16_t eth_proto = ntohs(key->dl_type);
  231 +
  232 + if (eth_proto == ETH_P_IP) {
  233 + uint16_t new, *field;
  234 +
  235 + new = ta->tp_port;
  236 +
  237 + if (key->nw_proto == IPPROTO_TCP) {
  238 + struct tcphdr *th = tcp_hdr(skb);
  239 +
  240 + if (ah->type == htons(OFPAT_SET_TP_SRC))
  241 + field = &th->source;
  242 + else
  243 + field = &th->dest;
  244 +
  245 + update_csum(&th->check, skb, *field, new, 1);
  246 + *field = new;
  247 + } else if (key->nw_proto == IPPROTO_UDP) {
  248 + struct udphdr *th = udp_hdr(skb);
  249 +
  250 + if (ah->type == htons(OFPAT_SET_TP_SRC))
  251 + field = &th->source;
  252 + else
  253 + field = &th->dest;
  254 +
  255 + update_csum(&th->check, skb, *field, new, 1);
  256 + *field = new;
  257 + }
  258 + }
  259 +
  260 + return skb;
  261 +}
  262 +
  263 +struct openflow_action {
  264 + size_t min_size;
  265 + size_t max_size;
  266 + uint16_t (*validate)(struct datapath *dp,
  267 + const struct sw_flow_key *key,
  268 + const struct ofp_action_header *ah);
  269 + struct sk_buff *(*execute)(struct sk_buff *skb,
  270 + struct sw_flow_key *key,
  271 + const struct ofp_action_header *ah);
  272 +};
  273 +
  274 +static const struct openflow_action of_actions[] = {
  275 + [OFPAT_OUTPUT] = {
  276 + sizeof(struct ofp_action_output),
  277 + sizeof(struct ofp_action_output),
  278 + validate_output,
  279 + NULL /* This is optimized into execute_actions */
  280 + },
  281 + [OFPAT_SET_VLAN_VID] = {
  282 + sizeof(struct ofp_action_vlan_vid),
  283 + sizeof(struct ofp_action_vlan_vid),
  284 + NULL,
  285 + set_vlan_vid
  286 + },
  287 + [OFPAT_SET_VLAN_PCP] = {
  288 + sizeof(struct ofp_action_vlan_pcp),
  289 + sizeof(struct ofp_action_vlan_pcp),
  290 + NULL,
  291 + set_vlan_pcp
  292 + },
  293 + [OFPAT_STRIP_VLAN] = {
  294 + sizeof(struct ofp_action_header),
  295 + sizeof(struct ofp_action_header),
  296 + NULL,
  297 + strip_vlan
  298 + },
  299 + [OFPAT_SET_DL_SRC] = {
  300 + sizeof(struct ofp_action_dl_addr),
  301 + sizeof(struct ofp_action_dl_addr),
  302 + NULL,
  303 + set_dl_addr
  304 + },
  305 + [OFPAT_SET_DL_DST] = {
  306 + sizeof(struct ofp_action_dl_addr),
  307 + sizeof(struct ofp_action_dl_addr),
  308 + NULL,
  309 + set_dl_addr
  310 + },
  311 + [OFPAT_SET_NW_SRC] = {
  312 + sizeof(struct ofp_action_nw_addr),
  313 + sizeof(struct ofp_action_nw_addr),
  314 + NULL,
  315 + set_nw_addr
  316 + },
  317 + [OFPAT_SET_NW_DST] = {
  318 + sizeof(struct ofp_action_nw_addr),
  319 + sizeof(struct ofp_action_nw_addr),
  320 + NULL,
  321 + set_nw_addr
  322 + },
  323 + [OFPAT_SET_NW_TOS] = {
  324 + sizeof(struct ofp_action_nw_tos),
  325 + sizeof(struct ofp_action_nw_tos),
  326 + NULL,
  327 + set_nw_tos
  328 + },
  329 + [OFPAT_SET_TP_SRC] = {
  330 + sizeof(struct ofp_action_tp_port),
  331 + sizeof(struct ofp_action_tp_port),
  332 + NULL,
  333 + set_tp_port
  334 + },
  335 + [OFPAT_SET_TP_DST] = {
  336 + sizeof(struct ofp_action_tp_port),
  337 + sizeof(struct ofp_action_tp_port),
  338 + NULL,
  339 + set_tp_port
  340 + }
  341 + /* OFPAT_VENDOR is not here, since it would blow up the array size. */
  342 +};
  343 +
  344 +/* Validate built-in OpenFlow actions. Either returns ACT_VALIDATION_OK
  345 + * or an OFPET_BAD_ACTION error code. */
  346 +static uint16_t
  347 +validate_ofpat(struct datapath *dp, const struct sw_flow_key *key,
  348 + const struct ofp_action_header *ah, uint16_t type, uint16_t len)
  349 +{
  350 + uint16_t ret = ACT_VALIDATION_OK;
  351 + const struct openflow_action *act = &of_actions[type];
  352 +
  353 + if ((len < act->min_size) || (len > act->max_size))
  354 + return OFPBAC_BAD_LEN;
  355 +
  356 + if (act->validate)
  357 + ret = act->validate(dp, key, ah);
  358 +
  359 + return ret;
  360 +}
  361 +
  362 +/* Validate vendor-defined actions. Either returns ACT_VALIDATION_OK
  363 + * or an OFPET_BAD_ACTION error code. */
  364 +static uint16_t
  365 +validate_vendor(struct datapath *dp, const struct sw_flow_key *key,
  366 + const struct ofp_action_header *ah, uint16_t len)
  367 +{
  368 + struct ofp_action_vendor_header *avh;
  369 + int ret = ACT_VALIDATION_OK;
  370 +
  371 + if (len < sizeof(struct ofp_action_vendor_header))
  372 + return OFPBAC_BAD_LEN;
  373 +
  374 + avh = (struct ofp_action_vendor_header *)ah;
  375 +
  376 + switch(ntohl(avh->vendor)) {
  377 + default:
  378 + return OFPBAC_BAD_VENDOR;
  379 + }
  380 +
  381 + return ret;
  382 +}
  383 +
  384 +/* Validates a list of actions. If a problem is found, a code for the
  385 + * OFPET_BAD_ACTION error type is returned. If the action list validates,
  386 + * ACT_VALIDATION_OK is returned. */
  387 +uint16_t
  388 +validate_actions(struct datapath *dp, const struct sw_flow_key *key,
  389 + const struct ofp_action_header *actions, size_t actions_len)
  390 +{
  391 + uint8_t *p = (uint8_t *)actions;
  392 + int err;
  393 +
  394 + while (actions_len >= sizeof(struct ofp_action_header)) {
  395 + struct ofp_action_header *ah = (struct ofp_action_header *)p;
  396 + size_t len = ntohs(ah->len);
  397 + uint16_t type;
  398 +
  399 + /* Make there's enough remaining data for the specified length
  400 + * and that the action length is a multiple of 64 bits. */
  401 + if ((actions_len < len) || (len % 8) != 0)
  402 + return OFPBAC_BAD_LEN;
  403 +
  404 + type = ntohs(ah->type);
  405 + if (type < ARRAY_SIZE(of_actions)) {
  406 + err = validate_ofpat(dp, key, ah, type, len);
  407 + if (err != ACT_VALIDATION_OK)
  408 + return err;
  409 + } else if (type == OFPAT_VENDOR) {
  410 + err = validate_vendor(dp, key, ah, len);
  411 + if (err != ACT_VALIDATION_OK)
  412 + return err;
  413 + } else
  414 + return OFPBAC_BAD_TYPE;
  415 +
  416 + p += len;
  417 + actions_len -= len;
  418 + }
  419 +
  420 + /* Check if there's any trailing garbage. */
  421 + if (actions_len != 0)
  422 + return OFPBAC_BAD_LEN;
  423 +
  424 + return ACT_VALIDATION_OK;
  425 +}
  426 +
  427 +/* Execute a built-in OpenFlow action against 'skb'. */
  428 +static struct sk_buff *
  429 +execute_ofpat(struct sk_buff *skb, struct sw_flow_key *key,
  430 + const struct ofp_action_header *ah, uint16_t type)
  431 +{
  432 + const struct openflow_action *act = &of_actions[type];
  433 + if (act->execute && make_writable(&skb))
  434 + skb = act->execute(skb, key, ah);
  435 + return skb;
  436 +}
  437 +
  438 +/* Execute a vendor-defined action against 'skb'. */
  439 +static struct sk_buff *
  440 +execute_vendor(struct sk_buff *skb, const struct sw_flow_key *key,
  441 + const struct ofp_action_header *ah)
  442 +{
  443 + struct ofp_action_vendor_header *avh
  444 + = (struct ofp_action_vendor_header *)ah;
  445 + struct datapath *dp = skb->dev->br_port->dp;
  446 +
  447 + /* NB: If changes need to be made to the packet, a call should be
  448 + * made to make_writable or its equivalent first. */
  449 +
  450 + switch(ntohl(avh->vendor)) {
  451 + default:
  452 + /* This should not be possible due to prior validation. */
  453 + if (net_ratelimit())
  454 + printk(KERN_WARNING "%s: attempt to execute action "
  455 + "with unknown vendor: %#x\n",
  456 + dp->netdev->name, ntohl(avh->vendor));
  457 + break;
  458 + }
  459 +
  460 + return skb;
  461 +}
  462 +
  463 +/* Execute a list of actions against 'skb'. */
  464 +void execute_actions(struct datapath *dp, struct sk_buff *skb,
  465 + struct sw_flow_key *key,
  466 + const struct ofp_action_header *actions, size_t actions_len,
  467 + int ignore_no_fwd)
  468 +{
  469 + /* Every output action needs a separate clone of 'skb', but the common
  470 + * case is just a single output action, so that doing a clone and
  471 + * then freeing the original skbuff is wasteful. So the following code
  472 + * is slightly obscure just to avoid that. */
  473 + int prev_port;
  474 + size_t max_len = UINT16_MAX;
  475 + uint8_t *p = (uint8_t *)actions;
  476 +
  477 + prev_port = -1;
  478 +
  479 + /* The action list was already validated, so we can be a bit looser
  480 + * in our sanity-checking. */
  481 + while (actions_len > 0) {
  482 + struct ofp_action_header *ah = (struct ofp_action_header *)p;
  483 + size_t len = htons(ah->len);
  484 +
  485 + WARN_ON_ONCE(skb_shared(skb));
  486 + if (prev_port != -1) {
  487 + do_output(dp, skb_clone(skb, GFP_ATOMIC),
  488 + max_len, prev_port, ignore_no_fwd);
  489 + prev_port = -1;
  490 + }
  491 +
  492 + if (likely(ah->type == htons(OFPAT_OUTPUT))) {
  493 + struct ofp_action_output *oa = (struct ofp_action_output *)p;
  494 + prev_port = ntohs(oa->port);
  495 + max_len = ntohs(oa->max_len);
  496 + } else {
  497 + uint16_t type = ntohs(ah->type);
  498 +
  499 + if (type < ARRAY_SIZE(of_actions))
  500 + skb = execute_ofpat(skb, key, ah, type);
  501 + else if (type == OFPAT_VENDOR)
  502 + skb = execute_vendor(skb, key, ah);
  503 +
  504 + if (!skb) {
  505 + if (net_ratelimit())
  506 + printk(KERN_WARNING "%s: "
  507 + "execute_actions lost skb\n",
  508 + dp->netdev->name);
  509 + return;
  510 + }
  511 + }
  512 +
  513 + p += len;
  514 + actions_len -= len;
  515 + }
  516 + if (prev_port != -1)
  517 + do_output(dp, skb, max_len, prev_port, ignore_no_fwd);
  518 + else
  519 + kfree_skb(skb);
  520 +}
  521 +
  522 +/* Utility functions. */
  523 +
  524 +/* Makes '*pskb' writable, possibly copying it and setting '*pskb' to point to
  525 + * the copy.
  526 + * Returns 1 if successful, 0 on failure. */
  527 +int
  528 +make_writable(struct sk_buff **pskb)
  529 +{
  530 + struct sk_buff *skb = *pskb;
  531 + if (skb_shared(skb) || skb_cloned(skb)) {
  532 + struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
  533 + if (!nskb)
  534 + return 0;
  535 + kfree_skb(skb);
  536 + *pskb = nskb;
  537 + return 1;
  538 + } else {
  539 + unsigned int hdr_len = (skb_transport_offset(skb)
  540 + + sizeof(struct tcphdr));
  541 + return pskb_may_pull(skb, min(hdr_len, skb->len));
  542 + }
  543 +}
... ...
datapath/dp_act.h 0 → 100755
  1 +#ifndef DP_ACT_H
  2 +#define DP_ACT_H 1
  3 +
  4 +#include "datapath.h"
  5 +
  6 +#define ACT_VALIDATION_OK ((uint16_t)-1)
  7 +
  8 +uint16_t validate_actions(struct datapath *, const struct sw_flow_key *,
  9 + const struct ofp_action_header *, size_t);
  10 +void execute_actions(struct datapath *, struct sk_buff *,
  11 + struct sw_flow_key *, const struct ofp_action_header *,
  12 + size_t action_len, int ignore_no_fwd);
  13 +int make_writable(struct sk_buff **pskb);
  14 +
  15 +#endif /* dp_act.h */
... ...
datapath/dp_dev.c 0 → 100755
  1 +#include <linux/kernel.h>
  2 +#include <linux/netdevice.h>
  3 +#include <linux/etherdevice.h>
  4 +#include <linux/ethtool.h>
  5 +#include <linux/rcupdate.h>
  6 +#include <linux/skbuff.h>
  7 +#include <linux/workqueue.h>
  8 +
  9 +#include "datapath.h"
  10 +#include "dp_dev.h"
  11 +#include "forward.h"
  12 +
  13 +
  14 +static struct dp_dev *dp_dev_priv(struct net_device *netdev)
  15 +{
  16 + return netdev_priv(netdev);
  17 +}
  18 +
  19 +struct datapath *dp_dev_get_dp(struct net_device *netdev)
  20 +{
  21 + return dp_dev_priv(netdev)->dp;
  22 +}
  23 +EXPORT_SYMBOL(dp_dev_get_dp);
  24 +
  25 +static struct net_device_stats *dp_dev_get_stats(struct net_device *netdev)
  26 +{
  27 + struct dp_dev *dp_dev = dp_dev_priv(netdev);
  28 + return &dp_dev->stats;
  29 +}
  30 +
  31 +int dp_dev_recv(struct net_device *netdev, struct sk_buff *skb)
  32 +{
  33 + int len = skb->len;
  34 + struct dp_dev *dp_dev = dp_dev_priv(netdev);
  35 + skb->dev = netdev;
  36 + skb->pkt_type = PACKET_HOST;
  37 + skb->protocol = eth_type_trans(skb, netdev);
  38 + if (in_interrupt())
  39 + netif_rx(skb);
  40 + else
  41 + netif_rx_ni(skb);
  42 + netdev->last_rx = jiffies;
  43 + dp_dev->stats.rx_packets++;
  44 + dp_dev->stats.rx_bytes += len;
  45 + return len;
  46 +}
  47 +
  48 +static int dp_dev_mac_addr(struct net_device *dev, void *p)
  49 +{
  50 + struct sockaddr *addr = p;
  51 +
  52 + if (netif_running(dev))
  53 + return -EBUSY;
  54 + if (!is_valid_ether_addr(addr->sa_data))
  55 + return -EADDRNOTAVAIL;
  56 + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
  57 + return 0;
  58 +}
  59 +
  60 +static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
  61 +{
  62 + struct dp_dev *dp_dev = dp_dev_priv(netdev);
  63 + struct datapath *dp = dp_dev->dp;
  64 +
  65 + /* By orphaning 'skb' we will screw up socket accounting slightly, but
  66 + * the effect is limited to the device queue length. If we don't
  67 + * do this, then the sk_buff will be destructed eventually, but it is
  68 + * harder to predict when. */
  69 + skb_orphan(skb);
  70 +
  71 + /* We are going to modify 'skb', by sticking it on &dp_dev->xmit_queue,
  72 + * so we need to have our own clone. (At any rate, fwd_port_input()
  73 + * will need its own clone, so there's no benefit to queuing any other
  74 + * way.) */
  75 + skb = skb_share_check(skb, GFP_ATOMIC);
  76 + if (!skb)
  77 + return 0;
  78 +
  79 + dp_dev->stats.tx_packets++;
  80 + dp_dev->stats.tx_bytes += skb->len;
  81 +
  82 + if (skb_queue_len(&dp_dev->xmit_queue) >= dp->netdev->tx_queue_len) {
  83 + /* Queue overflow. Stop transmitter. */
  84 + netif_stop_queue(dp->netdev);
  85 +
  86 + /* We won't see all dropped packets individually, so overrun
  87 + * error is appropriate. */
  88 + dp_dev->stats.tx_fifo_errors++;
  89 + }
  90 + skb_queue_tail(&dp_dev->xmit_queue, skb);
  91 + dp->netdev->trans_start = jiffies;
  92 +
  93 + schedule_work(&dp_dev->xmit_work);
  94 +
  95 + return 0;
  96 +}
  97 +
  98 +static void dp_dev_do_xmit(struct work_struct *work)
  99 +{
  100 + struct dp_dev *dp_dev = container_of(work, struct dp_dev, xmit_work);
  101 + struct datapath *dp = dp_dev->dp;
  102 + struct sk_buff *skb;
  103 +
  104 + while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) {
  105 + skb_reset_mac_header(skb);
  106 + rcu_read_lock();
  107 + fwd_port_input(dp->chain, skb, dp->local_port);
  108 + rcu_read_unlock();
  109 + }
  110 + netif_wake_queue(dp->netdev);
  111 +}
  112 +
  113 +static int dp_dev_open(struct net_device *netdev)
  114 +{
  115 + netif_start_queue(netdev);
  116 + return 0;
  117 +}
  118 +
  119 +static int dp_dev_stop(struct net_device *netdev)
  120 +{
  121 + netif_stop_queue(netdev);
  122 + return 0;
  123 +}
  124 +
  125 +static void dp_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
  126 +{
  127 + strcpy(info->driver, "openflow");
  128 + sprintf(info->version, "0x%d", OFP_VERSION);
  129 + strcpy(info->fw_version, "N/A");
  130 + strcpy(info->bus_info, "N/A");
  131 +}
  132 +
  133 +static struct ethtool_ops dp_ethtool_ops = {
  134 + .get_drvinfo = dp_getinfo,
  135 + .get_link = ethtool_op_get_link,
  136 + .get_sg = ethtool_op_get_sg,
  137 + .get_tx_csum = ethtool_op_get_tx_csum,
  138 + .get_tso = ethtool_op_get_tso,
  139 +};
  140 +
  141 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
  142 +static const struct net_device_ops dp_netdev_ops = {
  143 + .ndo_init = NULL,
  144 + .ndo_uninit = NULL,
  145 + .ndo_open = dp_dev_open,
  146 + .ndo_stop = dp_dev_stop,
  147 + .ndo_start_xmit = dp_dev_xmit,
  148 + .ndo_select_queue = NULL,
  149 + .ndo_change_rx_flags = NULL,
  150 + .ndo_set_rx_mode = NULL,
  151 + .ndo_set_multicast_list = NULL,
  152 + .ndo_set_mac_address = dp_dev_mac_addr,
  153 + .ndo_validate_addr = NULL,
  154 + .ndo_do_ioctl = NULL,
  155 + .ndo_set_config = NULL,
  156 + .ndo_change_mtu = NULL,
  157 + .ndo_tx_timeout = NULL,
  158 + .ndo_get_stats = dp_dev_get_stats,
  159 + .ndo_vlan_rx_register = NULL,
  160 + .ndo_vlan_rx_add_vid = NULL,
  161 + .ndo_vlan_rx_kill_vid = NULL,
  162 + .ndo_poll_controller = NULL
  163 +};
  164 +#endif
  165 +
  166 +static void
  167 +do_setup(struct net_device *netdev)
  168 +{
  169 + ether_setup(netdev);
  170 +
  171 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
  172 + netdev->netdev_ops = &dp_netdev_ops;
  173 +#else
  174 + netdev->do_ioctl = dp_ioctl_hook;
  175 + netdev->get_stats = dp_dev_get_stats;
  176 + netdev->hard_start_xmit = dp_dev_xmit;
  177 + netdev->open = dp_dev_open;
  178 + netdev->stop = dp_dev_stop;
  179 + netdev->set_mac_address = dp_dev_mac_addr;
  180 +#endif
  181 + SET_ETHTOOL_OPS(netdev, &dp_ethtool_ops);
  182 + netdev->tx_queue_len = 100;
  183 +
  184 + netdev->flags = IFF_BROADCAST | IFF_MULTICAST;
  185 +
  186 + random_ether_addr(netdev->dev_addr);
  187 +
  188 + /* Set the OUI to the Nicira one. */
  189 + netdev->dev_addr[0] = 0x00;
  190 + netdev->dev_addr[1] = 0x23;
  191 + netdev->dev_addr[2] = 0x20;
  192 +
  193 + /* Set the top bits to indicate random Nicira address. */
  194 + netdev->dev_addr[3] |= 0xc0;
  195 +}
  196 +
  197 +/* Create a datapath device associated with 'dp'. If 'dp_name' is null,
  198 + * the device name will be of the form 'of<dp_idx>'.
  199 + *
  200 + * Called with RTNL lock and dp_mutex.*/
  201 +int dp_dev_setup(struct datapath *dp, const char *dp_name)
  202 +{
  203 + struct dp_dev *dp_dev;
  204 + struct net_device *netdev;
  205 + char dev_name[IFNAMSIZ];
  206 + int err;
  207 +
  208 + if (dp_name) {
  209 + if (strlen(dp_name) >= IFNAMSIZ)
  210 + return -EINVAL;
  211 + strncpy(dev_name, dp_name, sizeof(dev_name));
  212 + } else
  213 + snprintf(dev_name, sizeof dev_name, "of%d", dp->dp_idx);
  214 +
  215 + netdev = alloc_netdev(sizeof(struct dp_dev), dev_name, do_setup);
  216 + if (!netdev)
  217 + return -ENOMEM;
  218 +
  219 + err = register_netdevice(netdev);
  220 + if (err) {
  221 + free_netdev(netdev);
  222 + return err;
  223 + }
  224 +
  225 + dp_dev = dp_dev_priv(netdev);
  226 + dp_dev->dp = dp;
  227 + skb_queue_head_init(&dp_dev->xmit_queue);
  228 + INIT_WORK(&dp_dev->xmit_work, dp_dev_do_xmit);
  229 + dp->netdev = netdev;
  230 + return 0;
  231 +}
  232 +
  233 +/* Called with RTNL lock and dp_mutex.*/
  234 +void dp_dev_destroy(struct datapath *dp)
  235 +{
  236 + struct dp_dev *dp_dev = dp_dev_priv(dp->netdev);
  237 +
  238 + netif_tx_disable(dp->netdev);
  239 + synchronize_net();
  240 + skb_queue_purge(&dp_dev->xmit_queue);
  241 + unregister_netdevice(dp->netdev);
  242 +}
  243 +
  244 +int is_dp_dev(struct net_device *netdev)
  245 +{
  246 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
  247 + return netdev->netdev_ops->ndo_open == dp_dev_open;
  248 +
  249 +#else
  250 + return netdev->open == dp_dev_open;
  251 +#endif
  252 +}
... ...
datapath/dp_dev.h 0 → 100755
  1 +#ifndef DP_DEV_H
  2 +#define DP_DEV_H 1
  3 +
  4 +struct dp_dev {
  5 + struct net_device_stats stats;
  6 + struct datapath *dp;
  7 + struct sk_buff_head xmit_queue;
  8 + struct work_struct xmit_work;
  9 +};
  10 +
  11 +int dp_dev_setup(struct datapath *, const char *);
  12 +void dp_dev_destroy(struct datapath *);
  13 +int dp_dev_recv(struct net_device *, struct sk_buff *);
  14 +int is_dp_dev(struct net_device *);
  15 +struct datapath *dp_dev_get_dp(struct net_device *);
  16 +
  17 +#endif /* dp_dev.h */
... ...
datapath/dp_notify.c 0 → 100755
  1 +/*
  2 + * Distributed under the terms of the GNU GPL version 2.
  3 + * Copyright (c) 2007, 2008, 2009 The Board of Trustees of The Leland
  4 + * Stanford Junior University
  5 + */
  6 +
  7 +/* Handle changes to managed devices */
  8 +
  9 +#include <linux/netdevice.h>
  10 +
  11 +#include "datapath.h"
  12 +
  13 +
  14 +static int dp_device_event(struct notifier_block *unused, unsigned long event,
  15 + void *ptr)
  16 +{
  17 + struct net_device *dev = ptr;
  18 + struct net_bridge_port *p = dev->br_port;
  19 + unsigned long int flags;
  20 +
  21 +
  22 + /* Check if monitored port */
  23 + if (!p)
  24 + return NOTIFY_DONE;
  25 +
  26 + spin_lock_irqsave(&p->lock, flags);
  27 + switch (event) {
  28 + case NETDEV_UNREGISTER:
  29 + spin_unlock_irqrestore(&p->lock, flags);
  30 + mutex_lock(&dp_mutex);
  31 + dp_del_switch_port(p);
  32 + mutex_unlock(&dp_mutex);
  33 + return NOTIFY_DONE;
  34 + break;
  35 + }
  36 + spin_unlock_irqrestore(&p->lock, flags);
  37 +
  38 + return NOTIFY_DONE;
  39 +}
  40 +
  41 +struct notifier_block dp_device_notifier = {
  42 + .notifier_call = dp_device_event
  43 +};
... ...
datapath/flow.c 0 → 100755
  1 +
  2 +/*
  3 + * Distributed under the terms of the GNU GPL version 2.
  4 + * Copyright (c) 2007, 2008 The Board of Trustees of The Leland
  5 + * Stanford Junior University
  6 + */
  7 +
  8 +#include "flow.h"
  9 +#include <linux/netdevice.h>
  10 +#include <linux/etherdevice.h>
  11 +#include <linux/if_ether.h>
  12 +#include <linux/if_vlan.h>
  13 +#include <net/llc_pdu.h>
  14 +#include <linux/jiffies.h>
  15 +#include <linux/kernel.h>
  16 +#include <linux/llc.h>
  17 +#include <linux/module.h>
  18 +#include <linux/in.h>
  19 +#include <linux/rcupdate.h>
  20 +
  21 +#include "openflow/openflow.h"
  22 +#include "openflow/nicira-ext.h"
  23 +#include "compat.h"
  24 +
  25 +struct kmem_cache *flow_cache;
  26 +
  27 +/* Internal function used to compare fields in flow. */
  28 +static inline
  29 +int flow_fields_match(const struct sw_flow_key *a, const struct sw_flow_key *b,
  30 + uint32_t w, uint32_t src_mask, uint32_t dst_mask)
  31 +{
  32 + return ((w & OFPFW_IN_PORT || a->in_port == b->in_port)
  33 + && (w & OFPFW_DL_VLAN || a->dl_vlan == b->dl_vlan)
  34 + && (w & OFPFW_DL_VLAN_PCP || a->dl_vlan_pcp == b->dl_vlan_pcp)
  35 + && (w & OFPFW_DL_SRC || !memcmp(a->dl_src, b->dl_src, ETH_ALEN))
  36 + && (w & OFPFW_DL_DST || !memcmp(a->dl_dst, b->dl_dst, ETH_ALEN))
  37 + && (w & OFPFW_DL_TYPE || a->dl_type == b->dl_type)
  38 + && (w & OFPFW_NW_TOS || a->nw_tos == b->nw_tos)
  39 + && (w & OFPFW_NW_PROTO || a->nw_proto == b->nw_proto)
  40 + && !((a->nw_src ^ b->nw_src) & src_mask)
  41 + && !((a->nw_dst ^ b->nw_dst) & dst_mask)
  42 + && (w & OFPFW_TP_SRC || a->tp_src == b->tp_src)
  43 + && (w & OFPFW_TP_DST || a->tp_dst == b->tp_dst));
  44 +}
  45 +
  46 +/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal
  47 + * modulo wildcards in 'b', zero otherwise. */
  48 +int flow_matches_1wild(const struct sw_flow_key *a,
  49 + const struct sw_flow_key *b)
  50 +{
  51 + return flow_fields_match(a, b, b->wildcards,
  52 + b->nw_src_mask, b->nw_dst_mask);
  53 +}
  54 +EXPORT_SYMBOL(flow_matches_1wild);
  55 +
  56 +/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal
  57 + * modulo wildcards in 'a' or 'b', zero otherwise. */
  58 +int flow_matches_2wild(const struct sw_flow_key *a,
  59 + const struct sw_flow_key *b)
  60 +{
  61 + return flow_fields_match(a, b,
  62 + a->wildcards | b->wildcards,
  63 + a->nw_src_mask & b->nw_src_mask,
  64 + a->nw_dst_mask & b->nw_dst_mask);
  65 +}
  66 +EXPORT_SYMBOL(flow_matches_2wild);
  67 +
  68 +/* Returns nonzero if 't' (the table entry's key) and 'd' (the key
  69 + * describing the match) match, that is, if their fields are
  70 + * equal modulo wildcards, zero otherwise. If 'strict' is nonzero, the
  71 + * wildcards must match in both 't_key' and 'd_key'. Note that the
  72 + * table's wildcards are ignored unless 'strict' is set. */
  73 +int flow_matches_desc(const struct sw_flow_key *t, const struct sw_flow_key *d,
  74 + int strict)
  75 +{
  76 + if (strict && d->wildcards != t->wildcards)
  77 + return 0;
  78 + return flow_matches_1wild(t, d);
  79 +}
  80 +EXPORT_SYMBOL(flow_matches_desc);
  81 +
  82 +/* Returns nonzero if 't' (the table entry's key) and 'd' (the key
  83 + * describing the match) match, that is, if their fields are
  84 + * equal modulo 't' or 'd' wildcards, zero otherwise. If 'strict' is nonzero, the
  85 + * wildcards must match in both 't_key' and 'd_key'. Note that the
  86 + * table's wildcards are ignored unless 'strict' is set. */
  87 +int
  88 +flow_matches_2desc(const struct sw_flow_key *t, const struct sw_flow_key *d,
  89 + int strict)
  90 +{
  91 + if (strict && d->wildcards != t->wildcards) {
  92 + return 0;
  93 + }
  94 + return flow_matches_2wild(t, d);
  95 +}
  96 +EXPORT_SYMBOL(flow_matches_2desc);
  97 +
  98 +static uint32_t make_nw_mask(int n_wild_bits)
  99 +{
  100 + n_wild_bits &= (1u << OFPFW_NW_SRC_BITS) - 1;
  101 + return n_wild_bits < 32 ? htonl(~((1u << n_wild_bits) - 1)) : 0;
  102 +}
  103 +
  104 +void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from)
  105 +{
  106 + to->wildcards = ntohl(from->wildcards) & OFPFW_ALL;
  107 + to->dl_vlan_pcp = from->dl_vlan_pcp;
  108 + to->in_port = from->in_port;
  109 + to->dl_vlan = from->dl_vlan;
  110 + memcpy(to->dl_src, from->dl_src, ETH_ALEN);
  111 + memcpy(to->dl_dst, from->dl_dst, ETH_ALEN);
  112 + to->dl_type = from->dl_type;
  113 +
  114 + to->nw_tos = to->nw_proto = to->nw_src = to->nw_dst = 0;
  115 + to->tp_src = to->tp_dst = 0;
  116 + memset(to->pad, 0, sizeof(to->pad));
  117 +
  118 +#define OFPFW_TP (OFPFW_TP_SRC | OFPFW_TP_DST)
  119 +#define OFPFW_NW (OFPFW_NW_TOS | OFPFW_NW_PROTO | OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK)
  120 + if (to->wildcards & OFPFW_DL_TYPE) {
  121 + /* Can't sensibly match on network or transport headers if the
  122 + * data link type is unknown. */
  123 + to->wildcards |= OFPFW_NW | OFPFW_TP;
  124 + } else if (from->dl_type == htons(ETH_P_IP)) {
  125 + to->nw_tos = from->nw_tos & 0xfc;
  126 + to->nw_proto = from->nw_proto;
  127 + to->nw_src = from->nw_src;
  128 + to->nw_dst = from->nw_dst;
  129 +
  130 + if (to->wildcards & OFPFW_NW_PROTO) {
  131 + /* Can't sensibly match on transport headers if the
  132 + * network protocol is unknown. */
  133 + to->wildcards |= OFPFW_TP;
  134 + } else if (from->nw_proto == IPPROTO_TCP
  135 + || from->nw_proto == IPPROTO_UDP
  136 + || from->nw_proto == IPPROTO_ICMP) {
  137 + to->tp_src = from->tp_src;
  138 + to->tp_dst = from->tp_dst;
  139 + } else {
  140 + /* Transport layer fields are undefined. Mark them as
  141 + * exact-match to allow such flows to reside in
  142 + * table-hash, instead of falling into table-linear. */
  143 + to->wildcards &= ~OFPFW_TP;
  144 + }
  145 + } else {
  146 + /* Network and transport layer fields are undefined. Mark them
  147 + * as exact-match to allow such flows to reside in table-hash,
  148 + * instead of falling into table-linear. */
  149 + to->wildcards &= ~(OFPFW_NW | OFPFW_TP);
  150 + }
  151 +
  152 + /* We set these late because code above adjusts to->wildcards. */
  153 + to->nw_src_mask = make_nw_mask(to->wildcards >> OFPFW_NW_SRC_SHIFT);
  154 + to->nw_dst_mask = make_nw_mask(to->wildcards >> OFPFW_NW_DST_SHIFT);
  155 +}
  156 +
  157 +void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from)
  158 +{
  159 + to->wildcards = htonl(from->wildcards);
  160 + to->in_port = from->in_port;
  161 + to->dl_vlan = from->dl_vlan;
  162 + memcpy(to->dl_src, from->dl_src, ETH_ALEN);
  163 + memcpy(to->dl_dst, from->dl_dst, ETH_ALEN);
  164 + to->dl_type = from->dl_type;
  165 + to->nw_tos = from->nw_tos;
  166 + to->nw_proto = from->nw_proto;
  167 + to->nw_src = from->nw_src;
  168 + to->nw_dst = from->nw_dst;
  169 + to->tp_src = from->tp_src;
  170 + to->tp_dst = from->tp_dst;
  171 + to->dl_vlan_pcp = from->dl_vlan_pcp;
  172 +}
  173 +
  174 +int flow_timeout(struct sw_flow *flow)
  175 +{
  176 + if (flow->idle_timeout != OFP_FLOW_PERMANENT
  177 + && time_after64(get_jiffies_64(), flow->used + flow->idle_timeout * HZ))
  178 + return OFPRR_IDLE_TIMEOUT;
  179 + else if (flow->hard_timeout != OFP_FLOW_PERMANENT
  180 + && time_after64(get_jiffies_64(),
  181 + flow->created + flow->hard_timeout * HZ))
  182 + return OFPRR_HARD_TIMEOUT;
  183 + else
  184 + return -1;
  185 +}
  186 +EXPORT_SYMBOL(flow_timeout);
  187 +
  188 +/* Returns nonzero if 'flow' contains an output action to 'out_port' or
  189 + * has the value OFPP_NONE. 'out_port' is in network-byte order. */
  190 +int flow_has_out_port(struct sw_flow *flow, uint16_t out_port)
  191 +{
  192 + struct sw_flow_actions *sf_acts;
  193 + size_t actions_len;
  194 + uint8_t *p;
  195 +
  196 + if (out_port == htons(OFPP_NONE))
  197 + return 1;
  198 +
  199 + sf_acts = rcu_dereference(flow->sf_acts);
  200 +
  201 + actions_len = sf_acts->actions_len;
  202 + p = (uint8_t *)sf_acts->actions;
  203 +
  204 + while (actions_len > 0) {
  205 + struct ofp_action_header *ah = (struct ofp_action_header *)p;
  206 + size_t len = ntohs(ah->len);
  207 +
  208 + if (ah->type == htons(OFPAT_OUTPUT)) {
  209 + struct ofp_action_output *oa = (struct ofp_action_output *)p;
  210 + if (oa->port == out_port)
  211 + return 1;
  212 + }
  213 +
  214 + p += len;
  215 + actions_len -= len;
  216 + }
  217 +
  218 + return 0;
  219 +}
  220 +EXPORT_SYMBOL(flow_has_out_port);
  221 +
  222 +/* Allocates and returns a new flow with room for 'actions_len' actions,
  223 + * using allocation flags 'flags'. Returns the new flow or a null pointer
  224 + * on failure. */
  225 +struct sw_flow *flow_alloc(size_t actions_len, gfp_t flags)
  226 +{
  227 + struct sw_flow_actions *sfa;
  228 + size_t size = sizeof *sfa + actions_len;
  229 + struct sw_flow *flow = kmem_cache_alloc(flow_cache, flags);
  230 + if (unlikely(!flow))
  231 + return NULL;
  232 +
  233 + sfa = kmalloc(size, flags);
  234 + if (unlikely(!sfa)) {
  235 + kmem_cache_free(flow_cache, flow);
  236 + return NULL;
  237 + }
  238 + sfa->actions_len = actions_len;
  239 + flow->sf_acts = sfa;
  240 +
  241 + return flow;
  242 +}
  243 +
  244 +/* Frees 'flow' immediately. */
  245 +void flow_free(struct sw_flow *flow)
  246 +{
  247 + if (unlikely(!flow))
  248 + return;
  249 + kfree(flow->sf_acts);
  250 + kmem_cache_free(flow_cache, flow);
  251 +}
  252 +EXPORT_SYMBOL(flow_free);
  253 +
  254 +/* RCU callback used by flow_deferred_free. */
  255 +static void rcu_free_flow_callback(struct rcu_head *rcu)
  256 +{
  257 + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
  258 + flow_free(flow);
  259 +}
  260 +
  261 +/* Schedules 'flow' to be freed after the next RCU grace period.
  262 + * The caller must hold rcu_read_lock for this to be sensible. */
  263 +void flow_deferred_free(struct sw_flow *flow)
  264 +{
  265 + call_rcu(&flow->rcu, rcu_free_flow_callback);
  266 +}
  267 +EXPORT_SYMBOL(flow_deferred_free);
  268 +
  269 +/* RCU callback used by flow_deferred_free_acts. */
  270 +static void rcu_free_acts_callback(struct rcu_head *rcu)
  271 +{
  272 + struct sw_flow_actions *sf_acts = container_of(rcu,
  273 + struct sw_flow_actions, rcu);
  274 + kfree(sf_acts);
  275 +}
  276 +
  277 +/* Schedules 'sf_acts' to be freed after the next RCU grace period.
  278 + * The caller must hold rcu_read_lock for this to be sensible. */
  279 +void flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
  280 +{
  281 + call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
  282 +}
  283 +EXPORT_SYMBOL(flow_deferred_free_acts);
  284 +
  285 +/* Setup the action on the flow, just after it was created with flow_alloc().
  286 + * Jean II */
  287 +void flow_setup_actions(struct sw_flow * flow,
  288 + const struct ofp_action_header * actions,
  289 + int actions_len)
  290 +{
  291 + /* Basic init of the flow stucture */
  292 + flow->used = flow->created = get_jiffies_64();
  293 + flow->byte_count = 0;
  294 + flow->packet_count = 0;
  295 + spin_lock_init(&flow->lock);
  296 +
  297 + /* Make sure we don't blow the allocation done earlier */
  298 + if(actions_len > flow->sf_acts->actions_len) {
  299 + printk(KERN_ERR "flow_setup_actions: actions_len is too big (%d > %d)",
  300 + actions_len, flow->sf_acts->actions_len);
  301 + return;
  302 + }
  303 +
  304 + /* Setup the actions - No need for RCU at this point ;-) */
  305 + memcpy(flow->sf_acts->actions, actions, actions_len);
  306 +}
  307 +
  308 +/* Copies 'actions' into a newly allocated structure for use by 'flow'
  309 + * and safely frees the structure that defined the previous actions. */
  310 +void flow_replace_acts(struct sw_flow *flow,
  311 + const struct ofp_action_header *actions, size_t actions_len)
  312 +{
  313 + struct sw_flow_actions *sfa;
  314 + struct sw_flow_actions *orig_sfa = flow->sf_acts;
  315 + size_t size = sizeof *sfa + actions_len;
  316 +
  317 + sfa = kmalloc(size, GFP_ATOMIC);
  318 + if (unlikely(!sfa))
  319 + return;
  320 +
  321 + sfa->actions_len = actions_len;
  322 + memcpy(sfa->actions, actions, actions_len);
  323 +
  324 + rcu_assign_pointer(flow->sf_acts, sfa);
  325 + flow_deferred_free_acts(orig_sfa);
  326 +
  327 + return;
  328 +}
  329 +EXPORT_SYMBOL(flow_replace_acts);
  330 +
  331 +/* Prints a representation of 'key' to the kernel log. */
  332 +void print_flow(const struct sw_flow_key *key)
  333 +{
  334 + printk("wild %08x port %04x vlan-vid %04x vlan-pcp %02x "
  335 + "src-mac %02x:%02x:%02x:%02x:%02x:%02x "
  336 + "dst-mac %02x:%02x:%02x:%02x:%02x:%02x "
  337 + "frm-type %04x ip-tos %02x ip-proto %02x "
  338 + "src-ip %u.%u.%u.%u dst-ip %u.%u.%u.%u tp-src %d tp-dst %d\n",
  339 + key->wildcards, ntohs(key->in_port), ntohs(key->dl_vlan),
  340 + key->dl_vlan_pcp,
  341 + key->dl_src[0], key->dl_src[1], key->dl_src[2],
  342 + key->dl_src[3], key->dl_src[4], key->dl_src[5],
  343 + key->dl_dst[0], key->dl_dst[1], key->dl_dst[2],
  344 + key->dl_dst[3], key->dl_dst[4], key->dl_dst[5],
  345 + ntohs(key->dl_type),
  346 + key->nw_tos, key->nw_proto,
  347 + ((unsigned char *)&key->nw_src)[0],
  348 + ((unsigned char *)&key->nw_src)[1],
  349 + ((unsigned char *)&key->nw_src)[2],
  350 + ((unsigned char *)&key->nw_src)[3],
  351 + ((unsigned char *)&key->nw_dst)[0],
  352 + ((unsigned char *)&key->nw_dst)[1],
  353 + ((unsigned char *)&key->nw_dst)[2],
  354 + ((unsigned char *)&key->nw_dst)[3],
  355 + ntohs(key->tp_src), ntohs(key->tp_dst));
  356 +}
  357 +EXPORT_SYMBOL(print_flow);
  358 +
  359 +#define SNAP_OUI_LEN 3
  360 +
  361 +struct eth_snap_hdr
  362 +{
  363 + struct ethhdr eth;
  364 + uint8_t dsap; /* Always 0xAA */
  365 + uint8_t ssap; /* Always 0xAA */
  366 + uint8_t ctrl;
  367 + uint8_t oui[SNAP_OUI_LEN];
  368 + uint16_t ethertype;
  369 +} __attribute__ ((packed));
  370 +
  371 +static int is_snap(const struct eth_snap_hdr *esh)
  372 +{
  373 + return (esh->dsap == LLC_SAP_SNAP
  374 + && esh->ssap == LLC_SAP_SNAP
  375 + && !memcmp(esh->oui, "\0\0\0", 3));
  376 +}
  377 +
  378 +/* Parses the Ethernet frame in 'skb', which was received on 'in_port',
  379 + * and initializes 'key' to match. Returns 1 if 'skb' contains an IP
  380 + * fragment, 0 otherwise. */
  381 +int flow_extract(struct sk_buff *skb, uint16_t in_port,
  382 + struct sw_flow_key *key)
  383 +{
  384 + struct ethhdr *eth;
  385 + struct eth_snap_hdr *esh;
  386 + int retval = 0;
  387 + int nh_ofs;
  388 +
  389 + memset(key, 0, sizeof *key);
  390 + key->dl_vlan = htons(OFP_VLAN_NONE);
  391 + key->in_port = htons(in_port);
  392 +
  393 + if (skb->len < sizeof *eth)
  394 + return 0;
  395 + if (!pskb_may_pull(skb, skb->len >= 64 ? 64 : skb->len)) {
  396 + return 0;
  397 + }
  398 +
  399 + skb_reset_mac_header(skb);
  400 + eth = eth_hdr(skb);
  401 + esh = (struct eth_snap_hdr *) eth;
  402 + nh_ofs = sizeof *eth;
  403 + if (likely(ntohs(eth->h_proto) >= OFP_DL_TYPE_ETH2_CUTOFF))
  404 + key->dl_type = eth->h_proto;
  405 + else if (skb->len >= sizeof *esh && is_snap(esh)) {
  406 + key->dl_type = esh->ethertype;
  407 + nh_ofs = sizeof *esh;
  408 + } else {
  409 + key->dl_type = htons(OFP_DL_TYPE_NOT_ETH_TYPE);
  410 + if (skb->len >= nh_ofs + sizeof(struct llc_pdu_un)) {
  411 + nh_ofs += sizeof(struct llc_pdu_un);
  412 + }
  413 + }
  414 +
  415 + /* Check for a VLAN tag */
  416 + if (key->dl_type == htons(ETH_P_8021Q) &&
  417 + skb->len >= nh_ofs + sizeof(struct vlan_hdr)) {
  418 + struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + nh_ofs);
  419 + key->dl_type = vh->h_vlan_encapsulated_proto;
  420 + key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK);
  421 + key->dl_vlan_pcp = (uint8_t)((ntohs(vh->h_vlan_TCI) >> VLAN_PCP_SHIFT)
  422 + & VLAN_PCP_BITMASK);
  423 + nh_ofs += sizeof(struct vlan_hdr);
  424 + }
  425 + memcpy(key->dl_src, eth->h_source, ETH_ALEN);
  426 + memcpy(key->dl_dst, eth->h_dest, ETH_ALEN);
  427 + skb_set_network_header(skb, nh_ofs);
  428 +
  429 + /* Network layer. */
  430 + if (key->dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) {
  431 + struct iphdr *nh = ip_hdr(skb);
  432 + int th_ofs = nh_ofs + nh->ihl * 4;
  433 + key->nw_tos = nh->tos & 0xfc;
  434 + key->nw_proto = nh->protocol;
  435 + key->nw_src = nh->saddr;
  436 + key->nw_dst = nh->daddr;
  437 + skb_set_transport_header(skb, th_ofs);
  438 +
  439 + /* Transport layer. */
  440 + if (!(nh->frag_off & htons(IP_MF | IP_OFFSET))) {
  441 + if (key->nw_proto == IPPROTO_TCP) {
  442 + if (tcphdr_ok(skb)) {
  443 + struct tcphdr *tcp = tcp_hdr(skb);
  444 + key->tp_src = tcp->source;
  445 + key->tp_dst = tcp->dest;
  446 + } else {
  447 + /* Avoid tricking other code into
  448 + * thinking that this packet has an L4
  449 + * header. */
  450 + key->nw_proto = 0;
  451 + }
  452 + } else if (key->nw_proto == IPPROTO_UDP) {
  453 + if (udphdr_ok(skb)) {
  454 + struct udphdr *udp = udp_hdr(skb);
  455 + key->tp_src = udp->source;
  456 + key->tp_dst = udp->dest;
  457 + } else {
  458 + /* Avoid tricking other code into
  459 + * thinking that this packet has an L4
  460 + * header. */
  461 + key->nw_proto = 0;
  462 + }
  463 + } else if (key->nw_proto == IPPROTO_ICMP) {
  464 + if (icmphdr_ok(skb)) {
  465 + struct icmphdr *icmp = icmp_hdr(skb);
  466 + /* The ICMP type and code fields use the 16-bit
  467 + * transport port fields, so we need to store them
  468 + * in 16-bit network byte order. */
  469 + key->icmp_type = htons(icmp->type);
  470 + key->icmp_code = htons(icmp->code);
  471 + } else {
  472 + /* Avoid tricking other code into
  473 + * thinking that this packet has an L4
  474 + * header. */
  475 + key->nw_proto = 0;
  476 + }
  477 + }
  478 + } else {
  479 + retval = 1;
  480 + }
  481 + } else {
  482 + skb_reset_transport_header(skb);
  483 + }
  484 + return retval;
  485 +}
  486 +
  487 +/* Initializes the flow module.
  488 + * Returns zero if successful or a negative error code. */
  489 +int flow_init(void)
  490 +{
  491 + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
  492 + 0, NULL);
  493 + if (flow_cache == NULL)
  494 + return -ENOMEM;
  495 +
  496 + return 0;
  497 +}
  498 +
  499 +/* Uninitializes the flow module. */
  500 +void flow_exit(void)
  501 +{
  502 + kmem_cache_destroy(flow_cache);
  503 +}
  504 +
... ...