From b120272f86773d3c6d0b39e78cd6eae7b3a9b655 Mon Sep 17 00:00:00 2001
From: Pau Espin Pedrol <pespin@sysmocom.de>
Date: Wed, 3 May 2017 12:38:05 +0200
Subject: Add osmux-reference document

Change-Id: I4d19df98af84560c147a637bc42ebe570bb280aa
---
 doc/manuals/Makefile                    |   4 +-
 doc/manuals/osmux-reference-docinfo.xml |  89 +++++++
 doc/manuals/osmux-reference.adoc        | 396 ++++++++++++++++++++++++++++++++
 3 files changed, 488 insertions(+), 1 deletion(-)
 create mode 100644 doc/manuals/osmux-reference-docinfo.xml
 create mode 100644 doc/manuals/osmux-reference.adoc
diff --git a/doc/manuals/Makefile b/doc/manuals/Makefile
index 284ac3bba..e12d3e283 100644
--- a/doc/manuals/Makefile
+++ b/doc/manuals/Makefile
@@ -15,18 +15,20 @@ docbooktotypes = pdf
 # htmlcss =
 
 TOPDIR := ..
-ASCIIDOCS := osmobsc-usermanual
+ASCIIDOCS := osmobsc-usermanual osmux-reference
 
 include $(TOPDIR)/build/Makefile.asciidoc.inc
 include $(TOPDIR)/build/Makefile.inc
 
 osmobsc-usermanual.pdf: chapters/*.adoc
+osmux-reference.pdf: osmux-reference.adoc
 
 clean:
 	-rm -rf $(cleanfiles)
 	-rm osmobsc-usermanual__*.png
 	-rm osmobsc-usermanual__*.svg
 	-rm osmobsc-usermanual*.check
+	-rm osmux-reference*.check
 
 gen-bsc-vty-docbook: FORCE
 	$(call command,xsltproc -o generated/combined1.xml \
diff --git a/doc/manuals/osmux-reference-docinfo.xml b/doc/manuals/osmux-reference-docinfo.xml
new file mode 100644
index 000000000..06b2d7edd
--- /dev/null
+++ b/doc/manuals/osmux-reference-docinfo.xml
@@ -0,0 +1,89 @@
+<revhistory>
+  <revision>
+    <revnumber>0.1</revnumber>
+    <date>11 June 2012</date>
+    <authorinitials>Pablo Neira Ayuso</authorinitials>
+    <revremark>
+       Initial version of the proposal for internal discussion.
+    </revremark>
+  </revision>
+  <revision>
+    <revnumber>0.2</revnumber>
+    <date>11 June 2012</date>
+    <authorinitials>Pablo Neira Ayuso</authorinitials>
+    <revremark>
+            Second version after comments from Holger and Harald:
+            Include figures that provide expect traffic savings (in %).
+            Change licensing terms (owned by OnWaves and consultants).
+            Adjust work from 200 to 150 hours, remove details on how the implementation
+    </revremark>
+  </revision>
+  <revision>
+    <revnumber>0.3</revnumber>
+    <date>20 June 2017</date>
+    <authorinitials>Pau Espin Pedrol</authorinitials>
+    <revremark>
+            Improve and extenend for osmo-gsm-manuals inclusion from Pau Espin:
+            Convert to asciidoc.
+            Update frame bits according to implementation.
+    </revremark>
+  </revision>
+</revhistory>
+
+<authorgroup>
+  <author>
+    <firstname>Holger</firstname>
+    <surname>Freyther</surname>
+    <email>hfreyther@sysmocom.de</email>
+    <authorinitials>HF</authorinitials>
+    <affiliation>
+      <shortaffil>sysmocom</shortaffil>
+      <orgname>sysmocom - s.f.m.c. GmbH</orgname>
+      <jobtitle>Managing Director</jobtitle>
+    </affiliation>
+  </author>
+  <author>
+    <firstname>Harald</firstname>
+    <surname>Welte</surname>
+    <email>hwelte@sysmocom.de</email>
+    <authorinitials>HW</authorinitials>
+    <affiliation>
+      <shortaffil>sysmocom</shortaffil>
+      <orgname>sysmocom - s.f.m.c. GmbH</orgname>
+      <jobtitle>Managing Director</jobtitle>
+    </affiliation>
+  </author>
+  <author>
+    <firstname>Pablo</firstname>
+    <surname>Neira Ayuso</surname>
+    <email>pneira@sysmocom.de</email>
+    <authorinitials>PN</authorinitials>
+    <affiliation>
+      <shortaffil>sysmocom</shortaffil>
+      <orgname>sysmocom - s.f.m.c. GmbH</orgname>
+    </affiliation>
+  </author>
+</authorgroup>
+
+<copyright>
+  <year>2012-2017</year>
+  <holder>sysmocom - s.f.m.c. GmbH</holder>
+</copyright>
+
+<legalnotice>
+  <para>
+	Permission is granted to copy, distribute and/or modify this
+	document under the terms of the GNU Free Documentation License,
+	Version 1.3 or any later version published by the Free Software
+	Foundation; with the Invariant Sections being just 'Foreword',
+	'Acknowledgements' and 'Preface', with no Front-Cover Texts,
+	and no Back-Cover Texts.  A copy of the license is included in
+	the section entitled "GNU Free Documentation License".
+  </para>
+  <para>
+	The Asciidoc source code of this manual can be found at
+	<ulink url="http://git.osmocom.org/osmo-gsm-manuals/">
+		http://git.osmocom.org/osmo-gsm-manuals/
+	</ulink>
+  </para>
+</legalnotice>
diff --git a/doc/manuals/osmux-reference.adoc b/doc/manuals/osmux-reference.adoc
new file mode 100644
index 000000000..068bc19e0
--- /dev/null
+++ b/doc/manuals/osmux-reference.adoc
@@ -0,0 +1,396 @@
+[[osmux]]
+= OSmux: reduce of SAT uplink costs by protocol optimizations
+
+== Problem
+
+In case of satellite based GSM systems, the transmission cost on the back-haul
+is relatively expensive. The billing for such SAT uplink is usually done in a
+pay-per-byte basis. Thus, reducing the amount of bytes transfered would
+significantly reduce the cost of such uplinks. In such environment, even
+seemingly small protocol optimizations, eg. message batching and trunking, can
+result in significant cost reduction.
+
+This is true not only for speech codec frames, but also for the constant
+background load caused by the signalling link (A protocol). Optimizations in
+this protocol are applicable to both VSAT back-haul (best-effort background IP)
+as well as Inmarsat based links (QoS with guaranteed bandwidth).
+
+== Proposed solution
+
+In order to reduce the bandwidth consumption, this document proposes to develop
+a multiplex protocol that will be used to proxy voice and signalling traffic
+through the SAT links.
+
+=== Voice
+
+For the voice case, we propose a protocol that provides:
+
+* Batching: that consists of putting multiple codec frames on the sender side
+  into one single packet to reduce the protocol header overhead. This batch
+  is then sent as one RTP/UDP/IP packet at the same time. Currently, AMR 5.9
+  codec frames are transported in a RTP/UDP/IP protocol stacking. This means
+  there are 15 bytes of speech codec frame, plus a 2 byte RTP payload header,
+  plus the RTP (12 bytes), UDP (8 bytes) and IP (20 bytes) overhead. This means
+  we have 40 byte overhead for 17 byte payload.
+
+* Trunking: in case of multiple concurrent voice calls, each of them will
+  generate one speech codec frame every 20ms. Instead of sending only codec
+  frames of one voice call in a given IP packet, we can 'interleave' or trunk
+  the codec frames of multiple calls into one IP. This further increases the
+  IP packet size and thus improves the payload/overhead ratio.
+
+Both techniques should be applied without noticeable impact in terms of user
+experience. As the satellite back-haul has very high round trip time (several
+hundred milliseconds), adding some more delay is not going to make things
+significantly worse.
+
+For the batching, the idea consists of batching multiple codec frames on the
+sender side, A batching factor (B) of '4' means that we will send 4 codec
+frames in one underlying protocol packet. The additional delay of the batching
+can be computed as (B-1)*20ms as 20ms is the duration of one codec frame.
+Existing experimentation has shown that a batching factor of 4 to 8 (causing a
+delay of 60ms to 140ms) is acceptable and does not cause significant quality
+degradation.
+
+The main requirements for such voice RTP proxy are:
+
+* Always batch codec frames of multiple simultaneous calls into single UDP
+  message.
+
+* Batch configurable number codec frames of the same call into one UDP
+  message.
+
+* Make sure to properly reconstruct timing at receiver (non-bursty but
+  one codec frame every 20ms).
+
+* Implementation in libosmo-netif to make sure it can be used
+  in osmo-bts (towards osmo-bsc), osmo-bsc (towards osmo-bts and
+  osmo-bsc_nat) and osmo-bsc_nat (towards osmo-bsc)
+
+* Primary application will be with osmo-bsc connected via satellite link to
+  osmo-bsc_nat.
+
+* Make sure to properly deal with SID (silence detection) frames in case
+  of DTX.
+
+* Make sure to transmit and properly re-construct the M (marker) bit of
+  the RTP header, as it is used in AMR.
+
+* Primary use case for AMR codec, probably not worth to waste extra
+  payload byte on indicating codec type (amr/hr/fr/efr). If we can add
+  the codec type somewhere without growing the packet, we'll do it.
+  Otherwise, we'll skip this.
+
+=== Signalling
+
+Signalling uses SCCP/IPA/TCP/IP stacking. Considering SCCP as payload, this
+adds 3 (IPA) + 20 (TCP) + 20 (IP) = 43 bytes overhead for every signalling
+message, plus of course the 40-byte-sized TCP ACK sent in the opposite
+direction.
+
+While trying to look for alternatives, we consider that none of the standard IP
+layer 4 protocols are suitable for this application. We detail the reasons
+why:
+
+* TCP is a streaming protocol aimed at maximizing the throughput of a stream
+  withing the constraints of the underlying transport layer.  This feature is
+  not really required for the low-bandwidth and low-pps GSM signalling.
+  Moreover, TCP is stream oriented and does not conserve message boundaries.
+  As such, the IPA header has to serve as a boundary between messages in the
+  stream. Moreover, assuming a generally quite idle signalling link, the
+  assumption of a pure TCP ACK (without any data segment) is very likely to
+  happen.
+
+* Raw IP or UDP as alternative is not a real option, as it does not recover
+  lost packets.
+
+* SCTP preserves message boundaries and allows for multiple streams
+  (multiplexing) within one connection, but it has too much overhead.
+
+For that reason, we propose the use of LAPD for this task. This protocol was
+originally specified to be used on top of E1 links for the A interface, who
+do not expose any kind of noticeable latency. LAPD resolves (albeit not as
+good as TCP does) packet loss and copes with packet re-ordering.
+
+LAPD has a very small header (3-5 octets) compared to TCPs 20 bytes.  Even if
+LAPD is put inside UDP, the combination of 11 to 13 octets still saves a
+noticable number of bytes per packet. Moreover, LAPD has been modified for less
+reliable interfaces such as the GSM Um interface (LAPDm), as well as for the
+use in satellite systems (LAPsat in ETSI GMR).
+
+== OSmux protocol
+
+The OSmux protocol is the core of our proposed solution. This protocol operates
+over UDP or, alternatively, over raw IP. The designated default UDP port number
+and IP protocol type have not been yet decided.
+
+Every OSmux message starts with a control octet. The control octet contains a
+2-bit Field Type (FT) and its location starts on the 2nd bit for backward
+compatibility with older versions (used to be 3 bits). The FT defines the
+structure of the remaining header as well as the payload.
+
+The following FT values are assigned:
+
+* FT == 0: LAPD Signalling
+* FT == 1: AMR Codec
+* FT == 2: Dummy
+* FT == 3: Reserved for Fture Use
+
+There can be any number of OSmux messages batched up in one underlaying packet.
+In this case, the multiple OSmux messages are simply concatenated, i.e. the
+OSmux header control octet directly follows the last octet of the payload of the
+previous OSmux message.
+
+
+=== LAPD Signalling (0)
+
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+|X|FT |X X X X X|   PL-LENGTH   | LAPD header + payload         |
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+Field Type (FT): 2 bits::
+The Field Type allocated for AMR codec is "0".
+
+This frame type is not yet supported inside OsmoCom and may be subject to
+change in future versions of the protocol.
+
+
+=== AMR Codec (1)
+
+This OSmux packet header is used to transport one or more RTP-AMR packets for a
+specific RTP stream identified by the Circuit ID field.
+
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+|M|FT | CTR |F|Q| Red. TS/SeqNR |  Circuit ID   |AMR FT |AMR CMR|
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+Marker (M): 1 bit::
+This is a 1:1 mapping from the RTP Marker (M) bit as specified in RFC3550
+Section 5.1 (RTP) as well as RFC3267 Section 4.1 (RTP-AMR). In AMR, the Marker
+is used to indicate the beginning of a talk-spurt, i.e. the end of a silence
+period. In case more than one AMR frame from the specific stream is batched into
+this OSmux header, it is guaranteed that the first AMR frame is the first in the
+talkspurt.
+
+Field Type (FT): 2 bits::
+The Field Type allocated for AMR codec is "1".
+
+Frame Counter (CTR): 2 bits::
+Provides the number of batched AMR payloads (starting 0) after the header. For
+instance, if there are 2 AMR payloads batched, CTR will be "1".
+
+AMR-F (F): 1 bit::
+This is a 1:1 mapping from the AMR F field in RFC3267 Section 4.3.2. In case
+there are multiple AMR codec frames with different F bit batched together, we
+only use the last F and ignore any previous F.
+
+AMR-Q (Q): 1 bit::
+This is a 1:1 mapping from the AMR Q field (Frame quality indicator) in RFC3267
+Section 4.3.2. In case there are multiple AMR codec frames with different Q bit
+batched together, we only use the last Q and ignore any previous Q.
+
+Circuit ID Code (CIC): 8 bits::
+Identifies the Circuit (Voice call), which in RTP is identified by {srcip,
+srcport, dstip, dstport, ssrc}.
+
+Reduced/Combined Timestamp and Sequence Number (RCTS): 8 bits::
+Resembles a combination of the RTP timestamp and sequence number. In the GSM
+system, speech codec frames are generated at a rate of 20ms.  Thus, there is no
+need to have independent timestamp and sequence numbers (related to a 8kHz
+clock) as specified in AMR-RTP.
+
+AMR Codec Mode Request (AMR-FT): 4 bits::
+This is a mapping from te AMR FT field (Frame type index) in RFC3267 Section
+4.3.2. The length of each codec frame needs to be determined from this field. It
+is thus guaranteed that all frames for a specific stream in an OSmux batch are
+of the same AMR type.
+
+AMR Codec Mode Request (AMR-CMR): 4 bits::
+The RTP AMR payload header as specified in RFC3267 contains a 4-bit CMR field.
+Rather than transporting it in a separate octet, we squeeze it in the lower four
+bits of the clast octet.  In case there are multiple AMR codec frames with
+different CMR, we only use the last CMR and ignore any previous CMR.
+
+==== Additional considerations
+
+* It can be assumed that all OSmux frames of type AMR Codec contain at least 1
+  AMR frame.
+* Given a batch factor of N frames (N>1), it can not be assumed that the amount
+  of AMR frames in any OSmux frame will always be N, due to some restrictions
+  mentioned above. For instance, a sender can decide to send before queueing the
+  expected N frames due to timing issues, or to conform with the restriction
+  that the first AMR frame in the batch must be the first in the talkspurt
+  (Marker M bit).
+
+
+=== Dummy (2)
+
+This kind of frame is used for NAT traversal. If a peer is behind a NAT, its
+source port specified in SDP will be a private port not accessible from the
+outside. Before other peers are able to send any packet to it, they require the
+mapping between the private and the public port to be set by the firewall,
+otherwise the firewall will most probably drop the incoming messages or send it
+to a wrong destination. The firewall in most cases won't create a mapping until
+the peer behind the NAT sends a packet to the peer residing outside.
+
+In this scenario, if the peer behind the nat is expecting to receive but never
+transmit audio, no packets will ever reach him. To solve this, the peer sends
+dummy packets to let the firewall create the port mapping. When the other peers
+receive this dummy packet, they can infer the relation between the original
+private port and the public port and start sending packets to it.
+
+When opening a connection, the peer is expected to send dummy packets until it
+starts sending real audio, at which point dummy packets are not needed anymore.
+
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+|X|FT | CTR |X X|X X X X X X X X X| Circuit ID  |AMR FT |X X X X|
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+Field Type (FT): 2 bits::
+The Field Type allocated for AMR codec is "2".
+
+Frame Counter (CTR): 2 bits::
+Provides the number of dummy batched AMR payloads (starting 0) after the header.
+For instance, if there are 2 AMR payloads batched, CTR will be "1".
+
+Circuit ID Code (CIC): 8 bits::
+Identifies the Circuit (Voice call), which in RTP is identified by {srcip,
+srcport, dstip, dstport, ssrc}.
+
+AMR Codec Mode Request (AMR-FT): 4 bits::
+This field must contain any valid value described in the AMR FT field (Frame
+type index) in RFC3267 Section 4.3.2.
+
+==== Additional considerations
+
+* After the header, additional padding needs to be allocated to conform with CTR
+and AMR FT fields. For instance, if CTR is 0 and AMR FT is AMR 6.9, a padding
+of 17 bytes is to be allocated after the header.
+
+* On receival of this kind of OSmux frame, it's usually enough for the reader to
+  discard the header plus the calculated padding and keep operating.
+
+
+== Evaluation: Expected traffic savings
+
+The following figure shows the traffic saving (in %) depending on the number
+of concurrent numbers of callings (asumming trunking but no batching at all):
+----
+  Traffic savings (%)
+  100 ++-------+-------+--------+--------+-------+--------+-------+-------++
+      +        +       +        +        +       +   batch factor 1 **E*** +
+      |                                                                    |
+   80 ++                                                                  ++
+      |                                                                    |
+      |                                                                    |
+      |                                                       ****E********E
+   60 ++                             ****E*******E********E***            ++
+      |                       **E****                                      |
+      |                   ****                                             |
+   40 ++              *E**                                                ++
+      |             **                                                     |
+      |           **                                                       |
+      |         **                                                         |
+   20 ++       E                                                          ++
+      |                                                                    |
+      +        +       +        +        +       +        +       +        +
+    0 ++-------+-------+--------+--------+-------+--------+-------+-------++
+      0        1       2        3        4       5        6       7        8
+                                Concurrent calls
+----
+
+The results shows a saving of 15.79% with only one concurrent call, that
+quickly improves with more concurrent calls (due to trunking).
+
+We also provide the expected results by batching 4 messages for a single call:
+----
+  Traffic savings (%)
+  100 ++-------+-------+--------+--------+-------+--------+-------+-------++
+      +        +       +        +        +       +   batch factor 4 **E*** +
+      |                                                                    |
+   80 ++                                                                  ++
+      |                                                                    |
+      |                                                                    |
+      |                     ****E********E*******E********E*******E********E
+   60 ++           ****E****                                              ++
+      |        E***                                                        |
+      |                                                                    |
+   40 ++                                                                  ++
+      |                                                                    |
+      |                                                                    |
+      |                                                                    |
+   20 ++                                                                  ++
+      |                                                                    |
+      +        +       +        +        +       +        +       +        +
+    0 ++-------+-------+--------+--------+-------+--------+-------+-------++
+      0        1       2        3        4       5        6       7        8
+                                Concurrent calls
+----
+
+The results show a saving of 56.68% with only one concurrent call. Trunking
+slightly improves the situation with more concurrent calls.
+
+We also provide the figure with batching factor of 8:
+----
+  Traffic savings (%)
+  100 ++-------+-------+--------+--------+-------+--------+-------+-------++
+      +        +       +        +        +       +   batch factor 8 **E*** +
+      |                                                                    |
+   80 ++                                                                  ++
+      |                                                                    |
+      |                                               ****E*******E********E
+      |            ****E********E********E*******E****                     |
+   60 ++       E***                                                       ++
+      |                                                                    |
+      |                                                                    |
+   40 ++                                                                  ++
+      |                                                                    |
+      |                                                                    |
+      |                                                                    |
+   20 ++                                                                  ++
+      |                                                                    |
+      +        +       +        +        +       +        +       +        +
+    0 ++-------+-------+--------+--------+-------+--------+-------+-------++
+      0        1       2        3        4       5        6       7        8
+                                Concurrent calls
+----
+
+That shows very little improvement with regards to batching 4 messages.
+Still, we risk to degrade user experience. Thus, we consider a batching factor
+of 3 and 4 is adecuate.
+
+== Other proposed follow-up works
+
+The following sections describe features that can be considered in the mid-run
+to be included in the OSmux infrastructure. They will be considered for future
+proposals as extensions to this work. Therefore, they are NOT included in
+this proposal.
+
+=== Encryption
+
+Voice streams within OSmux can be encrypted in a similar manner to SRTP
+(RFC3711). The only potential problem is the use of a reduced sequence number,
+as it wraps in (20ms * 2^256 * B), i.e. 5.12s to 40.96s. However, as the
+receiver knows at which rate the codec frames are generated at the sender, he
+should be able to compute how much time has passed using his own timebase.
+
+Another alternative can be the use of DTLS (RFC 6347) that can be used to
+secure datagram traffic using TLS facilities (libraries like openssl and
+gnutls already support this).
+
+=== Multiple OSmux messages in one packet
+
+In case there is already at least one active voice call, there will be
+regular transmissions of voice codec frames.  Depending on the batching
+factor, they will be sent every 70ms to 140ms.  The size even of a
+batched (and/or trunked) codec message is still much lower than the MTU.
+
+Thus, any signalling (related or unrelated to the call causing the codec
+stream) can just be piggy-backed to the packets containing the voice
+codec frames.
-- 
cgit v1.2.3