Ready-to-use SRT / WebRTC / RTSP / RTMP / LL-HLS media server and media proxy that allows to read, publish, proxy, record and playback video and audio streams.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

329 lines
6.5 KiB

package formatprocessor
import (
"bytes"
"time"
"github.com/bluenviron/gortsplib/v3/pkg/formats"
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtph264"
"github.com/bluenviron/mediacommon/pkg/codecs/h264"
"github.com/pion/rtp"
"github.com/bluenviron/mediamtx/internal/logger"
)
// extract SPS and PPS without decoding RTP packets
func rtpH264ExtractSPSPPS(pkt *rtp.Packet) ([]byte, []byte) {
if len(pkt.Payload) < 1 {
return nil, nil
}
typ := h264.NALUType(pkt.Payload[0] & 0x1F)
switch typ {
case h264.NALUTypeSPS:
return pkt.Payload, nil
case h264.NALUTypePPS:
return nil, pkt.Payload
case h264.NALUTypeSTAPA:
payload := pkt.Payload[1:]
var sps []byte
var pps []byte
for len(payload) > 0 {
if len(payload) < 2 {
break
}
size := uint16(payload[0])<<8 | uint16(payload[1])
payload = payload[2:]
if size == 0 {
break
}
if int(size) > len(payload) {
return nil, nil
}
nalu := payload[:size]
payload = payload[size:]
typ = h264.NALUType(nalu[0] & 0x1F)
switch typ {
case h264.NALUTypeSPS:
sps = nalu
case h264.NALUTypePPS:
pps = nalu
}
}
return sps, pps
default:
return nil, nil
}
}
// UnitH264 is a H264 data unit.
type UnitH264 struct {
RTPPackets []*rtp.Packet
NTP time.Time
PTS time.Duration
AU [][]byte
}
// GetRTPPackets implements Unit.
func (d *UnitH264) GetRTPPackets() []*rtp.Packet {
return d.RTPPackets
}
// GetNTP implements Unit.
func (d *UnitH264) GetNTP() time.Time {
return d.NTP
}
type formatProcessorH264 struct {
udpMaxPayloadSize int
format *formats.H264
log logger.Writer
encoder *rtph264.Encoder
decoder *rtph264.Decoder
lastKeyFrameReceived time.Time
}
func newH264(
udpMaxPayloadSize int,
forma *formats.H264,
generateRTPPackets bool,
log logger.Writer,
) (*formatProcessorH264, error) {
t := &formatProcessorH264{
udpMaxPayloadSize: udpMaxPayloadSize,
format: forma,
log: log,
}
if generateRTPPackets {
t.encoder = &rtph264.Encoder{
PayloadMaxSize: udpMaxPayloadSize - 12,
PayloadType: forma.PayloadTyp,
PacketizationMode: forma.PacketizationMode,
}
t.encoder.Init()
t.lastKeyFrameReceived = time.Now()
}
return t, nil
}
func (t *formatProcessorH264) updateTrackParametersFromRTPPacket(pkt *rtp.Packet) {
sps, pps := rtpH264ExtractSPSPPS(pkt)
update := false
if sps != nil && !bytes.Equal(sps, t.format.SPS) {
update = true
}
if pps != nil && !bytes.Equal(pps, t.format.PPS) {
update = true
}
if update {
if sps == nil {
sps = t.format.SPS
}
if pps == nil {
pps = t.format.PPS
}
t.format.SafeSetParams(sps, pps)
}
}
func (t *formatProcessorH264) updateTrackParametersFromNALUs(nalus [][]byte) {
sps := t.format.SPS
pps := t.format.PPS
update := false
for _, nalu := range nalus {
typ := h264.NALUType(nalu[0] & 0x1F)
switch typ {
case h264.NALUTypeSPS:
if !bytes.Equal(nalu, sps) {
sps = nalu
update = true
}
case h264.NALUTypePPS:
if !bytes.Equal(nalu, pps) {
pps = nalu
update = true
}
}
}
if update {
t.format.SafeSetParams(sps, pps)
}
}
func (t *formatProcessorH264) checkKeyFrameInterval(isKeyFrame bool) {
if isKeyFrame {
t.lastKeyFrameReceived = time.Now()
} else {
now := time.Now()
if now.Sub(t.lastKeyFrameReceived) >= maxKeyFrameInterval {
t.lastKeyFrameReceived = now
t.log.Log(logger.Warn, "no H264 key frames received in %v, stream can't be decoded")
}
}
}
func (t *formatProcessorH264) remuxAccessUnit(nalus [][]byte) [][]byte {
isKeyFrame := false
n := 0
for _, nalu := range nalus {
typ := h264.NALUType(nalu[0] & 0x1F)
switch typ {
case h264.NALUTypeSPS, h264.NALUTypePPS: // parameters: remove
continue
case h264.NALUTypeAccessUnitDelimiter: // AUD: remove
continue
case h264.NALUTypeIDR: // key frame
if !isKeyFrame {
isKeyFrame = true
// prepend parameters
if t.format.SPS != nil && t.format.PPS != nil {
n += 2
}
}
}
n++
}
t.checkKeyFrameInterval(isKeyFrame)
if n == 0 {
return nil
}
filteredNALUs := make([][]byte, n)
i := 0
if isKeyFrame && t.format.SPS != nil && t.format.PPS != nil {
filteredNALUs[0] = t.format.SPS
filteredNALUs[1] = t.format.PPS
i = 2
}
for _, nalu := range nalus {
typ := h264.NALUType(nalu[0] & 0x1F)
switch typ {
case h264.NALUTypeSPS, h264.NALUTypePPS:
continue
case h264.NALUTypeAccessUnitDelimiter:
continue
}
filteredNALUs[i] = nalu
i++
}
return filteredNALUs
}
func (t *formatProcessorH264) Process(unit Unit, hasNonRTSPReaders bool) error { //nolint:dupl
tunit := unit.(*UnitH264)
if tunit.RTPPackets != nil {
pkt := tunit.RTPPackets[0]
t.updateTrackParametersFromRTPPacket(pkt)
if t.encoder == nil {
// remove padding
pkt.Header.Padding = false
pkt.PaddingSize = 0
// RTP packets exceed maximum size: start re-encoding them
if pkt.MarshalSize() > t.udpMaxPayloadSize {
v1 := pkt.SSRC
v2 := pkt.SequenceNumber
v3 := pkt.Timestamp
t.encoder = &rtph264.Encoder{
PayloadMaxSize: t.udpMaxPayloadSize - 12,
PayloadType: pkt.PayloadType,
SSRC: &v1,
InitialSequenceNumber: &v2,
InitialTimestamp: &v3,
PacketizationMode: t.format.PacketizationMode,
}
t.encoder.Init()
}
}
// decode from RTP
if hasNonRTSPReaders || t.decoder != nil || t.encoder != nil {
if t.decoder == nil {
t.decoder = t.format.CreateDecoder()
t.lastKeyFrameReceived = time.Now()
}
if t.encoder != nil {
tunit.RTPPackets = nil
}
// DecodeUntilMarker() is necessary, otherwise Encode() generates partial groups
au, pts, err := t.decoder.DecodeUntilMarker(pkt)
if err != nil {
if err == rtph264.ErrNonStartingPacketAndNoPrevious || err == rtph264.ErrMorePacketsNeeded {
return nil
}
return err
}
tunit.AU = t.remuxAccessUnit(au)
tunit.PTS = pts
}
// route packet as is
if t.encoder == nil {
return nil
}
} else {
t.updateTrackParametersFromNALUs(tunit.AU)
tunit.AU = t.remuxAccessUnit(tunit.AU)
}
// encode into RTP
if len(tunit.AU) != 0 {
pkts, err := t.encoder.Encode(tunit.AU, tunit.PTS)
if err != nil {
return err
}
tunit.RTPPackets = pkts
} else {
tunit.RTPPackets = nil
}
return nil
}
func (t *formatProcessorH264) UnitForRTPPacket(pkt *rtp.Packet, ntp time.Time) Unit {
return &UnitH264{
RTPPackets: []*rtp.Packet{pkt},
NTP: ntp,
}
}