Browse Source
* Add CPU and RAM usage alerting * Create basic troubleshooting document to point alerts at * Limit max number of hardware values collected * Save metric value with the point in time it was takenpull/146/head
7 changed files with 137 additions and 15 deletions
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
## CPU and RAM usage alerts |
||||
|
||||
If your hardware is being maxed out then your video may not be processed and delivered fast enough to keep up with the real-time requirements of live video. |
||||
|
||||
Here are some steps you can try taking to resolve this. |
||||
|
||||
1. You may have too many bitrates defined as separate video quality variants. Try removing one. |
||||
1. Change to a [faster encoder preset](https://github.com/gabek/owncast/blob/master/doc/encoding.md#encoder-preset) in your configuration. If you're currently using `veryfast`, try `superfast`, for example. |
||||
1. Try reducing [the quality of the video you're sending to Owncast from your broadcasting software](https://github.com/gabek/owncast/blob/master/doc/encoding.md#how-you-configure-your-broadcasting-software-matters). |
||||
1. If you've gone down to a single bitrate, changed the encoder preset to the fastest, and experimented with different qualities in your broadcasting software, it's possible the server you're running Owncast is just not powerful enough for the task and you might need to try a different environment to run this on. |
@ -0,0 +1,41 @@
@@ -0,0 +1,41 @@
|
||||
package metrics |
||||
|
||||
import ( |
||||
log "github.com/sirupsen/logrus" |
||||
) |
||||
|
||||
const maxCPUAlertingThresholdPCT = 95 |
||||
const maxRAMAlertingThresholdPCT = 95 |
||||
|
||||
const alertingError = "The %s utilization of %d%% is higher than the alerting threshold of %d%%. This can cause issues with video generation and delivery. Please visit the documentation at https://github.com/gabek/owncast/blob/master/doc/troubleshooting.md to help troubleshoot this issue." |
||||
|
||||
func handleAlerting() { |
||||
handleCPUAlerting() |
||||
handleRAMAlerting() |
||||
} |
||||
|
||||
func handleCPUAlerting() { |
||||
if len(Metrics.CPUUtilizations) < 2 { |
||||
return |
||||
} |
||||
|
||||
avg := recentAverage(Metrics.CPUUtilizations) |
||||
if avg > maxCPUAlertingThresholdPCT { |
||||
log.Errorf(alertingError, "CPU", avg, maxCPUAlertingThresholdPCT) |
||||
} |
||||
} |
||||
|
||||
func handleRAMAlerting() { |
||||
if len(Metrics.RAMUtilizations) < 2 { |
||||
return |
||||
} |
||||
|
||||
avg := recentAverage(Metrics.RAMUtilizations) |
||||
if avg > maxRAMAlertingThresholdPCT { |
||||
log.Errorf(alertingError, "memory", avg, maxRAMAlertingThresholdPCT) |
||||
} |
||||
} |
||||
|
||||
func recentAverage(values []value) int { |
||||
return int((values[len(values)-1].Value + values[len(values)-2].Value) / 2) |
||||
} |
@ -0,0 +1,35 @@
@@ -0,0 +1,35 @@
|
||||
package metrics |
||||
|
||||
import ( |
||||
"time" |
||||
|
||||
"github.com/shirou/gopsutil/cpu" |
||||
"github.com/shirou/gopsutil/mem" |
||||
) |
||||
|
||||
// Max number of metrics we want to keep.
|
||||
const maxCollectionValues = 500 |
||||
|
||||
func collectCPUUtilization() { |
||||
if len(Metrics.CPUUtilizations) > maxCollectionValues { |
||||
Metrics.CPUUtilizations = Metrics.CPUUtilizations[1:] |
||||
} |
||||
|
||||
v, err := cpu.Percent(0, false) |
||||
if err != nil { |
||||
panic(err) |
||||
} |
||||
|
||||
metricValue := value{time.Now(), int(v[0])} |
||||
Metrics.CPUUtilizations = append(Metrics.CPUUtilizations, metricValue) |
||||
} |
||||
|
||||
func collectRAMUtilization() { |
||||
if len(Metrics.RAMUtilizations) > maxCollectionValues { |
||||
Metrics.RAMUtilizations = Metrics.RAMUtilizations[1:] |
||||
} |
||||
|
||||
memoryUsage, _ := mem.VirtualMemory() |
||||
metricValue := value{time.Now(), int(memoryUsage.UsedPercent)} |
||||
Metrics.RAMUtilizations = append(Metrics.RAMUtilizations, metricValue) |
||||
} |
@ -0,0 +1,39 @@
@@ -0,0 +1,39 @@
|
||||
package metrics |
||||
|
||||
import ( |
||||
"time" |
||||
) |
||||
|
||||
// How often we poll for updates
|
||||
const metricsPollingInterval = 15 * time.Second |
||||
|
||||
type value struct { |
||||
Time time.Time |
||||
Value int |
||||
} |
||||
|
||||
type metrics struct { |
||||
CPUUtilizations []value |
||||
RAMUtilizations []value |
||||
} |
||||
|
||||
// Metrics is the shared Metrics instance
|
||||
var Metrics *metrics |
||||
|
||||
// Start will begin the metrics collection and alerting
|
||||
func Start() { |
||||
Metrics = new(metrics) |
||||
|
||||
for range time.Tick(metricsPollingInterval) { |
||||
handlePolling() |
||||
} |
||||
} |
||||
|
||||
func handlePolling() { |
||||
// Collect hardware stats
|
||||
collectCPUUtilization() |
||||
collectRAMUtilization() |
||||
|
||||
// Alerting
|
||||
handleAlerting() |
||||
} |
Loading…
Reference in new issue