Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/mysql.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ These params apply in general to all MySQL clusters, unless specified differentl
- `User`, `Password`: these can be specified as plaintext, or in a `${some_env_variable}` format, in which case `freno` will look up its environment for specified variable. (e.g. to match the above config, a `shell` script invoking `freno` can `export mysql_password_env_variable=flyingcircus`)
- `MetricQuery`:
- Note: returned value is expected to be `[0..)` (`0` or more), where lower values are "better" and higher values are "worse".
- if not provided, `freno` will assume you're interested in replication lag, and will issue a `SHOW SLAVE STATUS` to extract `Seconds_behind_master`
- if not provided, `freno` will assume you're interested in replication lag, and will issue `SHOW REPLICA STATUS` (MySQL 8.0.22+) or fall back to `SHOW SLAVE STATUS` on older versions, extracting the seconds-behind value
- We strongly recommend using a custom heartbeat mechanism such as `pt-heartbeat`, with subsecond resolution. The sample query above works well with `pt-heartbeat` subsecond timestamps.
- Strictly speaking, you don't have to provide a replication-lag metric. This could be any query that reports any metric. However you're likely interested in replication lag to start with.
- Note: the default time unit for replication lag is _seconds_
Expand Down
49 changes: 40 additions & 9 deletions pkg/mysql/mysql_throttle_metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
package mysql

import (
"errors"
"fmt"
"strings"
"time"

"github.com/go-sql-driver/mysql"
"github.com/outbrain/golib/sqlutils"
"github.com/patrickmn/go-cache"
metrics "github.com/rcrowley/go-metrics"
Expand Down Expand Up @@ -66,7 +68,7 @@ func (metric *MySQLThrottleMetric) Get() (float64, error) {
}

// ReadThrottleMetric returns replication lag for a given connection config; either by explicit query
// or via SHOW SLAVE STATUS
// or via SHOW REPLICA STATUS / SHOW SLAVE STATUS
func ReadThrottleMetric(probe *Probe, clusterName string) (mySQLThrottleMetric *MySQLThrottleMetric) {
if mySQLThrottleMetric := getCachedMySQLThrottleMetric(probe); mySQLThrottleMetric != nil {
return mySQLThrottleMetric
Expand Down Expand Up @@ -115,17 +117,46 @@ func ReadThrottleMetric(probe *Probe, clusterName string) (mySQLThrottleMetric *
return mySQLThrottleMetric
}

// No metric query? By default we look at replication lag as output of SHOW SLAVE STATUS
// No metric query? By default we look at replication lag.
// Try SHOW REPLICA STATUS first (MySQL 8.0.22+, required in 8.4+), fall back to
// SHOW SLAVE STATUS for older MySQL versions that don't recognise the new syntax.

mySQLThrottleMetric.Err = sqlutils.QueryRowsMap(db, `show slave status`, func(m sqlutils.RowMap) error {
slaveIORunning := m.GetString("Slave_IO_Running")
slaveSQLRunning := m.GetString("Slave_SQL_Running")
secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master")
if !secondsBehindMaster.Valid {
return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=%+v", slaveIORunning, slaveSQLRunning)
mySQLThrottleMetric.Err = sqlutils.QueryRowsMap(db, `show replica status`, func(m sqlutils.RowMap) error {
replicaIORunning := m.GetString("Replica_IO_Running")
replicaSQLRunning := m.GetString("Replica_SQL_Running")
secondsBehindSource := m.GetNullInt64("Seconds_Behind_Source")
if !secondsBehindSource.Valid {
return fmt.Errorf("replication not running; Replica_IO_Running=%+v, Replica_SQL_Running=%+v", replicaIORunning, replicaSQLRunning)
}
mySQLThrottleMetric.Value = float64(secondsBehindMaster.Int64)
mySQLThrottleMetric.Value = float64(secondsBehindSource.Int64)
return nil
})

// MySQL error 1064 means syntax error — the server doesn't understand SHOW REPLICA STATUS
// (MySQL < 8.0.22). Fall back to the legacy SHOW SLAVE STATUS command.
if mySQLThrottleMetric.Err != nil {
var mysqlErr *mysql.MySQLError
if errors.As(mySQLThrottleMetric.Err, &mysqlErr) && mysqlErr.Number == 1064 {
originalErr := mySQLThrottleMetric.Err
fallbackErr := sqlutils.QueryRowsMap(db, `show slave status`, func(m sqlutils.RowMap) error {
slaveIORunning := m.GetString("Slave_IO_Running")
slaveSQLRunning := m.GetString("Slave_SQL_Running")
secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master")
if !secondsBehindMaster.Valid {
return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=%+v", slaveIORunning, slaveSQLRunning)
}
mySQLThrottleMetric.Value = float64(secondsBehindMaster.Int64)
return nil
})
if fallbackErr == nil {
mySQLThrottleMetric.Err = nil
} else {
// Both commands failed; surface the original error as it's more informative.
mySQLThrottleMetric.Err = originalErr
}
}
// Non-syntax errors (permissions, connectivity, replication issues) are kept as-is.
}

return cacheMySQLThrottleMetric(probe, mySQLThrottleMetric)
}