目录

Prometheus rate irate increase笔记


rate

func extrapolatedRate(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper, isCounter bool, isRate bool) Vector {
	ms := args[0].(*parser.MatrixSelector)
	vs := ms.VectorSelector.(*parser.VectorSelector)
	var (
	    // samples表示某个metric的某段时间的数据(区间向量)
		samples    = vals[0].(Matrix)[0]
		
		// enh.Ts: 执行查询的时间.
		// ms.Range: 区间向量表达式中括号内的时间转换成Duration.
		// vs.Offset: 区间向量表达式中后面跟的offset 的时间转换成Duration.
		rangeStart = enh.Ts - durationMilliseconds(ms.Range+vs.Offset)
		rangeEnd   = enh.Ts - durationMilliseconds(vs.Offset)
	)

	// No sense in trying to compute a rate without at least two points. Drop
	// this Vector element.
	if len(samples.Points) < 2 {
		return enh.Out
	}

	resultValue := samples.Points[len(samples.Points)-1].V - samples.Points[0].V
	// 如果exporter被重启,conter会从头开始计数,跟前面就不对应了,下面是斧正的逻辑
	if isCounter {
		var lastValue float64
		for _, sample := range samples.Points {
			if sample.V < lastValue {
				resultValue += lastValue
			}
			lastValue = sample.V
		}
	}

	// Duration between first/last samples and boundary of range.
	// 区间向量第一个指标的时间戳 - rangeStart,除1000表示以Milliseconds为时间单位。
	durationToStart := float64(samples.Points[0].T-rangeStart) / 1000
    // 区间向量最后个指标(距当前时间最近一次的指标)的时间戳 - rangeStart,除1000表示以Milliseconds为时间单位。
	durationToEnd := float64(rangeEnd-samples.Points[len(samples.Points)-1].T) / 1000

    // 取区间向量第一个和最后一个指标时间的差值,除1000表示以Milliseconds为时间单位。
	sampledInterval := float64(samples.Points[len(samples.Points)-1].T-samples.Points[0].T) / 1000
	// 平均时间间隔
	averageDurationBetweenSamples := sampledInterval / float64(len(samples.Points)-1)

	if isCounter && resultValue > 0 && samples.Points[0].V >= 0 {
		// Counters cannot be negative. If we have any slope at
		// all (i.e. resultValue went up), we can extrapolate
		// the zero point of the counter. If the duration to the
		// zero point is shorter than the durationToStart, we
		// take the zero point as the start of the series,
		// thereby avoiding extrapolation to negative counter
		// values.
		durationToZero := sampledInterval * (samples.Points[0].V / resultValue)
		if durationToZero < durationToStart {
			durationToStart = durationToZero
		}
	}

	// If the first/last samples are close to the boundaries of the range,
	// extrapolate the result. This is as we expect that another sample
	// will exist given the spacing between samples we've seen thus far,
	// with an allowance for noise.
	extrapolationThreshold := averageDurationBetweenSamples * 1.1
	extrapolateToInterval := sampledInterval

	if durationToStart < extrapolationThreshold {
		extrapolateToInterval += durationToStart
	} else {
		extrapolateToInterval += averageDurationBetweenSamples / 2
	}
	if durationToEnd < extrapolationThreshold {
		extrapolateToInterval += durationToEnd
	} else {
		extrapolateToInterval += averageDurationBetweenSamples / 2
	}
	resultValue = resultValue * (extrapolateToInterval / sampledInterval)
	if isRate {
		resultValue = resultValue / ms.Range.Seconds()
	}

	return append(enh.Out, Sample{
		Point: Point{V: resultValue},
	})
}


// === rate(node parser.ValueTypeMatrix) Vector ===
func funcRate(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
	return extrapolatedRate(vals, args, enh, true, true)
}

说明: 44行开始,推算就可以算出来具体的数值了,但是有些细节可以补充一下:

durationToStartdurationToEnd受查询时间和metricscrape time影响,如果超出了extrapolationThreshold时间,durationToStartdurationToEnd的值 = averageDurationBetweenSamples / 2

例如:假设当前的指标:A(conter类型),每秒以10的的速度增长,我们需要采集60s的指标,该指标的设置的采集间隔为5s,第一次采集的时间为00:01 00,最后一次的采集时间为00:02 00,当前查询时间为00:02 03,查询语句为A[1m],一般情况下,那么计算逻辑(伪代码)如下:

rangeStart = "00:02 03" (1m + 0) // 没有offset rangeStart == 00:01 03
rangeEnd = "00:02 03" - 0        // 没有offset rangeEnd == 00:02 03
durationToStart = (第一个metric的时间戳(00:02 00) - rangeStart) / 1000
durationToEnd = (最后一个metric的时间戳(00:01 00) - rangeStart) / 1000

sampledInterval = (第一个metric的时间戳(00:02 00)  - (最后一个metric的时间戳(00:01 00)  // sampledInterval = 60,0000

averageDurationBetweenSamples = sampledInterval / len(区间向量的数量,也就是12个) // averageDurationBetweenSamples = 60,0000 / 12

剩下的去套上面的程序(从第44行开始),至于resultValue可以随便编一个,但是要合理。

increase

// === rate(node parser.ValueTypeMatrix) Vector ===
func funcRate(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
	return extrapolatedRate(vals, args, enh, true, true)
}

increaserate共用一个函数extrapolatedRate,只是结果不需要执行extrapolatedRate函数第76行的内容。

irate

源代码(2021/4/14):


// === irate(node parser.ValueTypeMatrix) Vector ===
func funcIrate(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
	return instantValue(vals, enh.Out, true)
}
....

func instantValue(vals []parser.Value, out Vector, isRate bool) Vector {
	samples := vals[0].(Matrix)[0]
	// No sense in trying to compute a rate without at least two points. Drop
	// this Vector element.
	if len(samples.Points) < 2 {
		return out
	}

	lastSample := samples.Points[len(samples.Points)-1]
	previousSample := samples.Points[len(samples.Points)-2]

	var resultValue float64
	if isRate && lastSample.V < previousSample.V {
		// Counter reset.
		resultValue = lastSample.V
	} else {
		resultValue = lastSample.V - previousSample.V
	}

	sampledInterval := lastSample.T - previousSample.T
	if sampledInterval == 0 {
		// Avoid dividing by 0.
		return out
	}

	if isRate {
		// Convert to per-second.
		resultValue /= float64(sampledInterval) / 1000
	}

	return append(out, Sample{
		Point: Point{V: resultValue},
	})
}

irate最终的计算规则:

(倒数第一个Metric Value - 减倒数第二个metris Value) / (倒数第一个Metric抓取时间(秒) - 减倒数第二个metris抓取时间(秒))