[C++] 纯文本查看 复制代码
#include <iostream>
#include <vector>
#include <cmath>
#include <numeric>
#include <algorithm>
#include <stdexcept>
// 查找对应样本长度的 d2 值
double getD2(int length) {
if (length < 1 || length > 50) {
throw std::invalid_argument("d2值只支持样本长度1到50的范围。");
}
static const std::vector<double> d2_values = {
1.128, 1.693, 2.059, 2.223, 2.370, 2.504, 2.615, 2.717, 2.811,
2.915, 3.024, 3.121, 3.207, 3.285, 3.356, 3.422, 3.482, 3.538, 3.591,
3.64, 3.686, 3.73, 3.771, 3.811, 3.847, 3.883, 3.931, 3.965, 3.997,
4.028, 4.058, 4.086, 4.113, 4.139, 4.164, 4.189, 4.213, 4.236, 4.258,
4.28, 4.301, 4.322, 4.342, 4.361, 4.38, 4.398, 4.415, 4.432, 4.449
};
return d2_values[length - 1];
}
// 查找对应样本长度的 C4 值
double getC4(int n) {
if (n < 1 || n > 100) {
throw std::invalid_argument("C4值只支持样本长度1到100的范围。");
}
static const std::vector<double> c4_values = {
1.0, 0.797885, 0.886227, 0.921318, 0.939986, 0.951533, 0.959369, 0.965030,
0.969311, 0.972659, 0.975350, 0.977559, 0.979406, 0.980971, 0.982316, 0.983484,
0.984506, 0.985410, 0.986214, 0.986934, 0.987583, 0.988170, 0.988705, 0.989193,
0.989640, 0.990052, 0.990433, 0.990786, 0.991113, 0.991418, 0.991703, 0.991969,
0.992219, 0.992454, 0.992675, 0.992884, 0.993080, 0.993267, 0.993443, 0.993611,
0.993770, 0.993922, 0.994066, 0.994203, 0.994335, 0.994460, 0.994580, 0.994695,
0.994806, 0.994911, 0.995013, 0.995110, 0.995204, 0.995294, 0.995381, 0.995465,
0.995546, 0.995624, 0.995699, 0.995772, 0.995842, 0.995910, 0.995976, 0.996040,
0.996102, 0.996161, 0.996219, 0.996276, 0.996330, 0.996383, 0.996435, 0.996485,
0.996534, 0.996581, 0.996627, 0.996672, 0.996716, 0.996759, 0.996800, 0.996841,
0.996880, 0.996918, 0.996956, 0.996993, 0.997028, 0.997063, 0.997097, 0.997131,
0.997163, 0.997195, 0.997226, 0.997257, 0.997286, 0.997315, 0.997344, 0.997372,
0.997399, 0.997426, 0.997452, 0.997478
};
return c4_values[n - 1];
}
// 计算极差值的中位数
double calculateRangeMedian(std::vector<double>& ranges) {
if (ranges.empty()) {
throw std::invalid_argument("极差值列表不能为空。");
}
std::sort(ranges.begin(), ranges.end());
size_t size = ranges.size();
if (size % 2 == 0) {
return (ranges[size / 2 - 1] + ranges[size / 2]) / 2.0;
}
else {
return ranges[size / 2];
}
}
// 计算加权标准差
double weightedStandardDeviation(const std::vector<double>& means, const std::vector<double>& stddevs, const std::vector<int>& sizes) {
double totalWeight = 0.0;
double combinedVariance = 0.0;
for (size_t i = 0; i < means.size(); ++i) {
double weight = sizes; // 子组的大小作为权重
totalWeight += weight;
// 计算合并方差
combinedVariance += weight * (stddevs * stddevs + means * means);
}
double combinedMean = 0.0;
for (size_t i = 0; i < means.size(); ++i) {
combinedMean += sizes * means; // 计算加权平均值
}
combinedMean /= totalWeight;
combinedVariance /= totalWeight; // 计算新的合并方差
combinedVariance -= combinedMean * combinedMean; // 减去均值的平方
return std::sqrt(combinedVariance); // 返回标准差
}
// 计算自定义长度的移动极差
double calculateMovingRange(const std::vector<double>& data, int length, bool is_median = false) {
if (length < 1 || length > data.size()) {
throw std::invalid_argument("长度必须介于1和数据大小之间。");
}
std::vector<double> ranges;
for (size_t i = 0; i <= data.size() - length; ++i) {
double maxValue = *std::max_element(data.begin() + i, data.begin() + i + length);
double minValue = *std::min_element(data.begin() + i, data.begin() + i + length);
double range = maxValue - minValue;
ranges.push_back(range);
}
double averageRange = 0.0;
if (is_median) {
//使用移动极差中位数
averageRange = calculateRangeMedian(ranges);
}
else {
//使用移动极差平均值
averageRange = std::accumulate(ranges.begin(), ranges.end(), 0.0) / ranges.size();
}
double d2 = getD2(length); // 获取对应长度的 d2 值
return averageRange / d2; // 返回标准偏差的估计值
}
// 计算标准差
double calculateStandardDeviation(const std::vector<double>& data, int subgroupSize) {
if (subgroupSize == 0) { return 0.0; }
if (subgroupSize == 1) { // 如果子组大小为1,计算移动极差
// 使用自定义长度的移动极差
return calculateMovingRange(data, 2);
}
else {
// 合并标准差计算
std::vector<double> means, stddevs;
std::vector<int> sizes;
for (size_t i = 0; i < data.size(); i += subgroupSize) {
int n = std::min(subgroupSize, static_cast<int>(data.size() - i)); // 得到当前子组的大小
std::vector<double> subgroup(data.begin() + i, data.begin() + i + n); // 形成子组
double mean = std::accumulate(subgroup.begin(), subgroup.end(), 0.0) / n; // 计算子组均值
double variance = 0.0;
for (double value : subgroup) {
variance += (value - mean) * (value - mean); // 计算方差
}
variance /= (n - 1); // 有偏估计
double stddev = std::sqrt(variance); // 计算标准差
means.push_back(mean); // 存储均值
stddevs.push_back(stddev); // 存储标准差
sizes.push_back(n); // 存储子组大小
}
// 计算合并标准差
double combinedStdDev = weightedStandardDeviation(means, stddevs, sizes); // 计算加权标准差
double unbiasedConstant = getC4(subgroupSize); // 计算无偏常量
return combinedStdDev / unbiasedConstant; // 返回修正后的标准差
}
}
int main() {
// 示例数据
std::vector<double> data = {
3.08, 3.09, 3.1, 3.09, 3.15, 3.11, 3.1, 3.09, 3.11, 3.15,
3.12, 3.09, 3.11, 3.11, 3.1, 3.09, 3.11, 3.08, 3.09, 3.06,
3.09, 3.1, 3.12, 3.08, 3.13, 3.1, 3.1, 3.09, 3.11, 3.08,
3.06, 3.09
};
// 计算标准差,子组大小为1
double stddev = calculateStandardDeviation(data, 32);
std::cout << "估计标准偏差: " << stddev << std::endl; // 输出估计的标准差
return 0;
}