本帖最后由 shituo 于 2024-7-19 22:26 编辑
[C++] 纯文本查看 复制代码 #include <fstream>
#include <sstream>
#include <map>
#include <string>
#include <vector>
#include <iostream>
#include <stdexcept>
class GenderUtils {
public:
GenderUtils() {
init();
}
std::map<std::string, double> guessGenderByName(const std::string& nameString) {
std::vector<char> nameChars(nameString.begin() + 1, nameString.end());
double maleProb = getGenderProb(nameChars, 1);
double femaleProb = getGenderProb(nameChars, 0);
if (maleProb > femaleProb) {
return {{"male", maleProb / (maleProb + femaleProb)}};
} else if (femaleProb > maleProb) {
return {{"female", femaleProb / (maleProb + femaleProb)}};
} else {
return {{"unknown", 0}};
}
}
private:
std::map<char, std::pair<int, int>> rawGenderMap;
std::map<char, std::pair<double, double>> genderMap;
int maleTotal = 0;
int femaleTotal = 0;
int genderTotal = 0;
void init() {
std::ifstream file("data/ngender/charfreq.csv");
if (!file.is_open()) {
throw std::runtime_error("Failed to open file.");
}
std::string line;
std::getline(file, line); // Skip header
while (std::getline(file, line)) {
std::istringstream ss(line);
std::string cell;
std::getline(ss, cell, ','); // Get first cell
char nameChar = cell[0];
std::getline(ss, cell, ','); // Get second cell
int maleNum = std::stoi(cell);
std::getline(ss, cell, ','); // Get third cell
int femaleNum = std::stoi(cell);
rawGenderMap[nameChar] = {maleNum, femaleNum};
maleTotal += maleNum;
femaleTotal += femaleNum;
}
genderTotal = maleTotal + femaleTotal;
// Convert raw counts to probabilities
for (auto& p : rawGenderMap) {
genderMap[p.first] = {static_cast<double>(p.second.second) / femaleTotal,
static_cast<double>(p.second.first) / maleTotal};
}
}
double getGenderProb(const std::vector<char>& nameChars, int genderFlag) {
double baseProb = genderFlag == 0 ? static_cast<double>(femaleTotal) / genderTotal :
static_cast<double>(maleTotal) / genderTotal;
for (char nameChar : nameChars) {
auto it = genderMap.find(nameChar);
if (it != genderMap.end()) {
baseProb *= it->second[genderFlag];
}
}
return baseProb;
}
};
int main() {
try {
GenderUtils genderUtils;
auto result = genderUtils.guessGenderByName("张三");
for (const auto& pair : result) {
std::cout << pair.first << ": " << pair.second << std::endl;
}
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
}
return 0;
}
|