Skip to content

Commit

Permalink
Merge pull request #1438 from roumcha/fix-language-name-normalization
Browse files Browse the repository at this point in the history
Fix language name normalization
  • Loading branch information
kenkoooo authored Oct 15, 2023
2 parents dc92d7b + 09b6617 commit bfcd349
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 12 deletions.
56 changes: 48 additions & 8 deletions atcoder-problems-backend/sql-client/src/language_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,12 +192,33 @@ impl LanguageCountClient for PgPool {
}
}

const MAPPING: [(&str, &str); 9] = [
("PyPy", "Python"),
("Python (Cython", "Cython"),
("Assembly x64", "Assembly x64"),
("Awk", "AWK"),
("IOI-Style", "C++"),
("LuaJIT", "Lua"),
("Seed7", "Seed7"),
("Perl6", "Raku"),
("Objective-C", "Objective-C"),
];

fn simplify_language(lang: &str) -> String {
let re = Regex::new(r"\d*\s*\(.*\)").unwrap();
if lang.starts_with("Perl6") {
"Raku".to_string()
} else {
re.replace(lang, "").to_string()
for (beginning, simplified) in MAPPING {
if lang.starts_with(beginning) {
return simplified.to_string();
}
}

let simplified = Regex::new(r"\s*[\d(\-].*")
.unwrap()
.replace(lang, "")
.to_string();

match simplified.len() {
0 => String::from("Unknown"),
_ => simplified,
}
}

Expand All @@ -207,12 +228,31 @@ mod tests {

#[test]
fn test_simplify_language() {
assert_eq!(simplify_language("language1"), "language1");
assert_eq!(simplify_language("language1"), "language");
assert_eq!(simplify_language("Perl (5)"), "Perl");
assert_eq!(simplify_language("Perl6"), "Raku");
assert_eq!(simplify_language("Fortran(GNU Fortran 9.2.1)"), "Fortran");
assert_eq!(simplify_language("Ada2012 (GNAT 9.2.1)"), "Ada");
assert_eq!(simplify_language("PyPy2 (7.3.0)"), "PyPy");
assert_eq!(simplify_language("Haxe (4.0.3); js"), "Haxe; js");
assert_eq!(simplify_language("Haxe (4.0.3); js"), "Haxe");
assert_eq!(simplify_language("C++11 (Clang++ 3.4)"), "C++");
assert_eq!(simplify_language("C++ 20 (gcc 12.2)"), "C++");
assert_eq!(simplify_language("C# 11.0 (.NET 7.0.7)"), "C#");
assert_eq!(simplify_language("C# 11.0 AOT (.NET 7.0.7)"), "C#");
assert_eq!(simplify_language("Visual Basic 16.9 (...)"), "Visual Basic");
assert_eq!(simplify_language("><> (fishr 0.1.0)"), "><>");
assert_eq!(simplify_language("プロデル (...)"), "プロデル");

// mapped individually
assert_eq!(simplify_language("Assembly x64"), "Assembly x64");
assert_eq!(simplify_language("Awk (GNU Awk 4.1.4)"), "AWK");
assert_eq!(simplify_language("IOI-Style C++ (GCC 5.4.1)"), "C++");
assert_eq!(simplify_language("LuaJIT (2.0.4)"), "Lua");
assert_eq!(simplify_language("Objective-C (Clang3.8.0)"), "Objective-C");
assert_eq!(simplify_language("PyPy2 (7.3.0)"), "Python");
assert_eq!(simplify_language("Python (Cython 0.29.34)"), "Cython");
assert_eq!(simplify_language("Cython (0.29.16)"), "Cython");
assert_eq!(simplify_language("Seed7 (Seed7 3.2.1)"), "Seed7");

assert_eq!(simplify_language("1234"), "Unknown");
}
}
22 changes: 22 additions & 0 deletions atcoder-problems-frontend/src/utils/LanguageNormalizer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,26 @@ test("normalize language", () => {
expect(normalizeLanguage("C++11 (Clang++ 3.4)")).toBe("C++");
expect(normalizeLanguage("Scala (2.11.7)")).toBe("Scala");
expect(normalizeLanguage("Fortran(GNU Fortran 9.2.1)")).toBe("Fortran");
expect(normalizeLanguage("Ada2012 (GNAT 9.2.1)")).toBe("Ada");
expect(normalizeLanguage("Haxe (4.0.3); js")).toBe("Haxe");
expect(normalizeLanguage("C++11 (Clang++ 3.4)")).toBe("C++");
expect(normalizeLanguage("C++ 20 (gcc 12.2)")).toBe("C++");
expect(normalizeLanguage("C# 11.0 (.NET 7.0.7)")).toBe("C#");
expect(normalizeLanguage("C# 11.0 AOT (.NET 7.0.7)")).toBe("C#");
expect(normalizeLanguage("Visual Basic 16.9 (...)")).toBe("Visual Basic");
expect(normalizeLanguage("><> (fishr 0.1.0)")).toBe("><>");
expect(normalizeLanguage("プロデル (...)")).toBe("プロデル");

// mapped individually
expect(normalizeLanguage("Assembly x64")).toBe("Assembly x64");
expect(normalizeLanguage("Awk (GNU Awk 4.1.4)")).toBe("AWK");
expect(normalizeLanguage("IOI-Style C++ (GCC 5.4.1)")).toBe("C++");
expect(normalizeLanguage("LuaJIT (2.0.4)")).toBe("Lua");
expect(normalizeLanguage("Objective-C (Clang3.8.0)")).toBe("Objective-C");
expect(normalizeLanguage("PyPy2 (7.3.0)")).toBe("Python");
expect(normalizeLanguage("Python (Cython 0.29.34)")).toBe("Cython");
expect(normalizeLanguage("Cython (0.29.16)")).toBe("Cython");
expect(normalizeLanguage("Seed7 (Seed7 3.2.1)")).toBe("Seed7");

expect(normalizeLanguage("1234")).toBe("Unknown");
});
22 changes: 18 additions & 4 deletions atcoder-problems-frontend/src/utils/LanguageNormalizer.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
const mapping: [beginning: string, normalized: string][] = [
["PyPy", "Python"],
["Python (Cython", "Cython"],
["Assembly x64", "Assembly x64"],
["Awk", "AWK"],
["IOI-Style", "C++"],
["LuaJIT", "Lua"],
["Seed7", "Seed7"],
["Perl6", "Raku"],
["Objective-C", "Objective-C"],
];

export const normalizeLanguage = (language: string): string => {
if (language.startsWith("Perl6")) {
return "Raku";
} else {
return language.replace(/\d*\s*\(.*\)$/, "");
for (const [beginning, normalized] of mapping) {
if (language.startsWith(beginning)) {
return normalized;
}
}

return language.replace(/\s*[\d(\-].*/, "") || "Unknown";
};

0 comments on commit bfcd349

Please sign in to comment.