Skip to content

Commit 9928151

Browse files
[Bugfix:Plagiarism] Bump base Docker image to Ubuntu 22.04 (#104)
### What is the current behavior? Our Docker image is currently based on Ubuntu 20.04. This is problematic because the main Lichen binary is compiled on the host machine and copied into the Lichen container at runtime, which requires both operating systems to be compatible. Eventually, we should do all of the compilation at image build time instead... ### What is the new behavior? The base Docker image has been bumped to Ubuntu 22.04. Due to a handful of version incompatibilities and other miscellaneous Lichen bugs, I also bumped clang to version 14 (the C++ tokenizer was broken), bumped python to 3.9, and fixed a regression introduced in Submitty/Submitty#9630. Further Python version increases will break the Java tokenizer. I will make a separate PR to migrate away from `javac-parser`, which hasn't been updated in 5 years now, and seems to be a dead project.
1 parent 7e7d229 commit 9928151

File tree

12 files changed

+24
-26
lines changed

12 files changed

+24
-26
lines changed

.github/workflows/lichen_ci.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ name: Lichen CI
33
on: [push, pull_request]
44

55
env:
6-
PYTHON_VERSION: 3.8
6+
PYTHON_VERSION: '3.9'
77

88
jobs:
99
python-unit-tests:
10-
runs-on: ubuntu-20.04
10+
runs-on: ubuntu-22.04
1111
steps:
1212
- uses: actions/checkout@v2
1313
- uses: actions/setup-python@v2
@@ -19,14 +19,14 @@ jobs:
1919
- name: Install Tokenizer Dependencies
2020
run: |
2121
sudo apt-get update
22-
sudo apt-get install -y clang-6.0
22+
sudo apt-get install -y clang-14
2323
- name: Run Unit Tests
2424
run: |
2525
cd tests/unittest
2626
python3 -m unittest discover
2727
2828
test-lichen-integration:
29-
runs-on: ubuntu-20.04
29+
runs-on: ubuntu-22.04
3030
steps:
3131
- uses: actions/checkout@v2
3232
- name: Install Lichen

.github/workflows/pylint.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on: [push]
44

55
jobs:
66
python-lint:
7-
runs-on: ubuntu-20.04
7+
runs-on: ubuntu-22.04
88
steps:
99
- uses: actions/checkout@v2
1010
- uses: actions/setup-python@v2

Dockerfile

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
FROM ubuntu:20.04
1+
FROM ubuntu:22.04
22

33
ARG DEBIAN_FRONTEND=noninteractive
44

55
# C++ and Python
66
RUN apt-get update \
77
&& apt-get install -y \
88
libboost-all-dev \
9-
python3.8 \
9+
python3.9 \
1010
python3-pip \
11-
clang-6.0 \
11+
clang-14 \
1212
default-jdk
1313

1414
# Python Dependencies

bin/concatenate_all.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,10 @@ def validate(config, args):
176176
# check permissions to make sure we have access to the other gradeables
177177
my_course_group_perms = Path(args.basepath).group()
178178
for gradeable in other_gradeables:
179-
if Path(args.datapath, gradeable["other_semester"], gradeable["other_course"]).group()\
179+
if Path(args.datapath, gradeable["other_term"], gradeable["other_course"]).group()\
180180
!= my_course_group_perms:
181181
raise SystemExit("ERROR: Invalid permissions to access course "
182-
f"{gradeable['other_semester']}/{gradeable['other_course']}")
182+
f"{gradeable['other_term']}/{gradeable['other_course']}")
183183

184184
# check permissions for each path we are given (if any are provided)
185185
if config.get("other_gradeable_paths") is not None:
@@ -211,7 +211,7 @@ def main():
211211
validate(config, args)
212212

213213
# parameters to be used in this file
214-
semester = config["semester"]
214+
term = config["term"]
215215
course = config["course"]
216216
gradeable = config["gradeable"]
217217
regex_patterns = config["regex"]
@@ -225,7 +225,7 @@ def main():
225225
total_concat = 0
226226

227227
for dir in regex_dirs:
228-
input_path = os.path.join(args.datapath, semester, course, dir, gradeable)
228+
input_path = os.path.join(args.datapath, term, course, dir, gradeable)
229229
output_path = os.path.join(args.basepath, "users")
230230
total_concat = processGradeable(args.basepath, config,
231231
input_path, output_path, total_concat)
@@ -235,13 +235,13 @@ def main():
235235
for other_gradeable in other_gradeables:
236236
for dir in regex_dirs:
237237
input_path = os.path.join(args.datapath,
238-
other_gradeable["other_semester"],
238+
other_gradeable["other_term"],
239239
other_gradeable["other_course"],
240240
dir,
241241
other_gradeable["other_gradeable"])
242242

243243
output_path = os.path.join(args.basepath, "other_gradeables",
244-
f"{other_gradeable['other_semester']}__{other_gradeable['other_course']}__{other_gradeable['other_gradeable']}") # noqa: E501
244+
f"{other_gradeable['other_term']}__{other_gradeable['other_course']}__{other_gradeable['other_gradeable']}") # noqa: E501
245245
total_concat = processGradeable(args.basepath, config,
246246
input_path, output_path, total_concat)
247247

compare_hashes/compare_hashes.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ int main(int argc, char* argv[]) {
103103
assert(istr.good());
104104
nlohmann::json config_file_json = nlohmann::json::parse(istr);
105105

106-
config.semester = config_file_json.value("semester", "ERROR");
106+
config.term = config_file_json.value("term", "ERROR");
107107
config.course = config_file_json.value("course", "ERROR");
108108
config.gradeable = config_file_json.value("gradeable", "ERROR");
109109
config.hash_size = config_file_json.value("hash_size", 1);
@@ -218,7 +218,7 @@ int main(int argc, char* argv[]) {
218218
while (istr >> input_hash_str) {
219219
hash input_hash = (unsigned int)(stoul(input_hash_str, 0, 16));
220220
location++;
221-
all_hashes[input_hash][username].push_back(HashLocation(username, version, location, config.semester + "__" + config.course + "__" + config.gradeable));
221+
all_hashes[input_hash][username].push_back(HashLocation(username, version, location, config.term + "__" + config.course + "__" + config.gradeable));
222222
curr_submission->addHash(input_hash, location);
223223
}
224224

compare_hashes/lichen_config.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#define LICHEN_CONFIG_H
33

44
struct LichenConfig {
5-
std::string semester;
5+
std::string term;
66
std::string course;
77
std::string gradeable;
88
int hash_size;

install_lichen.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ cp -r "$lichen_repository_dir"/* "$lichen_installation_dir"
2020
# install C++ dependencies
2121

2222
apt-get update
23-
apt-get install -y clang-6.0 libboost-all-dev
23+
apt-get install -y clang-14 libboost-all-dev
2424

2525
####################################################################################################
2626
# Install Python Dependencies locally (for concatenation)

tests/data/test_lichen/multiple_versions/expected_output/config.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"semester": "f21",
2+
"term": "f21",
33
"course": "plagiarism",
44
"gradeable": "multiple_versions",
55
"config_id": 1,

tests/data/test_lichen/multiple_versions/input/config.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"semester": "f21",
2+
"term": "f21",
33
"course": "plagiarism",
44
"gradeable": "multiple_versions",
55
"config_id": 1,

tests/data/test_lichen/repeated_sequences/expected_output/config.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"semester": "f21",
2+
"term": "f21",
33
"course": "plagiarism",
44
"gradeable": "repeated_sequences",
55
"config_id": "1",

tests/data/test_lichen/repeated_sequences/input/config.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"semester": "f21",
2+
"term": "f21",
33
"course": "plagiarism",
44
"gradeable": "repeated_sequences",
55
"config_id": "1",

tokenizer/c/c_tokenizer.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,8 @@ def main():
2424
# copy the concatenated file to the temporary file location
2525
shutil.copy(args.input_file, tmp_cpp_file_name)
2626

27-
if (os.path.isfile('/usr/lib/llvm-6.0/lib/libclang.so.1')):
28-
clang.cindex.Config.set_library_file('/usr/lib/llvm-6.0/lib/libclang.so.1')
29-
elif (os.path.isfile('/usr/lib/llvm-3.8/lib/libclang-3.8.so.1')):
30-
clang.cindex.Config.set_library_file('/usr/lib/llvm-3.8/lib/libclang-3.8.so.1')
27+
if (os.path.isfile('/usr/lib/llvm-14/lib/libclang.so.1')):
28+
clang.cindex.Config.set_library_file('/usr/lib/llvm-14/lib/libclang.so.1')
3129
idx = clang.cindex.Index.create()
3230

3331
# parse the input file

0 commit comments

Comments
 (0)