Skip to content

Commit

Permalink
[Bugfix:Plagiarism] Bump base Docker image to Ubuntu 22.04 (#104)
Browse files Browse the repository at this point in the history
### What is the current behavior?
Our Docker image is currently based on Ubuntu 20.04. This is problematic
because the main Lichen binary is compiled on the host machine and
copied into the Lichen container at runtime, which requires both
operating systems to be compatible. Eventually, we should do all of the
compilation at image build time instead...

### What is the new behavior?
The base Docker image has been bumped to Ubuntu 22.04. Due to a handful
of version incompatibilities and other miscellaneous Lichen bugs, I also
bumped clang to version 14 (the C++ tokenizer was broken), bumped python
to 3.9, and fixed a regression introduced in
Submitty/Submitty#9630.

Further Python version increases will break the Java tokenizer. I will
make a separate PR to migrate away from `javac-parser`, which hasn't
been updated in 5 years now, and seems to be a dead project.
  • Loading branch information
williamjallen authored Sep 11, 2023
1 parent 7e7d229 commit 9928151
Show file tree
Hide file tree
Showing 12 changed files with 24 additions and 26 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/lichen_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ name: Lichen CI
on: [push, pull_request]

env:
PYTHON_VERSION: 3.8
PYTHON_VERSION: '3.9'

jobs:
python-unit-tests:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
Expand All @@ -19,14 +19,14 @@ jobs:
- name: Install Tokenizer Dependencies
run: |
sudo apt-get update
sudo apt-get install -y clang-6.0
sudo apt-get install -y clang-14
- name: Run Unit Tests
run: |
cd tests/unittest
python3 -m unittest discover
test-lichen-integration:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2
- name: Install Lichen
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on: [push]

jobs:
python-lint:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
Expand Down
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
FROM ubuntu:20.04
FROM ubuntu:22.04

ARG DEBIAN_FRONTEND=noninteractive

# C++ and Python
RUN apt-get update \
&& apt-get install -y \
libboost-all-dev \
python3.8 \
python3.9 \
python3-pip \
clang-6.0 \
clang-14 \
default-jdk

# Python Dependencies
Expand Down
12 changes: 6 additions & 6 deletions bin/concatenate_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,10 @@ def validate(config, args):
# check permissions to make sure we have access to the other gradeables
my_course_group_perms = Path(args.basepath).group()
for gradeable in other_gradeables:
if Path(args.datapath, gradeable["other_semester"], gradeable["other_course"]).group()\
if Path(args.datapath, gradeable["other_term"], gradeable["other_course"]).group()\
!= my_course_group_perms:
raise SystemExit("ERROR: Invalid permissions to access course "
f"{gradeable['other_semester']}/{gradeable['other_course']}")
f"{gradeable['other_term']}/{gradeable['other_course']}")

# check permissions for each path we are given (if any are provided)
if config.get("other_gradeable_paths") is not None:
Expand Down Expand Up @@ -211,7 +211,7 @@ def main():
validate(config, args)

# parameters to be used in this file
semester = config["semester"]
term = config["term"]
course = config["course"]
gradeable = config["gradeable"]
regex_patterns = config["regex"]
Expand All @@ -225,7 +225,7 @@ def main():
total_concat = 0

for dir in regex_dirs:
input_path = os.path.join(args.datapath, semester, course, dir, gradeable)
input_path = os.path.join(args.datapath, term, course, dir, gradeable)
output_path = os.path.join(args.basepath, "users")
total_concat = processGradeable(args.basepath, config,
input_path, output_path, total_concat)
Expand All @@ -235,13 +235,13 @@ def main():
for other_gradeable in other_gradeables:
for dir in regex_dirs:
input_path = os.path.join(args.datapath,
other_gradeable["other_semester"],
other_gradeable["other_term"],
other_gradeable["other_course"],
dir,
other_gradeable["other_gradeable"])

output_path = os.path.join(args.basepath, "other_gradeables",
f"{other_gradeable['other_semester']}__{other_gradeable['other_course']}__{other_gradeable['other_gradeable']}") # noqa: E501
f"{other_gradeable['other_term']}__{other_gradeable['other_course']}__{other_gradeable['other_gradeable']}") # noqa: E501
total_concat = processGradeable(args.basepath, config,
input_path, output_path, total_concat)

Expand Down
4 changes: 2 additions & 2 deletions compare_hashes/compare_hashes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ int main(int argc, char* argv[]) {
assert(istr.good());
nlohmann::json config_file_json = nlohmann::json::parse(istr);

config.semester = config_file_json.value("semester", "ERROR");
config.term = config_file_json.value("term", "ERROR");
config.course = config_file_json.value("course", "ERROR");
config.gradeable = config_file_json.value("gradeable", "ERROR");
config.hash_size = config_file_json.value("hash_size", 1);
Expand Down Expand Up @@ -218,7 +218,7 @@ int main(int argc, char* argv[]) {
while (istr >> input_hash_str) {
hash input_hash = (unsigned int)(stoul(input_hash_str, 0, 16));
location++;
all_hashes[input_hash][username].push_back(HashLocation(username, version, location, config.semester + "__" + config.course + "__" + config.gradeable));
all_hashes[input_hash][username].push_back(HashLocation(username, version, location, config.term + "__" + config.course + "__" + config.gradeable));
curr_submission->addHash(input_hash, location);
}

Expand Down
2 changes: 1 addition & 1 deletion compare_hashes/lichen_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define LICHEN_CONFIG_H

struct LichenConfig {
std::string semester;
std::string term;
std::string course;
std::string gradeable;
int hash_size;
Expand Down
2 changes: 1 addition & 1 deletion install_lichen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ cp -r "$lichen_repository_dir"/* "$lichen_installation_dir"
# install C++ dependencies

apt-get update
apt-get install -y clang-6.0 libboost-all-dev
apt-get install -y clang-14 libboost-all-dev

####################################################################################################
# Install Python Dependencies locally (for concatenation)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"semester": "f21",
"term": "f21",
"course": "plagiarism",
"gradeable": "multiple_versions",
"config_id": 1,
Expand Down
2 changes: 1 addition & 1 deletion tests/data/test_lichen/multiple_versions/input/config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"semester": "f21",
"term": "f21",
"course": "plagiarism",
"gradeable": "multiple_versions",
"config_id": 1,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"semester": "f21",
"term": "f21",
"course": "plagiarism",
"gradeable": "repeated_sequences",
"config_id": "1",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"semester": "f21",
"term": "f21",
"course": "plagiarism",
"gradeable": "repeated_sequences",
"config_id": "1",
Expand Down
6 changes: 2 additions & 4 deletions tokenizer/c/c_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ def main():
# copy the concatenated file to the temporary file location
shutil.copy(args.input_file, tmp_cpp_file_name)

if (os.path.isfile('/usr/lib/llvm-6.0/lib/libclang.so.1')):
clang.cindex.Config.set_library_file('/usr/lib/llvm-6.0/lib/libclang.so.1')
elif (os.path.isfile('/usr/lib/llvm-3.8/lib/libclang-3.8.so.1')):
clang.cindex.Config.set_library_file('/usr/lib/llvm-3.8/lib/libclang-3.8.so.1')
if (os.path.isfile('/usr/lib/llvm-14/lib/libclang.so.1')):
clang.cindex.Config.set_library_file('/usr/lib/llvm-14/lib/libclang.so.1')
idx = clang.cindex.Index.create()

# parse the input file
Expand Down

0 comments on commit 9928151

Please sign in to comment.