Skip to content

Commit

Permalink
fixed crc computation bug, and added crc per file
Browse files Browse the repository at this point in the history
  • Loading branch information
Cornul11 committed Jul 1, 2023
1 parent d491a19 commit 91d9ce1
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ private void createLibrariesTable() {
private void createSignaturesTable() {
String createTableQuery = "CREATE TABLE IF NOT EXISTS signatures (" +
"id INT PRIMARY KEY AUTO_INCREMENT, " +
"hash BIGINT NOT NULL)";
"hash BIGINT NOT NULL," +
"crc BIGINT NOT NULL)";

try (Connection connection = ds.getConnection();
Statement statement = connection.createStatement()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public int insertLibrary(JarInfoExtractor jarInfoExtractor, long jarHash, long j
@Override
public int insertSignatures(List<Signature> signatures, long jarHash, long jarCrc) {
String insertLibraryQuery = "INSERT INTO libraries (groupId, artifactId, version, hash, crc, isUberJar) VALUES (?, ?, ?, ?, ?, ?)";
String insertSignatureQuery = "INSERT INTO signatures (hash) VALUES (?)";
String insertSignatureQuery = "INSERT INTO signatures (hash, crc) VALUES (?, ?)";
String insertLibrarySignatureQuery = "INSERT INTO library_signature (library_id, signature_id) VALUES (?, ?)";


Expand All @@ -75,7 +75,8 @@ public int insertSignatures(List<Signature> signatures, long jarHash, long jarCr
PreparedStatement librarySignatureStatement = connection.prepareStatement(insertLibrarySignatureQuery);

for (Signature signature : signatures) {
insertStatement.setString(1, signature.getHash());
insertStatement.setLong(1, signature.getHash());
insertStatement.setLong(2, signature.getCrc());
insertStatement.executeUpdate();

ResultSet signatureGeneratedKeys = insertStatement.getGeneratedKeys();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
public class ClassFileInfo {
private final String fileName;
private final long hashCode;
private final long crc;

public ClassFileInfo(String fileName, long hashCode) {
public ClassFileInfo(String fileName, long hashCode, long crc) {
this.fileName = fileName;
this.hashCode = hashCode;
this.crc = crc;
}

public String getFileName() {
Expand All @@ -16,4 +18,8 @@ public String getFileName() {
public long getHashCode() {
return hashCode;
}

public long getCrc() {
return crc;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public int processJarFile(Path jarFilePath) {
}
LongHashFunction xx = LongHashFunction.xx();
long jarHash = xx.hashChars(sb.toString());
long jarCrc = jarHandler.getCrc();
long jarCrc = jarHandler.getJarCrc();

JarInfoExtractor jarInfoExtractor = new JarInfoExtractor(jarFilePath.toString());
if (signatures.isEmpty()) { // it's probably an uber-JAR, let's still add it to the db
Expand Down Expand Up @@ -110,6 +110,6 @@ public void printStats() {
}

private Signature createSignature(ClassFileInfo signature, JarInfoExtractor jarInfoExtractor) {
return new Signature(0, signature.getFileName(), Long.toString(signature.getHashCode()), jarInfoExtractor.getGroupId(), jarInfoExtractor.getArtifactId(), jarInfoExtractor.getVersion());
return new Signature(0, signature.getFileName(), signature.getHashCode(), signature.getCrc(), jarInfoExtractor.getGroupId(), jarInfoExtractor.getArtifactId(), jarInfoExtractor.getVersion());
}
}
22 changes: 13 additions & 9 deletions src/main/java/nl/tudelft/cornul11/thesis/jarfile/JarHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ public class JarHandler {
private final Set<String> mavenSubmodules = new HashSet<>();
private final Logger logger = LoggerFactory.getLogger(JarHandler.class);
private final boolean ignoreUberJars;
private final CRC32 crc = new CRC32();
private long crcValue = -1;
private final CRC32 jarCrc = new CRC32();
private final long crcValue;

public JarHandler(Path jarFilePath, List<String> ignoredUberJars, List<String> insertedLibraries, ConfigurationLoader config) {
this.jarFilePath = jarFilePath;
this.ignoredUberJars = ignoredUberJars;
Expand All @@ -41,20 +42,22 @@ public JarHandler(Path jarFilePath, List<String> ignoredUberJars, List<String> i
}

private long generateCrc() {
jarCrc.reset();

try (FileChannel channel = FileChannel.open(jarFilePath)) {
MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
byte[] bytes = new byte[8192];

while (buffer.hasRemaining()) {
int length = Math.min(buffer.remaining(), bytes.length);
buffer.get(bytes, 0, length);
crc.update(bytes, 0, length);
jarCrc.update(bytes, 0, length);
}
} catch (IOException e) {
logger.error("Failed to generate CRC for " + jarFilePath, e);
}

return crc.getValue();
return jarCrc.getValue();
}

public List<ClassFileInfo> extractSignatures() {
Expand Down Expand Up @@ -172,16 +175,17 @@ private boolean hasMultiplePackages(Path jarFilePath, JarEntry entry, String ini

private ClassFileInfo processClassFile(JarEntry entry, JarFile jarFile) throws IOException {
try (InputStream classFileInputStream = jarFile.getInputStream(entry)) {
byte[] bytecode = classFileInputStream.readAllBytes();
byte[] bytecode = BytecodeUtils.readBytecodeAndCalculateCRCWhenNotAvailable(entry, classFileInputStream);

BytecodeDetails bytecodeDetails = BytecodeParser.extractSignature(bytecode);
return new ClassFileInfo(entry.getName(), BytecodeUtils.getSignatureHash(bytecodeDetails));
} catch (Exception e) {
return new ClassFileInfo(entry.getName(), BytecodeUtils.getSignatureHash(bytecodeDetails), entry.getCrc());
} catch ( Exception e ) {
logger.error("Error while processing class file: " + entry.getName(), e);
return null;
throw e;
}
}

public long getCrc() {
public long getJarCrc() {
return crcValue;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public class JarSignatureMapper {
private final SignatureDAO signatureDao;
private final Logger logger = LoggerFactory.getLogger(JarSignatureMapper.class);
private static final Set<String> FILENAME_EXCEPTIONS = Set.of("module-info.class", "package-info.class");

public JarSignatureMapper(SignatureDAO signatureDao) {
this.signatureDao = signatureDao;
}
Expand Down Expand Up @@ -54,7 +55,7 @@ public Map<String, Map<String, Object>> inferJarFile(Path jarFilePath) {
public Map<String, Map<String, Object>> getTopMatches(List<ClassFileInfo> signatures, SignatureDAO signatureDao) {
logger.info("Getting top matches for " + signatures.size() + " signatures");
List<Long> hashes = signatures.stream()
.map(signature -> signature.getHashCode())
.map(ClassFileInfo::getHashCode)
.collect(Collectors.toList());

// get the top library matches based on hashes
Expand All @@ -78,17 +79,18 @@ public Map<String, Map<String, Object>> getTopMatches(List<ClassFileInfo> signat
entry -> entry.getKey() + ":" + entry.getValue().getVersion(),
entry -> Map.of("count", (long) entry.getValue().getClassFileCount(),
"total", (long) entry.getValue().getTotalCount(),
"ratio", ((double)entry.getValue().getClassFileCount())/entry.getValue().getTotalCount())));
"ratio", ((double) entry.getValue().getClassFileCount()) / entry.getValue().getTotalCount())));

return libraryVersionCountMap;
}

private ClassFileInfo processClassFile(JarEntry entry, JarFile jarFile) throws IOException {
logger.info("Processing class file: " + entry.getName());
try (InputStream classFileInputStream = jarFile.getInputStream(entry)) {
byte[] bytecode = classFileInputStream.readAllBytes();
byte[] bytecode = BytecodeUtils.readBytecodeAndCalculateCRCWhenNotAvailable(entry, classFileInputStream);

BytecodeDetails bytecodeDetails = BytecodeParser.extractSignature(bytecode);
return new ClassFileInfo(entry.getName(), BytecodeUtils.getSignatureHash(bytecodeDetails));
return new ClassFileInfo(entry.getName(), BytecodeUtils.getSignatureHash(bytecodeDetails), entry.getCrc());
} catch (Exception e) {
logger.error("Error while processing class file: " + entry.getName(), e);
return null;
Expand Down
12 changes: 9 additions & 3 deletions src/main/java/nl/tudelft/cornul11/thesis/model/Signature.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
public class Signature {
private final int id;
private final String fileName;
private final String hash;
private final long hash;
private final long crc;
private final String groupID;
private final String artifactId;
private final String version;

public Signature(int id, String fileName, String hash, String groupID, String artifactId, String version) {
public Signature(int id, String fileName, long hash, long crc, String groupID, String artifactId, String version) {
this.id = id;
this.fileName = fileName;
this.hash = hash;
this.crc = crc;
this.groupID = groupID;
this.artifactId = artifactId;
this.version = version;
Expand All @@ -25,10 +27,14 @@ public String getFileName() {
return fileName;
}

public String getHash() {
public long getHash() {
return hash;
}

public long getCrc() {
return crc;
}

public String getGroupID() {
return groupID;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,22 @@
import net.openhft.hashing.LongHashFunction;
import nl.tudelft.cornul11.thesis.signature.extractor.bytecode.members.*;

import java.io.IOException;
import java.io.InputStream;
import java.util.jar.JarEntry;
import java.util.zip.CRC32;

public class BytecodeUtils {
public static byte[] readBytecodeAndCalculateCRCWhenNotAvailable(JarEntry entry, InputStream classFileInputStream) throws IOException {
byte[] bytecode = classFileInputStream.readAllBytes();
if (entry.getCrc() == -1) {
CRC32 crc = new CRC32();
crc.update(bytecode);
entry.setCrc(crc.getValue());
}
return bytecode;
}

public static long getSignatureHash(BytecodeDetails bytecodeDetails) {
LongHashFunction cityHashFunction = LongHashFunction.xx3();
StringBuilder classSignature = new StringBuilder();
Expand Down

0 comments on commit 91d9ce1

Please sign in to comment.