From ac371566f6b2da8a57c72dd9cec6ed9a5ac147ae Mon Sep 17 00:00:00 2001 From: Allan Shoup Date: Thu, 26 Aug 2021 21:40:56 -0500 Subject: [PATCH] Converted System.exit to throw exceptions, cleaned up leaked resources, removed unnecessary casts. --- .../blockbuilding/AbstractBlockBuilding.java | 2 +- .../AbstractBlockProcessing.java | 2 +- .../blockcleaning/BlockFiltering.java | 6 +- .../blockcleaning/SizeBasedBlockPurging.java | 4 +- .../comparisoncleaning/CanopyClustering.java | 5 +- .../ExtendedCanopyClustering.java | 6 +- .../AbstractRandomSearchConfiguration.java | 4 +- .../org/scify/jedai/datamodel/Comparison.java | 1 - .../scify/jedai/datamodel/GomoryHuTree.java | 2 +- .../jedai/datamodel/SimilarityPairs.java | 9 +- .../scify/jedai/datamodel/joins/Category.java | 4 +- .../entityreader/EntityDBReader.java | 78 ++++--------- .../entityreader/EntityJSONRDFReader.java | 8 +- .../entityreader/EntityRDFReader.java | 5 +- .../entityreader/EntitySPARQLReader.java | 5 +- .../entityreader/EntityXMLreader.java | 2 +- .../groundtruthreader/AbstractGtReader.java | 8 +- .../groundtruthreader/GtCSVReader.java | 8 +- .../groundtruthreader/GtRDFReader.java | 4 +- .../datawriter/BlocksPerformanceWriter.java | 109 +++++------------- .../datawriter/ClustersPerformanceWriter.java | 71 +++--------- .../jedai/datawriter/PrintStatsToFile.java | 75 +++--------- .../jedai/entityclustering/CutClustering.java | 14 +-- .../RicochetSRClusteringCCER.java | 2 +- .../jedai/entitymatching/GroupLinkage.java | 5 +- .../jedai/entitymatching/ProfileMatcher.java | 5 +- .../GlobalProgressiveSortedNeighborhood.java | 2 +- .../ProgressiveBlockScheduling.java | 4 +- .../ProgressiveEntityScheduling.java | 4 +- .../ProgressiveGlobalTopComparisons.java | 5 +- .../ProgressiveLocalTopComparisons.java | 3 +- .../utilities/BlockcentricEntityIndex.java | 2 +- .../ProgressiveCNPDecomponsedBlocks.java | 2 +- .../characterbased/AllPairs.java | 2 +- .../characterbased/EdJoin.java | 2 +- .../fuzzysets/FuzzySetSimJoin.java | 9 +- .../tokenbased/PartEnumJoin.java | 10 +- .../org/scify/jedai/textmodels/BagModel.java | 12 +- .../CharacterNGramsWithGlobalWeights.java | 16 ++- .../scify/jedai/textmodels/GraphModel.java | 10 +- .../TokenNGramsWithGlobalWeights.java | 16 ++- .../embeddings/PretrainedVectors.java | 61 ++++------ .../embeddings/VectorSpaceModel.java | 6 +- .../org/scify/jedai/utilities/DBUtils.java | 52 +++++++++ .../datastructures/GroundTruthIndex.java | 4 - .../jedai/datareader/TestHDTRdfReader.java | 4 +- .../jedai/datareader/TestJSONrdfReader.java | 4 +- .../demoworkflows/CompareXmlRdfProfiles.java | 3 +- .../scify/jedai/demoworkflows/CsvDblpAcm.java | 3 +- .../groundtruth/GtDblpRdfAcmCsvReader.java | 8 +- .../groundtruth/GtRdfCsvReader.java | 8 +- .../entityclustering/TestAllMethods.java | 3 +- .../entityclustering/TestAllMethodsCcer.java | 2 - .../entityclustering/TestAllMethodsDer.java | 3 +- .../TestCorrelationClustering.java | 3 +- .../jedai/generalexamples/PrintDatasets.java | 2 - .../jedai/similarityjoins/TestSimJoins.java | 4 +- .../TestSimJoinsWithDirtyERdatasets.java | 3 +- .../TestCleanCleanERBaseline.java | 2 +- .../prioritization/TestDirtyERBaseline.java | 2 +- 60 files changed, 267 insertions(+), 453 deletions(-) create mode 100644 src/main/java/org/scify/jedai/utilities/DBUtils.java diff --git a/src/main/java/org/scify/jedai/blockbuilding/AbstractBlockBuilding.java b/src/main/java/org/scify/jedai/blockbuilding/AbstractBlockBuilding.java index 12c77bba..2257bde0 100644 --- a/src/main/java/org/scify/jedai/blockbuilding/AbstractBlockBuilding.java +++ b/src/main/java/org/scify/jedai/blockbuilding/AbstractBlockBuilding.java @@ -107,7 +107,7 @@ public int getTotalNoOfEntities() { if (entityProfilesD2 == null) { return noOfEntitiesD1; } - return noOfEntitiesD1 + noOfEntitiesD2; + return Math.addExact(noOfEntitiesD1, noOfEntitiesD2); } protected void indexEntities(Map index, List entities) { diff --git a/src/main/java/org/scify/jedai/blockprocessing/AbstractBlockProcessing.java b/src/main/java/org/scify/jedai/blockprocessing/AbstractBlockProcessing.java index 871b6867..5d3f78b4 100644 --- a/src/main/java/org/scify/jedai/blockprocessing/AbstractBlockProcessing.java +++ b/src/main/java/org/scify/jedai/blockprocessing/AbstractBlockProcessing.java @@ -36,6 +36,6 @@ protected void printOriginalStatistics(List inputBlocks) { } Log.info("Original blocks\t:\t" + inputBlocks.size()); - Log.info("Original comparisons\t:\t" + ((long)comparisons)); + Log.info("Original comparisons\t:\t" + comparisons); } } diff --git a/src/main/java/org/scify/jedai/blockprocessing/blockcleaning/BlockFiltering.java b/src/main/java/org/scify/jedai/blockprocessing/blockcleaning/BlockFiltering.java index 7ab78755..9235691c 100644 --- a/src/main/java/org/scify/jedai/blockprocessing/blockcleaning/BlockFiltering.java +++ b/src/main/java/org/scify/jedai/blockprocessing/blockcleaning/BlockFiltering.java @@ -105,10 +105,10 @@ protected void getBilateralLimits(List blocks) { }); for (int i = 0; i < limitsD1.length; i++) { - limitsD1[i] = (int) Math.round(ratio * limitsD1[i]); + limitsD1[i] = Math.round(ratio * limitsD1[i]); } for (int i = 0; i < limitsD2.length; i++) { - limitsD2[i] = (int) Math.round(ratio * limitsD2[i]); + limitsD2[i] = Math.round(ratio * limitsD2[i]); } } @@ -193,7 +193,7 @@ protected void getUnilateralLimits(List blocks) { }); for (int i = 0; i < limitsD1.length; i++) { - limitsD1[i] = (int) Math.round(ratio * limitsD1[i]); + limitsD1[i] = Math.round(ratio * limitsD1[i]); } } diff --git a/src/main/java/org/scify/jedai/blockprocessing/blockcleaning/SizeBasedBlockPurging.java b/src/main/java/org/scify/jedai/blockprocessing/blockcleaning/SizeBasedBlockPurging.java index c610ca0d..cd05b4ec 100644 --- a/src/main/java/org/scify/jedai/blockprocessing/blockcleaning/SizeBasedBlockPurging.java +++ b/src/main/java/org/scify/jedai/blockprocessing/blockcleaning/SizeBasedBlockPurging.java @@ -63,7 +63,7 @@ private int getMaxBlockSize(List blocks) { entities.addAll(uBlock.getEntities()); }); - return (int) Math.round(entities.size()*purgingFactor); + return Math.round(entities.size()*purgingFactor); } private int getMaxInnerBlockSize(List blocks) { @@ -76,7 +76,7 @@ private int getMaxInnerBlockSize(List blocks) { d2Entities.addAll(bBlock.getIndex2Entities()); }); - return (int) Math.round(Math.min(d1Entities.size(), d2Entities.size())*purgingFactor); + return Math.round(Math.min(d1Entities.size(), d2Entities.size())*purgingFactor); } @Override diff --git a/src/main/java/org/scify/jedai/blockprocessing/comparisoncleaning/CanopyClustering.java b/src/main/java/org/scify/jedai/blockprocessing/comparisoncleaning/CanopyClustering.java index 6b27b97d..694b5392 100644 --- a/src/main/java/org/scify/jedai/blockprocessing/comparisoncleaning/CanopyClustering.java +++ b/src/main/java/org/scify/jedai/blockprocessing/comparisoncleaning/CanopyClustering.java @@ -15,7 +15,6 @@ */ package org.scify.jedai.blockprocessing.comparisoncleaning; -import com.esotericsoftware.minlog.Log; import org.scify.jedai.datamodel.AbstractBlock; import org.scify.jedai.utilities.enumerations.WeightingScheme; import gnu.trove.iterator.TIntIterator; @@ -56,8 +55,8 @@ public CanopyClustering(float inThr, float outThr, WeightingScheme scheme) { exclusiveThreshold = outThr; inclusiveThreshold = inThr; if (exclusiveThreshold < inclusiveThreshold) { - Log.error(getMethodName(), "The " + getParameterName(1) + " cannot be smaller than the " + getParameterName(0)); - System.exit(-1); + throw new IllegalStateException( + "The " + getParameterName(1) + " cannot be smaller than the " + getParameterName(0)); } } diff --git a/src/main/java/org/scify/jedai/blockprocessing/comparisoncleaning/ExtendedCanopyClustering.java b/src/main/java/org/scify/jedai/blockprocessing/comparisoncleaning/ExtendedCanopyClustering.java index e22b9c08..2ec75305 100644 --- a/src/main/java/org/scify/jedai/blockprocessing/comparisoncleaning/ExtendedCanopyClustering.java +++ b/src/main/java/org/scify/jedai/blockprocessing/comparisoncleaning/ExtendedCanopyClustering.java @@ -15,7 +15,6 @@ */ package org.scify.jedai.blockprocessing.comparisoncleaning; -import com.esotericsoftware.minlog.Log; import org.scify.jedai.datamodel.AbstractBlock; import org.scify.jedai.datamodel.Comparison; import org.scify.jedai.utilities.enumerations.WeightingScheme; @@ -60,8 +59,7 @@ public ExtendedCanopyClustering(int inThr, int outThr, WeightingScheme scheme) { exclusiveThreshold = outThr; inclusiveThreshold = inThr; if (inclusiveThreshold < exclusiveThreshold) { - Log.error(getMethodName(), "The Exclusive Threshold cannot be larger than the Inclusive one."); - System.exit(-1); + throw new IllegalStateException("The Exclusive Threshold cannot be larger than the Inclusive one."); } } @@ -133,7 +131,7 @@ protected List pruneEdges() { excludedEntities = new TIntHashSet(); nearestEntities = new HashSet[noOfEntities]; - topKEdges = new PriorityQueue<>((int) (2 * inclusiveThreshold), new IncComparisonWeightComparator()); + topKEdges = new PriorityQueue<>(2 * inclusiveThreshold, new IncComparisonWeightComparator()); if (weightingScheme.equals(WeightingScheme.ARCS)) { while (iterator.hasNext()) { int currentId = iterator.next(); diff --git a/src/main/java/org/scify/jedai/configuration/randomsearch/AbstractRandomSearchConfiguration.java b/src/main/java/org/scify/jedai/configuration/randomsearch/AbstractRandomSearchConfiguration.java index 899fa56d..2cc12c48 100644 --- a/src/main/java/org/scify/jedai/configuration/randomsearch/AbstractRandomSearchConfiguration.java +++ b/src/main/java/org/scify/jedai/configuration/randomsearch/AbstractRandomSearchConfiguration.java @@ -27,10 +27,10 @@ public abstract class AbstractRandomSearchConfiguration implements IRandomSearch protected final static Random RANDOM_GEN = new Random(); - protected final List selectedRandomValues; + protected final List selectedRandomValues; public AbstractRandomSearchConfiguration() { - selectedRandomValues = new ArrayList(); + selectedRandomValues = new ArrayList<>(); } @Override diff --git a/src/main/java/org/scify/jedai/datamodel/Comparison.java b/src/main/java/org/scify/jedai/datamodel/Comparison.java index 39dd4801..1c7e4ac1 100644 --- a/src/main/java/org/scify/jedai/datamodel/Comparison.java +++ b/src/main/java/org/scify/jedai/datamodel/Comparison.java @@ -65,7 +65,6 @@ public int getEntityId2() { /** * Returns the measure of the weight or similarity between two entities. * Higher utility measures correspond to greater weight or stronger similarity. - * @return */ public float getUtilityMeasure() { return utilityMeasure; diff --git a/src/main/java/org/scify/jedai/datamodel/GomoryHuTree.java b/src/main/java/org/scify/jedai/datamodel/GomoryHuTree.java index 3717e7f5..42a5d682 100644 --- a/src/main/java/org/scify/jedai/datamodel/GomoryHuTree.java +++ b/src/main/java/org/scify/jedai/datamodel/GomoryHuTree.java @@ -90,7 +90,7 @@ public SimpleGraph MinCutTree() { final Set sourcePartition = minSourceSinkCut.getSourcePartition(); // float flowValue = minSourceSinkCut.getCutWeight(); - DefaultWeightedEdge e = (DefaultWeightedEdge) returnGraphClone.addEdge(vertex, predecessor); + DefaultWeightedEdge e = returnGraphClone.addEdge(vertex, predecessor); returnGraph.addEdge(Integer.parseInt(vertex + ""), Integer.parseInt(predecessor + "")); returnGraphClone.setEdgeWeight(e, flowValue); diff --git a/src/main/java/org/scify/jedai/datamodel/SimilarityPairs.java b/src/main/java/org/scify/jedai/datamodel/SimilarityPairs.java index 55e59c7c..b084d057 100644 --- a/src/main/java/org/scify/jedai/datamodel/SimilarityPairs.java +++ b/src/main/java/org/scify/jedai/datamodel/SimilarityPairs.java @@ -15,9 +15,7 @@ */ package org.scify.jedai.datamodel; -import com.esotericsoftware.minlog.Log; import java.io.Serializable; - import java.util.List; import org.scify.jedai.utilities.IConstants; @@ -64,10 +62,9 @@ private long countComparisons(List blocks) { comparisons += block.getNoOfComparisons(); } - if (MAX_COMPARISONS < comparisons) { - Log.error("Very high number of comparisons to be executed! " - + "Maximum allowed number is : " + MAX_COMPARISONS); - System.exit(-1); + if (comparisons > MAX_COMPARISONS) { + throw new IllegalStateException("Very high number of comparisons to be executed. " + + "Maximum allowed number is : " + MAX_COMPARISONS); } return comparisons; } diff --git a/src/main/java/org/scify/jedai/datamodel/joins/Category.java b/src/main/java/org/scify/jedai/datamodel/joins/Category.java index 2d101149..2f91bab4 100644 --- a/src/main/java/org/scify/jedai/datamodel/joins/Category.java +++ b/src/main/java/org/scify/jedai/datamodel/joins/Category.java @@ -50,8 +50,8 @@ public Category(int len, float threshold, int categoryN) { Category.THRESHOLD = threshold; Category.N = categoryN; s_len = len; - e_len = (int) ((float) (s_len / THRESHOLD)); - K = (int) (2 * (1 - THRESHOLD) / (1 + THRESHOLD) * (float) e_len); + e_len = (int) (s_len / THRESHOLD); + K = (int) (2 * (1 - THRESHOLD) / (1 + THRESHOLD) * e_len); N1 = K + 1; N2 = 2; diff --git a/src/main/java/org/scify/jedai/datareader/entityreader/EntityDBReader.java b/src/main/java/org/scify/jedai/datareader/entityreader/EntityDBReader.java index 64ae7efe..b4950642 100644 --- a/src/main/java/org/scify/jedai/datareader/entityreader/EntityDBReader.java +++ b/src/main/java/org/scify/jedai/datareader/entityreader/EntityDBReader.java @@ -19,8 +19,7 @@ import org.apache.jena.atlas.json.JsonArray; import org.apache.jena.atlas.json.JsonObject; import org.scify.jedai.datamodel.EntityProfile; - -import java.io.IOException; +import org.scify.jedai.utilities.DBUtils; import java.sql.*; import java.util.*; @@ -60,33 +59,25 @@ public List getEntityProfiles() { return null; } - //inputFilePath is assigned the Database URL - try { - if (user == null) { - Log.error("Database user has not been set!"); - return null; - } - if (password == null) { - Log.error("Database password has not been set!"); - return null; - } - if (table == null) { - Log.error("Database table has not been set!"); - return null; - } - - Connection conn; - if (inputFilePath.startsWith("mysql")) { - conn = getMySQLconnection(inputFilePath); - } else if (inputFilePath.startsWith("postgresql")) { - conn = getPostgreSQLconnection(inputFilePath); - } else { - Log.error("Only MySQL and PostgreSQL are supported for the time being!"); - return null; - } + if (user == null) { + Log.error("Database user has not been set!"); + return null; + } + if (password == null) { + Log.error("Database password has not been set!"); + return null; + } + if (table == null) { + Log.error("Database table has not been set!"); + return null; + } - final Statement stmt = conn.createStatement(); - final ResultSet rs = stmt.executeQuery("SELECT * FROM " + table);//retrieve the appropriate table + //inputFilePath is assigned the Database URL + try (Connection conn = DBUtils.getDBConnection(inputFilePath, user, password, ssl); + Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("SELECT * FROM " + table);//retrieve the appropriate table + ) { + final ResultSetMetaData rsmd = rs.getMetaData(); int columnsNum = rsmd.getColumnCount(); String[] columns = new String[columnsNum]; @@ -113,7 +104,7 @@ public List getEntityProfiles() { } } rs.close(); - } catch (IOException | SQLException ex) { + } catch (SQLException ex) { Log.error("Error in entities reading!", ex); return null; } @@ -157,16 +148,6 @@ public String getMethodParameters() { + "6)" + getParameterDescription(5) + "."; } - private Connection getMySQLconnection(String dbURL) throws IOException { - try { - Class.forName("com.mysql.jdbc.Driver"); - return DriverManager.getConnection("jdbc:" + dbURL + "?user=" + user + "&password=" + password); - } catch (Exception ex) { - Log.error("Error with database connection!", ex); - return null; - } - } - @Override public JsonArray getParameterConfiguration() { final JsonObject obj1 = new JsonObject(); @@ -273,25 +254,6 @@ public String getParameterName(int parameterId) { } } - private Connection getPostgreSQLconnection(String dbURL) throws IOException { - try { - final Properties props = new Properties(); - if (!(user == null)) { - props.setProperty("user", user); - } - if (!(password == null)) { - props.setProperty("password", password); - } - if (ssl) { - props.setProperty("ssl", "true"); - } - return DriverManager.getConnection("jdbc:" + dbURL, props); - } catch (SQLException ex) { - Log.error("Error with database connection!", ex); - return null; - } - } - public void setAttributesToExclude(String[] attributesNamesToExclude) { attributesToExclude.addAll(Arrays.asList(attributesNamesToExclude)); } diff --git a/src/main/java/org/scify/jedai/datareader/entityreader/EntityJSONRDFReader.java b/src/main/java/org/scify/jedai/datareader/entityreader/EntityJSONRDFReader.java index 9ae3baaa..aeacb98e 100644 --- a/src/main/java/org/scify/jedai/datareader/entityreader/EntityJSONRDFReader.java +++ b/src/main/java/org/scify/jedai/datareader/entityreader/EntityJSONRDFReader.java @@ -19,10 +19,8 @@ import org.apache.jena.atlas.json.JSON; import org.apache.jena.atlas.json.JsonArray; import org.apache.jena.atlas.json.JsonObject; -import org.rdfhdt.hdt.exceptions.NotFoundException; import org.scify.jedai.datamodel.EntityProfile; -import java.io.IOException; import java.util.*; /** @@ -57,11 +55,9 @@ public List getEntityProfiles() { //load the rdf model from the input file try { readModel(inputFilePath); - } catch (IOException ex) { + } catch (Exception ex) { Log.error("Error in entities reading!", ex); return null; - } catch (NotFoundException e) { - Log.error(e.getMessage()); } return entityProfiles; @@ -145,7 +141,7 @@ public String getParameterName(int parameterId) { } } - private void readModel(String inpFIle) throws IOException, NotFoundException { + private void readModel(String inpFIle) { //read each ntriples JsonObject jsonObject = JSON.read(inpFIle); String key = jsonObject.keys().toArray()[0].toString(); diff --git a/src/main/java/org/scify/jedai/datareader/entityreader/EntityRDFReader.java b/src/main/java/org/scify/jedai/datareader/entityreader/EntityRDFReader.java index 2d202074..9a2ed90f 100644 --- a/src/main/java/org/scify/jedai/datareader/entityreader/EntityRDFReader.java +++ b/src/main/java/org/scify/jedai/datareader/entityreader/EntityRDFReader.java @@ -22,7 +22,6 @@ import org.apache.jena.riot.RDFDataMgr; import org.scify.jedai.datamodel.EntityProfile; -import java.io.IOException; import java.util.*; /** @@ -58,7 +57,7 @@ public List getEntityProfiles() { try { final Model model = RDFDataMgr.loadModel(inputFilePath); readModel(model); - } catch (IOException ex) { + } catch (Exception ex) { Log.error("Error in entities reading!", ex); return null; } @@ -144,7 +143,7 @@ public String getParameterName(int parameterId) { } } - private void readModel(Model m) throws IOException { + private void readModel(Model m) { //read each ntriples //get spo, create a separate profile for each separate subject, //with Attribute=predicate and Value=object diff --git a/src/main/java/org/scify/jedai/datareader/entityreader/EntitySPARQLReader.java b/src/main/java/org/scify/jedai/datareader/entityreader/EntitySPARQLReader.java index ba5730fb..a9a9084c 100644 --- a/src/main/java/org/scify/jedai/datareader/entityreader/EntitySPARQLReader.java +++ b/src/main/java/org/scify/jedai/datareader/entityreader/EntitySPARQLReader.java @@ -19,7 +19,6 @@ import com.esotericsoftware.minlog.Log; -import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; @@ -69,7 +68,7 @@ public List getEntityProfiles() { //load the rdf model from the input file try { readEndpoint(inputFilePath); - } catch (IOException ex) { + } catch (Exception ex) { Log.error("Error in data reading", ex); return null; } @@ -189,7 +188,7 @@ public String getParameterName(int parameterId) { } } - private void readEndpoint(String endpointUrl) throws IOException { + private void readEndpoint(String endpointUrl) { //read each ntriples //get spo, create a separate profile for each separate subject, //with Attribute=predicate and Value=object diff --git a/src/main/java/org/scify/jedai/datareader/entityreader/EntityXMLreader.java b/src/main/java/org/scify/jedai/datareader/entityreader/EntityXMLreader.java index a3fbf9fe..c8c450f3 100644 --- a/src/main/java/org/scify/jedai/datareader/entityreader/EntityXMLreader.java +++ b/src/main/java/org/scify/jedai/datareader/entityreader/EntityXMLreader.java @@ -142,7 +142,7 @@ public String getParameterName(int parameterId) { } } - private void readXMLdoc(Document document) throws IOException { + private void readXMLdoc(Document document) { final Element classElement = document.getRootElement(); final List dblpRoot = classElement.getChildren(); diff --git a/src/main/java/org/scify/jedai/datareader/groundtruthreader/AbstractGtReader.java b/src/main/java/org/scify/jedai/datareader/groundtruthreader/AbstractGtReader.java index 3d736142..fd74fc5d 100644 --- a/src/main/java/org/scify/jedai/datareader/groundtruthreader/AbstractGtReader.java +++ b/src/main/java/org/scify/jedai/datareader/groundtruthreader/AbstractGtReader.java @@ -43,14 +43,14 @@ public abstract class AbstractGtReader extends AbstractReader implements IGround protected final Set idDuplicates; protected final TObjectIntMap urlToEntityId1; protected final TObjectIntMap urlToEntityId2; - protected final SimpleGraph duplicatesGraph; + protected final SimpleGraph duplicatesGraph; public AbstractGtReader (String filePath) { super(filePath); idDuplicates = new HashSet<>(); - duplicatesGraph = new SimpleGraph(DefaultEdge.class); - urlToEntityId1 = new TObjectIntHashMap(); - urlToEntityId2 = new TObjectIntHashMap(); + duplicatesGraph = new SimpleGraph<>(DefaultEdge.class); + urlToEntityId1 = new TObjectIntHashMap<>(); + urlToEntityId2 = new TObjectIntHashMap<>(); } @Override diff --git a/src/main/java/org/scify/jedai/datareader/groundtruthreader/GtCSVReader.java b/src/main/java/org/scify/jedai/datareader/groundtruthreader/GtCSVReader.java index fbfbc5fa..86c3740f 100644 --- a/src/main/java/org/scify/jedai/datareader/groundtruthreader/GtCSVReader.java +++ b/src/main/java/org/scify/jedai/datareader/groundtruthreader/GtCSVReader.java @@ -31,6 +31,7 @@ import org.apache.jena.atlas.json.JsonArray; import org.apache.jena.atlas.json.JsonObject; import org.jgrapht.alg.connectivity.ConnectivityInspector; +import org.jgrapht.graph.DefaultEdge; /** * @@ -179,10 +180,7 @@ public Set getDuplicatePairs(List profilesD1, } initializeDataStructures(profilesD1, profilesD2); - try { - // creating reader - final BufferedReader br = new BufferedReader(new FileReader(inputFilePath)); - + try (BufferedReader br = new BufferedReader(new FileReader(inputFilePath));) { String line; if (ignoreFirstRow) { line = br.readLine(); @@ -214,7 +212,7 @@ public Set getDuplicatePairs(List profilesD1, Log.info("Total edges in duplicates graph\t:\t" + duplicatesGraph.edgeSet().size()); // get connected components - final ConnectivityInspector ci = new ConnectivityInspector(duplicatesGraph); + final ConnectivityInspector ci = new ConnectivityInspector<>(duplicatesGraph); final List> connectedComponents = ci.connectedSets(); Log.info("Total connected components in duplicate graph\t:\t" + connectedComponents.size()); diff --git a/src/main/java/org/scify/jedai/datareader/groundtruthreader/GtRDFReader.java b/src/main/java/org/scify/jedai/datareader/groundtruthreader/GtRDFReader.java index 4f91743c..f1ddba50 100644 --- a/src/main/java/org/scify/jedai/datareader/groundtruthreader/GtRDFReader.java +++ b/src/main/java/org/scify/jedai/datareader/groundtruthreader/GtRDFReader.java @@ -29,11 +29,11 @@ import java.util.List; import java.util.NoSuchElementException; import java.util.Set; - import org.apache.jena.atlas.json.JsonArray; import org.apache.jena.atlas.json.JsonObject; import org.jgrapht.alg.connectivity.ConnectivityInspector; +import org.jgrapht.graph.DefaultEdge; /** * @@ -97,7 +97,7 @@ public Set getDuplicatePairs(List profilesD1, Log.info("Total edges in duplicates graph\t:\t" + duplicatesGraph.edgeSet().size()); // get connected components - final ConnectivityInspector ci = new ConnectivityInspector(duplicatesGraph); + final ConnectivityInspector ci = new ConnectivityInspector<>(duplicatesGraph); final List> connectedComponents = ci.connectedSets(); Log.info("Total connected components in duplicate graph\t:\t" + connectedComponents.size()); diff --git a/src/main/java/org/scify/jedai/datawriter/BlocksPerformanceWriter.java b/src/main/java/org/scify/jedai/datawriter/BlocksPerformanceWriter.java index 761acf96..fd401a71 100644 --- a/src/main/java/org/scify/jedai/datawriter/BlocksPerformanceWriter.java +++ b/src/main/java/org/scify/jedai/datawriter/BlocksPerformanceWriter.java @@ -29,6 +29,7 @@ import org.rdfhdt.hdt.options.HDTSpecification; import org.scify.jedai.blockprocessing.comparisoncleaning.ComparisonPropagation; import org.scify.jedai.datamodel.*; +import org.scify.jedai.utilities.DBUtils; import org.scify.jedai.utilities.datastructures.AbstractDuplicatePropagation; import org.scify.jedai.utilities.datastructures.GroundTruthIndex; @@ -37,10 +38,8 @@ import java.io.IOException; import java.io.PrintWriter; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.Statement; import java.util.List; -import java.util.Properties; /** * @@ -103,35 +102,6 @@ public void setEndpointGraph(String endpointGraph) { this.endpointGraph = endpointGraph; } - private Connection getMySQLconnection(String dbURL) throws IOException { - try { - Class.forName("com.mysql.jdbc.Driver"); - return DriverManager.getConnection("jdbc:" + dbURL + "?user=" + dbuser + "&password=" + dbpassword); - } catch (Exception ex) { - Log.error("Error with database connection!", ex); - return null; - } - } - - private Connection getPostgreSQLconnection(String dbURL) throws IOException { - try { - final Properties props = new Properties(); - if (!(dbuser == null)) { - props.setProperty("user", dbuser); - } - if (!(dbpassword == null)) { - props.setProperty("password", dbpassword); - } - if (ssl) { - props.setProperty("ssl", "true"); - } - return DriverManager.getConnection("jdbc:" + dbURL, props); - } catch (Exception ex) { - Log.error("Error with database connection!", ex); - return null; - } - } - private boolean areCooccurring(boolean cleanCleanER, IdDuplicates pairOfDuplicates) { final int[] blocks1 = entityIndex.getEntityBlocks(pairOfDuplicates.getEntityId1(), 0); if (blocks1 == null) { @@ -143,8 +113,6 @@ private boolean areCooccurring(boolean cleanCleanER, IdDuplicates pairOfDuplicat return false; } - int noOfBlocks1 = blocks1.length; - int noOfBlocks2 = blocks2.length; for (int item : blocks1) { for (int value : blocks2) { if (value < item) { @@ -963,7 +931,7 @@ public void printDetailedResultsToXML(List profilesD1, List profilesD1, List profilesD2, String endpointURL, String GraphName) throws FileNotFoundException { + public void printDetailedResultsToSPARQL(List profilesD1, List profilesD2, String endpointURL, String GraphName) { if (blocks.isEmpty()) { Log.warn("Empty set of blocks was given as input!"); return; @@ -1132,7 +1100,7 @@ public void printDetailedResultsToSPARQL(List profilesD1, List profilesD1, List profilesD2, String dbURL) throws FileNotFoundException { + public void printDetailedResultsToDB(List profilesD1, List profilesD2, String dbURL) { if (blocks.isEmpty()) { Log.warn("Empty set of blocks was given as input!"); return; @@ -1202,28 +1170,20 @@ public void printDetailedResultsToDB(List profilesD1, List profilesD1, List profi pw.close(); } - public void debugToDB(List profilesD1, List profilesD2, String dbURL) throws FileNotFoundException { + public void debugToDB(List profilesD1, List profilesD2, String dbURL) { if (blocks.isEmpty()) { Log.warn("Empty set of blocks was given as input!"); return; @@ -1308,28 +1268,19 @@ public void debugToDB(List profilesD1, List profil String dbquery = sb.toString(); - try { - if (dbuser == null) { - Log.error("Database user has not been set!"); - } - if (dbpassword == null) { - Log.error("Database password has not been set!"); - } - if (dbtable == null) { - Log.error("Database table has not been set!"); - } - - Connection conn = null; - if (dbURL.startsWith("mysql")) { - conn = getMySQLconnection(dbURL); - } else if (dbURL.startsWith("postgresql")) { - conn = getPostgreSQLconnection(dbURL); - } else { - Log.error("Only MySQL and PostgreSQL are supported for the time being!"); - } + if (dbuser == null) { + Log.error("Database user has not been set!"); + } + if (dbpassword == null) { + Log.error("Database password has not been set!"); + } + if (dbtable == null) { + Log.error("Database table has not been set!"); + } - final Statement stmt = conn.createStatement(); - stmt.executeQuery(dbquery);//retrieve the appropriate table + try (Connection conn = DBUtils.getDBConnection(dbURL, dbuser, dbpassword, ssl); + Statement stmt = conn.createStatement();) { + stmt.execute(dbquery);//retrieve the appropriate table } catch (Exception ex) { Log.error("Error in db writing!", ex); } @@ -1622,7 +1573,7 @@ public void debugToXML(List profilesD1, List profi printWriter.close(); } - public void debugToSPARQL(List profilesD1, List profilesD2, String endpointURL, String GraphName) throws FileNotFoundException { + public void debugToSPARQL(List profilesD1, List profilesD2, String endpointURL, String GraphName) { if (blocks.isEmpty()) { Log.warn("Empty set of blocks was given as input!"); return; diff --git a/src/main/java/org/scify/jedai/datawriter/ClustersPerformanceWriter.java b/src/main/java/org/scify/jedai/datawriter/ClustersPerformanceWriter.java index 64f8816e..d454d265 100644 --- a/src/main/java/org/scify/jedai/datawriter/ClustersPerformanceWriter.java +++ b/src/main/java/org/scify/jedai/datawriter/ClustersPerformanceWriter.java @@ -21,6 +21,7 @@ import org.rdfhdt.hdt.hdt.HDTManager; import org.rdfhdt.hdt.options.HDTSpecification; import org.scify.jedai.utilities.datastructures.BilateralDuplicatePropagation; +import org.scify.jedai.utilities.DBUtils; import org.scify.jedai.utilities.datastructures.AbstractDuplicatePropagation; import org.apache.jena.update.UpdateExecutionFactory; import org.apache.jena.update.UpdateFactory; @@ -36,10 +37,8 @@ import java.io.IOException; import java.io.PrintWriter; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.Statement; import java.util.List; -import java.util.Properties; import org.scify.jedai.datamodel.EntityProfile; import org.scify.jedai.datamodel.IdDuplicates; @@ -95,35 +94,6 @@ public void setEndpointGraph(String endpointGraph) { this.endpointGraph = endpointGraph; } - private Connection getMySQLconnection(String dbURL) throws IOException { - try { - Class.forName("com.mysql.jdbc.Driver"); - return DriverManager.getConnection("jdbc:" + dbURL + "?user=" + dbuser + "&password=" + dbpassword); - } catch (Exception ex) { - Log.error("Error with database connection!", ex); - return null; - } - } - - private Connection getPostgreSQLconnection(String dbURL) throws IOException { - try { - final Properties props = new Properties(); - if (!(dbuser == null)) { - props.setProperty("user", dbuser); - } - if (!(dbpassword == null)) { - props.setProperty("password", dbpassword); - } - if (ssl) { - props.setProperty("ssl", "true"); - } - return DriverManager.getConnection("jdbc:" + dbURL, props); - } catch (Exception ex) { - Log.error("Error with database connection!", ex); - return null; - } - } - public int getDetectedDuplicates() { return abstractDP.getNoOfDuplicates(); } @@ -939,7 +909,7 @@ public void printDetailedResultsToHDTrdf(List profilesD1, List profilesD1, List profilesD2, String endpointURL, String GraphName) throws FileNotFoundException { + public void printDetailedResultsToSPARQL(List profilesD1, List profilesD2, String endpointURL, String GraphName) { if (entityClusters.length == 0) { Log.warn("Empty set of equivalence clusters given as input!"); return; @@ -1442,7 +1412,7 @@ public void printDetailedResultsToXML(List profilesD1, List profilesD1, List profilesD2, String dbURL) throws FileNotFoundException { + public void printDetailedResultsToDB(List profilesD1, List profilesD2, String dbURL) { if (entityClusters.length == 0) { Log.warn("Empty set of equivalence clusters given as input!"); return; @@ -1556,28 +1526,19 @@ public void printDetailedResultsToDB(List profilesD1, List duplicatesGraph; + protected SimpleWeightedGraph weightedGraph; public CutClustering() { this(0.3f, 0.5f); @@ -64,7 +64,7 @@ public CutClustering(float ac, float simTh) { @Override protected EquivalenceCluster[] getConnectedComponents() { // get connected components - final ConnectivityInspector ci = new ConnectivityInspector(duplicatesGraph); + final ConnectivityInspector ci = new ConnectivityInspector<>(duplicatesGraph); final List> connectedComponents = ci.connectedSets(); // prepare output @@ -88,12 +88,12 @@ public EquivalenceCluster[] getDuplicates(SimilarityPairs simPairs) { while (iterator.hasNext()) { // add an edge for every pair of entities with a weight higher than the threshold Comparison comparison = iterator.next(); if (threshold < comparison.getUtilityMeasure()) { - DefaultWeightedEdge e = (DefaultWeightedEdge) weightedGraph.addEdge(comparison.getEntityId1() + "", (comparison.getEntityId2() + datasetLimit) + ""); + DefaultWeightedEdge e = weightedGraph.addEdge(comparison.getEntityId1() + "", (comparison.getEntityId2() + datasetLimit) + ""); weightedGraph.setEdgeWeight(e, comparison.getUtilityMeasure()); } } - GomoryHuTree ght = new GomoryHuTree(weightedGraph); //take the minimum cut (Gomory-Hu) tree from the similarity graph + GomoryHuTree ght = new GomoryHuTree<>(weightedGraph); //take the minimum cut (Gomory-Hu) tree from the similarity graph duplicatesGraph = ght.MinCutTree(); duplicatesGraph.removeVertex(noOfEntities); //remove the artificial sink @@ -186,7 +186,7 @@ protected void initializeGraph() { for (int i = 0; i < noOfEntities; i++) { String edgeLabel = i + ""; weightedGraph.addVertex(edgeLabel); - DefaultWeightedEdge e = (DefaultWeightedEdge) weightedGraph.addEdge(sinkLabel, edgeLabel); // add the capacity edges "a" + DefaultWeightedEdge e = weightedGraph.addEdge(sinkLabel, edgeLabel); // add the capacity edges "a" weightedGraph.setEdgeWeight(e, Acap); //connecting the artificial sink with all vertices } diff --git a/src/main/java/org/scify/jedai/entityclustering/RicochetSRClusteringCCER.java b/src/main/java/org/scify/jedai/entityclustering/RicochetSRClusteringCCER.java index f0cc7315..017f1176 100644 --- a/src/main/java/org/scify/jedai/entityclustering/RicochetSRClusteringCCER.java +++ b/src/main/java/org/scify/jedai/entityclustering/RicochetSRClusteringCCER.java @@ -83,7 +83,7 @@ public EquivalenceCluster[] getDuplicates(SimilarityPairs simPairs) { final TIntSet Center = new TIntHashSet(); final TIntSet NonCenter = new TIntHashSet(); - final TIntObjectHashMap Clusters = new TIntObjectHashMap(); + final TIntObjectHashMap Clusters = new TIntObjectHashMap<>(); final int[] clusterCenter = new int[noOfEntities]; final float[] simWithCenter = new float[noOfEntities]; // similarity with center diff --git a/src/main/java/org/scify/jedai/entitymatching/GroupLinkage.java b/src/main/java/org/scify/jedai/entitymatching/GroupLinkage.java index bf961de9..46ab5308 100644 --- a/src/main/java/org/scify/jedai/entitymatching/GroupLinkage.java +++ b/src/main/java/org/scify/jedai/entitymatching/GroupLinkage.java @@ -63,9 +63,8 @@ public GroupLinkage(float simThr, List profilesD1, List profilesD1, List profil @Override protected final void buildModels() { if (profilesD1 == null) { - Log.error("First list of entity profiles is null! " - + "The first argument should always contain entities."); - System.exit(-1); + throw new IllegalArgumentException( + "First list of entity profiles is null. The first argument should always contain entities."); } Log.info("Applying " + getMethodName() + " with the following configuration : " + getMethodConfiguration()); diff --git a/src/main/java/org/scify/jedai/prioritization/GlobalProgressiveSortedNeighborhood.java b/src/main/java/org/scify/jedai/prioritization/GlobalProgressiveSortedNeighborhood.java index 2a870bbd..efea510f 100644 --- a/src/main/java/org/scify/jedai/prioritization/GlobalProgressiveSortedNeighborhood.java +++ b/src/main/java/org/scify/jedai/prioritization/GlobalProgressiveSortedNeighborhood.java @@ -47,7 +47,7 @@ public class GlobalProgressiveSortedNeighborhood extends AbstractSimilarityBased public GlobalProgressiveSortedNeighborhood(int budget, ProgressiveWeightingScheme pwScheme) { super(budget, pwScheme); - topComps = new PriorityQueue<>((int) (2 * comparisonsBudget), new IncComparisonWeightComparator()); + topComps = new PriorityQueue<>(2 * comparisonsBudget, new IncComparisonWeightComparator()); gridMaxWindow = new IntGridSearchConfiguration(10, 1, 1); randomMaxWindow = new IntRandomSearchConfiguration(10, 1); diff --git a/src/main/java/org/scify/jedai/prioritization/ProgressiveBlockScheduling.java b/src/main/java/org/scify/jedai/prioritization/ProgressiveBlockScheduling.java index 6e8cb32f..e69f76eb 100644 --- a/src/main/java/org/scify/jedai/prioritization/ProgressiveBlockScheduling.java +++ b/src/main/java/org/scify/jedai/prioritization/ProgressiveBlockScheduling.java @@ -15,7 +15,6 @@ */ package org.scify.jedai.prioritization; -import com.esotericsoftware.minlog.Log; import org.scify.jedai.datamodel.AbstractBlock; import org.scify.jedai.datamodel.Comparison; import org.scify.jedai.datamodel.ComparisonIterator; @@ -49,8 +48,7 @@ public ProgressiveBlockScheduling(int budget, WeightingScheme wScheme) { @Override public void developBlockBasedSchedule(List blocks) { if (blocks == null || blocks.isEmpty()) { - Log.error("No blocks were given as input!"); - System.exit(-1); + throw new IllegalArgumentException("No blocks were given as input."); } blocks.sort(new IncBlockCardinalityComparator()); diff --git a/src/main/java/org/scify/jedai/prioritization/ProgressiveEntityScheduling.java b/src/main/java/org/scify/jedai/prioritization/ProgressiveEntityScheduling.java index 4312a602..7abde20a 100644 --- a/src/main/java/org/scify/jedai/prioritization/ProgressiveEntityScheduling.java +++ b/src/main/java/org/scify/jedai/prioritization/ProgressiveEntityScheduling.java @@ -15,7 +15,6 @@ */ package org.scify.jedai.prioritization; -import com.esotericsoftware.minlog.Log; import java.util.Iterator; import java.util.List; import org.scify.jedai.datamodel.AbstractBlock; @@ -44,8 +43,7 @@ public ProgressiveEntityScheduling(int budget, WeightingScheme wScheme) { @Override public void developBlockBasedSchedule(List blocks) { if (blocks == null || blocks.isEmpty()) { - Log.error("No blocks were given as input!"); - System.exit(-1); + throw new IllegalArgumentException("No blocks were given as input."); } final ProgressiveWNP pwnp = new ProgressiveWNP(wScheme); diff --git a/src/main/java/org/scify/jedai/prioritization/ProgressiveGlobalTopComparisons.java b/src/main/java/org/scify/jedai/prioritization/ProgressiveGlobalTopComparisons.java index b1b86230..887c702a 100644 --- a/src/main/java/org/scify/jedai/prioritization/ProgressiveGlobalTopComparisons.java +++ b/src/main/java/org/scify/jedai/prioritization/ProgressiveGlobalTopComparisons.java @@ -47,8 +47,7 @@ public ProgressiveGlobalTopComparisons(int budget, WeightingScheme wScheme) { @Override public void developBlockBasedSchedule(List blocks) { if (blocks == null || blocks.isEmpty()) { - Log.error("No blocks were given as input!"); - System.exit(-1); + throw new IllegalArgumentException("No blocks were given as input."); } if (blocks.get(0) instanceof DecomposedBlock) { @@ -85,7 +84,7 @@ public Comparison next() { protected Iterator processDecomposedBlocks(List blocks) { float minimumWeight = -1; - final Queue topComparisons = new PriorityQueue<>((int) (2 * comparisonsBudget), new IncComparisonWeightComparator()); + final Queue topComparisons = new PriorityQueue<>(2 * comparisonsBudget, new IncComparisonWeightComparator()); for (AbstractBlock block : blocks) { final ComparisonIterator cIterator = block.getComparisonIterator(); while (cIterator.hasNext()) { diff --git a/src/main/java/org/scify/jedai/prioritization/ProgressiveLocalTopComparisons.java b/src/main/java/org/scify/jedai/prioritization/ProgressiveLocalTopComparisons.java index e124a60e..316b35bc 100644 --- a/src/main/java/org/scify/jedai/prioritization/ProgressiveLocalTopComparisons.java +++ b/src/main/java/org/scify/jedai/prioritization/ProgressiveLocalTopComparisons.java @@ -41,8 +41,7 @@ public ProgressiveLocalTopComparisons(int budget, WeightingScheme wScheme) { @Override public void developBlockBasedSchedule(List blocks) { if (blocks == null || blocks.isEmpty()) { - Log.error("No blocks were given as input!"); - System.exit(-1); + throw new IllegalArgumentException("No blocks were given as input."); } if (blocks.get(0) instanceof DecomposedBlock) { diff --git a/src/main/java/org/scify/jedai/prioritization/utilities/BlockcentricEntityIndex.java b/src/main/java/org/scify/jedai/prioritization/utilities/BlockcentricEntityIndex.java index 6d905073..65f5071b 100644 --- a/src/main/java/org/scify/jedai/prioritization/utilities/BlockcentricEntityIndex.java +++ b/src/main/java/org/scify/jedai/prioritization/utilities/BlockcentricEntityIndex.java @@ -314,7 +314,7 @@ public float getWeight(int blockIndex, Comparison comparison) { return commonBlocksPX; } long[] v = new long[2]; - v[0] = (long) commonBlocksPX; + v[0] = commonBlocksPX; v[1] = getNoOfEntityBlocks(comparison.getEntityId1(), 0) - v[0]; long[] v_ = new long[2]; diff --git a/src/main/java/org/scify/jedai/prioritization/utilities/ProgressiveCNPDecomponsedBlocks.java b/src/main/java/org/scify/jedai/prioritization/utilities/ProgressiveCNPDecomponsedBlocks.java index 4b3381f7..af7edfdb 100644 --- a/src/main/java/org/scify/jedai/prioritization/utilities/ProgressiveCNPDecomponsedBlocks.java +++ b/src/main/java/org/scify/jedai/prioritization/utilities/ProgressiveCNPDecomponsedBlocks.java @@ -91,7 +91,7 @@ private void setComparisonIterator() { private void processEntityComparison(int entityId, Comparison c) { if (minimumWeight[entityId] < c.getUtilityMeasure()) { if (topComsPerEntity[entityId] == null) { - topComsPerEntity[entityId] = new PriorityQueue<>((int) (2 * maxComsPerEntity), new IncComparisonWeightComparator()); + topComsPerEntity[entityId] = new PriorityQueue<>(2 * maxComsPerEntity, new IncComparisonWeightComparator()); } topComsPerEntity[entityId].add(c); diff --git a/src/main/java/org/scify/jedai/similarityjoins/characterbased/AllPairs.java b/src/main/java/org/scify/jedai/similarityjoins/characterbased/AllPairs.java index 0a33645b..e07ead1e 100644 --- a/src/main/java/org/scify/jedai/similarityjoins/characterbased/AllPairs.java +++ b/src/main/java/org/scify/jedai/similarityjoins/characterbased/AllPairs.java @@ -257,7 +257,7 @@ private List performJoin(int rangeBound) { } } } - final TIntObjectMap index = new TIntObjectHashMap(); + final TIntObjectMap index = new TIntObjectHashMap<>(); for (int k = rangeBound; k < noOfEntities; k++) { int count = 0; int lastToken = -1; diff --git a/src/main/java/org/scify/jedai/similarityjoins/characterbased/EdJoin.java b/src/main/java/org/scify/jedai/similarityjoins/characterbased/EdJoin.java index eedac4bc..b54dbc91 100644 --- a/src/main/java/org/scify/jedai/similarityjoins/characterbased/EdJoin.java +++ b/src/main/java/org/scify/jedai/similarityjoins/characterbased/EdJoin.java @@ -229,7 +229,7 @@ private List performJoin(int rangeBound) { } } } - final TIntObjectMap index = new TIntObjectHashMap(); + final TIntObjectMap index = new TIntObjectHashMap<>(); for (int k = rangeBound; k < noOfEntities; k++) { int count = 0; int prefix_length = get_prefix_length(tokens[k]); diff --git a/src/main/java/org/scify/jedai/similarityjoins/fuzzysets/FuzzySetSimJoin.java b/src/main/java/org/scify/jedai/similarityjoins/fuzzysets/FuzzySetSimJoin.java index 05e8097c..97d2a31a 100644 --- a/src/main/java/org/scify/jedai/similarityjoins/fuzzysets/FuzzySetSimJoin.java +++ b/src/main/java/org/scify/jedai/similarityjoins/fuzzysets/FuzzySetSimJoin.java @@ -41,12 +41,7 @@ public class FuzzySetSimJoin { float[] elementBounds; /** - * Computes the join between two collections - * - * @param input1 - * @param input2 - * @param simThreshold - * @return + * Computes the join between two collections. */ public HashMap join(Map>> input1, Map>> input2, float simThreshold) { @@ -178,7 +173,7 @@ private TIntSet[] computeUnflattenedSignature(int[][] querySet, float simThresho for (int j = 0; j < querySet[i].length; j++) { if (querySet[i][j] == bestToken) { unflattenedSignature[i].add(bestToken); - simUpperBound -= (1.0 / (float) querySet[i].length); + simUpperBound -= (1.0f / querySet[i].length); } } } diff --git a/src/main/java/org/scify/jedai/similarityjoins/tokenbased/PartEnumJoin.java b/src/main/java/org/scify/jedai/similarityjoins/tokenbased/PartEnumJoin.java index 43f0eb08..41a8e777 100644 --- a/src/main/java/org/scify/jedai/similarityjoins/tokenbased/PartEnumJoin.java +++ b/src/main/java/org/scify/jedai/similarityjoins/tokenbased/PartEnumJoin.java @@ -131,8 +131,8 @@ private List performJoin() { private int check_overlap(TIntList a, TIntList b, int overlap) { int posa = 0, posb = 0, count = 0; - while (posa < (int) a.size() && posb < (int) b.size()) { - if (count + Math.min((int) a.size() - posa, (int) b.size() - posb) < overlap) { + while (posa < a.size() && posb < b.size()) { + if (count + Math.min(a.size() - posa, b.size() - posb) < overlap) { return -1; } if (a.get(posa) == b.get(posb)) { @@ -236,11 +236,11 @@ void convert_to_signature() { for (int i = 0; i < checked_flag.length; i++) { checked_flag[i] = true; } - for (int id = 0; id < (int) records.length; id++) { + for (int id = 0; id < records.length; id++) { int k; for (k = 0; k < MAX_CATEGORY; k++) { - if (helper[k].s_len <= (int) records[id].size() - && helper[k].e_len >= (int) records[id].size()) { + if (helper[k].s_len <= records[id].size() + && helper[k].e_len >= records[id].size()) { break; } } diff --git a/src/main/java/org/scify/jedai/textmodels/BagModel.java b/src/main/java/org/scify/jedai/textmodels/BagModel.java index 033bd2dd..962685d9 100644 --- a/src/main/java/org/scify/jedai/textmodels/BagModel.java +++ b/src/main/java/org/scify/jedai/textmodels/BagModel.java @@ -17,7 +17,6 @@ import org.scify.jedai.utilities.enumerations.RepresentationModel; import org.scify.jedai.utilities.enumerations.SimilarityMetric; -import com.esotericsoftware.minlog.Log; import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.TObjectIntMap; import gnu.trove.map.hash.TObjectIntHashMap; @@ -58,7 +57,7 @@ protected float getEnhancedJaccardSimilarity(BagModel oModel) { } float denominator = noOfTotalTerms + oModel.getNoOfTotalTerms() - numerator; - return numerator / (float)denominator; + return numerator / denominator; } @Override @@ -112,9 +111,8 @@ public float getSimilarity(ITextModel oModel) { case JACCARD_SIMILARITY: return getJaccardSimilarity((BagModel) oModel); default: - Log.error("The given similarity metric is incompatible with the bag representation model!"); - System.exit(-1); - return -1; + throw new IllegalStateException( + "The given similarity metric is incompatible with the bag representation model."); } } @@ -135,7 +133,7 @@ protected float getTfCosineSimilarity(BagModel oModel) { } float denominator = getVectorMagnitude() * oModel.getVectorMagnitude(); - return (float)(numerator / denominator); + return numerator / denominator; } protected float getTfGeneralizedJaccardSimilarity(BagModel oModel) { @@ -164,7 +162,7 @@ protected float getTfGeneralizedJaccardSimilarity(BagModel oModel) { denominator += Math.max(itemVector1.get(key) / totalTerms1, itemVector2.get(key) / totalTerms2); } - return (float)(numerator / denominator); + return numerator / denominator; } protected float getVectorMagnitude() { diff --git a/src/main/java/org/scify/jedai/textmodels/CharacterNGramsWithGlobalWeights.java b/src/main/java/org/scify/jedai/textmodels/CharacterNGramsWithGlobalWeights.java index 29ea5789..98581609 100644 --- a/src/main/java/org/scify/jedai/textmodels/CharacterNGramsWithGlobalWeights.java +++ b/src/main/java/org/scify/jedai/textmodels/CharacterNGramsWithGlobalWeights.java @@ -48,7 +48,7 @@ public void finalizeModel() { } protected float getARCSSimilarity(CharacterNGramsWithGlobalWeights oModel) { - final Set commonKeys = new HashSet(itemsFrequency.keySet()); + final Set commonKeys = new HashSet<>(itemsFrequency.keySet()); commonKeys.retainAll(oModel.getItemsFrequency().keySet()); float similarity = 0; @@ -57,8 +57,7 @@ protected float getARCSSimilarity(CharacterNGramsWithGlobalWeights oModel) { } else if (datasetId != oModel.getDatasetId()) { // Clean-Clean ER similarity = commonKeys.stream().map((key) -> 1.0f / ((float) Math.log1p(((float) DOC_FREQ[DATASET_1].get(key)) * DOC_FREQ[DATASET_2].get(key)) / (float) Math.log(2))).reduce(similarity, (accumulator, _item) -> accumulator + _item); } else { - Log.error("Both models come from dataset 1!"); - System.exit(-1); + throw new IllegalStateException("Both models come from dataset 1."); } return similarity; @@ -98,7 +97,7 @@ protected float getSigmaSimilarity(CharacterNGramsWithGlobalWeights oModel) { denominator = allKeys.stream().map((key) -> itemsFrequency.get(key) / noOfTotalTerms * getIdfWeight(key) + itemVector2.get(key) / totalTerms2 * oModel.getIdfWeight(key)).reduce(denominator, (accumulator, _item) -> accumulator + _item); - return (float)(numerator / denominator); + return numerator / denominator; } @Override @@ -113,9 +112,8 @@ public float getSimilarity(ITextModel oModel) { case SIGMA_SIMILARITY: return getSigmaSimilarity((CharacterNGramsWithGlobalWeights) oModel); default: - Log.error("The given similarity metric is incompatible with the bag representation model!"); - System.exit(-1); - return -1; + throw new IllegalStateException( + "The given similarity metric is incompatible with the bag representation model."); } } @@ -134,7 +132,7 @@ protected float getTfIdfCosineSimilarity(CharacterNGramsWithGlobalWeights oModel } float denominator = getVectorMagnitude() * oModel.getVectorMagnitude(); - return (float)(numerator / denominator); + return numerator / denominator; } protected float getTfIdfGeneralizedJaccardSimilarity(CharacterNGramsWithGlobalWeights oModel) { @@ -157,7 +155,7 @@ protected float getTfIdfGeneralizedJaccardSimilarity(CharacterNGramsWithGlobalWe denominator = allKeys.stream().map((key) -> Math.max(itemsFrequency.get(key) / noOfTotalTerms * getIdfWeight(key), itemVector2.get(key) / totalTerms2 * oModel.getIdfWeight(key))).reduce(denominator, (accumulator, _item) -> accumulator + _item); - return (float)(numerator / denominator); + return numerator / denominator; } @Override diff --git a/src/main/java/org/scify/jedai/textmodels/GraphModel.java b/src/main/java/org/scify/jedai/textmodels/GraphModel.java index 09e28ff5..6bd709a5 100644 --- a/src/main/java/org/scify/jedai/textmodels/GraphModel.java +++ b/src/main/java/org/scify/jedai/textmodels/GraphModel.java @@ -17,7 +17,6 @@ import org.scify.jedai.utilities.enumerations.RepresentationModel; import org.scify.jedai.utilities.enumerations.SimilarityMetric; -import com.esotericsoftware.minlog.Log; import gr.demokritos.iit.jinsect.documentModel.comparators.NGramCachedGraphComparator; import gr.demokritos.iit.jinsect.documentModel.representations.DocumentNGramGraph; import gr.demokritos.iit.jinsect.structs.GraphSimilarity; @@ -70,13 +69,12 @@ public float getSimilarity(ITextModel oModel) { overallSimilarity += graphSimilarity.ValueSimilarity; if (0 < graphSimilarity.SizeSimilarity) { overallSimilarity += graphSimilarity.ValueSimilarity / graphSimilarity.SizeSimilarity; - return (float)(overallSimilarity / 3); + return overallSimilarity / 3; } - return (float)(overallSimilarity / 2); + return overallSimilarity / 2; default: - Log.error("The given similarity metric is incompatible with the n-gram graphs representation model!"); - System.exit(-1); - return -1; + throw new IllegalStateException( + "The given similarity metric is incompatible with the n-gram graphs representation model."); } } } diff --git a/src/main/java/org/scify/jedai/textmodels/TokenNGramsWithGlobalWeights.java b/src/main/java/org/scify/jedai/textmodels/TokenNGramsWithGlobalWeights.java index eea427f3..52b4608a 100644 --- a/src/main/java/org/scify/jedai/textmodels/TokenNGramsWithGlobalWeights.java +++ b/src/main/java/org/scify/jedai/textmodels/TokenNGramsWithGlobalWeights.java @@ -48,7 +48,7 @@ public void finalizeModel() { } protected float getARCSSimilarity(TokenNGramsWithGlobalWeights oModel) { - final Set commonKeys = new HashSet(itemsFrequency.keySet()); + final Set commonKeys = new HashSet<>(itemsFrequency.keySet()); commonKeys.retainAll(oModel.getItemsFrequency().keySet()); float similarity = 0; @@ -62,8 +62,7 @@ protected float getARCSSimilarity(TokenNGramsWithGlobalWeights oModel) { similarity += 1.0f / (Math.log1p(((float) DOC_FREQ[DATASET_1].get(key)) * DOC_FREQ[DATASET_2].get(key)) / Math.log(2)); } } else { - Log.error("Both models come from dataset 1!"); - System.exit(-1); + throw new IllegalStateException("Both models come from dataset 1."); } return similarity; @@ -103,7 +102,7 @@ protected float getSigmaSimilarity(TokenNGramsWithGlobalWeights oModel) { denominator = allKeys.stream().map((key) -> itemsFrequency.get(key) / noOfTotalTerms * getIdfWeight(key) + itemVector2.get(key) / totalTerms2 * oModel.getIdfWeight(key)).reduce(denominator, (accumulator, _item) -> accumulator + _item); - return (float)(numerator / denominator); + return numerator / denominator; } @Override @@ -118,9 +117,8 @@ public float getSimilarity(ITextModel oModel) { case SIGMA_SIMILARITY: return getSigmaSimilarity((TokenNGramsWithGlobalWeights) oModel); default: - Log.error("The given similarity metric is incompatible with the bag representation model!"); - System.exit(-1); - return -1; + throw new IllegalStateException( + "The given similarity metric is incompatible with the bag representation model."); } } @@ -139,7 +137,7 @@ protected float getTfIdfCosineSimilarity(TokenNGramsWithGlobalWeights oModel) { } float denominator = getVectorMagnitude() * oModel.getVectorMagnitude(); - return (float)(numerator / denominator); + return numerator / denominator; } protected float getTfIdfGeneralizedJaccardSimilarity(TokenNGramsWithGlobalWeights oModel) { @@ -162,7 +160,7 @@ protected float getTfIdfGeneralizedJaccardSimilarity(TokenNGramsWithGlobalWeight denominator = allKeys.stream().map((key) -> Math.max(itemsFrequency.get(key) / noOfTotalTerms * getIdfWeight(key), itemVector2.get(key) / totalTerms2 * oModel.getIdfWeight(key))).reduce(denominator, (accumulator, _item) -> accumulator + _item); - return (float)(numerator / denominator); + return numerator / denominator; } @Override diff --git a/src/main/java/org/scify/jedai/textmodels/embeddings/PretrainedVectors.java b/src/main/java/org/scify/jedai/textmodels/embeddings/PretrainedVectors.java index 380b993e..244d176d 100644 --- a/src/main/java/org/scify/jedai/textmodels/embeddings/PretrainedVectors.java +++ b/src/main/java/org/scify/jedai/textmodels/embeddings/PretrainedVectors.java @@ -6,7 +6,6 @@ import org.scify.jedai.utilities.enumerations.SimilarityMetric; import java.io.BufferedReader; -import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.*; @@ -69,11 +68,8 @@ private void loadWeights() { String fileName = Objects.requireNonNull(classLoader.getResource("embeddings/weights.txt")).getFile(); elementMap = new HashMap<>(); - try { - BufferedReader br = new BufferedReader(new FileReader(fileName)); - - CSVReader reader = new CSVReader(new FileReader(fileName), dataSeparator, CSVParser.NULL_CHARACTER, 0); - + try (BufferedReader br = new BufferedReader(new FileReader(fileName)); + CSVReader reader = new CSVReader(new FileReader(fileName), dataSeparator, CSVParser.NULL_CHARACTER, 0);) { String[] components; int counter = 0; while ((components = reader.readNext()) != null) { @@ -92,8 +88,7 @@ private void loadWeights() { elementMap.put(components[0], value); } } catch (IOException e) { - Log.error("Problem loading embedding weights", e); - System.exit(-1); + throw new RuntimeException("Problem loading embedding weights.", e); } } @@ -108,44 +103,38 @@ private void loadWeightsWithHeader(){ Log.info("Loading weights from " + fileName); elementMap = new HashMap<>(); - try { - BufferedReader br = new BufferedReader(new FileReader(fileName)); + try (BufferedReader br = new BufferedReader(new FileReader(fileName));) { // first read parsing metadata, split by commas String [] header = br.readLine().split(","); try { dimension = Integer.parseInt(header[0]); dataSeparator = header[1].charAt(0); - }catch (NumberFormatException ex){ - Log.error("Pretrained header malformed -- expected:"); - System.exit(-1); + } catch (NumberFormatException ex) { + throw new RuntimeException("Pretrained header malformed -- expected:", ex); } Log.info(String.format("Read dimension: [%d], delimiter: [%c]", dimension, dataSeparator)); Log.info(String.format("Reading embedding mapping file. {%s}", Calendar.getInstance().getTime().toString())); - CSVReader reader = new CSVReader(new FileReader(fileName), dataSeparator, CSVParser.NULL_CHARACTER, 1); - // List vectors = reader.readAll(); - Log.info(String.format("Done reading embedding mapping file. {%s}", Calendar.getInstance().getTime().toString())); - - String[] components; - int counter=0; - while((components = reader.readNext()) != null){ - Log.debug(String.format("Read csv entry # %d: %s", counter, Arrays.toString(components))); - counter++; - if (components.length != dimension + 1) - throw new IOException(String.format("Mismatch in embedding vector #%d length : %d.", - counter, components.length)); - float [] value = new float[dimension]; - for (int i=1; i<=dimension; ++i){ - value[i-1] = Float.parseFloat(components[i]); - } - elementMap.put(components[0], value); + try (CSVReader reader = new CSVReader(new FileReader(fileName), dataSeparator, CSVParser.NULL_CHARACTER, 1);) { + Log.info(String.format("Done reading embedding mapping file. {%s}", Calendar.getInstance().getTime().toString())); + + String[] components; + int counter=0; + while((components = reader.readNext()) != null){ + Log.debug(String.format("Read csv entry # %d: %s", counter, Arrays.toString(components))); + counter++; + if (components.length != dimension + 1) + throw new IOException(String.format("Mismatch in embedding vector #%d length : %d.", + counter, components.length)); + float [] value = new float[dimension]; + for (int i=1; i<=dimension; ++i){ + value[i-1] = Float.parseFloat(components[i]); + } + elementMap.put(components[0], value); + } + Log.info(String.format("Done processing %d-line embedding mapping. {%s}", counter, Calendar.getInstance().getTime().toString())); } - Log.info(String.format("Done processing %d-line embedding mapping. {%s}", counter, Calendar.getInstance().getTime().toString())); - } catch (FileNotFoundException e) { - Log.error("No resource file found:" + fileName, e); - System.exit(-1); } catch (IOException e) { - Log.error("IO exception when reading:" + fileName, e); - System.exit(-1); + throw new RuntimeException("Exception when reading: " + fileName, e); } unkownVector = getZeroVector(); diff --git a/src/main/java/org/scify/jedai/textmodels/embeddings/VectorSpaceModel.java b/src/main/java/org/scify/jedai/textmodels/embeddings/VectorSpaceModel.java index 0ce76794..55c815df 100644 --- a/src/main/java/org/scify/jedai/textmodels/embeddings/VectorSpaceModel.java +++ b/src/main/java/org/scify/jedai/textmodels/embeddings/VectorSpaceModel.java @@ -1,6 +1,5 @@ package org.scify.jedai.textmodels.embeddings; -import com.esotericsoftware.minlog.Log; import org.scify.jedai.textmodels.AbstractModel; import org.scify.jedai.textmodels.ITextModel; import org.scify.jedai.utilities.enumerations.RepresentationModel; @@ -29,9 +28,8 @@ public float getSimilarity(ITextModel oModel) { case COSINE_SIMILARITY: return getCosineSimilarity((VectorSpaceModel) oModel); default: - Log.error("The given similarity metric is incompatible with the bag representation model!"); - System.exit(-1); - return -1; + throw new IllegalStateException( + "The given similarity metric is incompatible with the bag representation model."); } } diff --git a/src/main/java/org/scify/jedai/utilities/DBUtils.java b/src/main/java/org/scify/jedai/utilities/DBUtils.java new file mode 100644 index 00000000..7dd07ee1 --- /dev/null +++ b/src/main/java/org/scify/jedai/utilities/DBUtils.java @@ -0,0 +1,52 @@ +package org.scify.jedai.utilities; + +import static java.util.Objects.requireNonNull; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.util.Properties; + +/** + * Database static utility methods. + */ +public final class DBUtils { + /** + * Creates and returns and database connection. This method parses the specified DB URL and + * attempts to infer a database dialect from that URL. + *

The only supported databases are MySQL and PostgreSQL. + *

All checked exceptions are translated into runtime exceptions. + * + * @param dbURL portion of the JDBC connection string + * @return the non-null database connection. This must be closed by the caller. + * @throws IllegalStateException if the specified {@code dbURL} string does not identify a + * supported database dialect. + * @throws RuntimeException if any other exception occurs + */ + public static Connection getDBConnection(String dbURL, String dbUser, String dbPassword, boolean ssl) + throws IllegalStateException, RuntimeException { + + requireNonNull(dbURL, "dbURL cannot be null"); + requireNonNull(dbUser, "dbUser cannot be null"); + requireNonNull(dbPassword, "dbPassword cannot be null"); + try { + if (dbURL.startsWith("mysql")) { + Class.forName("com.mysql.jdbc.Driver"); + return DriverManager.getConnection("jdbc:" + dbURL + "?user=" + dbUser + "&password=" + dbPassword); + } else if (dbURL.startsWith("postgresql")) { + final Properties props = new Properties(); + props.setProperty("user", dbUser); + props.setProperty("password", dbPassword); + if (ssl) { + props.setProperty("ssl", "true"); + } + return DriverManager.getConnection("jdbc:" + dbURL, props); + } else { + throw new IllegalStateException("Only MySQL and PostgreSQL are supported for the time being."); + } + } catch (RuntimeException e) { + throw e; + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/src/main/java/org/scify/jedai/utilities/datastructures/GroundTruthIndex.java b/src/main/java/org/scify/jedai/utilities/datastructures/GroundTruthIndex.java index b88e5234..63820d46 100644 --- a/src/main/java/org/scify/jedai/utilities/datastructures/GroundTruthIndex.java +++ b/src/main/java/org/scify/jedai/utilities/datastructures/GroundTruthIndex.java @@ -71,8 +71,6 @@ public TIntList getCommonBlockIndices(int blockIndex, Comparison comparison) { final int[] blocks2 = entityBlocks[comparison.getEntityId2() + datasetLimit]; boolean firstCommonIndex = false; - int noOfBlocks1 = blocks1.length; - int noOfBlocks2 = blocks2.length; final TIntList indices = new TIntArrayList(); for (int item : blocks1) { for (int value : blocks2) { @@ -116,8 +114,6 @@ public int getNoOfCommonBlocks(int blockIndex, Comparison comparison) { final int[] blocks2 = entityBlocks[comparison.getEntityId2() + datasetLimit]; int commonBlocks = 0; - int noOfBlocks1 = blocks1.length; - int noOfBlocks2 = blocks2.length; boolean firstCommonIndex = false; for (int item : blocks1) { for (int value : blocks2) { diff --git a/src/test/java/org/scify/jedai/datareader/TestHDTRdfReader.java b/src/test/java/org/scify/jedai/datareader/TestHDTRdfReader.java index 3fda7a3f..d1172ba6 100644 --- a/src/test/java/org/scify/jedai/datareader/TestHDTRdfReader.java +++ b/src/test/java/org/scify/jedai/datareader/TestHDTRdfReader.java @@ -15,12 +15,10 @@ */ package org.scify.jedai.datareader; -import org.rdfhdt.hdt.exceptions.ParserException; import org.scify.jedai.datamodel.Attribute; import org.scify.jedai.datamodel.EntityProfile; import org.scify.jedai.datareader.entityreader.EntityHDTRDFReader; -import java.io.IOException; import java.util.List; /** @@ -29,7 +27,7 @@ */ public class TestHDTRdfReader { - public static void main(String[] args) throws IOException, ParserException { + public static void main(String[] args) { String filePath = "data/dataset.hdt"; EntityHDTRDFReader n3reader = new EntityHDTRDFReader(filePath); diff --git a/src/test/java/org/scify/jedai/datareader/TestJSONrdfReader.java b/src/test/java/org/scify/jedai/datareader/TestJSONrdfReader.java index 09e6fbd1..0d14736b 100644 --- a/src/test/java/org/scify/jedai/datareader/TestJSONrdfReader.java +++ b/src/test/java/org/scify/jedai/datareader/TestJSONrdfReader.java @@ -15,12 +15,10 @@ */ package org.scify.jedai.datareader; -import org.rdfhdt.hdt.exceptions.ParserException; import org.scify.jedai.datamodel.Attribute; import org.scify.jedai.datamodel.EntityProfile; import org.scify.jedai.datareader.entityreader.EntityJSONRDFReader; -import java.io.IOException; import java.util.List; /** @@ -29,7 +27,7 @@ */ public class TestJSONrdfReader { - public static void main(String[] args) throws IOException, ParserException { + public static void main(String[] args) { String filePath = "data/statsToFile.json"; EntityJSONRDFReader n3reader = new EntityJSONRDFReader(filePath); diff --git a/src/test/java/org/scify/jedai/demoworkflows/CompareXmlRdfProfiles.java b/src/test/java/org/scify/jedai/demoworkflows/CompareXmlRdfProfiles.java index 5cefadcd..90558549 100644 --- a/src/test/java/org/scify/jedai/demoworkflows/CompareXmlRdfProfiles.java +++ b/src/test/java/org/scify/jedai/demoworkflows/CompareXmlRdfProfiles.java @@ -22,7 +22,6 @@ import org.scify.jedai.datareader.entityreader.EntityRDFReader; import java.io.File; -import java.io.FileNotFoundException; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -35,7 +34,7 @@ public class CompareXmlRdfProfiles { private final static String prefix = "cleanCleanErDatasets/"; - public static void main(String[] args) throws FileNotFoundException { + public static void main(String[] args) { BasicConfigurator.configure(); String mainDirectory = "data" + File.separator + "cleanCleanErDatasets" + File.separator + "DBLP-ACM" + File.separator; diff --git a/src/test/java/org/scify/jedai/demoworkflows/CsvDblpAcm.java b/src/test/java/org/scify/jedai/demoworkflows/CsvDblpAcm.java index 9bf1feef..136a50ef 100644 --- a/src/test/java/org/scify/jedai/demoworkflows/CsvDblpAcm.java +++ b/src/test/java/org/scify/jedai/demoworkflows/CsvDblpAcm.java @@ -16,7 +16,6 @@ package org.scify.jedai.demoworkflows; import org.apache.log4j.BasicConfigurator; -import org.rdfhdt.hdt.exceptions.ParserException; import org.scify.jedai.blockbuilding.IBlockBuilding; import org.scify.jedai.blockbuilding.StandardBlocking; import org.scify.jedai.blockprocessing.IBlockProcessing; @@ -49,7 +48,7 @@ */ public class CsvDblpAcm { - public static void main(String[] args) throws IOException, ParserException { + public static void main(String[] args) throws IOException { BasicConfigurator.configure(); String mainDirectory = "data" + File.separator + "cleanCleanErDatasets" + File.separator + "DBLP-ACM" + File.separator; diff --git a/src/test/java/org/scify/jedai/demoworkflows/groundtruth/GtDblpRdfAcmCsvReader.java b/src/test/java/org/scify/jedai/demoworkflows/groundtruth/GtDblpRdfAcmCsvReader.java index 78f53923..f06ad23d 100644 --- a/src/test/java/org/scify/jedai/demoworkflows/groundtruth/GtDblpRdfAcmCsvReader.java +++ b/src/test/java/org/scify/jedai/demoworkflows/groundtruth/GtDblpRdfAcmCsvReader.java @@ -32,6 +32,7 @@ import org.apache.jena.atlas.json.JsonArray; import org.apache.jena.atlas.json.JsonObject; import org.jgrapht.alg.connectivity.ConnectivityInspector; +import org.jgrapht.graph.DefaultEdge; /** * @@ -180,10 +181,7 @@ public Set getDuplicatePairs(List profilesD1, } initializeDataStructures(profilesD1, profilesD2); - try { - // creating reader - final BufferedReader br = new BufferedReader(new FileReader(inputFilePath)); - + try (BufferedReader br = new BufferedReader(new FileReader(inputFilePath));) { String line; if (ignoreFirstRow) { line = br.readLine(); @@ -210,7 +208,7 @@ public Set getDuplicatePairs(List profilesD1, Log.info("Total edges in duplicates graph\t:\t" + duplicatesGraph.edgeSet().size()); // get connected components - final ConnectivityInspector ci = new ConnectivityInspector(duplicatesGraph); + final ConnectivityInspector ci = new ConnectivityInspector<>(duplicatesGraph); final List> connectedComponents = ci.connectedSets(); Log.info("Total connected components in duplicate graph\t:\t" + connectedComponents.size()); diff --git a/src/test/java/org/scify/jedai/demoworkflows/groundtruth/GtRdfCsvReader.java b/src/test/java/org/scify/jedai/demoworkflows/groundtruth/GtRdfCsvReader.java index a13e44d2..761d6449 100644 --- a/src/test/java/org/scify/jedai/demoworkflows/groundtruth/GtRdfCsvReader.java +++ b/src/test/java/org/scify/jedai/demoworkflows/groundtruth/GtRdfCsvReader.java @@ -32,6 +32,7 @@ import org.apache.jena.atlas.json.JsonArray; import org.apache.jena.atlas.json.JsonObject; import org.jgrapht.alg.connectivity.ConnectivityInspector; +import org.jgrapht.graph.DefaultEdge; /** * @@ -180,10 +181,7 @@ public Set getDuplicatePairs(List profilesD1, } initializeDataStructures(profilesD1, profilesD2); - try { - // creating reader - final BufferedReader br = new BufferedReader(new FileReader(inputFilePath)); - + try (BufferedReader br = new BufferedReader(new FileReader(inputFilePath));) { String line; if (ignoreFirstRow) { line = br.readLine(); @@ -211,7 +209,7 @@ public Set getDuplicatePairs(List profilesD1, Log.info("Total edges in duplicates graph\t:\t" + duplicatesGraph.edgeSet().size()); // get connected components - final ConnectivityInspector ci = new ConnectivityInspector(duplicatesGraph); + final ConnectivityInspector ci = new ConnectivityInspector<>(duplicatesGraph); final List> connectedComponents = ci.connectedSets(); Log.info("Total connected components in duplicate graph\t:\t" + connectedComponents.size()); diff --git a/src/test/java/org/scify/jedai/entityclustering/TestAllMethods.java b/src/test/java/org/scify/jedai/entityclustering/TestAllMethods.java index eab14978..3b900099 100644 --- a/src/test/java/org/scify/jedai/entityclustering/TestAllMethods.java +++ b/src/test/java/org/scify/jedai/entityclustering/TestAllMethods.java @@ -16,7 +16,6 @@ package org.scify.jedai.entityclustering; import java.io.File; -import java.io.FileNotFoundException; import org.scify.jedai.blockbuilding.IBlockBuilding; import org.scify.jedai.utilities.datastructures.AbstractDuplicatePropagation; import org.scify.jedai.blockprocessing.IBlockProcessing; @@ -44,7 +43,7 @@ */ public class TestAllMethods { - public static void main(String[] args) throws FileNotFoundException { + public static void main(String[] args) { BasicConfigurator.configure(); String entitiesFilePath = "data" + File.separator + "dirtyErDatasets" + File.separator + "coraProfiles"; diff --git a/src/test/java/org/scify/jedai/entityclustering/TestAllMethodsCcer.java b/src/test/java/org/scify/jedai/entityclustering/TestAllMethodsCcer.java index 08d34120..48f8984a 100644 --- a/src/test/java/org/scify/jedai/entityclustering/TestAllMethodsCcer.java +++ b/src/test/java/org/scify/jedai/entityclustering/TestAllMethodsCcer.java @@ -148,8 +148,6 @@ public static void main(String[] args) throws FileNotFoundException { clp.setStatistics(); clp.printStatistics(time6 - time5 + time4 - time3, matchingWorkflowName.toString(), matchingWorkflowConf.toString()); clp.printDetailedResults(profilesD1, profilesD2, "data" + File.separator + "test.csv"); - - //System.exit(-1); } } } diff --git a/src/test/java/org/scify/jedai/entityclustering/TestAllMethodsDer.java b/src/test/java/org/scify/jedai/entityclustering/TestAllMethodsDer.java index 874d46ae..ef8fc5aa 100644 --- a/src/test/java/org/scify/jedai/entityclustering/TestAllMethodsDer.java +++ b/src/test/java/org/scify/jedai/entityclustering/TestAllMethodsDer.java @@ -36,7 +36,6 @@ import org.scify.jedai.utilities.enumerations.EntityMatchingMethod; import java.io.File; -import java.io.FileNotFoundException; import java.util.List; /** @@ -45,7 +44,7 @@ */ public class TestAllMethodsDer { - public static void main(String[] args) throws FileNotFoundException { + public static void main(String[] args) { BasicConfigurator.configure(); String mainDirectory = "data" + File.separator + "cleanCleanErDatasets" + File.separator; diff --git a/src/test/java/org/scify/jedai/entityclustering/TestCorrelationClustering.java b/src/test/java/org/scify/jedai/entityclustering/TestCorrelationClustering.java index a85ab5b7..8a885f0d 100644 --- a/src/test/java/org/scify/jedai/entityclustering/TestCorrelationClustering.java +++ b/src/test/java/org/scify/jedai/entityclustering/TestCorrelationClustering.java @@ -16,7 +16,6 @@ package org.scify.jedai.entityclustering; import java.io.File; -import java.io.FileNotFoundException; import org.scify.jedai.blockbuilding.IBlockBuilding; import org.scify.jedai.utilities.datastructures.AbstractDuplicatePropagation; import org.scify.jedai.blockprocessing.IBlockProcessing; @@ -40,7 +39,7 @@ public class TestCorrelationClustering { - public static void main(String[] args) throws FileNotFoundException { + public static void main(String[] args) { BasicConfigurator.configure(); String entitiesFilePath = "data" + File.separator + "dirtyErDatasets" + File.separator + "coraProfiles"; diff --git a/src/test/java/org/scify/jedai/generalexamples/PrintDatasets.java b/src/test/java/org/scify/jedai/generalexamples/PrintDatasets.java index 74aad65d..761ec0c9 100644 --- a/src/test/java/org/scify/jedai/generalexamples/PrintDatasets.java +++ b/src/test/java/org/scify/jedai/generalexamples/PrintDatasets.java @@ -23,8 +23,6 @@ import org.scify.jedai.datareader.entityreader.IEntityReader; import org.scify.jedai.datareader.groundtruthreader.GtSerializationReader; import org.scify.jedai.datareader.groundtruthreader.IGroundTruthReader; -import org.scify.jedai.utilities.datastructures.AbstractDuplicatePropagation; -import org.scify.jedai.utilities.datastructures.BilateralDuplicatePropagation; import java.util.List; import java.util.Set; diff --git a/src/test/java/org/scify/jedai/similarityjoins/TestSimJoins.java b/src/test/java/org/scify/jedai/similarityjoins/TestSimJoins.java index 70aa21c6..004355fd 100644 --- a/src/test/java/org/scify/jedai/similarityjoins/TestSimJoins.java +++ b/src/test/java/org/scify/jedai/similarityjoins/TestSimJoins.java @@ -88,9 +88,7 @@ public static void main(String[] args) { maxLines = Integer.parseInt(prop.getProperty("max_lines")); simThreshold = Double.parseDouble(prop.getProperty("sim_threshold")); } catch (Exception e) { - System.out.println("ERROR: Wrong input parameters!"); - e.printStackTrace(); - System.exit(-1); + throw new RuntimeException("ERROR: Wrong input parameters.", e); } *//* READ THE INPUT FILES *//* diff --git a/src/test/java/org/scify/jedai/similarityjoins/TestSimJoinsWithDirtyERdatasets.java b/src/test/java/org/scify/jedai/similarityjoins/TestSimJoinsWithDirtyERdatasets.java index a2847a70..d2cc9b7e 100644 --- a/src/test/java/org/scify/jedai/similarityjoins/TestSimJoinsWithDirtyERdatasets.java +++ b/src/test/java/org/scify/jedai/similarityjoins/TestSimJoinsWithDirtyERdatasets.java @@ -86,7 +86,7 @@ static String getAggregateValues(EntityProfile profile) { return sb.toString().trim(); } - public static void main(String[] args) throws FileNotFoundException { + public static void main(String[] args) { BasicConfigurator.configure(); float jaccardThreshold = 0.45f; @@ -119,7 +119,6 @@ public static void main(String[] args) throws FileNotFoundException { PPJoin join = new PPJoin(jaccardThreshold); // SilkMoth join = new SilkMoth(jaccardThreshold); SimilarityPairs simPairs = join.executeFiltering("all", profiles); - float time1 = System.currentTimeMillis(); final IEntityClustering ec = new ConnectedComponentsClustering(jaccardThreshold); final EquivalenceCluster[] clusters = ec.getDuplicates(simPairs); diff --git a/src/test/java/org/scify/jedai/version3/prioritization/TestCleanCleanERBaseline.java b/src/test/java/org/scify/jedai/version3/prioritization/TestCleanCleanERBaseline.java index 3da94061..a351351e 100644 --- a/src/test/java/org/scify/jedai/version3/prioritization/TestCleanCleanERBaseline.java +++ b/src/test/java/org/scify/jedai/version3/prioritization/TestCleanCleanERBaseline.java @@ -112,7 +112,7 @@ public static void main(String[] args) { float originalRecall = clp.getRecall(); final IEntityMatching em = new ProfileMatcher(profiles1, profiles2, repModel[i], simMetric[i]); - SimilarityPairs sims = new SimilarityPairs(true, (int) allComparisons.size()); + SimilarityPairs sims = new SimilarityPairs(true, allComparisons.size()); Collections.shuffle(allComparisons); for (Comparison comparison : allComparisons) { diff --git a/src/test/java/org/scify/jedai/version3/prioritization/TestDirtyERBaseline.java b/src/test/java/org/scify/jedai/version3/prioritization/TestDirtyERBaseline.java index a26811b6..ca4a67bb 100644 --- a/src/test/java/org/scify/jedai/version3/prioritization/TestDirtyERBaseline.java +++ b/src/test/java/org/scify/jedai/version3/prioritization/TestDirtyERBaseline.java @@ -113,7 +113,7 @@ public static void main(String[] args) { float originalRecall = clp.getRecall(); final IEntityMatching em = new ProfileMatcher(profiles, bestModels[i], bestMetrics[i]); - SimilarityPairs sims = new SimilarityPairs(false, (int) allComparisons.size()); + SimilarityPairs sims = new SimilarityPairs(false, allComparisons.size()); int counter = 0; // int missingComparisons = 0;