Skip to content

Commit

Permalink
Changed comparison counters to long/int (again).
Browse files Browse the repository at this point in the history
  • Loading branch information
gpapadis committed Aug 26, 2021
1 parent 3a80152 commit 3eb67fc
Show file tree
Hide file tree
Showing 50 changed files with 184 additions and 185 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ public abstract class AbstractBlockBuilding implements IBlockBuilding {

protected boolean isUsingEntropy;

protected float noOfEntitiesD1;
protected float noOfEntitiesD2;
protected int noOfEntitiesD1;
protected int noOfEntitiesD2;

protected List<AbstractBlock> blocks;
protected List<EntityProfile> entityProfilesD1;
Expand Down Expand Up @@ -96,14 +96,14 @@ public List<AbstractBlock> getBlocks(List<EntityProfile> profilesD1, List<Entity
return readBlocks();
}

public float getBruteForceComparisons() {
public long getBruteForceComparisons() {
if (entityProfilesD2 == null) {
return noOfEntitiesD1 * (noOfEntitiesD1 - 1) / 2;
}
return noOfEntitiesD1 * noOfEntitiesD2;
}

public float getTotalNoOfEntities() {
public int getTotalNoOfEntities() {
if (entityProfilesD2 == null) {
return noOfEntitiesD1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
public abstract class AbstractBlockProcessing implements IBlockProcessing {

protected void printOriginalStatistics(List<AbstractBlock> inputBlocks) {
float comparisons = 0;
comparisons = inputBlocks.stream().map((block) -> block.getNoOfComparisons()).reduce(comparisons, (accumulator, _item) -> accumulator + _item);
long comparisons = 0;
for (AbstractBlock block : inputBlocks) {
comparisons += block.getNoOfComparisons();
}

Log.info("Original blocks\t:\t" + inputBlocks.size());
Log.info("Original comparisons\t:\t" + comparisons);
Log.info("Original comparisons\t:\t" + ((long)comparisons));
}
}
6 changes: 3 additions & 3 deletions src/main/java/org/scify/jedai/datamodel/AbstractBlock.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public abstract class AbstractBlock implements Serializable {

protected int blockIndex;

protected float comparisons;
protected long comparisons;
protected float entropy;
protected float utilityMeasure;

Expand All @@ -53,7 +53,7 @@ public float getEntropy() {
return entropy;
}

public float getNoOfComparisons() {
public long getNoOfComparisons() {
return comparisons;
}

Expand All @@ -77,6 +77,6 @@ public List<Comparison> getComparisons() {
return comparisonsList;
}

public abstract float getTotalBlockAssignments();
public abstract int getTotalBlockAssignments();
public abstract void setUtilityMeasure();
}
4 changes: 2 additions & 2 deletions src/main/java/org/scify/jedai/datamodel/BilateralBlock.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public BilateralBlock(float entropy, int[] entities1, int[] entities2) {
super(entropy);
index1Entities = entities1;
index2Entities = entities2;
comparisons = ((float) index1Entities.length) * ((float) index2Entities.length);
comparisons = ((long) index1Entities.length) * index2Entities.length;
}

@Override
Expand All @@ -66,7 +66,7 @@ public int[] getIndex2Entities() {
}

@Override
public float getTotalBlockAssignments() {
public int getTotalBlockAssignments() {
return index1Entities.length+index2Entities.length;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
*/
public class ComparisonIterator implements IConstants, Iterator<Comparison> {

private float executedComparisons;
private final float totalComparisons;
private long executedComparisons;
private final long totalComparisons;

private int innerLoop;
private int innerLimit;
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/scify/jedai/datamodel/DecomposedBlock.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ public int[] getEntities2() {
}

@Override
public float getNoOfComparisons() {
public long getNoOfComparisons() {
return entities1.length;
}

@Override
public float getTotalBlockAssignments() {
public int getTotalBlockAssignments() {
return 2*entities1.length;
}

Expand Down
14 changes: 9 additions & 5 deletions src/main/java/org/scify/jedai/datamodel/SimilarityPairs.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
public class SimilarityPairs implements IConstants, Serializable {

private final boolean isCleanCleanER;

private int currentIndex;

private final float[] similarities;
private final int[] entityIds1;
private final int[] entityIds2;
Expand All @@ -44,7 +46,7 @@ public SimilarityPairs(boolean ccer, int comparisons) {
public SimilarityPairs(boolean ccer, List<AbstractBlock> blocks) {
currentIndex = 0;
isCleanCleanER = ccer;
float totalComparisons = countComparisons(blocks);
long totalComparisons = countComparisons(blocks);
entityIds1 = new int[(int) totalComparisons];
entityIds2 = new int[(int) totalComparisons];
similarities = new float[(int) totalComparisons];
Expand All @@ -53,12 +55,14 @@ public SimilarityPairs(boolean ccer, List<AbstractBlock> blocks) {
public void addComparison(Comparison comparison) {
entityIds1[currentIndex] = comparison.getEntityId1();
entityIds2[currentIndex] = comparison.getEntityId2();
similarities[currentIndex++] = (float) comparison.getUtilityMeasure();
similarities[currentIndex++] = comparison.getUtilityMeasure();
}

private float countComparisons(List<AbstractBlock> blocks) {
float comparisons = 0;
comparisons = blocks.stream().map((block) -> block.getNoOfComparisons()).reduce(comparisons, (accumulator, _item) -> accumulator + _item);
private long countComparisons(List<AbstractBlock> blocks) {
long comparisons = 0;
for (AbstractBlock block : blocks) {
comparisons += block.getNoOfComparisons();
}

if (MAX_COMPARISONS < comparisons) {
Log.error("Very high number of comparisons to be executed! "
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/scify/jedai/datamodel/UnilateralBlock.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public UnilateralBlock(int[] entities) {
public UnilateralBlock(float entropy, int[] entities) {
super(entropy);
this.entities = entities;
comparisons = entities.length*(entities.length-1.0f)/2.0f;
comparisons = ((long)entities.length)*(entities.length-1)/2;
}

@Override
Expand All @@ -57,7 +57,7 @@ public int[] getEntities() {
}

@Override
public float getTotalBlockAssignments() {
public int getTotalBlockAssignments() {
return entities.length;
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/scify/jedai/textmodels/BagModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ protected float getJaccardSimilarity(BagModel oModel) {
final Set<String> commonKeys = new HashSet<>(itemsFrequency.keySet());
commonKeys.retainAll(oModel.getItemsFrequency().keySet());

int numerator = commonKeys.size();
int denominator = itemsFrequency.size() + oModel.getItemsFrequency().size() - numerator;
float numerator = commonKeys.size();
float denominator = itemsFrequency.size() + oModel.getItemsFrequency().size() - numerator;
return numerator / denominator;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ public void printFalseNegatives(List<EntityProfile> profilesD1, List<EntityProfi
pw.close();
}

public void printStatistics(float overheadTime, String methodConfiguration, String methodName) {
public void printStatistics(long overheadTime, String methodConfiguration, String methodName) {
if (blocks.isEmpty()) {
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ private boolean isCleanCleanER() {
return false;
}

public void printStatistics(float overheadTime, String methodName, String methodConfiguration) {
public void printStatistics(long overheadTime, String methodName, String methodConfiguration) {
System.out.println("\n\n\n**************************************************");
System.out.println("Performance of : " + methodName);
System.out.println("Configuration : " + methodConfiguration);
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/org/scify/jedai/utilities/SimPairsPerformance.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public class SimPairsPerformance {
private int noOfD2Entities;
private int detectedDuplicates;

private final float aggregateCardinality;
private final long aggregateCardinality;
private float fMeasure;
private float pc;
private float pq;
Expand All @@ -59,7 +59,7 @@ public SimPairsPerformance(SimilarityPairs simPairs, AbstractDuplicatePropagatio
aggregateCardinality = simPairs.getNoOfComparisons();
}

public float getAggregateCardinality() {
public long getAggregateCardinality() {
return aggregateCardinality;
}

Expand All @@ -85,7 +85,7 @@ private void getDuplicates() {

detectedDuplicates = abstractDP.getNoOfDuplicates();
pc = ((float) abstractDP.getNoOfDuplicates()) / abstractDP.getExistingDuplicates();
pq = abstractDP.getNoOfDuplicates() / aggregateCardinality;
pq = ((float) abstractDP.getNoOfDuplicates()) / aggregateCardinality;

if (0 < pc && 0 < pq) {
fMeasure = 2 * pc * pq / (pc + pq);
Expand Down Expand Up @@ -175,7 +175,7 @@ public void printDetailedResults(List<EntityProfile> profilesD1, List<EntityProf

detectedDuplicates = abstractDP.getNoOfDuplicates();
pc = ((float) abstractDP.getNoOfDuplicates()) / abstractDP.getExistingDuplicates();
pq = abstractDP.getNoOfDuplicates() / aggregateCardinality;
pq = ((float) abstractDP.getNoOfDuplicates()) / aggregateCardinality;
if (0 < pc && 0 < pq) {
fMeasure = 2 * pc * pq / (pc + pq);
} else {
Expand Down Expand Up @@ -223,7 +223,7 @@ public void printFalseNegatives(List<EntityProfile> profilesD1, List<EntityProfi
pw.close();
}

public void printStatistics(float overheadTime, String methodConfiguration, String methodName) {
public void printStatistics(long overheadTime, String methodConfiguration, String methodName) {
if (similarityPairs.getNoOfComparisons() == 0) {
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@
*/
public enum TokenBasedSimilarityJoinMethod {
ALL_PAIRS,
// ED_JOIN,
// FAST_SS,
// PASS_JOIN,
PART_ENUM_JOIN,
PP_JOIN;

Expand Down
18 changes: 9 additions & 9 deletions src/main/java/org/scify/jedai/workflowbuilder/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -273,14 +273,14 @@ public static void main(String[] args) {
final TIntList bbMethodIds = getBlockBuildingMethod();
List<AbstractBlock> blocks = new ArrayList<>();

float totalTime = 0;
long totalTime = 0;
for (TIntIterator bbIterator = bbMethodIds.iterator(); bbIterator.hasNext();) {
float time1 = System.currentTimeMillis();
long time1 = System.currentTimeMillis();

final IBlockBuilding blockBuildingMethod = BlockBuildingMethod.getDefaultConfiguration(BlockBuildingMethod.values()[bbIterator.next() - 1]);
blocks.addAll(blockBuildingMethod.getBlocks(profilesD1, profilesD2));

float time2 = System.currentTimeMillis();
long time2 = System.currentTimeMillis();

totalTime += time2 - time1;
workflowConf.append(blockBuildingMethod.getMethodConfiguration()).append("\n");
Expand All @@ -297,12 +297,12 @@ public static void main(String[] args) {
bcMethodIds.sort();
bcMethodIds.reverse();
for (TIntIterator bcIterator = bcMethodIds.iterator(); bcIterator.hasNext();) {
float time3 = System.currentTimeMillis();
long time3 = System.currentTimeMillis();

final IBlockProcessing blockCleaningMethod = BlockCleaningMethod.getDefaultConfiguration(BlockCleaningMethod.values()[bcIterator.next() - 1]);
blocks = blockCleaningMethod.refineBlocks(blocks);

float time4 = System.currentTimeMillis();
long time4 = System.currentTimeMillis();

totalTime += time4- time3;
workflowConf.append(blockCleaningMethod.getMethodConfiguration()).append("\n");
Expand All @@ -317,12 +317,12 @@ public static void main(String[] args) {
// Comparison Cleaning
int ccMethodId = getComparisonCleaningMethod();
if (0 <= ccMethodId) {
float time5 = System.currentTimeMillis();
long time5 = System.currentTimeMillis();

IBlockProcessing comparisonCleaningMethod = ComparisonCleaningMethod.getDefaultConfiguration(ComparisonCleaningMethod.values()[ccMethodId - 1]);
blocks = comparisonCleaningMethod.refineBlocks(blocks);

float time6 = System.currentTimeMillis();
long time6 = System.currentTimeMillis();

totalTime += time6 - time5;
workflowConf.append(comparisonCleaningMethod.getMethodConfiguration()).append("\n");
Expand All @@ -335,12 +335,12 @@ public static void main(String[] args) {

// Entity Matching
int emMethodId = getEntityMatchingMethod();
float time7 = System.currentTimeMillis();
long time7 = System.currentTimeMillis();

final IEntityMatching entityMatchingMethod = EntityMatchingMethod.getDefaultConfiguration(profilesD1, profilesD2, EntityMatchingMethod.values()[emMethodId - 1]);
final SimilarityPairs simPairs = entityMatchingMethod.executeComparisons(blocks);

float time8 = System.currentTimeMillis();
long time8 = System.currentTimeMillis();

totalTime += time8- time7;
workflowConf.append(entityMatchingMethod.getMethodConfiguration()).append("\n");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,14 @@ public static void main(String[] args) {
blbuMethods.add(BlockBuildingMethod.SUFFIX_ARRAYS);
blbuMethods.add(BlockBuildingMethod.Q_GRAMS_BLOCKING);

float totalTime = 0;
long totalTime = 0;
final List<AbstractBlock> blocks = new ArrayList<>();
for (BlockBuildingMethod blbuMethod : blbuMethods) {
IBlockBuilding blockBuildingMethod = BlockBuildingMethod.getDefaultConfiguration(blbuMethod);

float time1 = System.currentTimeMillis();
long time1 = System.currentTimeMillis();
blocks.addAll(blockBuildingMethod.getBlocks(profiles1, profiles2));
float time2 = System.currentTimeMillis();
long time2 = System.currentTimeMillis();
totalTime += time2 - time1;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,14 @@ public static void main(String[] args) {
System.out.println("Existing Duplicates\t:\t" + duplicatePropagation.getDuplicates().size());

for (BlockBuildingMethod blbuMethod : BlockBuildingMethod.values()) {
float time1 = System.currentTimeMillis();
long time1 = System.currentTimeMillis();

System.out.println("\n\nCurrent blocking metohd\t:\t" + blbuMethod);
IBlockBuilding blockBuildingMethod = BlockBuildingMethod.getDefaultConfiguration(blbuMethod);

System.out.println("Block Building...");
List<AbstractBlock> blocks = blockBuildingMethod.getBlocks(profiles1, profiles2);
float time2 = System.currentTimeMillis();
long time2 = System.currentTimeMillis();

BlocksPerformance blStats = new BlocksPerformance(blocks, duplicatePropagation);
blStats.setStatistics();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ public static void main(String[] args) {
System.out.println("Existing Duplicates\t:\t" + duplicatePropagation.getDuplicates().size());

for (BlockBuildingMethod blbuMethod : BlockBuildingMethod.values()) {
float time1 = System.currentTimeMillis();
long time1 = System.currentTimeMillis();

System.out.println("\n\nCurrent blocking metohd\t:\t" + blbuMethod);
IBlockBuilding blockBuildingMethod = BlockBuildingMethod.getDefaultConfiguration(blbuMethod);

System.out.println("Block Building...");
List<AbstractBlock> blocks = blockBuildingMethod.getBlocks(profiles, null);
float time2 = System.currentTimeMillis();
long time2 = System.currentTimeMillis();

BlocksPerformance blStats = new BlocksPerformance(blocks, duplicatePropagation);
blStats.setStatistics();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public static void main(String[] args) {
System.out.println("Existing Duplicates\t:\t" + duplicatePropagation.getDuplicates().size());

for (BlockBuildingMethod blbuMethod : BlockBuildingMethod.values()) {
float time1 = System.currentTimeMillis();
long time1 = System.currentTimeMillis();

StringBuilder workflowConf = new StringBuilder();
StringBuilder workflowName = new StringBuilder();
Expand All @@ -74,7 +74,7 @@ public static void main(String[] args) {
workflowName.append("->").append(blockCleaningMethod.getMethodName());
}

float time2 = System.currentTimeMillis();
long time2 = System.currentTimeMillis();

BlocksPerformance blStats = new BlocksPerformance(blocks, duplicatePropagation);
blStats.setStatistics();
Expand Down
Loading

0 comments on commit 3eb67fc

Please sign in to comment.