Implementing String similarity algorithm for searching category functionality

This commit is contained in:
pszklarska 2017-09-05 18:02:44 +02:00
parent d4a89afafd
commit 5094cbc58a
4 changed files with 85 additions and 0 deletions

View file

@ -25,6 +25,7 @@ dependencies {
compile ('com.mapbox.mapboxsdk:mapbox-android-sdk:5.1.0@aar'){
transitive=true
}
compile 'info.debatty:java-string-similarity:0.24'
compile 'io.reactivex.rxjava2:rxandroid:2.0.1'
// Because RxAndroid releases are few and far between, it is recommended you also

View file

@ -25,6 +25,7 @@ import com.pedrogomez.renderers.RVRendererAdapter;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
@ -36,6 +37,7 @@ import fr.free.nrw.commons.CommonsApplication;
import fr.free.nrw.commons.R;
import fr.free.nrw.commons.data.Category;
import fr.free.nrw.commons.upload.MwVolleyApi;
import fr.free.nrw.commons.utils.StringSortingUtils;
import io.reactivex.Observable;
import io.reactivex.android.schedulers.AndroidSchedulers;
import io.reactivex.schedulers.Schedulers;
@ -199,6 +201,7 @@ public class CategorizationFragment extends Fragment {
)
.filter(categoryItem -> !containsYear(categoryItem.getName()))
.distinct()
.sorted(sortByMatches(filter))
.observeOn(AndroidSchedulers.mainThread())
.subscribe(
s -> categoriesAdapter.add(s),
@ -222,6 +225,11 @@ public class CategorizationFragment extends Fragment {
);
}
private Comparator<CategoryItem> sortByMatches(final String filter) {
Comparator<String> stringSimilarityComparator = StringSortingUtils.sortBySimilarity(filter);
return (firstItem, secondItem) -> stringSimilarityComparator.compare(firstItem.getName(), secondItem.getName());
}
private List<String> getStringList(List<CategoryItem> input) {
List<String> output = new ArrayList<>();
for (CategoryItem item : input) {

View file

@ -0,0 +1,36 @@
package fr.free.nrw.commons.utils;
import info.debatty.java.stringsimilarity.Levenshtein;
import java.util.Comparator;
public class StringSortingUtils {
private StringSortingUtils() {
//no-op
}
public static Comparator<String> sortBySimilarity(final String filter) {
return (firstItem, secondItem) -> {
double firstItemSimilarity = StringSortingUtils.calculateSimilarity(firstItem, filter);
double secondItemSimilarity = StringSortingUtils.calculateSimilarity(secondItem, filter);
return (int) Math.signum(secondItemSimilarity - firstItemSimilarity);
};
}
private static double calculateSimilarity(String firstString, String secondString) {
String longer = firstString.toLowerCase();
String shorter = secondString.toLowerCase();
if (firstString.length() < secondString.length()) {
longer = secondString;
shorter = firstString;
}
int longerLength = longer.length();
if (longerLength == 0) {
return 1.0;
}
double distanceBetweenStrings = new Levenshtein().distance(longer, shorter);
return (longerLength - distanceBetweenStrings) / (double) longerLength;
}
}

View file

@ -0,0 +1,40 @@
package fr.free.nrw.commons.utils;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.junit.Assert;
import org.junit.Test;
public class StringSortingUtilsTest {
@Test
public void testSortingNumbersBySimilarity() throws Exception {
List<String> actualList = Arrays.asList("1234567", "4567", "12345", "123", "1234");
List<String> expectedList = Arrays.asList("1234", "12345", "123", "1234567", "4567");
Collections.sort(actualList, StringSortingUtils.sortBySimilarity("tes"));
Assert.assertEquals(expectedList, actualList);
}
@Test
public void testSortingTextBySimilarity() throws Exception {
List<String> actualList = Arrays.asList("The quick brown fox",
"quick brown fox",
"The",
"The quick ",
"The fox",
"brown fox",
"fox");
List<String> expectedList = Arrays.asList("The",
"The fox",
"The quick ",
"The quick brown fox",
"quick brown fox",
"brown fox",
"fox");
Collections.sort(actualList, StringSortingUtils.sortBySimilarity("The"));
Assert.assertEquals(expectedList, actualList);
}
}