From 5094cbc58a629d8f425c265adc9c95f7121b6027 Mon Sep 17 00:00:00 2001 From: pszklarska Date: Tue, 5 Sep 2017 18:02:44 +0200 Subject: [PATCH 1/8] Implementing String similarity algorithm for searching category functionality --- app/build.gradle | 1 + .../category/CategorizationFragment.java | 8 ++++ .../nrw/commons/utils/StringSortingUtils.java | 36 +++++++++++++++++ .../commons/utils/StringSortingUtilsTest.java | 40 +++++++++++++++++++ 4 files changed, 85 insertions(+) create mode 100644 app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java create mode 100644 app/src/test/java/fr/free/nrw/commons/utils/StringSortingUtilsTest.java diff --git a/app/build.gradle b/app/build.gradle index 51d6f56b0..e2d6d2b43 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -25,6 +25,7 @@ dependencies { compile ('com.mapbox.mapboxsdk:mapbox-android-sdk:5.1.0@aar'){ transitive=true } + compile 'info.debatty:java-string-similarity:0.24' compile 'io.reactivex.rxjava2:rxandroid:2.0.1' // Because RxAndroid releases are few and far between, it is recommended you also diff --git a/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java b/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java index b281a6c1b..0a96e3ca2 100644 --- a/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java +++ b/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java @@ -25,6 +25,7 @@ import com.pedrogomez.renderers.RVRendererAdapter; import java.util.ArrayList; import java.util.Calendar; +import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -36,6 +37,7 @@ import fr.free.nrw.commons.CommonsApplication; import fr.free.nrw.commons.R; import fr.free.nrw.commons.data.Category; import fr.free.nrw.commons.upload.MwVolleyApi; +import fr.free.nrw.commons.utils.StringSortingUtils; import io.reactivex.Observable; import io.reactivex.android.schedulers.AndroidSchedulers; import io.reactivex.schedulers.Schedulers; @@ -199,6 +201,7 @@ public class CategorizationFragment extends Fragment { ) .filter(categoryItem -> !containsYear(categoryItem.getName())) .distinct() + .sorted(sortByMatches(filter)) .observeOn(AndroidSchedulers.mainThread()) .subscribe( s -> categoriesAdapter.add(s), @@ -222,6 +225,11 @@ public class CategorizationFragment extends Fragment { ); } + private Comparator sortByMatches(final String filter) { + Comparator stringSimilarityComparator = StringSortingUtils.sortBySimilarity(filter); + return (firstItem, secondItem) -> stringSimilarityComparator.compare(firstItem.getName(), secondItem.getName()); + } + private List getStringList(List input) { List output = new ArrayList<>(); for (CategoryItem item : input) { diff --git a/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java b/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java new file mode 100644 index 000000000..95bd711f5 --- /dev/null +++ b/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java @@ -0,0 +1,36 @@ +package fr.free.nrw.commons.utils; + +import info.debatty.java.stringsimilarity.Levenshtein; +import java.util.Comparator; + +public class StringSortingUtils { + + private StringSortingUtils() { + //no-op + } + + public static Comparator sortBySimilarity(final String filter) { + return (firstItem, secondItem) -> { + double firstItemSimilarity = StringSortingUtils.calculateSimilarity(firstItem, filter); + double secondItemSimilarity = StringSortingUtils.calculateSimilarity(secondItem, filter); + return (int) Math.signum(secondItemSimilarity - firstItemSimilarity); + }; + } + + private static double calculateSimilarity(String firstString, String secondString) { + String longer = firstString.toLowerCase(); + String shorter = secondString.toLowerCase(); + + if (firstString.length() < secondString.length()) { + longer = secondString; + shorter = firstString; + } + int longerLength = longer.length(); + if (longerLength == 0) { + return 1.0; + } + + double distanceBetweenStrings = new Levenshtein().distance(longer, shorter); + return (longerLength - distanceBetweenStrings) / (double) longerLength; + } +} \ No newline at end of file diff --git a/app/src/test/java/fr/free/nrw/commons/utils/StringSortingUtilsTest.java b/app/src/test/java/fr/free/nrw/commons/utils/StringSortingUtilsTest.java new file mode 100644 index 000000000..661ddfc8e --- /dev/null +++ b/app/src/test/java/fr/free/nrw/commons/utils/StringSortingUtilsTest.java @@ -0,0 +1,40 @@ +package fr.free.nrw.commons.utils; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.junit.Assert; +import org.junit.Test; + +public class StringSortingUtilsTest { + + @Test + public void testSortingNumbersBySimilarity() throws Exception { + List actualList = Arrays.asList("1234567", "4567", "12345", "123", "1234"); + List expectedList = Arrays.asList("1234", "12345", "123", "1234567", "4567"); + + Collections.sort(actualList, StringSortingUtils.sortBySimilarity("tes")); + Assert.assertEquals(expectedList, actualList); + } + + @Test + public void testSortingTextBySimilarity() throws Exception { + List actualList = Arrays.asList("The quick brown fox", + "quick brown fox", + "The", + "The quick ", + "The fox", + "brown fox", + "fox"); + List expectedList = Arrays.asList("The", + "The fox", + "The quick ", + "The quick brown fox", + "quick brown fox", + "brown fox", + "fox"); + + Collections.sort(actualList, StringSortingUtils.sortBySimilarity("The")); + Assert.assertEquals(expectedList, actualList); + } +} \ No newline at end of file From ad664f3529d957760bcd5f74c2ee05dd66cb0818 Mon Sep 17 00:00:00 2001 From: pszklarska Date: Tue, 5 Sep 2017 18:03:08 +0200 Subject: [PATCH 2/8] Changing debounce time for search filter --- .../fr/free/nrw/commons/category/CategorizationFragment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java b/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java index 0a96e3ca2..f4092a4c9 100644 --- a/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java +++ b/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java @@ -108,7 +108,7 @@ public class CategorizationFragment extends Fragment { RxTextView.textChanges(categoriesFilter) .takeUntil(RxView.detaches(categoriesFilter)) - .debounce(300, TimeUnit.MILLISECONDS) + .debounce(500, TimeUnit.MILLISECONDS) .observeOn(AndroidSchedulers.mainThread()) .subscribe(filter -> updateCategoryList(filter.toString())); return rootView; From 228a497125055ddc91e713e8611853679f63ed7c Mon Sep 17 00:00:00 2001 From: pszklarska Date: Tue, 5 Sep 2017 18:03:59 +0200 Subject: [PATCH 3/8] Code cleanup in build.gradle --- app/build.gradle | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/app/build.gradle b/app/build.gradle index e2d6d2b43..a304b7674 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -12,20 +12,23 @@ dependencies { compile 'ch.acra:acra:4.7.0' compile 'org.mediawiki:api:1.3' compile 'commons-codec:commons-codec:1.10' - compile "com.android.support:support-v4:${project.supportLibVersion}" - compile "com.android.support:appcompat-v7:${project.supportLibVersion}" - compile "com.android.support:design:${project.supportLibVersion}" - compile 'com.google.code.gson:gson:2.8.0' - compile "com.jakewharton:butterknife:$BUTTERKNIFE_VERSION" compile 'com.github.pedrovgs:renderers:3.3.3' - annotationProcessor "com.jakewharton:butterknife-compiler:$BUTTERKNIFE_VERSION" + compile 'com.google.code.gson:gson:2.8.0' compile 'com.jakewharton.timber:timber:4.5.1' - compile 'com.squareup.okhttp3:okhttp:3.8.1' - compile 'com.squareup.okio:okio:1.13.0' + compile 'info.debatty:java-string-similarity:0.24' compile ('com.mapbox.mapboxsdk:mapbox-android-sdk:5.1.0@aar'){ transitive=true } - compile 'info.debatty:java-string-similarity:0.24' + + compile "com.android.support:support-v4:${project.supportLibVersion}" + compile "com.android.support:appcompat-v7:${project.supportLibVersion}" + compile "com.android.support:design:${project.supportLibVersion}" + + compile "com.jakewharton:butterknife:$BUTTERKNIFE_VERSION" + annotationProcessor "com.jakewharton:butterknife-compiler:$BUTTERKNIFE_VERSION" + + compile 'com.squareup.okhttp3:okhttp:3.8.1' + compile 'com.squareup.okio:okio:1.13.0' compile 'io.reactivex.rxjava2:rxandroid:2.0.1' // Because RxAndroid releases are few and far between, it is recommended you also From c9ed77f69608845cdc9fa83fe336129068c5745e Mon Sep 17 00:00:00 2001 From: pszklarska Date: Tue, 5 Sep 2017 18:13:36 +0200 Subject: [PATCH 4/8] Changing method names --- .../fr/free/nrw/commons/category/CategorizationFragment.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java b/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java index f4092a4c9..811afab76 100644 --- a/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java +++ b/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java @@ -201,7 +201,7 @@ public class CategorizationFragment extends Fragment { ) .filter(categoryItem -> !containsYear(categoryItem.getName())) .distinct() - .sorted(sortByMatches(filter)) + .sorted(sortBySimilarity(filter)) .observeOn(AndroidSchedulers.mainThread()) .subscribe( s -> categoriesAdapter.add(s), @@ -225,7 +225,7 @@ public class CategorizationFragment extends Fragment { ); } - private Comparator sortByMatches(final String filter) { + private Comparator sortBySimilarity(final String filter) { Comparator stringSimilarityComparator = StringSortingUtils.sortBySimilarity(filter); return (firstItem, secondItem) -> stringSimilarityComparator.compare(firstItem.getName(), secondItem.getName()); } From 8edd90ef506b967457028556b0fc38917bf1b898 Mon Sep 17 00:00:00 2001 From: pszklarska Date: Tue, 5 Sep 2017 18:27:16 +0200 Subject: [PATCH 5/8] Fixing Codacy issues --- .../commons/category/CategorizationFragment.java | 3 ++- .../free/nrw/commons/utils/StringSortingUtils.java | 13 +++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java b/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java index 811afab76..791777bdc 100644 --- a/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java +++ b/app/src/main/java/fr/free/nrw/commons/category/CategorizationFragment.java @@ -227,7 +227,8 @@ public class CategorizationFragment extends Fragment { private Comparator sortBySimilarity(final String filter) { Comparator stringSimilarityComparator = StringSortingUtils.sortBySimilarity(filter); - return (firstItem, secondItem) -> stringSimilarityComparator.compare(firstItem.getName(), secondItem.getName()); + return (firstItem, secondItem) -> stringSimilarityComparator + .compare(firstItem.getName(), secondItem.getName()); } private List getStringList(List input) { diff --git a/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java b/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java index 95bd711f5..c3b54abd4 100644 --- a/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java +++ b/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java @@ -9,10 +9,19 @@ public class StringSortingUtils { //no-op } + /** + * Returns Comparator for sorting strings by its similarity with Levenshtein + * algorithm. By using this Comparator we get results from the highest to + * the lowest match. + * + * @param filter pattern to compare similarity + * @return Comparator with string similarity + */ + public static Comparator sortBySimilarity(final String filter) { return (firstItem, secondItem) -> { - double firstItemSimilarity = StringSortingUtils.calculateSimilarity(firstItem, filter); - double secondItemSimilarity = StringSortingUtils.calculateSimilarity(secondItem, filter); + double firstItemSimilarity = calculateSimilarity(firstItem, filter); + double secondItemSimilarity = calculateSimilarity(secondItem, filter); return (int) Math.signum(secondItemSimilarity - firstItemSimilarity); }; } From 0446335bd82e55b63ef2b2b03b922bbd647b4d04 Mon Sep 17 00:00:00 2001 From: pszklarska Date: Tue, 5 Sep 2017 18:29:37 +0200 Subject: [PATCH 6/8] Fixing test not working --- .../java/fr/free/nrw/commons/utils/StringSortingUtilsTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/test/java/fr/free/nrw/commons/utils/StringSortingUtilsTest.java b/app/src/test/java/fr/free/nrw/commons/utils/StringSortingUtilsTest.java index 661ddfc8e..2fc2fac88 100644 --- a/app/src/test/java/fr/free/nrw/commons/utils/StringSortingUtilsTest.java +++ b/app/src/test/java/fr/free/nrw/commons/utils/StringSortingUtilsTest.java @@ -13,7 +13,7 @@ public class StringSortingUtilsTest { List actualList = Arrays.asList("1234567", "4567", "12345", "123", "1234"); List expectedList = Arrays.asList("1234", "12345", "123", "1234567", "4567"); - Collections.sort(actualList, StringSortingUtils.sortBySimilarity("tes")); + Collections.sort(actualList, StringSortingUtils.sortBySimilarity("1234")); Assert.assertEquals(expectedList, actualList); } From c61414d76e480704ee8072acb131e67aa12cd065 Mon Sep 17 00:00:00 2001 From: pszklarska Date: Tue, 5 Sep 2017 19:08:17 +0200 Subject: [PATCH 7/8] Fixing Codacy issues --- .../main/java/fr/free/nrw/commons/utils/StringSortingUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java b/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java index c3b54abd4..b6bfbb699 100644 --- a/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java +++ b/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java @@ -1,7 +1,7 @@ package fr.free.nrw.commons.utils; -import info.debatty.java.stringsimilarity.Levenshtein; import java.util.Comparator; +import info.debatty.java.stringsimilarity.Levenshtein; public class StringSortingUtils { From 0e2d2c1fcf1c2d65b0b7e7ace90fb50e0a4060b0 Mon Sep 17 00:00:00 2001 From: pszklarska Date: Tue, 5 Sep 2017 19:14:58 +0200 Subject: [PATCH 8/8] Fixing Codacy issues --- .../main/java/fr/free/nrw/commons/utils/StringSortingUtils.java | 1 + 1 file changed, 1 insertion(+) diff --git a/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java b/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java index b6bfbb699..03b1469e0 100644 --- a/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java +++ b/app/src/main/java/fr/free/nrw/commons/utils/StringSortingUtils.java @@ -1,6 +1,7 @@ package fr.free.nrw.commons.utils; import java.util.Comparator; + import info.debatty.java.stringsimilarity.Levenshtein; public class StringSortingUtils {