mirror of
https://github.com/commons-app/apps-android-commons.git
synced 2025-10-26 12:23:58 +01:00
Implementing String similarity algorithm for searching category functionality
This commit is contained in:
parent
d4a89afafd
commit
5094cbc58a
4 changed files with 85 additions and 0 deletions
|
|
@ -25,6 +25,7 @@ dependencies {
|
|||
compile ('com.mapbox.mapboxsdk:mapbox-android-sdk:5.1.0@aar'){
|
||||
transitive=true
|
||||
}
|
||||
compile 'info.debatty:java-string-similarity:0.24'
|
||||
|
||||
compile 'io.reactivex.rxjava2:rxandroid:2.0.1'
|
||||
// Because RxAndroid releases are few and far between, it is recommended you also
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ import com.pedrogomez.renderers.RVRendererAdapter;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
|
@ -36,6 +37,7 @@ import fr.free.nrw.commons.CommonsApplication;
|
|||
import fr.free.nrw.commons.R;
|
||||
import fr.free.nrw.commons.data.Category;
|
||||
import fr.free.nrw.commons.upload.MwVolleyApi;
|
||||
import fr.free.nrw.commons.utils.StringSortingUtils;
|
||||
import io.reactivex.Observable;
|
||||
import io.reactivex.android.schedulers.AndroidSchedulers;
|
||||
import io.reactivex.schedulers.Schedulers;
|
||||
|
|
@ -199,6 +201,7 @@ public class CategorizationFragment extends Fragment {
|
|||
)
|
||||
.filter(categoryItem -> !containsYear(categoryItem.getName()))
|
||||
.distinct()
|
||||
.sorted(sortByMatches(filter))
|
||||
.observeOn(AndroidSchedulers.mainThread())
|
||||
.subscribe(
|
||||
s -> categoriesAdapter.add(s),
|
||||
|
|
@ -222,6 +225,11 @@ public class CategorizationFragment extends Fragment {
|
|||
);
|
||||
}
|
||||
|
||||
private Comparator<CategoryItem> sortByMatches(final String filter) {
|
||||
Comparator<String> stringSimilarityComparator = StringSortingUtils.sortBySimilarity(filter);
|
||||
return (firstItem, secondItem) -> stringSimilarityComparator.compare(firstItem.getName(), secondItem.getName());
|
||||
}
|
||||
|
||||
private List<String> getStringList(List<CategoryItem> input) {
|
||||
List<String> output = new ArrayList<>();
|
||||
for (CategoryItem item : input) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,36 @@
|
|||
package fr.free.nrw.commons.utils;
|
||||
|
||||
import info.debatty.java.stringsimilarity.Levenshtein;
|
||||
import java.util.Comparator;
|
||||
|
||||
public class StringSortingUtils {
|
||||
|
||||
private StringSortingUtils() {
|
||||
//no-op
|
||||
}
|
||||
|
||||
public static Comparator<String> sortBySimilarity(final String filter) {
|
||||
return (firstItem, secondItem) -> {
|
||||
double firstItemSimilarity = StringSortingUtils.calculateSimilarity(firstItem, filter);
|
||||
double secondItemSimilarity = StringSortingUtils.calculateSimilarity(secondItem, filter);
|
||||
return (int) Math.signum(secondItemSimilarity - firstItemSimilarity);
|
||||
};
|
||||
}
|
||||
|
||||
private static double calculateSimilarity(String firstString, String secondString) {
|
||||
String longer = firstString.toLowerCase();
|
||||
String shorter = secondString.toLowerCase();
|
||||
|
||||
if (firstString.length() < secondString.length()) {
|
||||
longer = secondString;
|
||||
shorter = firstString;
|
||||
}
|
||||
int longerLength = longer.length();
|
||||
if (longerLength == 0) {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
double distanceBetweenStrings = new Levenshtein().distance(longer, shorter);
|
||||
return (longerLength - distanceBetweenStrings) / (double) longerLength;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
package fr.free.nrw.commons.utils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class StringSortingUtilsTest {
|
||||
|
||||
@Test
|
||||
public void testSortingNumbersBySimilarity() throws Exception {
|
||||
List<String> actualList = Arrays.asList("1234567", "4567", "12345", "123", "1234");
|
||||
List<String> expectedList = Arrays.asList("1234", "12345", "123", "1234567", "4567");
|
||||
|
||||
Collections.sort(actualList, StringSortingUtils.sortBySimilarity("tes"));
|
||||
Assert.assertEquals(expectedList, actualList);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSortingTextBySimilarity() throws Exception {
|
||||
List<String> actualList = Arrays.asList("The quick brown fox",
|
||||
"quick brown fox",
|
||||
"The",
|
||||
"The quick ",
|
||||
"The fox",
|
||||
"brown fox",
|
||||
"fox");
|
||||
List<String> expectedList = Arrays.asList("The",
|
||||
"The fox",
|
||||
"The quick ",
|
||||
"The quick brown fox",
|
||||
"quick brown fox",
|
||||
"brown fox",
|
||||
"fox");
|
||||
|
||||
Collections.sort(actualList, StringSortingUtils.sortBySimilarity("The"));
|
||||
Assert.assertEquals(expectedList, actualList);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue