mirror of
https://github.com/commons-app/apps-android-commons.git
synced 2025-10-26 20:33:53 +01:00
Implementing String similarity algorithm for searching category functionality
This commit is contained in:
parent
d4a89afafd
commit
5094cbc58a
4 changed files with 85 additions and 0 deletions
|
|
@ -25,6 +25,7 @@ dependencies {
|
||||||
compile ('com.mapbox.mapboxsdk:mapbox-android-sdk:5.1.0@aar'){
|
compile ('com.mapbox.mapboxsdk:mapbox-android-sdk:5.1.0@aar'){
|
||||||
transitive=true
|
transitive=true
|
||||||
}
|
}
|
||||||
|
compile 'info.debatty:java-string-similarity:0.24'
|
||||||
|
|
||||||
compile 'io.reactivex.rxjava2:rxandroid:2.0.1'
|
compile 'io.reactivex.rxjava2:rxandroid:2.0.1'
|
||||||
// Because RxAndroid releases are few and far between, it is recommended you also
|
// Because RxAndroid releases are few and far between, it is recommended you also
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ import com.pedrogomez.renderers.RVRendererAdapter;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -36,6 +37,7 @@ import fr.free.nrw.commons.CommonsApplication;
|
||||||
import fr.free.nrw.commons.R;
|
import fr.free.nrw.commons.R;
|
||||||
import fr.free.nrw.commons.data.Category;
|
import fr.free.nrw.commons.data.Category;
|
||||||
import fr.free.nrw.commons.upload.MwVolleyApi;
|
import fr.free.nrw.commons.upload.MwVolleyApi;
|
||||||
|
import fr.free.nrw.commons.utils.StringSortingUtils;
|
||||||
import io.reactivex.Observable;
|
import io.reactivex.Observable;
|
||||||
import io.reactivex.android.schedulers.AndroidSchedulers;
|
import io.reactivex.android.schedulers.AndroidSchedulers;
|
||||||
import io.reactivex.schedulers.Schedulers;
|
import io.reactivex.schedulers.Schedulers;
|
||||||
|
|
@ -199,6 +201,7 @@ public class CategorizationFragment extends Fragment {
|
||||||
)
|
)
|
||||||
.filter(categoryItem -> !containsYear(categoryItem.getName()))
|
.filter(categoryItem -> !containsYear(categoryItem.getName()))
|
||||||
.distinct()
|
.distinct()
|
||||||
|
.sorted(sortByMatches(filter))
|
||||||
.observeOn(AndroidSchedulers.mainThread())
|
.observeOn(AndroidSchedulers.mainThread())
|
||||||
.subscribe(
|
.subscribe(
|
||||||
s -> categoriesAdapter.add(s),
|
s -> categoriesAdapter.add(s),
|
||||||
|
|
@ -222,6 +225,11 @@ public class CategorizationFragment extends Fragment {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Comparator<CategoryItem> sortByMatches(final String filter) {
|
||||||
|
Comparator<String> stringSimilarityComparator = StringSortingUtils.sortBySimilarity(filter);
|
||||||
|
return (firstItem, secondItem) -> stringSimilarityComparator.compare(firstItem.getName(), secondItem.getName());
|
||||||
|
}
|
||||||
|
|
||||||
private List<String> getStringList(List<CategoryItem> input) {
|
private List<String> getStringList(List<CategoryItem> input) {
|
||||||
List<String> output = new ArrayList<>();
|
List<String> output = new ArrayList<>();
|
||||||
for (CategoryItem item : input) {
|
for (CategoryItem item : input) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
package fr.free.nrw.commons.utils;
|
||||||
|
|
||||||
|
import info.debatty.java.stringsimilarity.Levenshtein;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
public class StringSortingUtils {
|
||||||
|
|
||||||
|
private StringSortingUtils() {
|
||||||
|
//no-op
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Comparator<String> sortBySimilarity(final String filter) {
|
||||||
|
return (firstItem, secondItem) -> {
|
||||||
|
double firstItemSimilarity = StringSortingUtils.calculateSimilarity(firstItem, filter);
|
||||||
|
double secondItemSimilarity = StringSortingUtils.calculateSimilarity(secondItem, filter);
|
||||||
|
return (int) Math.signum(secondItemSimilarity - firstItemSimilarity);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static double calculateSimilarity(String firstString, String secondString) {
|
||||||
|
String longer = firstString.toLowerCase();
|
||||||
|
String shorter = secondString.toLowerCase();
|
||||||
|
|
||||||
|
if (firstString.length() < secondString.length()) {
|
||||||
|
longer = secondString;
|
||||||
|
shorter = firstString;
|
||||||
|
}
|
||||||
|
int longerLength = longer.length();
|
||||||
|
if (longerLength == 0) {
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double distanceBetweenStrings = new Levenshtein().distance(longer, shorter);
|
||||||
|
return (longerLength - distanceBetweenStrings) / (double) longerLength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
package fr.free.nrw.commons.utils;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class StringSortingUtilsTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSortingNumbersBySimilarity() throws Exception {
|
||||||
|
List<String> actualList = Arrays.asList("1234567", "4567", "12345", "123", "1234");
|
||||||
|
List<String> expectedList = Arrays.asList("1234", "12345", "123", "1234567", "4567");
|
||||||
|
|
||||||
|
Collections.sort(actualList, StringSortingUtils.sortBySimilarity("tes"));
|
||||||
|
Assert.assertEquals(expectedList, actualList);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSortingTextBySimilarity() throws Exception {
|
||||||
|
List<String> actualList = Arrays.asList("The quick brown fox",
|
||||||
|
"quick brown fox",
|
||||||
|
"The",
|
||||||
|
"The quick ",
|
||||||
|
"The fox",
|
||||||
|
"brown fox",
|
||||||
|
"fox");
|
||||||
|
List<String> expectedList = Arrays.asList("The",
|
||||||
|
"The fox",
|
||||||
|
"The quick ",
|
||||||
|
"The quick brown fox",
|
||||||
|
"quick brown fox",
|
||||||
|
"brown fox",
|
||||||
|
"fox");
|
||||||
|
|
||||||
|
Collections.sort(actualList, StringSortingUtils.sortBySimilarity("The"));
|
||||||
|
Assert.assertEquals(expectedList, actualList);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue