mirror of
https://github.com/commons-app/apps-android-commons.git
synced 2025-10-27 04:43:54 +01:00
change description extraction algorithm (#4938)
The original algorithm uses comma to separate descriptions which causes bugs when there are comma inside description.
The algorithm is changed to repeatedly matching string patterns of `{{languageCode|desciption}}`
This commit is contained in:
parent
36510a4301
commit
bf82fcc8d3
2 changed files with 142 additions and 14 deletions
|
|
@ -86,7 +86,9 @@ import io.reactivex.Single;
|
||||||
import io.reactivex.android.schedulers.AndroidSchedulers;
|
import io.reactivex.android.schedulers.AndroidSchedulers;
|
||||||
import io.reactivex.schedulers.Schedulers;
|
import io.reactivex.schedulers.Schedulers;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
@ -912,6 +914,8 @@ public class MediaDetailFragment extends CommonsDaggerSupportFragment implements
|
||||||
* @return LinkedHashMap<LanguageCode,Description>
|
* @return LinkedHashMap<LanguageCode,Description>
|
||||||
*/
|
*/
|
||||||
private LinkedHashMap<String,String> getDescriptions(String s) {
|
private LinkedHashMap<String,String> getDescriptions(String s) {
|
||||||
|
// trim spaces next to "=" and "|"
|
||||||
|
s = s.replace(" =", "=").replace(" |", "|").replace("= ","=").replace("| ","|");
|
||||||
int descriptionIndex = s.indexOf("description=");
|
int descriptionIndex = s.indexOf("description=");
|
||||||
if(descriptionIndex == -1){
|
if(descriptionIndex == -1){
|
||||||
descriptionIndex = s.indexOf("Description=");
|
descriptionIndex = s.indexOf("Description=");
|
||||||
|
|
@ -921,25 +925,43 @@ public class MediaDetailFragment extends CommonsDaggerSupportFragment implements
|
||||||
return new LinkedHashMap<>();
|
return new LinkedHashMap<>();
|
||||||
}
|
}
|
||||||
final String descriptionToEnd = s.substring(descriptionIndex+12);
|
final String descriptionToEnd = s.substring(descriptionIndex+12);
|
||||||
final int descriptionEndIndex = descriptionToEnd.indexOf("\n");
|
final int descriptionEndIndex = descriptionToEnd.indexOf("\n|");
|
||||||
final String description = s.substring(descriptionIndex+12, descriptionIndex+12+descriptionEndIndex);
|
final String description = s.substring(descriptionIndex+12, descriptionIndex+12+descriptionEndIndex);
|
||||||
|
|
||||||
final String[] arr = description.trim().split(",");
|
|
||||||
final LinkedHashMap<String,String> descriptionList = new LinkedHashMap<>();
|
final LinkedHashMap<String,String> descriptionList = new LinkedHashMap<>();
|
||||||
|
|
||||||
if (!description.equals("")) {
|
int count = 0; // number of "{{"
|
||||||
for (final String string :
|
int startCode = 0;
|
||||||
arr) {
|
int endCode = 0;
|
||||||
final int startCode = string.indexOf("{{");
|
int startDescription = 0;
|
||||||
final int endCode = string.indexOf("|");
|
int endDescription = 0;
|
||||||
final String languageCode = string.substring(startCode + 2, endCode).trim();
|
final HashSet<String> allLanguageCodes = new HashSet<>(Arrays.asList("en","es","de","ja","fr","ru","pt","it","zh-hans","zh-hant","ar","ko","id","pl","nl","fa","hi","th","vi","sv","uk","cs","simple","hu","ro","fi","el","he","nb","da","sr","hr","ms","bg","ca","tr","sk","sh","bn","tl","mr","ta","kk","lt","az","bs","sl","sq","arz","zh-yue","ka","te","et","lv","ml","hy","uz","kn","af","nn","mk","gl","sw","eu","ur","ky","gu","bh","sco","ast","is","mn","be","an","km","si","ceb","jv","eo","als","ig","su","be-x-old","la","my","cy","ne","bar","azb","mzn","as","am","so","pa","map-bms","scn","tg","ckb","ga","lb","war","zh-min-nan","nds","fy","vec","pnb","zh-classical","lmo","tt","io","ia","br","hif","mg","wuu","gan","ang","or","oc","yi","ps","tk","ba","sah","fo","nap","vls","sa","ce","qu","ku","min","bcl","ilo","ht","li","wa","vo","nds-nl","pam","new","mai","sn","pms","eml","yo","ha","gn","frr","gd","hsb","cv","lo","os","se","cdo","sd","ksh","bat-smg","bo","nah","xmf","ace","roa-tara","hak","bjn","gv","mt","pfl","szl","bpy","rue","co","diq","sc","rw","vep","lij","kw","fur","pcd","lad","tpi","ext","csb","rm","kab","gom","udm","mhr","glk","za","pdc","om","iu","nv","mi","nrm","tcy","frp","myv","kbp","dsb","zu","ln","mwl","fiu-vro","tum","tet","tn","pnt","stq","nov","ny","xh","crh","lfn","st","pap","ay","zea","bxr","kl","sm","ak","ve","pag","nso","kaa","lez","gag","kv","bm","to","lbe","krc","jam","ss","roa-rup","dv","ie","av","cbk-zam","chy","inh","ug","ch","arc","pih","mrj","kg","rmy","dty","na","ts","xal","wo","fj","tyv","olo","ltg","ff","jbo","haw","ki","chr","sg","atj","sat","ady","ty","lrc","ti","din","gor","lg","rn","bi","cu","kbd","pi","cr","koi","ik","mdf","bug","ee","shn","tw","dz","srn","ks","test","en-x-piglatin","ab"));
|
||||||
final int startDescription = string.indexOf("=");
|
for (int i = 0; i < description.length() - 1; i++) {
|
||||||
final int endDescription = string.indexOf("}}");
|
if (description.startsWith("{{", i)) {
|
||||||
final String languageDescription = string
|
if (count == 0) {
|
||||||
.substring(startDescription + 1, endDescription);
|
startCode = i;
|
||||||
|
endCode = description.indexOf("|", i);
|
||||||
|
startDescription = endCode + 1;
|
||||||
|
if (description.startsWith("1=", endCode + 1)) {
|
||||||
|
startDescription += 2;
|
||||||
|
i += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
count++;
|
||||||
|
} else if (description.startsWith("}}", i)) {
|
||||||
|
count--;
|
||||||
|
if (count == 0) {
|
||||||
|
endDescription = i;
|
||||||
|
final String languageCode = description.substring(startCode + 2, endCode);
|
||||||
|
final String languageDescription = description.substring(startDescription, endDescription);
|
||||||
|
if (allLanguageCodes.contains(languageCode)) {
|
||||||
descriptionList.put(languageCode, languageDescription);
|
descriptionList.put(languageCode, languageDescription);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
return descriptionList;
|
return descriptionList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -360,6 +360,112 @@ class MediaDetailFragmentUnitTests {
|
||||||
method.invoke(fragment)
|
method.invoke(fragment)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Throws(Exception::class)
|
||||||
|
fun testGetDescriptionsWithComma() {
|
||||||
|
`when`(media.filename).thenReturn("")
|
||||||
|
val method: Method = MediaDetailFragment::class.java.getDeclaredMethod("getDescriptions", String::class.java)
|
||||||
|
method.isAccessible = true
|
||||||
|
val s = "=={{int:filedesc}}==\n" +
|
||||||
|
"{{Information\n" +
|
||||||
|
"|description={{en|1=Antique cash register in a cafe, Darjeeling}}\n" +
|
||||||
|
"|date=2017-05-17 17:07:26\n" +
|
||||||
|
"|source={{own}}\n" +
|
||||||
|
"|author=[[User:Subhrajyoti07|Subhrajyoti07]]\n" +
|
||||||
|
"|permission=\n" +
|
||||||
|
"|other versions=\n" +
|
||||||
|
"}}\n" +
|
||||||
|
"{{Location|27.043186|88.267003}}\n" +
|
||||||
|
"{{Assessments|featured=1}}"
|
||||||
|
val map = linkedMapOf("en" to "Antique cash register in a cafe, Darjeeling")
|
||||||
|
Assert.assertEquals(map, method.invoke(fragment, s))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Throws(Exception::class)
|
||||||
|
fun testGetDescriptionsWithNestedBrackets() {
|
||||||
|
`when`(media.filename).thenReturn("")
|
||||||
|
val method: Method = MediaDetailFragment::class.java.getDeclaredMethod("getDescriptions", String::class.java)
|
||||||
|
method.isAccessible = true
|
||||||
|
val s = "=={{int:filedesc}}==\n" +
|
||||||
|
"{{Information\n" +
|
||||||
|
"|description={{en|1=[[:en:Fitzrovia Chapel|Fitzrovia Chapel]] ceiling<br/>\n" +
|
||||||
|
"{{On Wikidata|Q17549757}}}}\n" +
|
||||||
|
"|date=2017-09-17 13:09:39\n" +
|
||||||
|
"|source={{own}}\n" +
|
||||||
|
"|author=[[User:Colin|Colin]]\n" +
|
||||||
|
"|permission=\n" +
|
||||||
|
"|other versions=\n" +
|
||||||
|
"|Other fields = {{Credit line |Author = © [[User:Colin]] | Other = Wikimedia Commons |License = CC-BY-SA-4.0}}\n" +
|
||||||
|
"}}\n" +
|
||||||
|
"{{Location|51.519003|-0.138353}}\n" +
|
||||||
|
"{{Assessments|featured=1}}"
|
||||||
|
val map = linkedMapOf("en" to "[[:en:Fitzrovia Chapel|Fitzrovia Chapel]] ceiling<br/>\n{{On Wikidata|Q17549757}}")
|
||||||
|
Assert.assertEquals(map, method.invoke(fragment, s))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Throws(Exception::class)
|
||||||
|
fun testGetDescriptionsWithInvalidLanguageCode() {
|
||||||
|
`when`(media.filename).thenReturn("")
|
||||||
|
val method: Method = MediaDetailFragment::class.java.getDeclaredMethod("getDescriptions", String::class.java)
|
||||||
|
method.isAccessible = true
|
||||||
|
val s = "=={{int:filedesc}}==\n" +
|
||||||
|
"{{Information\n" +
|
||||||
|
"|description={{en|1=[[:en:Fitzrovia Chapel|Fitzrovia Chapel]] ceiling<br/>\n" +
|
||||||
|
"}}{{Listed building England|1223496}}\n" +
|
||||||
|
"|date=2017-09-17 13:09:39\n" +
|
||||||
|
"|source={{own}}\n" +
|
||||||
|
"|author=[[User:Colin|Colin]]\n" +
|
||||||
|
"|permission=\n" +
|
||||||
|
"|other versions=\n" +
|
||||||
|
"|Other fields = {{Credit line |Author = © [[User:Colin]] | Other = Wikimedia Commons |License = CC-BY-SA-4.0}}\n" +
|
||||||
|
"}}\n" +
|
||||||
|
"{{Location|51.519003|-0.138353}}\n" +
|
||||||
|
"{{Assessments|featured=1}}"
|
||||||
|
val map = linkedMapOf("en" to "[[:en:Fitzrovia Chapel|Fitzrovia Chapel]] ceiling<br/>\n")
|
||||||
|
Assert.assertEquals(map, method.invoke(fragment, s))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Throws(Exception::class)
|
||||||
|
fun testGetDescriptionsWithSpaces() {
|
||||||
|
`when`(media.filename).thenReturn("")
|
||||||
|
val method: Method = MediaDetailFragment::class.java.getDeclaredMethod("getDescriptions", String::class.java)
|
||||||
|
method.isAccessible = true
|
||||||
|
val s = "=={{int:filedesc}}==\n" +
|
||||||
|
"{{Artwork\n" +
|
||||||
|
" |artist = {{Creator:Filippo Peroni}} Restored by {{Creator:Adam Cuerden}}\n" +
|
||||||
|
" |author = \n" +
|
||||||
|
" |title = Ricchi giardini nel Palazzo di Monforte a Palermo\n" +
|
||||||
|
" |description = {{en|''Ricchi giardini nel Palazzo di Monforte a Palermo'', set design for ''I Vespri siciliani'' act 5 (undated).}} {{it|''Ricchi giardini nel Palazzo di Monforte a Palermo'', bozzetto per ''I Vespri siciliani'' atto 5 (s.d.).}}\n" +
|
||||||
|
" |date = {{between|1855|1878}} (Premiére of the opera and death of the artist, respectively)\n" +
|
||||||
|
" |medium = {{technique|watercolor|and=tempera|and2=|over=paper}}\n" +
|
||||||
|
" |dimensions = {{Size|unit=mm|height=210|width=270}}\n" +
|
||||||
|
" |institution = {{Institution:Archivio Storico Ricordi}}\n" +
|
||||||
|
" |department = \n" +
|
||||||
|
" |place of discovery = \n" +
|
||||||
|
" |object history = \n" +
|
||||||
|
" |exhibition history = \n" +
|
||||||
|
" |credit line = \n" +
|
||||||
|
" |inscriptions = \n" +
|
||||||
|
" |notes = \n" +
|
||||||
|
" |accession number = ICON000132\n" +
|
||||||
|
" |place of creation = \n" +
|
||||||
|
" |source = [https://www.archivioricordi.com/chi-siamo/glam-archivio-ricordi/#/ Archivio Storico Ricordi], [https://www.digitalarchivioricordi.com/it/works/display/108/Vespri_Siciliani__I Collezione Digitale Ricordi]\n" +
|
||||||
|
" |permission={{PermissionTicket|id=2022031410007974|user=Ruthven}} \n" +
|
||||||
|
" |other_versions = \n" +
|
||||||
|
"* [[:File:Ricchi giardini nel Palazzo di Monforte a Palermo, bozzetto di Filippo Peroni per I Vespri siciliani (s.d.) - Archivio Storico Ricordi ICON000132 - Restoration.jpg]] - Restoration (JPEG)\n" +
|
||||||
|
"* [[:File:Ricchi giardini nel Palazzo di Monforte a Palermo, bozzetto di Filippo Peroni per I Vespri siciliani (s.d.) - Archivio Storico Ricordi ICON000132 - Restoration.png]] - Restoration (PNG)\n" +
|
||||||
|
"* [[:File:Ricchi giardini nel Palazzo di Monforte a Palermo, bozzetto di Filippo Peroni per I Vespri siciliani (s.d.) - Archivio Storico Ricordi ICON000132.jpg]] - Original (JPEG)\n" +
|
||||||
|
" |references = \n" +
|
||||||
|
" |wikidata = \n" +
|
||||||
|
"}}"
|
||||||
|
val map = linkedMapOf("en" to "''Ricchi giardini nel Palazzo di Monforte a Palermo'', set design for ''I Vespri siciliani'' act 5 (undated).",
|
||||||
|
"it" to "''Ricchi giardini nel Palazzo di Monforte a Palermo'', bozzetto per ''I Vespri siciliani'' atto 5 (s.d.).")
|
||||||
|
Assert.assertEquals(map, method.invoke(fragment, s))
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Throws(Exception::class)
|
@Throws(Exception::class)
|
||||||
fun testGetDescriptionList() {
|
fun testGetDescriptionList() {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue