change description extraction algorithm (#4938)

The original algorithm uses comma to separate descriptions which causes bugs when there are comma inside description.
The algorithm is changed to repeatedly matching string patterns of `{{languageCode|desciption}}`
This commit is contained in:
2022-04-28 15:34:02 +08:00 committed by GitHub
parent 36510a4301
commit bf82fcc8d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 142 additions and 14 deletions

View file

@ -360,6 +360,112 @@ class MediaDetailFragmentUnitTests {
method.invoke(fragment)
}
@Test
@Throws(Exception::class)
fun testGetDescriptionsWithComma() {
`when`(media.filename).thenReturn("")
val method: Method = MediaDetailFragment::class.java.getDeclaredMethod("getDescriptions", String::class.java)
method.isAccessible = true
val s = "=={{int:filedesc}}==\n" +
"{{Information\n" +
"|description={{en|1=Antique cash register in a cafe, Darjeeling}}\n" +
"|date=2017-05-17 17:07:26\n" +
"|source={{own}}\n" +
"|author=[[User:Subhrajyoti07|Subhrajyoti07]]\n" +
"|permission=\n" +
"|other versions=\n" +
"}}\n" +
"{{Location|27.043186|88.267003}}\n" +
"{{Assessments|featured=1}}"
val map = linkedMapOf("en" to "Antique cash register in a cafe, Darjeeling")
Assert.assertEquals(map, method.invoke(fragment, s))
}
@Test
@Throws(Exception::class)
fun testGetDescriptionsWithNestedBrackets() {
`when`(media.filename).thenReturn("")
val method: Method = MediaDetailFragment::class.java.getDeclaredMethod("getDescriptions", String::class.java)
method.isAccessible = true
val s = "=={{int:filedesc}}==\n" +
"{{Information\n" +
"|description={{en|1=[[:en:Fitzrovia Chapel|Fitzrovia Chapel]] ceiling<br/>\n" +
"{{On Wikidata|Q17549757}}}}\n" +
"|date=2017-09-17 13:09:39\n" +
"|source={{own}}\n" +
"|author=[[User:Colin|Colin]]\n" +
"|permission=\n" +
"|other versions=\n" +
"|Other fields = {{Credit line |Author = © [[User:Colin]] | Other = Wikimedia Commons |License = CC-BY-SA-4.0}}\n" +
"}}\n" +
"{{Location|51.519003|-0.138353}}\n" +
"{{Assessments|featured=1}}"
val map = linkedMapOf("en" to "[[:en:Fitzrovia Chapel|Fitzrovia Chapel]] ceiling<br/>\n{{On Wikidata|Q17549757}}")
Assert.assertEquals(map, method.invoke(fragment, s))
}
@Test
@Throws(Exception::class)
fun testGetDescriptionsWithInvalidLanguageCode() {
`when`(media.filename).thenReturn("")
val method: Method = MediaDetailFragment::class.java.getDeclaredMethod("getDescriptions", String::class.java)
method.isAccessible = true
val s = "=={{int:filedesc}}==\n" +
"{{Information\n" +
"|description={{en|1=[[:en:Fitzrovia Chapel|Fitzrovia Chapel]] ceiling<br/>\n" +
"}}{{Listed building England|1223496}}\n" +
"|date=2017-09-17 13:09:39\n" +
"|source={{own}}\n" +
"|author=[[User:Colin|Colin]]\n" +
"|permission=\n" +
"|other versions=\n" +
"|Other fields = {{Credit line |Author = © [[User:Colin]] | Other = Wikimedia Commons |License = CC-BY-SA-4.0}}\n" +
"}}\n" +
"{{Location|51.519003|-0.138353}}\n" +
"{{Assessments|featured=1}}"
val map = linkedMapOf("en" to "[[:en:Fitzrovia Chapel|Fitzrovia Chapel]] ceiling<br/>\n")
Assert.assertEquals(map, method.invoke(fragment, s))
}
@Test
@Throws(Exception::class)
fun testGetDescriptionsWithSpaces() {
`when`(media.filename).thenReturn("")
val method: Method = MediaDetailFragment::class.java.getDeclaredMethod("getDescriptions", String::class.java)
method.isAccessible = true
val s = "=={{int:filedesc}}==\n" +
"{{Artwork\n" +
" |artist = {{Creator:Filippo Peroni}} Restored by {{Creator:Adam Cuerden}}\n" +
" |author = \n" +
" |title = Ricchi giardini nel Palazzo di Monforte a Palermo\n" +
" |description = {{en|''Ricchi giardini nel Palazzo di Monforte a Palermo'', set design for ''I Vespri siciliani'' act 5 (undated).}} {{it|''Ricchi giardini nel Palazzo di Monforte a Palermo'', bozzetto per ''I Vespri siciliani'' atto 5 (s.d.).}}\n" +
" |date = {{between|1855|1878}} (Premiére of the opera and death of the artist, respectively)\n" +
" |medium = {{technique|watercolor|and=tempera|and2=|over=paper}}\n" +
" |dimensions = {{Size|unit=mm|height=210|width=270}}\n" +
" |institution = {{Institution:Archivio Storico Ricordi}}\n" +
" |department = \n" +
" |place of discovery = \n" +
" |object history = \n" +
" |exhibition history = \n" +
" |credit line = \n" +
" |inscriptions = \n" +
" |notes = \n" +
" |accession number = ICON000132\n" +
" |place of creation = \n" +
" |source = [https://www.archivioricordi.com/chi-siamo/glam-archivio-ricordi/#/ Archivio Storico Ricordi], [https://www.digitalarchivioricordi.com/it/works/display/108/Vespri_Siciliani__I Collezione Digitale Ricordi]\n" +
" |permission={{PermissionTicket|id=2022031410007974|user=Ruthven}} \n" +
" |other_versions = \n" +
"* [[:File:Ricchi giardini nel Palazzo di Monforte a Palermo, bozzetto di Filippo Peroni per I Vespri siciliani (s.d.) - Archivio Storico Ricordi ICON000132 - Restoration.jpg]] - Restoration (JPEG)\n" +
"* [[:File:Ricchi giardini nel Palazzo di Monforte a Palermo, bozzetto di Filippo Peroni per I Vespri siciliani (s.d.) - Archivio Storico Ricordi ICON000132 - Restoration.png]] - Restoration (PNG)\n" +
"* [[:File:Ricchi giardini nel Palazzo di Monforte a Palermo, bozzetto di Filippo Peroni per I Vespri siciliani (s.d.) - Archivio Storico Ricordi ICON000132.jpg]] - Original (JPEG)\n" +
" |references = \n" +
" |wikidata = \n" +
"}}"
val map = linkedMapOf("en" to "''Ricchi giardini nel Palazzo di Monforte a Palermo'', set design for ''I Vespri siciliani'' act 5 (undated).",
"it" to "''Ricchi giardini nel Palazzo di Monforte a Palermo'', bozzetto per ''I Vespri siciliani'' atto 5 (s.d.).")
Assert.assertEquals(map, method.invoke(fragment, s))
}
@Test
@Throws(Exception::class)
fun testGetDescriptionList() {
@ -687,4 +793,4 @@ class MediaDetailFragmentUnitTests {
method.isAccessible = true
method.invoke(fragment, media)
}
}
}