Convert PageTitle to kotlin

This commit is contained in:
Paul Hawke 2024-12-05 23:39:45 -06:00
parent 68b3f71b7b
commit 11ade09287
2 changed files with 284 additions and 339 deletions

View file

@ -1,339 +0,0 @@
package fr.free.nrw.commons.wikidata.model.page;
import android.os.Parcel;
import android.os.Parcelable;
import android.text.TextUtils;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import com.google.gson.annotations.SerializedName;
import fr.free.nrw.commons.wikidata.model.WikiSite;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.text.Normalizer;
import java.util.Arrays;
import java.util.Locale;
import timber.log.Timber;
/**
* Represents certain vital information about a page, including the title, namespace,
* and fragment (section anchor target). It can also contain a thumbnail URL for the
* page, and a short description retrieved from Wikidata.
*
* WARNING: This class is not immutable! Specifically, the thumbnail URL and the Wikidata
* description can be altered after construction. Therefore do NOT rely on all the fields
* of a PageTitle to remain constant for the lifetime of the object.
*/
public class PageTitle implements Parcelable {
public static final Parcelable.Creator<PageTitle> CREATOR
= new Parcelable.Creator<PageTitle>() {
@Override
public PageTitle createFromParcel(Parcel in) {
return new PageTitle(in);
}
@Override
public PageTitle[] newArray(int size) {
return new PageTitle[size];
}
};
/**
* The localised namespace of the page as a string, or null if the page is in mainspace.
*
* This field contains the prefix of the page's title, as opposed to the namespace ID used by
* MediaWiki. Therefore, mainspace pages always have a null namespace, as they have no prefix,
* and the namespace of a page will depend on the language of the wiki the user is currently
* looking at.
*
* Examples:
* * [[Manchester]] on enwiki will have a namespace of null
* * [[Deutschland]] on dewiki will have a namespace of null
* * [[User:Deskana]] on enwiki will have a namespace of "User"
* * [[Utilisateur:Deskana]] on frwiki will have a namespace of "Utilisateur", even if you got
* to the page by going to [[User:Deskana]] and having MediaWiki automatically redirect you.
*/
// TODO: remove. This legacy code is the localized namespace name (File, Special, Talk, etc) but
// isn't consistent across titles. e.g., articles with colons, such as RTÉ News: Six One,
// are broken.
@Nullable private final String namespace;
@NonNull private final String text;
@Nullable private final String fragment;
@Nullable private String thumbUrl;
@SerializedName("site") @NonNull private final WikiSite wiki;
@Nullable private String description;
@Nullable private final PageProperties properties;
// TODO: remove after the restbase endpoint supports ZH variants.
@Nullable private String convertedText;
/**
* Creates a new PageTitle object.
* Use this if you want to pass in a fragment portion separately from the title.
*
* @param prefixedText title of the page with optional namespace prefix
* @param fragment optional fragment portion
* @param wiki the wiki site the page belongs to
* @return a new PageTitle object matching the given input parameters
*/
public static PageTitle withSeparateFragment(@NonNull String prefixedText,
@Nullable String fragment, @NonNull WikiSite wiki) {
if (TextUtils.isEmpty(fragment)) {
return new PageTitle(prefixedText, wiki, null, (PageProperties) null);
} else {
// TODO: this class needs some refactoring to allow passing in a fragment
// without having to do string manipulations.
return new PageTitle(prefixedText + "#" + fragment, wiki, null, (PageProperties) null);
}
}
public PageTitle(@Nullable final String namespace, @NonNull String text, @Nullable String fragment, @Nullable String thumbUrl, @NonNull WikiSite wiki) {
this.namespace = namespace;
this.text = text;
this.fragment = fragment;
this.wiki = wiki;
this.thumbUrl = thumbUrl;
properties = null;
}
public PageTitle(@Nullable String text, @NonNull WikiSite wiki, @Nullable String thumbUrl, @Nullable String description, @Nullable PageProperties properties) {
this(text, wiki, thumbUrl, properties);
this.description = description;
}
public PageTitle(@Nullable String text, @NonNull WikiSite wiki, @Nullable String thumbUrl, @Nullable String description) {
this(text, wiki, thumbUrl);
this.description = description;
}
public PageTitle(@Nullable String namespace, @NonNull String text, @NonNull WikiSite wiki) {
this(namespace, text, null, null, wiki);
}
public PageTitle(@Nullable String text, @NonNull WikiSite wiki, @Nullable String thumbUrl) {
this(text, wiki, thumbUrl, (PageProperties) null);
}
public PageTitle(@Nullable String text, @NonNull WikiSite wiki) {
this(text, wiki, null);
}
private PageTitle(@Nullable String text, @NonNull WikiSite wiki, @Nullable String thumbUrl,
@Nullable PageProperties properties) {
if (text == null) {
text = "";
}
// FIXME: Does not handle mainspace articles with a colon in the title well at all
String[] fragParts = text.split("#", -1);
text = fragParts[0];
if (fragParts.length > 1) {
this.fragment = decodeURL(fragParts[1]).replace(" ", "_");
} else {
this.fragment = null;
}
String[] parts = text.split(":", -1);
if (parts.length > 1) {
String namespaceOrLanguage = parts[0];
if (Arrays.asList(Locale.getISOLanguages()).contains(namespaceOrLanguage)) {
this.namespace = null;
this.wiki = new WikiSite(wiki.authority(), namespaceOrLanguage);
} else {
this.wiki = wiki;
this.namespace = namespaceOrLanguage;
}
this.text = TextUtils.join(":", Arrays.copyOfRange(parts, 1, parts.length));
} else {
this.wiki = wiki;
this.namespace = null;
this.text = parts[0];
}
this.thumbUrl = thumbUrl;
this.properties = properties;
}
/**
* Decodes a URL-encoded string into its UTF-8 equivalent. If the string cannot be decoded, the
* original string is returned.
* @param url The URL-encoded string that you wish to decode.
* @return The decoded string, or the input string if the decoding failed.
*/
@NonNull private String decodeURL(@NonNull String url) {
try {
return URLDecoder.decode(url, "UTF-8");
} catch (IllegalArgumentException e) {
// Swallow IllegalArgumentException (can happen with malformed encoding), and just
// return the original string.
Timber.d("URL decoding failed. String was: %s", url);
return url;
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
@NonNull public WikiSite getWikiSite() {
return wiki;
}
@NonNull public String getText() {
return text.replace(" ", "_");
}
@Nullable public String getFragment() {
return fragment;
}
@Nullable public String getThumbUrl() {
return thumbUrl;
}
public void setThumbUrl(@Nullable String thumbUrl) {
this.thumbUrl = thumbUrl;
}
@Nullable public String getDescription() {
return description;
}
public void setDescription(@Nullable String description) {
this.description = description;
}
@NonNull
public String getConvertedText() {
return convertedText == null ? getPrefixedText() : convertedText;
}
public void setConvertedText(@Nullable String convertedText) {
this.convertedText = convertedText;
}
@NonNull public String getDisplayText() {
return getPrefixedText().replace("_", " ");
}
@NonNull public String getDisplayTextWithoutNamespace() {
return text.replace("_", " ");
}
public boolean hasProperties() {
return properties != null;
}
@Nullable public PageProperties getProperties() {
return properties;
}
public boolean isMainPage() {
return properties != null && properties.isMainPage();
}
public boolean isDisambiguationPage() {
return properties != null && properties.isDisambiguationPage();
}
public String getCanonicalUri() {
return getUriForDomain(getWikiSite().authority());
}
public String getMobileUri() {
return getUriForDomain(getWikiSite().mobileAuthority());
}
public String getUriForAction(String action) {
try {
return String.format(
"%1$s://%2$s/w/index.php?title=%3$s&action=%4$s",
getWikiSite().scheme(),
getWikiSite().authority(),
URLEncoder.encode(getPrefixedText(), "utf-8"),
action
);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
public String getPrefixedText() {
// TODO: find a better way to check if the namespace is a ISO Alpha2 Code (two digits country code)
return namespace == null ? getText() : addUnderscores(namespace) + ":" + getText();
}
private String addUnderscores(@NonNull String text) {
return text.replace(" ", "_");
}
@Override public void writeToParcel(Parcel parcel, int flags) {
parcel.writeString(namespace);
parcel.writeString(text);
parcel.writeString(fragment);
parcel.writeParcelable(wiki, flags);
parcel.writeParcelable(properties, flags);
parcel.writeString(thumbUrl);
parcel.writeString(description);
parcel.writeString(convertedText);
}
@Override public boolean equals(Object o) {
if (!(o instanceof PageTitle)) {
return false;
}
PageTitle other = (PageTitle)o;
// Not using namespace directly since that can be null
return normalizedEquals(other.getPrefixedText(), getPrefixedText()) && other.wiki.equals(wiki);
}
// Compare two strings based on their normalized form, using the Unicode Normalization Form C.
// This should be used when comparing or verifying strings that will be exchanged between
// different platforms (iOS, desktop, etc) that may encode strings using inconsistent
// composition, especially for accents, diacritics, etc.
private boolean normalizedEquals(@Nullable String str1, @Nullable String str2) {
if (str1 == null || str2 == null) {
return (str1 == null && str2 == null);
}
return Normalizer.normalize(str1, Normalizer.Form.NFC)
.equals(Normalizer.normalize(str2, Normalizer.Form.NFC));
}
@Override public int hashCode() {
int result = getPrefixedText().hashCode();
result = 31 * result + wiki.hashCode();
return result;
}
@Override public String toString() {
return getPrefixedText();
}
@Override public int describeContents() {
return 0;
}
private String getUriForDomain(String domain) {
try {
return String.format(
"%1$s://%2$s/wiki/%3$s%4$s",
getWikiSite().scheme(),
domain,
URLEncoder.encode(getPrefixedText(), "utf-8"),
(this.fragment != null && this.fragment.length() > 0) ? ("#" + this.fragment) : ""
);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
private PageTitle(Parcel in) {
namespace = in.readString();
text = in.readString();
fragment = in.readString();
wiki = in.readParcelable(WikiSite.class.getClassLoader());
properties = in.readParcelable(PageProperties.class.getClassLoader());
thumbUrl = in.readString();
description = in.readString();
convertedText = in.readString();
}
}

View file

@ -0,0 +1,284 @@
package fr.free.nrw.commons.wikidata.model.page
import android.os.Parcel
import android.os.Parcelable
import android.text.TextUtils
import com.google.gson.annotations.SerializedName
import fr.free.nrw.commons.wikidata.model.WikiSite
import timber.log.Timber
import java.io.UnsupportedEncodingException
import java.net.URLDecoder
import java.net.URLEncoder
import java.text.Normalizer
import java.util.Arrays
import java.util.Locale
/**
* Represents certain vital information about a page, including the title, namespace,
* and fragment (section anchor target). It can also contain a thumbnail URL for the
* page, and a short description retrieved from Wikidata.
*
* WARNING: This class is not immutable! Specifically, the thumbnail URL and the Wikidata
* description can be altered after construction. Therefore do NOT rely on all the fields
* of a PageTitle to remain constant for the lifetime of the object.
*/
class PageTitle : Parcelable {
/**
* The localised namespace of the page as a string, or null if the page is in mainspace.
*
* This field contains the prefix of the page's title, as opposed to the namespace ID used by
* MediaWiki. Therefore, mainspace pages always have a null namespace, as they have no prefix,
* and the namespace of a page will depend on the language of the wiki the user is currently
* looking at.
*
* Examples:
* * [[Manchester]] on enwiki will have a namespace of null
* * [[Deutschland]] on dewiki will have a namespace of null
* * [[User:Deskana]] on enwiki will have a namespace of "User"
* * [[Utilisateur:Deskana]] on frwiki will have a namespace of "Utilisateur", even if you got
* to the page by going to [[User:Deskana]] and having MediaWiki automatically redirect you.
*/
// TODO: remove. This legacy code is the localized namespace name (File, Special, Talk, etc) but
// isn't consistent across titles. e.g., articles with colons, such as RTÉ News: Six One,
// are broken.
private val namespace: String?
private val text: String
val fragment: String?
var thumbUrl: String?
@SerializedName("site")
val wikiSite: WikiSite
var description: String? = null
private val properties: PageProperties?
// TODO: remove after the restbase endpoint supports ZH variants.
private var convertedText: String? = null
constructor(namespace: String?, text: String, fragment: String?, thumbUrl: String?, wiki: WikiSite) {
this.namespace = namespace
this.text = text
this.fragment = fragment
this.thumbUrl = thumbUrl
wikiSite = wiki
properties = null
}
constructor(text: String?, wiki: WikiSite, thumbUrl: String?, description: String?, properties: PageProperties?) : this(text, wiki, thumbUrl, properties) {
this.description = description
}
constructor(text: String?, wiki: WikiSite, thumbUrl: String?, description: String?) : this(text, wiki, thumbUrl) {
this.description = description
}
constructor(namespace: String?, text: String, wiki: WikiSite) : this(namespace, text, null, null, wiki)
@JvmOverloads
constructor(text: String?, wiki: WikiSite, thumbUrl: String? = null) : this(text, wiki, thumbUrl, null as PageProperties?)
private constructor(input: String?, wiki: WikiSite, thumbUrl: String?, properties: PageProperties?) {
var text = input ?: ""
// FIXME: Does not handle mainspace articles with a colon in the title well at all
val fragParts = text.split("#".toRegex()).toTypedArray()
text = fragParts[0]
fragment = if (fragParts.size > 1) {
decodeURL(fragParts[1]).replace(" ", "_")
} else {
null
}
val parts = text.split(":".toRegex()).toTypedArray()
if (parts.size > 1) {
val namespaceOrLanguage = parts[0]
if (Arrays.asList(*Locale.getISOLanguages()).contains(namespaceOrLanguage)) {
namespace = null
wikiSite = WikiSite(wiki.authority(), namespaceOrLanguage)
} else {
wikiSite = wiki
namespace = namespaceOrLanguage
}
this.text = TextUtils.join(":", Arrays.copyOfRange(parts, 1, parts.size))
} else {
wikiSite = wiki
namespace = null
this.text = parts[0]
}
this.thumbUrl = thumbUrl
this.properties = properties
}
/**
* Decodes a URL-encoded string into its UTF-8 equivalent. If the string cannot be decoded, the
* original string is returned.
* @param url The URL-encoded string that you wish to decode.
* @return The decoded string, or the input string if the decoding failed.
*/
private fun decodeURL(url: String): String {
try {
return URLDecoder.decode(url, "UTF-8")
} catch (e: IllegalArgumentException) {
// Swallow IllegalArgumentException (can happen with malformed encoding), and just
// return the original string.
Timber.d("URL decoding failed. String was: %s", url)
return url
} catch (e: UnsupportedEncodingException) {
throw RuntimeException(e)
}
}
private fun getTextWithoutSpaces(): String =
text.replace(" ", "_")
fun getConvertedText(): String =
if (convertedText == null) prefixedText else convertedText!!
fun setConvertedText(convertedText: String?) {
this.convertedText = convertedText
}
val displayText: String
get() = prefixedText.replace("_", " ")
val displayTextWithoutNamespace: String
get() = text.replace("_", " ")
fun hasProperties(): Boolean =
properties != null
val isMainPage: Boolean
get() = properties != null && properties.isMainPage
val isDisambiguationPage: Boolean
get() = properties != null && properties.isDisambiguationPage
val canonicalUri: String
get() = getUriForDomain(wikiSite.authority())
val mobileUri: String
get() = getUriForDomain(wikiSite.mobileAuthority())
fun getUriForAction(action: String?): String {
try {
return String.format(
"%1\$s://%2\$s/w/index.php?title=%3\$s&action=%4\$s",
wikiSite.scheme(),
wikiSite.authority(),
URLEncoder.encode(prefixedText, "utf-8"),
action
)
} catch (e: UnsupportedEncodingException) {
throw RuntimeException(e)
}
}
// TODO: find a better way to check if the namespace is a ISO Alpha2 Code (two digits country code)
val prefixedText: String
get() = namespace?.let { addUnderscores(it) + ":" + getTextWithoutSpaces() }
?: getTextWithoutSpaces()
private fun addUnderscores(text: String): String =
text.replace(" ", "_")
override fun writeToParcel(parcel: Parcel, flags: Int) {
parcel.writeString(namespace)
parcel.writeString(text)
parcel.writeString(fragment)
parcel.writeParcelable(wikiSite, flags)
parcel.writeParcelable(properties, flags)
parcel.writeString(thumbUrl)
parcel.writeString(description)
parcel.writeString(convertedText)
}
override fun equals(o: Any?): Boolean {
if (o !is PageTitle) {
return false
}
val other = o
// Not using namespace directly since that can be null
return normalizedEquals(other.prefixedText, prefixedText) && other.wikiSite.equals(wikiSite)
}
// Compare two strings based on their normalized form, using the Unicode Normalization Form C.
// This should be used when comparing or verifying strings that will be exchanged between
// different platforms (iOS, desktop, etc) that may encode strings using inconsistent
// composition, especially for accents, diacritics, etc.
private fun normalizedEquals(str1: String?, str2: String?): Boolean {
if (str1 == null || str2 == null) {
return (str1 == null && str2 == null)
}
return (Normalizer.normalize(str1, Normalizer.Form.NFC)
== Normalizer.normalize(str2, Normalizer.Form.NFC))
}
override fun hashCode(): Int {
var result = prefixedText.hashCode()
result = 31 * result + wikiSite.hashCode()
return result
}
override fun toString(): String =
prefixedText
override fun describeContents(): Int = 0
private fun getUriForDomain(domain: String): String = try {
String.format(
"%1\$s://%2\$s/wiki/%3\$s%4\$s",
wikiSite.scheme(),
domain,
URLEncoder.encode(prefixedText, "utf-8"),
if ((fragment != null && fragment.length > 0)) ("#$fragment") else ""
)
} catch (e: UnsupportedEncodingException) {
throw RuntimeException(e)
}
private constructor(parcel: Parcel) {
namespace = parcel.readString()
text = parcel.readString()!!
fragment = parcel.readString()
wikiSite = parcel.readParcelable(WikiSite::class.java.classLoader)!!
properties = parcel.readParcelable(PageProperties::class.java.classLoader)
thumbUrl = parcel.readString()
description = parcel.readString()
convertedText = parcel.readString()
}
companion object {
@JvmField
val CREATOR: Parcelable.Creator<PageTitle> = object : Parcelable.Creator<PageTitle> {
override fun createFromParcel(parcel: Parcel): PageTitle {
return PageTitle(parcel)
}
override fun newArray(size: Int): Array<PageTitle?> {
return arrayOfNulls(size)
}
}
/**
* Creates a new PageTitle object.
* Use this if you want to pass in a fragment portion separately from the title.
*
* @param prefixedText title of the page with optional namespace prefix
* @param fragment optional fragment portion
* @param wiki the wiki site the page belongs to
* @return a new PageTitle object matching the given input parameters
*/
fun withSeparateFragment(
prefixedText: String,
fragment: String?, wiki: WikiSite
): PageTitle {
return if (TextUtils.isEmpty(fragment)) {
PageTitle(prefixedText, wiki, null, null as PageProperties?)
} else {
// TODO: this class needs some refactoring to allow passing in a fragment
// without having to do string manipulations.
PageTitle("$prefixedText#$fragment", wiki, null, null as PageProperties?)
}
}
}
}