mirror of
https://github.com/commons-app/apps-android-commons.git
synced 2025-10-26 20:33:53 +01:00
With data-client added as library module (#3656)
* With data-client added as library module * Fix build
This commit is contained in:
parent
9ee04f3df4
commit
32ee0b4f9a
258 changed files with 34820 additions and 2 deletions
101
data-client/scripts/generate_wiki_languages.py
Normal file
101
data-client/scripts/generate_wiki_languages.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import lxml
|
||||
import lxml.builder as lb
|
||||
import json
|
||||
import requests
|
||||
|
||||
|
||||
QUERY_SITEMATRIX = 'https://www.mediawiki.org/w/api.php?action=sitematrix' \
|
||||
'&format=json&formatversion=2&smtype=language&smstate=all'
|
||||
|
||||
QUERY_ALLUSERS = '/w/api.php?action=query&format=json&formatversion=2&list=allusers' \
|
||||
'&aulimit=50&auactiveusers=1&auwitheditsonly=1'
|
||||
|
||||
lang_keys = []
|
||||
lang_local_names = []
|
||||
lang_eng_names = []
|
||||
lang_rank = []
|
||||
|
||||
|
||||
def add_lang(key, local_name, eng_name, rank):
|
||||
rank_pos = 0
|
||||
# Automatically keep the arrays sorted by rank
|
||||
for index, item in enumerate(lang_rank):
|
||||
rank_pos = index
|
||||
if (rank > item):
|
||||
break
|
||||
lang_keys.insert(rank_pos, key)
|
||||
lang_local_names.insert(rank_pos, local_name)
|
||||
lang_eng_names.insert(rank_pos, eng_name)
|
||||
lang_rank.insert(rank_pos, rank)
|
||||
|
||||
|
||||
data = json.loads(requests.get(QUERY_SITEMATRIX).text)
|
||||
|
||||
for key, value in data[u"sitematrix"].items():
|
||||
if type(value) is not dict:
|
||||
continue
|
||||
language_code = value[u"code"]
|
||||
if language_code == 'got':
|
||||
# 'got' is Gothic Runes, which lie outside the Basic Multilingual Plane
|
||||
# Android segfaults on these. So let's ignore those.
|
||||
continue
|
||||
site_list = value[u"site"]
|
||||
if type(site_list) is not list:
|
||||
continue
|
||||
wikipedia_url = ""
|
||||
for site in site_list:
|
||||
if "wikipedia.org" in site[u"url"] and u"closed" not in site:
|
||||
wikipedia_url = site[u"url"]
|
||||
if len(wikipedia_url) == 0:
|
||||
continue
|
||||
# TODO: If we want to remove languages with too few active users:
|
||||
# allusers = json.loads(requests.get(wikipedia_url + QUERY_ALLUSERS).text)
|
||||
# if len(allusers[u"query"][u"allusers"]) < 10:
|
||||
# print ("Excluding " + language_code + " (too few active users).")
|
||||
# continue
|
||||
# Use the AQS API to get total pageviews for this language wiki in the last month:
|
||||
date = datetime.today() - timedelta(days=31)
|
||||
unique_device_response = json.loads(requests.get('https://wikimedia.org/api/rest_v1/metrics/unique-devices/' +
|
||||
wikipedia_url.replace('https://', '') + '/all-sites/monthly/' +
|
||||
date.strftime('%Y%m01') + '/' + date.strftime('%Y%m01')).text)
|
||||
rank = 0
|
||||
if u"items" in unique_device_response:
|
||||
if len(unique_device_response[u"items"]) > 0:
|
||||
rank = unique_device_response[u"items"][0][u"devices"]
|
||||
print ("Rank for " + language_code + ": " + str(rank))
|
||||
if language_code == 'zh':
|
||||
add_lang(key='zh-hans', local_name=u'简体中文',
|
||||
eng_name='Simplified Chinese', rank=rank)
|
||||
add_lang(key='zh-hant', local_name=u'繁體中文',
|
||||
eng_name='Traditional Chinese', rank=rank)
|
||||
continue
|
||||
if language_code == 'no': # T114042
|
||||
language_code = 'nb'
|
||||
add_lang(language_code, value[u"name"].replace("'", "\\'"), value[u"localname"].replace("'", "\\'"), rank)
|
||||
|
||||
|
||||
add_lang(key='test', local_name='Test', eng_name='Test', rank=0)
|
||||
add_lang(key='en-x-piglatin', local_name='Igpay Atinlay', eng_name='Pig Latin', rank=0)
|
||||
|
||||
# Generate the XML, for Android
|
||||
NAMESPACE = 'http://schemas.android.com/tools'
|
||||
TOOLS = '{%s}' % NAMESPACE
|
||||
x = lb.ElementMaker(nsmap={'tools': NAMESPACE})
|
||||
|
||||
keys = [x.item(k) for k in lang_keys]
|
||||
local_names = [x.item(k) for k in lang_local_names]
|
||||
eng_names = [x.item(k) for k in lang_eng_names]
|
||||
|
||||
resources = x.resources(
|
||||
getattr(x, 'string-array')(*keys, name='preference_language_keys'),
|
||||
getattr(x, 'string-array')(*local_names, name='preference_language_local_names'),
|
||||
getattr(x, 'string-array')(*eng_names, name='preference_language_canonical_names'))
|
||||
resources.set(TOOLS + 'ignore', 'MissingTranslation')
|
||||
|
||||
with open('../src/main/res/values/languages_list.xml', 'wb') as f:
|
||||
f.write(lxml.etree.tostring(resources, pretty_print=True,
|
||||
xml_declaration=True, encoding='utf-8'))
|
||||
183
data-client/scripts/make-templates.py
Normal file
183
data-client/scripts/make-templates.py
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
|
||||
import copy
|
||||
import os
|
||||
import json
|
||||
import codecs
|
||||
import requests
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
|
||||
CHINESE_WIKI_LANG = "zh"
|
||||
SIMPLIFIED_CHINESE_LANG = "zh-hans"
|
||||
TRADITIONAL_CHINESE_LANG = "zh-hant"
|
||||
|
||||
# T114042
|
||||
NORWEGIAN_BOKMAL_WIKI_LANG = "no"
|
||||
NORWEGIAN_BOKMAL_LANG = "nb"
|
||||
|
||||
|
||||
# Wikis that cause problems and hence we pretend
|
||||
# do not exist.
|
||||
# - "got" -> Gothic runes wiki. The name of got in got
|
||||
# contains characters outside the Unicode BMP. Android
|
||||
# hard crashes on these. Let's ignore these fellas
|
||||
# for now.
|
||||
# - "mo" -> Moldovan, which automatically redirects to Romanian (ro),
|
||||
# which already exists in our list.
|
||||
OSTRICH_WIKIS = [u"got", "mo"]
|
||||
|
||||
|
||||
# Represents a single wiki, along with arbitrary properties of that wiki
|
||||
# Simple data container object
|
||||
class Wiki(object):
|
||||
def __init__(self, lang):
|
||||
self.lang = lang
|
||||
self.props = {}
|
||||
|
||||
|
||||
# Represents a list of wikis plus their properties.
|
||||
# Encapsulates rendering code as well
|
||||
class WikiList(object):
|
||||
def __init__(self, wikis):
|
||||
self.wikis = wikis
|
||||
self.template_env = Environment(loader=FileSystemLoader(
|
||||
os.path.join(os.path.dirname(os.path.realpath(__file__)), u"templates")
|
||||
))
|
||||
|
||||
def render(self, template, class_name, **kwargs):
|
||||
data = {
|
||||
u"class_name": class_name,
|
||||
u"wikis": self.wikis
|
||||
}
|
||||
data.update(kwargs)
|
||||
rendered = self.template_env.get_template(template).render(**data)
|
||||
out = codecs.open(u"../src/main/java/org/wikipedia/staticdata/" + class_name + u".java", u"w", u"utf-8")
|
||||
out.write(rendered)
|
||||
out.close()
|
||||
|
||||
|
||||
def build_wiki(lang, english_name, local_name):
|
||||
wiki = Wiki(lang)
|
||||
wiki.props["english_name"] = english_name
|
||||
wiki.props["local_name"] = local_name
|
||||
return wiki
|
||||
|
||||
|
||||
def list_from_sitematrix():
|
||||
QUERY_SITEMATRIX = 'https://www.mediawiki.org/w/api.php?action=sitematrix' \
|
||||
'&format=json&formatversion=2&smtype=language&smstate=all'
|
||||
|
||||
print(u"Fetching languages...")
|
||||
data = json.loads(requests.get(QUERY_SITEMATRIX).text)
|
||||
wikis = []
|
||||
|
||||
for key, value in data[u"sitematrix"].items():
|
||||
if type(value) is not dict:
|
||||
continue
|
||||
site_list = value[u"site"]
|
||||
if type(site_list) is not list:
|
||||
continue
|
||||
wikipedia_url = ""
|
||||
for site in site_list:
|
||||
if "wikipedia.org" in site[u"url"] and u"closed" not in site:
|
||||
wikipedia_url = site[u"url"]
|
||||
if len(wikipedia_url) == 0:
|
||||
continue
|
||||
wikis.append(build_wiki(value[u"code"], value[u"localname"], value[u"name"]))
|
||||
|
||||
return wikis
|
||||
|
||||
|
||||
# Remove unsupported wikis.
|
||||
def filter_supported_wikis(wikis):
|
||||
return [wiki for wiki in wikis if wiki.lang not in OSTRICH_WIKIS]
|
||||
|
||||
|
||||
# Apply manual tweaks to the list of wikis before they're populated.
|
||||
def preprocess_wikis(wikis):
|
||||
# Add TestWiki.
|
||||
wikis.append(build_wiki(lang="test", english_name="Test", local_name="Test"))
|
||||
|
||||
return wikis
|
||||
|
||||
|
||||
# Apply manual tweaks to the list of wikis after they're populated.
|
||||
def postprocess_wikis(wiki_list):
|
||||
# Add Simplified and Traditional Chinese dialects.
|
||||
chineseWiki = next((wiki for wiki in wiki_list.wikis if wiki.lang == CHINESE_WIKI_LANG), None)
|
||||
chineseWikiIndex = wiki_list.wikis.index(chineseWiki)
|
||||
|
||||
simplifiedWiki = copy.deepcopy(chineseWiki)
|
||||
simplifiedWiki.lang = SIMPLIFIED_CHINESE_LANG
|
||||
simplifiedWiki.props["english_name"] = "Simplified Chinese"
|
||||
simplifiedWiki.props["local_name"] = "简体中文"
|
||||
wiki_list.wikis.insert(chineseWikiIndex + 1, simplifiedWiki)
|
||||
|
||||
traditionalWiki = copy.deepcopy(chineseWiki)
|
||||
traditionalWiki.lang = TRADITIONAL_CHINESE_LANG
|
||||
traditionalWiki.props["english_name"] = "Traditional Chinese"
|
||||
traditionalWiki.props["local_name"] = "繁體中文"
|
||||
wiki_list.wikis.insert(chineseWikiIndex + 2, traditionalWiki)
|
||||
|
||||
bokmalWiki = next((wiki for wiki in wiki_list.wikis if wiki.lang == NORWEGIAN_BOKMAL_WIKI_LANG), None)
|
||||
bokmalWiki.lang = NORWEGIAN_BOKMAL_LANG
|
||||
|
||||
return wiki_list
|
||||
|
||||
|
||||
# Populate the aliases for "Special:" and "File:" in all wikis
|
||||
def populate_aliases(wikis):
|
||||
for wiki in wikis.wikis:
|
||||
print(u"Fetching Special Page and File alias for %s" % wiki.lang)
|
||||
url = u"https://%s.wikipedia.org/w/api.php" % wiki.lang + \
|
||||
u"?action=query&meta=siteinfo&format=json&siprop=namespaces"
|
||||
data = json.loads(requests.get(url).text)
|
||||
# according to https://www.mediawiki.org/wiki/Manual:Namespace
|
||||
# -1 seems to be the ID for Special Pages
|
||||
wiki.props[u"special_alias"] = data[u"query"][u"namespaces"][u"-1"][u"*"]
|
||||
# 6 is the ID for File pages
|
||||
wiki.props[u"file_alias"] = data[u"query"][u"namespaces"][u"6"][u"*"]
|
||||
return wikis
|
||||
|
||||
|
||||
# Populates data on names of main page in each wiki
|
||||
def populate_main_pages(wikis):
|
||||
for wiki in wikis.wikis:
|
||||
print(u"Fetching Main Page for %s" % wiki.lang)
|
||||
url = u"https://%s.wikipedia.org/w/api.php" % wiki.lang + \
|
||||
u"?action=query&meta=siteinfo&format=json&siprop=general"
|
||||
data = json.loads(requests.get(url).text)
|
||||
wiki.props[u"main_page_name"] = data[u"query"][u"general"][u"mainpage"]
|
||||
return wikis
|
||||
|
||||
|
||||
# Returns a function that renders a particular template when passed
|
||||
# a WikiList object
|
||||
def render_template(template, filename, **kwargs):
|
||||
def _actual_render(wikis):
|
||||
wikis.render(template, filename, **kwargs)
|
||||
return wikis
|
||||
return _actual_render
|
||||
|
||||
|
||||
# Kinda like reduce(), but special cases first function
|
||||
def chain(*funcs):
|
||||
res = funcs[0]()
|
||||
for func in funcs[1:]:
|
||||
res = func(res)
|
||||
|
||||
|
||||
chain(
|
||||
list_from_sitematrix,
|
||||
filter_supported_wikis,
|
||||
preprocess_wikis,
|
||||
WikiList,
|
||||
populate_aliases,
|
||||
populate_main_pages,
|
||||
postprocess_wikis,
|
||||
render_template(u"basichash.java.jinja", u"SpecialAliasData", key=u"special_alias"),
|
||||
render_template(u"basichash.java.jinja", u"FileAliasData", key=u"file_alias"),
|
||||
render_template(u"basichash.java.jinja", u"MainPageNameData", key=u"main_page_name"),
|
||||
)
|
||||
36
data-client/scripts/templates/basichash.java.jinja
Normal file
36
data-client/scripts/templates/basichash.java.jinja
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
This file is auto-generated from a template (/scripts/templates).
|
||||
If you need to modify it, make sure to modify the template, not this file.
|
||||
*/
|
||||
package org.wikipedia.staticdata;
|
||||
|
||||
import android.support.annotation.NonNull;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public final class {{class_name}} {
|
||||
@NonNull private static final Map<String, String> DATA_MAP = Collections.unmodifiableMap(newMap());
|
||||
|
||||
@NonNull public static String valueFor(String key) {
|
||||
if (DATA_MAP.containsKey(key)) {
|
||||
return DATA_MAP.get(key);
|
||||
}
|
||||
return DATA_MAP.get("en");
|
||||
}
|
||||
|
||||
@SuppressWarnings({"checkstyle:methodlength", "SpellCheckingInspection"})
|
||||
private static Map<String, String> newMap() {
|
||||
final int size = {{wikis|length}};
|
||||
Map<String, String> map = new HashMap<>(size);
|
||||
|
||||
{%- for wiki in wikis %}
|
||||
map.put("{{wiki.lang}}", "{{wiki.props[key]}}");
|
||||
{%- endfor %}
|
||||
return map;
|
||||
}
|
||||
|
||||
private {{class_name}}() { }
|
||||
}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue