From 8c13c015031a0de865d2e767cd8e879754f803e2 Mon Sep 17 00:00:00 2001 From: shedaniel Date: Fri, 5 Aug 2022 01:30:08 +0800 Subject: More work --- .../search/method/unihan/BomopofoInputMethod.java | 142 +++++++++++++++++++++ .../search/method/unihan/JyutpingInputMethod.java | 77 +++++++++++ .../search/method/unihan/PinyinInputMethod.java | 136 ++++++++++++++++++++ .../search/method/unihan/UniHanInputMethod.java | 82 ++++++++++++ .../client/search/method/unihan/UniHanManager.java | 123 ++++++++++++++++++ 5 files changed, 560 insertions(+) create mode 100644 runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/BomopofoInputMethod.java create mode 100644 runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/JyutpingInputMethod.java create mode 100644 runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/PinyinInputMethod.java create mode 100644 runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/UniHanInputMethod.java create mode 100644 runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/UniHanManager.java (limited to 'runtime-engine/default-runtime-plugin/src/main/java') diff --git a/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/BomopofoInputMethod.java b/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/BomopofoInputMethod.java new file mode 100644 index 000000000..0054c2bc7 --- /dev/null +++ b/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/BomopofoInputMethod.java @@ -0,0 +1,142 @@ +/* + * This file is licensed under the MIT License, part of Roughly Enough Items. + * Copyright (c) 2018, 2019, 2020, 2021, 2022 shedaniel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package me.shedaniel.rei.impl.client.search.method.unihan; + +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; +import net.minecraft.network.chat.Component; +import net.minecraft.network.chat.TranslatableComponent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * MIT License + *

+ * Copyright (c) 2019 Juntong Liu + *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + *

+ * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +public class BomopofoInputMethod extends PinyinInputMethod { + private static final Map CONVERSION = Stream.of(new String[][]{ + {"", ""}, {"0", ""}, {"1", " "}, {"2", "6"}, {"3", "3"}, + {"4", "4"}, {"a", "8"}, {"ai", "9"}, {"an", "0"}, {"ang", ";"}, + {"ao", "l"}, {"b", "1"}, {"c", "h"}, {"ch", "t"}, {"d", "2"}, + {"e", "k"}, {"ei", "o"}, {"en", "p"}, {"eng", "/"}, {"er", "-"}, + {"f", "z"}, {"g", "e"}, {"h", "c"}, {"i", "u"}, {"ia", "u8"}, + {"ian", "u0"}, {"iang", "u;"}, {"iao", "ul"}, {"ie", "u,"}, {"in", "up"}, + {"ing", "u/"}, {"iong", "m/"}, {"iu", "u."}, {"j", "r"}, {"k", "d"}, + {"l", "x"}, {"m", "a"}, {"n", "s"}, {"o", "i"}, {"ong", "j/"}, + {"ou", "."}, {"p", "q"}, {"q", "f"}, {"r", "b"}, {"s", "n"}, + {"sh", "g"}, {"t", "w"}, {"u", "j"}, {"ua", "j8"}, {"uai", "j9"}, + {"uan", "j0"}, {"uang", "j;"}, {"uen", "mp"}, {"ueng", "j/"}, {"ui", "jo"}, + {"un", "jp"}, {"uo", "ji"}, {"v", "m"}, {"van", "m0"}, {"vang", "m;"}, + {"ve", "m,"}, {"vn", "mp"}, {"w", "j"}, {"x", "v"}, {"y", "u"}, + {"z", "y"}, {"zh", "5"}, + }).collect(Collectors.toMap(d -> IntList.of(d[0].codePoints().toArray()), d -> IntList.of(d[1].trim().codePoints().toArray()))); + + public BomopofoInputMethod(UniHanManager manager) { + super(manager); + } + + @Override + public Component getName() { + return new TranslatableComponent("text.rei.input.methods.bopomofo"); + } + + @Override + public Component getDescription() { + return new TranslatableComponent("text.rei.input.methods.bopomofo.description"); + } + + @Override + protected ExpendedChar asExpendedChar(String string) { + IntList codepoints = new IntArrayList(string.length() + 1); + int[] tone = {-1}; + string.codePoints().forEach(codepoint -> { + if (codepoint == 'ü') { + codepoints.add('v'); + return; + } + ToneEntry toneEntry = toneMap.get(codepoint); + if (toneEntry == null) { + codepoints.add(codepoint); + } else { + codepoints.add(toneEntry.codepoint()); + tone[0] = toneEntry.tone(); + } + }); + if (tone[0] != -1) { + codepoints.add(Character.forDigit(tone[0], 10)); + } + List phonemes = standard(codepoints).stream().map(str -> CONVERSION.getOrDefault(str, str)).toList(); + return new ExpendedChar(phonemes); + } + + private static List standard(IntList s) { + List ret = new ArrayList<>(); + int cursor = 0; + + // initial + if (hasInitial(s)) { + cursor = s.size() > 2 && s.getInt(1) == 'h' ? 2 : 1; + ret.add(s.subList(0, cursor)); + } + + // final + if (s.size() != cursor + 1 && s.size() - 1 > cursor) { + ret.add(s.subList(cursor, s.size() - 1)); + } + + // tone + if (s.size() >= 1) { + ret.add(s.subList(s.size() - 1, s.size())); + } + + return ret; + } + + private static boolean hasInitial(IntList s) { + return Stream.of('a', 'e', 'i', 'o', 'u', 'v').noneMatch(i -> s.getInt(0) == i); + } +} diff --git a/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/JyutpingInputMethod.java b/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/JyutpingInputMethod.java new file mode 100644 index 000000000..5a48d55f6 --- /dev/null +++ b/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/JyutpingInputMethod.java @@ -0,0 +1,77 @@ +/* + * This file is licensed under the MIT License, part of Roughly Enough Items. + * Copyright (c) 2018, 2019, 2020, 2021, 2022 shedaniel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package me.shedaniel.rei.impl.client.search.method.unihan; + +import it.unimi.dsi.fastutil.ints.IntList; +import me.shedaniel.rei.api.client.search.method.CharacterUnpackingInputMethod; +import me.shedaniel.rei.api.client.search.method.InputMethod; +import me.shedaniel.rei.api.common.util.CollectionUtils; +import net.minecraft.network.chat.Component; +import net.minecraft.network.chat.TranslatableComponent; + +import java.util.Collections; +import java.util.List; + +public class JyutpingInputMethod extends UniHanInputMethod implements CharacterUnpackingInputMethod { + public JyutpingInputMethod(UniHanManager manager) { + super(manager); + } + + @Override + public Iterable expendFilter(String filter) { + return Collections.singletonList(IntList.of(filter.codePoints().toArray())); + } + + @Override + public List expendSourceChar(int codePoint) { + List sequences = dataMap.get(codePoint); + if (sequences != null && !sequences.isEmpty()) return sequences; + return List.of(new ExpendedChar(List.of(IntList.of(codePoint)))); + } + + @Override + protected String getFieldKey() { + return "kCantonese"; + } + + @Override + protected String getFieldDelimiter() { + return " "; + } + + @Override + public List getMatchingLocales() { + return CollectionUtils.filterToList(InputMethod.getAllLocales(), locale -> locale.code().startsWith("zh_")); + } + + @Override + public Component getName() { + return new TranslatableComponent("text.rei.input.methods.jyutping"); + } + + @Override + public Component getDescription() { + return new TranslatableComponent("text.rei.input.methods.jyutping.description"); + } +} diff --git a/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/PinyinInputMethod.java b/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/PinyinInputMethod.java new file mode 100644 index 000000000..6c317e0dd --- /dev/null +++ b/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/PinyinInputMethod.java @@ -0,0 +1,136 @@ +/* + * This file is licensed under the MIT License, part of Roughly Enough Items. + * Copyright (c) 2018, 2019, 2020, 2021, 2022 shedaniel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package me.shedaniel.rei.impl.client.search.method.unihan; + +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; +import it.unimi.dsi.fastutil.ints.IntList; +import me.shedaniel.rei.api.client.search.method.CharacterUnpackingInputMethod; +import me.shedaniel.rei.api.client.search.method.InputMethod; +import me.shedaniel.rei.api.common.util.CollectionUtils; +import net.minecraft.network.chat.Component; +import net.minecraft.network.chat.TranslatableComponent; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class PinyinInputMethod extends UniHanInputMethod implements CharacterUnpackingInputMethod { + protected final Int2ObjectMap toneMap; + + protected record ToneEntry(int codepoint, int tone) {} + + public PinyinInputMethod(UniHanManager manager) { + super(manager); + toneMap = new Int2ObjectOpenHashMap<>(); + addTone('ā', "a1"); + addTone('á', "a2"); + addTone('ǎ', "a3"); + addTone('à', "a4"); + addTone('ē', "e1"); + addTone('é', "e2"); + addTone('ě', "e3"); + addTone('è', "e4"); + addTone('ī', "i1"); + addTone('í', "i2"); + addTone('ǐ', "i3"); + addTone('ì', "i4"); + addTone('ō', "o1"); + addTone('ó', "o2"); + addTone('ǒ', "o3"); + addTone('ò', "o4"); + addTone('ū', "u1"); + addTone('ú', "u2"); + addTone('ǔ', "u3"); + addTone('ù', "u4"); + addTone('ǖ', "v1"); + addTone('ǘ', "v2"); + addTone('ǚ', "v3"); + addTone('ǜ', "v4"); + } + + private void addTone(char c, String s) { + toneMap.put(c, new ToneEntry(s.charAt(0), Character.digit(s.charAt(1), 10))); + } + + @Override + public Iterable expendFilter(String filter) { + return Collections.singletonList(IntList.of(filter.codePoints().toArray())); + } + + @Override + public List expendSourceChar(int codePoint) { + List sequences = dataMap.get(codePoint); + if (sequences != null && !sequences.isEmpty()) return sequences; + return List.of(new ExpendedChar(List.of(IntList.of(codePoint)))); + } + + @Override + protected String getFieldKey() { + return "kMandarin"; + } + + @Override + protected String getFieldDelimiter() { + return " "; + } + + @Override + public List getMatchingLocales() { + return CollectionUtils.filterToList(InputMethod.getAllLocales(), locale -> locale.code().startsWith("zh_")); + } + + @Override + public Component getName() { + return new TranslatableComponent("text.rei.input.methods.pinyin"); + } + + @Override + public Component getDescription() { + return new TranslatableComponent("text.rei.input.methods.pinyin.description"); + } + + @Override + protected ExpendedChar asExpendedChar(String string) { + List codepoints = new ArrayList<>(string.length() + 1); + int[] tone = {-1}; + string.codePoints().forEach(codepoint -> { + if (codepoint == 'ü') { + codepoints.add(IntList.of('v')); + return; + } + ToneEntry toneEntry = toneMap.get(codepoint); + if (toneEntry == null) { + codepoints.add(IntList.of(codepoint)); + } else { + codepoints.add(IntList.of(toneEntry.codepoint)); + tone[0] = toneEntry.tone; + } + }); + if (tone[0] != -1) { + codepoints.add(IntList.of(Character.forDigit(tone[0], 10))); + } + return new ExpendedChar(codepoints); + } +} diff --git a/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/UniHanInputMethod.java b/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/UniHanInputMethod.java new file mode 100644 index 000000000..1402c54ba --- /dev/null +++ b/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/UniHanInputMethod.java @@ -0,0 +1,82 @@ +/* + * This file is licensed under the MIT License, part of Roughly Enough Items. + * Copyright (c) 2018, 2019, 2020, 2021, 2022 shedaniel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package me.shedaniel.rei.impl.client.search.method.unihan; + +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; +import it.unimi.dsi.fastutil.ints.IntList; +import me.shedaniel.rei.api.client.search.method.CharacterUnpackingInputMethod.ExpendedChar; +import me.shedaniel.rei.api.client.search.method.InputMethod; +import me.shedaniel.rei.api.common.util.CollectionUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executor; + +public abstract class UniHanInputMethod implements InputMethod { + protected final UniHanManager manager; + protected Int2ObjectMap> dataMap = new Int2ObjectOpenHashMap<>(); + + public UniHanInputMethod(UniHanManager manager) { + this.manager = manager; + } + + protected abstract String getFieldKey(); + + protected abstract String getFieldDelimiter(); + + @Override + public CompletableFuture prepare(Executor executor) { + return dispose(executor) + .thenRunAsync(manager::download, executor) + .thenRunAsync(this::load, executor); + } + + public void load() { + try { + manager.load((codepoint, fieldKey, data) -> { + if (fieldKey.equals(getFieldKey())) { + String[] strings = data.split(getFieldDelimiter()); + List sequences = dataMap.computeIfAbsent(codepoint, value -> new ArrayList<>(strings.length)); + for (String string : strings) { + sequences.add(asExpendedChar(string)); + } + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + protected ExpendedChar asExpendedChar(String string) { + return new ExpendedChar(CollectionUtils.map(IntList.of(string.codePoints().toArray()), IntList::of)); + } + + @Override + public CompletableFuture dispose(Executor executor) { + return CompletableFuture.runAsync(dataMap::clear, executor); + } +} diff --git a/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/UniHanManager.java b/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/UniHanManager.java new file mode 100644 index 000000000..bf36fc130 --- /dev/null +++ b/runtime-engine/default-runtime-plugin/src/main/java/me/shedaniel/rei/impl/client/search/method/unihan/UniHanManager.java @@ -0,0 +1,123 @@ +/* + * This file is licensed under the MIT License, part of Roughly Enough Items. + * Copyright (c) 2018, 2019, 2020, 2021, 2022 shedaniel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package me.shedaniel.rei.impl.client.search.method.unihan; + +import me.shedaniel.rei.impl.common.InternalLogger; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.LineIterator; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.zip.ZipInputStream; + +public class UniHanManager { + private final Path unihanPath; + + public UniHanManager(Path unihanPath) { + this.unihanPath = unihanPath; + } + + public boolean downloaded() { + return Files.exists(unihanPath); + } + + public void download() { + if (downloaded()) return; + try { + URL url = new URL("https://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip"); + Files.deleteIfExists(unihanPath); + Path parent = unihanPath.getParent(); + if (parent != null) Files.createDirectories(parent); + HttpURLConnection httpConnection = (HttpURLConnection) url.openConnection(); + long completeFileSize = httpConnection.getContentLength(); + BufferedInputStream inputStream = new BufferedInputStream(httpConnection.getInputStream()); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + BufferedOutputStream bufferedStream = new BufferedOutputStream(outputStream, 1024); + byte[] data = new byte[1024]; + long downloadedFileSize = 0; + int x; + int lastPercent = 0; + while ((x = inputStream.read(data, 0, 1024)) >= 0) { + downloadedFileSize += x; + double progress = (double) downloadedFileSize / (double) completeFileSize; + int percent = (int) (progress * 100); + if (percent > lastPercent) { + lastPercent = percent; + InternalLogger.getInstance().debug("Downloading UniHan Progress: %d%%".formatted(percent)); + } + bufferedStream.write(data, 0, x); + } + bufferedStream.close(); + inputStream.close(); + Files.write(unihanPath, outputStream.toByteArray(), StandardOpenOption.CREATE); + InternalLogger.getInstance().debug("Downloaded UniHan"); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public Path getUnihanPath() { + return unihanPath; + } + + public void load(DataConsumer consumer) throws IOException { + try (ZipInputStream inputStream = new ZipInputStream(Files.newInputStream(getUnihanPath()))) { + while (inputStream.getNextEntry() != null) { + read(IOUtils.lineIterator(inputStream, StandardCharsets.UTF_8), consumer); + } + } + } + + private void read(LineIterator lines, DataConsumer consumer) { + int i = 0; + while (lines.hasNext()) { + i++; + String line = lines.nextLine(); + if (line.startsWith("#") || line.isEmpty()) continue; + if (!line.startsWith("U+")) { + throw new IllegalArgumentException("Invalid line: " + i + ", " + line); + } + int firstTab = line.indexOf('\t'); + String code = line.substring(2, firstTab); + int codePoint = Integer.parseInt(code, 16); + int secondTab = line.indexOf('\t', firstTab + 1); + String fieldKey = line.substring(firstTab + 1, secondTab); + String data = line.substring(secondTab + 1); + consumer.read(codePoint, fieldKey, data); + } + } + + @FunctionalInterface + public interface DataConsumer { + void read(int codepoint, String fieldKey, String data); + } +} -- cgit