summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornea <nea@nea.moe>2023-09-15 17:06:05 +0200
committernea <nea@nea.moe>2023-09-15 17:06:05 +0200
commite19b7e8198c771c6d9c533765b5a68c4a52080f7 (patch)
tree6f3e3bb20069b15e577004eb8e73f58fb04d6698
downloadwowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.tar.gz
wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.tar.bz2
wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.zip
Initial commit
-rw-r--r--.gitignore46
-rw-r--r--build.gradle.kts27
-rw-r--r--gradle.properties1
-rw-r--r--gradle/wrapper/gradle-wrapper.jarbin0 -> 60756 bytes
-rw-r--r--gradle/wrapper/gradle-wrapper.properties5
-rwxr-xr-xgradlew234
-rw-r--r--gradlew.bat89
-rw-r--r--settings.gradle.kts12
-rw-r--r--src/main/kotlin/DefMain.kt183
-rw-r--r--src/main/kotlin/ScraperMain.kt115
10 files changed, 712 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..57c2565
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,46 @@
+.gradle
+build/
+!gradle/wrapper/gradle-wrapper.jar
+!**/src/main/**/build/
+!**/src/test/**/build/
+
+### IntelliJ IDEA ###
+.idea/modules.xml
+.idea/jarRepositories.xml
+.idea/compiler.xml
+.idea/libraries/
+*.iws
+*.iml
+*.ipr
+out/
+!**/src/main/**/out/
+!**/src/test/**/out/
+
+### Eclipse ###
+.apt_generated
+.classpath
+.factorypath
+.project
+.settings
+.springBeans
+.sts4-cache
+bin/
+!**/src/main/**/bin/
+!**/src/test/**/bin/
+
+### NetBeans ###
+/nbproject/private/
+/nbbuild/
+/dist/
+/nbdist/
+/.nb-gradle/
+
+### VS Code ###
+.vscode/
+
+### Mac OS ###
+.DS_Store
+
+/*.html
+/*.json
+LinneasNPCList
diff --git a/build.gradle.kts b/build.gradle.kts
new file mode 100644
index 0000000..c47c75d
--- /dev/null
+++ b/build.gradle.kts
@@ -0,0 +1,27 @@
+plugins {
+ kotlin("jvm") version "1.9.0"
+ kotlin("plugin.serialization") version "1.9.0"
+}
+
+group = "moe.nea"
+version = "1.0-SNAPSHOT"
+
+repositories {
+ mavenCentral()
+}
+
+dependencies {
+ testImplementation(kotlin("test"))
+ implementation("io.ktor:ktor-client-core:2.3.3")
+ implementation("io.ktor:ktor-client-cio:2.3.3")
+ implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.0")
+ implementation("org.seleniumhq.selenium:selenium-java:4.12.1")
+}
+
+tasks.test {
+ useJUnitPlatform()
+}
+
+kotlin {
+ jvmToolchain(17)
+} \ No newline at end of file
diff --git a/gradle.properties b/gradle.properties
new file mode 100644
index 0000000..7fc6f1f
--- /dev/null
+++ b/gradle.properties
@@ -0,0 +1 @@
+kotlin.code.style=official
diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000..249e583
--- /dev/null
+++ b/gradle/wrapper/gradle-wrapper.jar
Binary files differ
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000..06febab
--- /dev/null
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,5 @@
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists \ No newline at end of file
diff --git a/gradlew b/gradlew
new file mode 100755
index 0000000..1b6c787
--- /dev/null
+++ b/gradlew
@@ -0,0 +1,234 @@
+#!/bin/sh
+
+#
+# Copyright © 2015-2021 the original authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+#
+# Gradle start up script for POSIX generated by Gradle.
+#
+# Important for running:
+#
+# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
+# noncompliant, but you have some other compliant shell such as ksh or
+# bash, then to run this script, type that shell name before the whole
+# command line, like:
+#
+# ksh Gradle
+#
+# Busybox and similar reduced shells will NOT work, because this script
+# requires all of these POSIX shell features:
+# * functions;
+# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
+# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
+# * compound commands having a testable exit status, especially «case»;
+# * various built-in commands including «command», «set», and «ulimit».
+#
+# Important for patching:
+#
+# (2) This script targets any POSIX shell, so it avoids extensions provided
+# by Bash, Ksh, etc; in particular arrays are avoided.
+#
+# The "traditional" practice of packing multiple parameters into a
+# space-separated string is a well documented source of bugs and security
+# problems, so this is (mostly) avoided, by progressively accumulating
+# options in "$@", and eventually passing that to Java.
+#
+# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
+# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
+# see the in-line comments for details.
+#
+# There are tweaks for specific operating systems such as AIX, CygWin,
+# Darwin, MinGW, and NonStop.
+#
+# (3) This script is generated from the Groovy template
+# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
+# within the Gradle project.
+#
+# You can find Gradle at https://github.com/gradle/gradle/.
+#
+##############################################################################
+
+# Attempt to set APP_HOME
+
+# Resolve links: $0 may be a link
+app_path=$0
+
+# Need this for daisy-chained symlinks.
+while
+ APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
+ [ -h "$app_path" ]
+do
+ ls=$( ls -ld "$app_path" )
+ link=${ls#*' -> '}
+ case $link in #(
+ /*) app_path=$link ;; #(
+ *) app_path=$APP_HOME$link ;;
+ esac
+done
+
+APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
+
+APP_NAME="Gradle"
+APP_BASE_NAME=${0##*/}
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD=maximum
+
+warn () {
+ echo "$*"
+} >&2
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+} >&2
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "$( uname )" in #(
+ CYGWIN* ) cygwin=true ;; #(
+ Darwin* ) darwin=true ;; #(
+ MSYS* | MINGW* ) msys=true ;; #(
+ NONSTOP* ) nonstop=true ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD=$JAVA_HOME/jre/sh/java
+ else
+ JAVACMD=$JAVA_HOME/bin/java
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD=java
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
+ case $MAX_FD in #(
+ max*)
+ MAX_FD=$( ulimit -H -n ) ||
+ warn "Could not query maximum file descriptor limit"
+ esac
+ case $MAX_FD in #(
+ '' | soft) :;; #(
+ *)
+ ulimit -n "$MAX_FD" ||
+ warn "Could not set maximum file descriptor limit to $MAX_FD"
+ esac
+fi
+
+# Collect all arguments for the java command, stacking in reverse order:
+# * args from the command line
+# * the main class name
+# * -classpath
+# * -D...appname settings
+# * --module-path (only if needed)
+# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if "$cygwin" || "$msys" ; then
+ APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
+ CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
+
+ JAVACMD=$( cygpath --unix "$JAVACMD" )
+
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ for arg do
+ if
+ case $arg in #(
+ -*) false ;; # don't mess with options #(
+ /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
+ [ -e "$t" ] ;; #(
+ *) false ;;
+ esac
+ then
+ arg=$( cygpath --path --ignore --mixed "$arg" )
+ fi
+ # Roll the args list around exactly as many times as the number of
+ # args, so each arg winds up back in the position where it started, but
+ # possibly modified.
+ #
+ # NB: a `for` loop captures its iteration list before it begins, so
+ # changing the positional parameters here affects neither the number of
+ # iterations, nor the values presented in `arg`.
+ shift # remove old arg
+ set -- "$@" "$arg" # push replacement arg
+ done
+fi
+
+# Collect all arguments for the java command;
+# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
+# shell script including quotes and variable substitutions, so put them in
+# double quotes to make sure that they get re-expanded; and
+# * put everything else in single quotes, so that it's not re-expanded.
+
+set -- \
+ "-Dorg.gradle.appname=$APP_BASE_NAME" \
+ -classpath "$CLASSPATH" \
+ org.gradle.wrapper.GradleWrapperMain \
+ "$@"
+
+# Use "xargs" to parse quoted args.
+#
+# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
+#
+# In Bash we could simply go:
+#
+# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
+# set -- "${ARGS[@]}" "$@"
+#
+# but POSIX shell has neither arrays nor command substitution, so instead we
+# post-process each arg (as a line of input to sed) to backslash-escape any
+# character that might be a shell metacharacter, then use eval to reverse
+# that process (while maintaining the separation between arguments), and wrap
+# the whole thing up as a single "set" statement.
+#
+# This will of course break if any of these variables contains a newline or
+# an unmatched quote.
+#
+
+eval "set -- $(
+ printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
+ xargs -n1 |
+ sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
+ tr '\n' ' '
+ )" '"$@"'
+
+exec "$JAVACMD" "$@"
diff --git a/gradlew.bat b/gradlew.bat
new file mode 100644
index 0000000..107acd3
--- /dev/null
+++ b/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/settings.gradle.kts b/settings.gradle.kts
new file mode 100644
index 0000000..f177aed
--- /dev/null
+++ b/settings.gradle.kts
@@ -0,0 +1,12 @@
+pluginManagement {
+ repositories {
+ mavenCentral()
+ gradlePluginPortal()
+ }
+}
+
+plugins {
+ id("org.gradle.toolchains.foojay-resolver-convention") version "0.5.0"
+}
+
+rootProject.name = "WowHeadScraper" \ No newline at end of file
diff --git a/src/main/kotlin/DefMain.kt b/src/main/kotlin/DefMain.kt
new file mode 100644
index 0000000..6d58a99
--- /dev/null
+++ b/src/main/kotlin/DefMain.kt
@@ -0,0 +1,183 @@
+import kotlinx.serialization.DeserializationStrategy
+import kotlinx.serialization.Serializable
+import kotlinx.serialization.json.*
+import java.io.File
+import java.io.PrintWriter
+
+@Serializable
+data class LModule(
+ val Namespace: String? = null,
+ val Functions: List<LFunction> = listOf(),
+ val Tables: List<LTable> = listOf(),
+)
+
+@Serializable
+data class LTypeUse(
+ val Type: String,
+ val InnerType: String? = null,
+ val Name: String,
+ val Nilable: Boolean,
+)
+
+@Serializable
+data class LEnumValue(
+ val Type: String,
+ val Name: String,
+ val EnumValue: Long,
+)
+
+@Serializable
+data class LFunction(
+ val Returns: List<LTypeUse> = listOf(),
+ val Name: String,
+ val Arguments: List<LTypeUse> = listOf(),
+)
+
+
+@Serializable(with = LTable.Serializer::class)
+sealed class LTable {
+ object Serializer : JsonContentPolymorphicSerializer<LTable>(LTable::class) {
+ override fun selectDeserializer(element: JsonElement): DeserializationStrategy<LTable> {
+ element as JsonObject
+ return when ((element["Type"] as JsonPrimitive).content) {
+ "Structure" -> LStructure.serializer()
+ "Enumeration" -> LEnumeration.serializer()
+ else -> {
+ println("Unknown type " + element["Type"])
+ LUnknownObject.serializer()
+ }
+ }
+ }
+ }
+}
+
+@Serializable
+data class LStructure(
+ val Name: String,
+ val Fields: List<LTypeUse>,
+) : LTable()
+
+@Serializable
+data object LUnknownObject : LTable()
+
+@Serializable
+data class LEnumeration(
+ val Name: String,
+ val MinValue: Long,
+ val NumValues: Long,
+ val MaxValue: Long,
+ val Fields: List<LEnumValue>,
+) : LTable()
+
+fun String?.postFixedWithOrEmpty(text: String): String {
+ return if (this.isNullOrBlank()) ""
+ else this + text
+}
+
+fun compositeTypeStr(module: LModule, field: LTypeUse): String {
+ return if (field.InnerType == null) field.Type else module.Namespace.postFixedWithOrEmpty(
+ "."
+ ) + field.InnerType + "[]"
+}
+
+fun main() {
+ val json1 = Json { ignoreUnknownKeys = true }
+ val luaMeta = PrintWriter(File("definitions.lua"))
+ luaMeta.println("---@meta")
+ luaMeta.println()
+
+ luaMeta.println("---@alias bool boolean")
+ luaMeta.println("---@alias BigInteger number")
+ luaMeta.println("---@alias BigUInteger number")
+ luaMeta.println("---@alias cstring string")
+ luaMeta.println("---@alias CalendarEventID string")
+ luaMeta.println("---@alias ClubId string")
+ luaMeta.println("---@alias ClubInvitationId string")
+ luaMeta.println("---@alias ClubStreamId string")
+ luaMeta.println("---@alias FileAsset string")
+ luaMeta.println("---@alias fileID number")
+ luaMeta.println("---@alias GarrisonFollower string")
+ luaMeta.println("---@alias IDOrLink string|number")
+ luaMeta.println("---@alias kstringClubMessage string")
+ luaMeta.println("---@alias kstringLfgListApplicant string")
+ luaMeta.println("---@alias kstringLfgListSearch string")
+ luaMeta.println("---@alias luaIndex number")
+ luaMeta.println("---@alias ModelAsset string")
+ luaMeta.println("---@alias normalizedValue string")
+ luaMeta.println("---@alias RecruitAcceptanceID string")
+ luaMeta.println("---@alias ScriptRegion SimpleScriptRegion")
+ luaMeta.println("---@alias SimpleButtonStateToken string")
+ luaMeta.println("---@alias SingleColorValue number")
+ luaMeta.println("---@alias size number")
+ luaMeta.println("---@alias TBFFlags string")
+ luaMeta.println("---@alias TBFStyleFlags string")
+ luaMeta.println("---@alias textureAtlas string")
+ luaMeta.println("---@alias textureKit string")
+ luaMeta.println("---@alias time_t number")
+ luaMeta.println("---@alias uiAddon string")
+ luaMeta.println("---@alias uiFontHeight number")
+ luaMeta.println("---@alias uiMapID number")
+ luaMeta.println("---@alias uiUnit number")
+ luaMeta.println("---@alias UnitToken string")
+ luaMeta.println("---@alias WeeklyRewardItemDBID string")
+ luaMeta.println("---@alias WOWGUID string")
+ luaMeta.println("---@alias WOWMONEY string")
+
+ for (module in json1.decodeFromString<List<LModule>>(File("generated.json").readText())) {
+ luaMeta.println()
+ if (module.Namespace != null) {
+ luaMeta.println("${module.Namespace} = {}")
+ luaMeta.println()
+ }
+ for (table in module.Tables) {
+ when (table) {
+ is LEnumeration -> {
+ luaMeta.println("---@enum ${table.Name}")
+ luaMeta.println("${table.Name} = {")
+ for (field in table.Fields) {
+ luaMeta.println(" ${field.Name} = ${field.EnumValue};")
+ }
+ luaMeta.println("}")
+ luaMeta.println()
+ }
+
+ is LStructure -> {
+ luaMeta.println("---@class ${module.Namespace.postFixedWithOrEmpty(".")}${table.Name}")
+ for (field in table.Fields) {
+ luaMeta.println(
+ "---@field ${field.Name} ${
+ compositeTypeStr(
+ module,
+ field
+ )
+ }${if (field.Nilable) "|nil" else ""}"
+ )
+ }
+ luaMeta.println()
+ }
+
+ LUnknownObject -> {
+ }
+ }
+ }
+ for (func in module.Functions) {
+ for (ret in func.Returns) {
+ luaMeta.println(
+ "---@return ${
+ compositeTypeStr(
+ module,
+ ret
+ )
+ }${if (ret.Nilable) "|nil" else ""} ${ret.Name}"
+ )
+ }
+ for (par in func.Arguments) {
+ luaMeta.println("---@param ${par.Name}${if (par.Nilable) "?" else ""} ${compositeTypeStr(module, par)}")
+ }
+ luaMeta.println("function ${module.Namespace.postFixedWithOrEmpty(".")}${func.Name}(${func.Arguments.joinToString { it.Name }}) end")
+ luaMeta.println()
+ }
+ }
+ luaMeta.close()
+}
+
diff --git a/src/main/kotlin/ScraperMain.kt b/src/main/kotlin/ScraperMain.kt
new file mode 100644
index 0000000..674a87e
--- /dev/null
+++ b/src/main/kotlin/ScraperMain.kt
@@ -0,0 +1,115 @@
+import io.ktor.client.*
+import io.ktor.client.engine.cio.*
+import io.ktor.client.request.*
+import io.ktor.client.statement.*
+import kotlinx.serialization.*
+import kotlinx.serialization.descriptors.PolymorphicKind
+import kotlinx.serialization.descriptors.SerialDescriptor
+import kotlinx.serialization.descriptors.buildSerialDescriptor
+import kotlinx.serialization.encoding.Decoder
+import kotlinx.serialization.json.*
+import java.io.File
+
+@Serializable
+data class NpcListData(
+ val displayName: String,
+ val id: NpcId,
+ val tag: String? = null,
+)
+
+@Serializable
+data class ListData<T>(
+ val data: List<T>
+)
+
+
+val json = Json { ignoreUnknownKeys = true }
+val ListDataPattern = "new Listview\\((.*)\\);\n".toRegex()
+val NpcLocationDataPattern = "g_mapperData = (.*);\n".toRegex()
+val client = HttpClient(CIO)
+
+suspend fun getNpcList(zoneId: Int): List<NpcListData> {
+ return getNpcList("https://www.wowhead.com/npcs/react-a:1?filter=6;$zoneId;0") +
+ getNpcList("https://www.wowhead.com/npcs/react-a:0?filter=6;$zoneId;0") +
+ getNpcList("https://www.wowhead.com/npcs/react-a:-1?filter=6;$zoneId;0")
+}
+
+suspend fun getNpcList(url: String): List<NpcListData> {
+ val string =
+ client.get(url).bodyAsText()
+ val match = ListDataPattern.find(string)!!
+ val jsonData = match.groupValues[1]
+ val data = json.decodeFromString<ListData<NpcListData>>(jsonData).data
+ if (data.size == 1000) {
+ println("Warning: Encountered NPC limit for url $url")
+ }
+ return data
+}
+
+@Serializable
+@JvmInline
+value class ZoneId(val int: Int)
+
+@Serializable
+@JvmInline
+value class MapId(val int: Int)
+
+@Serializable
+@JvmInline
+value class NpcId(val int: Int)
+
+@Serializable
+data class MapCoordinates(
+ val count: Int,
+ val coords: List<List<Float>>,
+ val uiMapId: MapId? = null,
+ val uiMapName: String? = null,
+)
+
+@OptIn(InternalSerializationApi::class, ExperimentalSerializationApi::class)
+object NpcZoneMapSerializer : DeserializationStrategy<Map<ZoneId, List<MapCoordinates>>> {
+ override val descriptor: SerialDescriptor
+ get() = buildSerialDescriptor("NpcZoneMapSerializer", PolymorphicKind.SEALED)
+
+ override fun deserialize(decoder: Decoder): Map<ZoneId, List<MapCoordinates>> {
+ val d = decoder as JsonDecoder
+ val el = d.decodeJsonElement()
+ el as JsonObject
+
+ return el.mapNotNull { (t, u) ->
+ if (u is JsonArray)
+ ZoneId(t.toInt()) to d.json.decodeFromJsonElement<List<MapCoordinates>>(u)
+ else null
+ }.toMap()
+ }
+}
+
+suspend fun getNpcData(id: NpcId, name: String): Map<ZoneId, List<MapCoordinates>> {
+ val string = client.get("https://www.wowhead.com/npc=${id.int}/${name.replace(" ", "-").lowercase()}").bodyAsText()
+ val match = NpcLocationDataPattern.find(string)!!
+ val jsonData = match.groupValues[1]
+ try {
+ return json.decodeFromString(NpcZoneMapSerializer, jsonData)
+ } finally {
+ File("crash.html").writeText(string)
+ }
+}
+
+@Serializable
+data class CompleteNpcData(
+ val metadata: NpcListData,
+ val locations: Map<ZoneId, List<MapCoordinates>>,
+)
+
+@Serializable
+data class AllNpcData(
+ val npcData: Map<NpcId, CompleteNpcData>
+)
+
+suspend fun main() {
+ val zoneIdsToScrape = listOf(13646)
+ val npcsPerZone = zoneIdsToScrape.associateWith { getNpcList(it) }
+ val indexedNpcIds = npcsPerZone.flatMap { it.value }.associateBy { it.id }
+ val npcData = indexedNpcIds.values.associate { it.id to CompleteNpcData(it, getNpcData(it.id, it.displayName)) }
+ File("database.json").writeText(json.encodeToString(npcData))
+} \ No newline at end of file