summaryrefslogtreecommitdiff
path: root/src/main
diff options
context:
space:
mode:
authornea <nea@nea.moe>2023-09-15 17:06:05 +0200
committernea <nea@nea.moe>2023-09-15 17:06:05 +0200
commite19b7e8198c771c6d9c533765b5a68c4a52080f7 (patch)
tree6f3e3bb20069b15e577004eb8e73f58fb04d6698 /src/main
downloadwowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.tar.gz
wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.tar.bz2
wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.zip
Initial commit
Diffstat (limited to 'src/main')
-rw-r--r--src/main/kotlin/DefMain.kt183
-rw-r--r--src/main/kotlin/ScraperMain.kt115
2 files changed, 298 insertions, 0 deletions
diff --git a/src/main/kotlin/DefMain.kt b/src/main/kotlin/DefMain.kt
new file mode 100644
index 0000000..6d58a99
--- /dev/null
+++ b/src/main/kotlin/DefMain.kt
@@ -0,0 +1,183 @@
+import kotlinx.serialization.DeserializationStrategy
+import kotlinx.serialization.Serializable
+import kotlinx.serialization.json.*
+import java.io.File
+import java.io.PrintWriter
+
+@Serializable
+data class LModule(
+ val Namespace: String? = null,
+ val Functions: List<LFunction> = listOf(),
+ val Tables: List<LTable> = listOf(),
+)
+
+@Serializable
+data class LTypeUse(
+ val Type: String,
+ val InnerType: String? = null,
+ val Name: String,
+ val Nilable: Boolean,
+)
+
+@Serializable
+data class LEnumValue(
+ val Type: String,
+ val Name: String,
+ val EnumValue: Long,
+)
+
+@Serializable
+data class LFunction(
+ val Returns: List<LTypeUse> = listOf(),
+ val Name: String,
+ val Arguments: List<LTypeUse> = listOf(),
+)
+
+
+@Serializable(with = LTable.Serializer::class)
+sealed class LTable {
+ object Serializer : JsonContentPolymorphicSerializer<LTable>(LTable::class) {
+ override fun selectDeserializer(element: JsonElement): DeserializationStrategy<LTable> {
+ element as JsonObject
+ return when ((element["Type"] as JsonPrimitive).content) {
+ "Structure" -> LStructure.serializer()
+ "Enumeration" -> LEnumeration.serializer()
+ else -> {
+ println("Unknown type " + element["Type"])
+ LUnknownObject.serializer()
+ }
+ }
+ }
+ }
+}
+
+@Serializable
+data class LStructure(
+ val Name: String,
+ val Fields: List<LTypeUse>,
+) : LTable()
+
+@Serializable
+data object LUnknownObject : LTable()
+
+@Serializable
+data class LEnumeration(
+ val Name: String,
+ val MinValue: Long,
+ val NumValues: Long,
+ val MaxValue: Long,
+ val Fields: List<LEnumValue>,
+) : LTable()
+
+fun String?.postFixedWithOrEmpty(text: String): String {
+ return if (this.isNullOrBlank()) ""
+ else this + text
+}
+
+fun compositeTypeStr(module: LModule, field: LTypeUse): String {
+ return if (field.InnerType == null) field.Type else module.Namespace.postFixedWithOrEmpty(
+ "."
+ ) + field.InnerType + "[]"
+}
+
+fun main() {
+ val json1 = Json { ignoreUnknownKeys = true }
+ val luaMeta = PrintWriter(File("definitions.lua"))
+ luaMeta.println("---@meta")
+ luaMeta.println()
+
+ luaMeta.println("---@alias bool boolean")
+ luaMeta.println("---@alias BigInteger number")
+ luaMeta.println("---@alias BigUInteger number")
+ luaMeta.println("---@alias cstring string")
+ luaMeta.println("---@alias CalendarEventID string")
+ luaMeta.println("---@alias ClubId string")
+ luaMeta.println("---@alias ClubInvitationId string")
+ luaMeta.println("---@alias ClubStreamId string")
+ luaMeta.println("---@alias FileAsset string")
+ luaMeta.println("---@alias fileID number")
+ luaMeta.println("---@alias GarrisonFollower string")
+ luaMeta.println("---@alias IDOrLink string|number")
+ luaMeta.println("---@alias kstringClubMessage string")
+ luaMeta.println("---@alias kstringLfgListApplicant string")
+ luaMeta.println("---@alias kstringLfgListSearch string")
+ luaMeta.println("---@alias luaIndex number")
+ luaMeta.println("---@alias ModelAsset string")
+ luaMeta.println("---@alias normalizedValue string")
+ luaMeta.println("---@alias RecruitAcceptanceID string")
+ luaMeta.println("---@alias ScriptRegion SimpleScriptRegion")
+ luaMeta.println("---@alias SimpleButtonStateToken string")
+ luaMeta.println("---@alias SingleColorValue number")
+ luaMeta.println("---@alias size number")
+ luaMeta.println("---@alias TBFFlags string")
+ luaMeta.println("---@alias TBFStyleFlags string")
+ luaMeta.println("---@alias textureAtlas string")
+ luaMeta.println("---@alias textureKit string")
+ luaMeta.println("---@alias time_t number")
+ luaMeta.println("---@alias uiAddon string")
+ luaMeta.println("---@alias uiFontHeight number")
+ luaMeta.println("---@alias uiMapID number")
+ luaMeta.println("---@alias uiUnit number")
+ luaMeta.println("---@alias UnitToken string")
+ luaMeta.println("---@alias WeeklyRewardItemDBID string")
+ luaMeta.println("---@alias WOWGUID string")
+ luaMeta.println("---@alias WOWMONEY string")
+
+ for (module in json1.decodeFromString<List<LModule>>(File("generated.json").readText())) {
+ luaMeta.println()
+ if (module.Namespace != null) {
+ luaMeta.println("${module.Namespace} = {}")
+ luaMeta.println()
+ }
+ for (table in module.Tables) {
+ when (table) {
+ is LEnumeration -> {
+ luaMeta.println("---@enum ${table.Name}")
+ luaMeta.println("${table.Name} = {")
+ for (field in table.Fields) {
+ luaMeta.println(" ${field.Name} = ${field.EnumValue};")
+ }
+ luaMeta.println("}")
+ luaMeta.println()
+ }
+
+ is LStructure -> {
+ luaMeta.println("---@class ${module.Namespace.postFixedWithOrEmpty(".")}${table.Name}")
+ for (field in table.Fields) {
+ luaMeta.println(
+ "---@field ${field.Name} ${
+ compositeTypeStr(
+ module,
+ field
+ )
+ }${if (field.Nilable) "|nil" else ""}"
+ )
+ }
+ luaMeta.println()
+ }
+
+ LUnknownObject -> {
+ }
+ }
+ }
+ for (func in module.Functions) {
+ for (ret in func.Returns) {
+ luaMeta.println(
+ "---@return ${
+ compositeTypeStr(
+ module,
+ ret
+ )
+ }${if (ret.Nilable) "|nil" else ""} ${ret.Name}"
+ )
+ }
+ for (par in func.Arguments) {
+ luaMeta.println("---@param ${par.Name}${if (par.Nilable) "?" else ""} ${compositeTypeStr(module, par)}")
+ }
+ luaMeta.println("function ${module.Namespace.postFixedWithOrEmpty(".")}${func.Name}(${func.Arguments.joinToString { it.Name }}) end")
+ luaMeta.println()
+ }
+ }
+ luaMeta.close()
+}
+
diff --git a/src/main/kotlin/ScraperMain.kt b/src/main/kotlin/ScraperMain.kt
new file mode 100644
index 0000000..674a87e
--- /dev/null
+++ b/src/main/kotlin/ScraperMain.kt
@@ -0,0 +1,115 @@
+import io.ktor.client.*
+import io.ktor.client.engine.cio.*
+import io.ktor.client.request.*
+import io.ktor.client.statement.*
+import kotlinx.serialization.*
+import kotlinx.serialization.descriptors.PolymorphicKind
+import kotlinx.serialization.descriptors.SerialDescriptor
+import kotlinx.serialization.descriptors.buildSerialDescriptor
+import kotlinx.serialization.encoding.Decoder
+import kotlinx.serialization.json.*
+import java.io.File
+
+@Serializable
+data class NpcListData(
+ val displayName: String,
+ val id: NpcId,
+ val tag: String? = null,
+)
+
+@Serializable
+data class ListData<T>(
+ val data: List<T>
+)
+
+
+val json = Json { ignoreUnknownKeys = true }
+val ListDataPattern = "new Listview\\((.*)\\);\n".toRegex()
+val NpcLocationDataPattern = "g_mapperData = (.*);\n".toRegex()
+val client = HttpClient(CIO)
+
+suspend fun getNpcList(zoneId: Int): List<NpcListData> {
+ return getNpcList("https://www.wowhead.com/npcs/react-a:1?filter=6;$zoneId;0") +
+ getNpcList("https://www.wowhead.com/npcs/react-a:0?filter=6;$zoneId;0") +
+ getNpcList("https://www.wowhead.com/npcs/react-a:-1?filter=6;$zoneId;0")
+}
+
+suspend fun getNpcList(url: String): List<NpcListData> {
+ val string =
+ client.get(url).bodyAsText()
+ val match = ListDataPattern.find(string)!!
+ val jsonData = match.groupValues[1]
+ val data = json.decodeFromString<ListData<NpcListData>>(jsonData).data
+ if (data.size == 1000) {
+ println("Warning: Encountered NPC limit for url $url")
+ }
+ return data
+}
+
+@Serializable
+@JvmInline
+value class ZoneId(val int: Int)
+
+@Serializable
+@JvmInline
+value class MapId(val int: Int)
+
+@Serializable
+@JvmInline
+value class NpcId(val int: Int)
+
+@Serializable
+data class MapCoordinates(
+ val count: Int,
+ val coords: List<List<Float>>,
+ val uiMapId: MapId? = null,
+ val uiMapName: String? = null,
+)
+
+@OptIn(InternalSerializationApi::class, ExperimentalSerializationApi::class)
+object NpcZoneMapSerializer : DeserializationStrategy<Map<ZoneId, List<MapCoordinates>>> {
+ override val descriptor: SerialDescriptor
+ get() = buildSerialDescriptor("NpcZoneMapSerializer", PolymorphicKind.SEALED)
+
+ override fun deserialize(decoder: Decoder): Map<ZoneId, List<MapCoordinates>> {
+ val d = decoder as JsonDecoder
+ val el = d.decodeJsonElement()
+ el as JsonObject
+
+ return el.mapNotNull { (t, u) ->
+ if (u is JsonArray)
+ ZoneId(t.toInt()) to d.json.decodeFromJsonElement<List<MapCoordinates>>(u)
+ else null
+ }.toMap()
+ }
+}
+
+suspend fun getNpcData(id: NpcId, name: String): Map<ZoneId, List<MapCoordinates>> {
+ val string = client.get("https://www.wowhead.com/npc=${id.int}/${name.replace(" ", "-").lowercase()}").bodyAsText()
+ val match = NpcLocationDataPattern.find(string)!!
+ val jsonData = match.groupValues[1]
+ try {
+ return json.decodeFromString(NpcZoneMapSerializer, jsonData)
+ } finally {
+ File("crash.html").writeText(string)
+ }
+}
+
+@Serializable
+data class CompleteNpcData(
+ val metadata: NpcListData,
+ val locations: Map<ZoneId, List<MapCoordinates>>,
+)
+
+@Serializable
+data class AllNpcData(
+ val npcData: Map<NpcId, CompleteNpcData>
+)
+
+suspend fun main() {
+ val zoneIdsToScrape = listOf(13646)
+ val npcsPerZone = zoneIdsToScrape.associateWith { getNpcList(it) }
+ val indexedNpcIds = npcsPerZone.flatMap { it.value }.associateBy { it.id }
+ val npcData = indexedNpcIds.values.associate { it.id to CompleteNpcData(it, getNpcData(it.id, it.displayName)) }
+ File("database.json").writeText(json.encodeToString(npcData))
+} \ No newline at end of file