diff options
author | nea <nea@nea.moe> | 2023-09-15 17:06:05 +0200 |
---|---|---|
committer | nea <nea@nea.moe> | 2023-09-15 17:06:05 +0200 |
commit | e19b7e8198c771c6d9c533765b5a68c4a52080f7 (patch) | |
tree | 6f3e3bb20069b15e577004eb8e73f58fb04d6698 /src/main | |
download | wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.tar.gz wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.tar.bz2 wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.zip |
Initial commit
Diffstat (limited to 'src/main')
-rw-r--r-- | src/main/kotlin/DefMain.kt | 183 | ||||
-rw-r--r-- | src/main/kotlin/ScraperMain.kt | 115 |
2 files changed, 298 insertions, 0 deletions
diff --git a/src/main/kotlin/DefMain.kt b/src/main/kotlin/DefMain.kt new file mode 100644 index 0000000..6d58a99 --- /dev/null +++ b/src/main/kotlin/DefMain.kt @@ -0,0 +1,183 @@ +import kotlinx.serialization.DeserializationStrategy +import kotlinx.serialization.Serializable +import kotlinx.serialization.json.* +import java.io.File +import java.io.PrintWriter + +@Serializable +data class LModule( + val Namespace: String? = null, + val Functions: List<LFunction> = listOf(), + val Tables: List<LTable> = listOf(), +) + +@Serializable +data class LTypeUse( + val Type: String, + val InnerType: String? = null, + val Name: String, + val Nilable: Boolean, +) + +@Serializable +data class LEnumValue( + val Type: String, + val Name: String, + val EnumValue: Long, +) + +@Serializable +data class LFunction( + val Returns: List<LTypeUse> = listOf(), + val Name: String, + val Arguments: List<LTypeUse> = listOf(), +) + + +@Serializable(with = LTable.Serializer::class) +sealed class LTable { + object Serializer : JsonContentPolymorphicSerializer<LTable>(LTable::class) { + override fun selectDeserializer(element: JsonElement): DeserializationStrategy<LTable> { + element as JsonObject + return when ((element["Type"] as JsonPrimitive).content) { + "Structure" -> LStructure.serializer() + "Enumeration" -> LEnumeration.serializer() + else -> { + println("Unknown type " + element["Type"]) + LUnknownObject.serializer() + } + } + } + } +} + +@Serializable +data class LStructure( + val Name: String, + val Fields: List<LTypeUse>, +) : LTable() + +@Serializable +data object LUnknownObject : LTable() + +@Serializable +data class LEnumeration( + val Name: String, + val MinValue: Long, + val NumValues: Long, + val MaxValue: Long, + val Fields: List<LEnumValue>, +) : LTable() + +fun String?.postFixedWithOrEmpty(text: String): String { + return if (this.isNullOrBlank()) "" + else this + text +} + +fun compositeTypeStr(module: LModule, field: LTypeUse): String { + return if (field.InnerType == null) field.Type else module.Namespace.postFixedWithOrEmpty( + "." + ) + field.InnerType + "[]" +} + +fun main() { + val json1 = Json { ignoreUnknownKeys = true } + val luaMeta = PrintWriter(File("definitions.lua")) + luaMeta.println("---@meta") + luaMeta.println() + + luaMeta.println("---@alias bool boolean") + luaMeta.println("---@alias BigInteger number") + luaMeta.println("---@alias BigUInteger number") + luaMeta.println("---@alias cstring string") + luaMeta.println("---@alias CalendarEventID string") + luaMeta.println("---@alias ClubId string") + luaMeta.println("---@alias ClubInvitationId string") + luaMeta.println("---@alias ClubStreamId string") + luaMeta.println("---@alias FileAsset string") + luaMeta.println("---@alias fileID number") + luaMeta.println("---@alias GarrisonFollower string") + luaMeta.println("---@alias IDOrLink string|number") + luaMeta.println("---@alias kstringClubMessage string") + luaMeta.println("---@alias kstringLfgListApplicant string") + luaMeta.println("---@alias kstringLfgListSearch string") + luaMeta.println("---@alias luaIndex number") + luaMeta.println("---@alias ModelAsset string") + luaMeta.println("---@alias normalizedValue string") + luaMeta.println("---@alias RecruitAcceptanceID string") + luaMeta.println("---@alias ScriptRegion SimpleScriptRegion") + luaMeta.println("---@alias SimpleButtonStateToken string") + luaMeta.println("---@alias SingleColorValue number") + luaMeta.println("---@alias size number") + luaMeta.println("---@alias TBFFlags string") + luaMeta.println("---@alias TBFStyleFlags string") + luaMeta.println("---@alias textureAtlas string") + luaMeta.println("---@alias textureKit string") + luaMeta.println("---@alias time_t number") + luaMeta.println("---@alias uiAddon string") + luaMeta.println("---@alias uiFontHeight number") + luaMeta.println("---@alias uiMapID number") + luaMeta.println("---@alias uiUnit number") + luaMeta.println("---@alias UnitToken string") + luaMeta.println("---@alias WeeklyRewardItemDBID string") + luaMeta.println("---@alias WOWGUID string") + luaMeta.println("---@alias WOWMONEY string") + + for (module in json1.decodeFromString<List<LModule>>(File("generated.json").readText())) { + luaMeta.println() + if (module.Namespace != null) { + luaMeta.println("${module.Namespace} = {}") + luaMeta.println() + } + for (table in module.Tables) { + when (table) { + is LEnumeration -> { + luaMeta.println("---@enum ${table.Name}") + luaMeta.println("${table.Name} = {") + for (field in table.Fields) { + luaMeta.println(" ${field.Name} = ${field.EnumValue};") + } + luaMeta.println("}") + luaMeta.println() + } + + is LStructure -> { + luaMeta.println("---@class ${module.Namespace.postFixedWithOrEmpty(".")}${table.Name}") + for (field in table.Fields) { + luaMeta.println( + "---@field ${field.Name} ${ + compositeTypeStr( + module, + field + ) + }${if (field.Nilable) "|nil" else ""}" + ) + } + luaMeta.println() + } + + LUnknownObject -> { + } + } + } + for (func in module.Functions) { + for (ret in func.Returns) { + luaMeta.println( + "---@return ${ + compositeTypeStr( + module, + ret + ) + }${if (ret.Nilable) "|nil" else ""} ${ret.Name}" + ) + } + for (par in func.Arguments) { + luaMeta.println("---@param ${par.Name}${if (par.Nilable) "?" else ""} ${compositeTypeStr(module, par)}") + } + luaMeta.println("function ${module.Namespace.postFixedWithOrEmpty(".")}${func.Name}(${func.Arguments.joinToString { it.Name }}) end") + luaMeta.println() + } + } + luaMeta.close() +} + diff --git a/src/main/kotlin/ScraperMain.kt b/src/main/kotlin/ScraperMain.kt new file mode 100644 index 0000000..674a87e --- /dev/null +++ b/src/main/kotlin/ScraperMain.kt @@ -0,0 +1,115 @@ +import io.ktor.client.* +import io.ktor.client.engine.cio.* +import io.ktor.client.request.* +import io.ktor.client.statement.* +import kotlinx.serialization.* +import kotlinx.serialization.descriptors.PolymorphicKind +import kotlinx.serialization.descriptors.SerialDescriptor +import kotlinx.serialization.descriptors.buildSerialDescriptor +import kotlinx.serialization.encoding.Decoder +import kotlinx.serialization.json.* +import java.io.File + +@Serializable +data class NpcListData( + val displayName: String, + val id: NpcId, + val tag: String? = null, +) + +@Serializable +data class ListData<T>( + val data: List<T> +) + + +val json = Json { ignoreUnknownKeys = true } +val ListDataPattern = "new Listview\\((.*)\\);\n".toRegex() +val NpcLocationDataPattern = "g_mapperData = (.*);\n".toRegex() +val client = HttpClient(CIO) + +suspend fun getNpcList(zoneId: Int): List<NpcListData> { + return getNpcList("https://www.wowhead.com/npcs/react-a:1?filter=6;$zoneId;0") + + getNpcList("https://www.wowhead.com/npcs/react-a:0?filter=6;$zoneId;0") + + getNpcList("https://www.wowhead.com/npcs/react-a:-1?filter=6;$zoneId;0") +} + +suspend fun getNpcList(url: String): List<NpcListData> { + val string = + client.get(url).bodyAsText() + val match = ListDataPattern.find(string)!! + val jsonData = match.groupValues[1] + val data = json.decodeFromString<ListData<NpcListData>>(jsonData).data + if (data.size == 1000) { + println("Warning: Encountered NPC limit for url $url") + } + return data +} + +@Serializable +@JvmInline +value class ZoneId(val int: Int) + +@Serializable +@JvmInline +value class MapId(val int: Int) + +@Serializable +@JvmInline +value class NpcId(val int: Int) + +@Serializable +data class MapCoordinates( + val count: Int, + val coords: List<List<Float>>, + val uiMapId: MapId? = null, + val uiMapName: String? = null, +) + +@OptIn(InternalSerializationApi::class, ExperimentalSerializationApi::class) +object NpcZoneMapSerializer : DeserializationStrategy<Map<ZoneId, List<MapCoordinates>>> { + override val descriptor: SerialDescriptor + get() = buildSerialDescriptor("NpcZoneMapSerializer", PolymorphicKind.SEALED) + + override fun deserialize(decoder: Decoder): Map<ZoneId, List<MapCoordinates>> { + val d = decoder as JsonDecoder + val el = d.decodeJsonElement() + el as JsonObject + + return el.mapNotNull { (t, u) -> + if (u is JsonArray) + ZoneId(t.toInt()) to d.json.decodeFromJsonElement<List<MapCoordinates>>(u) + else null + }.toMap() + } +} + +suspend fun getNpcData(id: NpcId, name: String): Map<ZoneId, List<MapCoordinates>> { + val string = client.get("https://www.wowhead.com/npc=${id.int}/${name.replace(" ", "-").lowercase()}").bodyAsText() + val match = NpcLocationDataPattern.find(string)!! + val jsonData = match.groupValues[1] + try { + return json.decodeFromString(NpcZoneMapSerializer, jsonData) + } finally { + File("crash.html").writeText(string) + } +} + +@Serializable +data class CompleteNpcData( + val metadata: NpcListData, + val locations: Map<ZoneId, List<MapCoordinates>>, +) + +@Serializable +data class AllNpcData( + val npcData: Map<NpcId, CompleteNpcData> +) + +suspend fun main() { + val zoneIdsToScrape = listOf(13646) + val npcsPerZone = zoneIdsToScrape.associateWith { getNpcList(it) } + val indexedNpcIds = npcsPerZone.flatMap { it.value }.associateBy { it.id } + val npcData = indexedNpcIds.values.associate { it.id to CompleteNpcData(it, getNpcData(it.id, it.displayName)) } + File("database.json").writeText(json.encodeToString(npcData)) +}
\ No newline at end of file |