summaryrefslogtreecommitdiff
path: root/src/main/kotlin/ScraperMain.kt
diff options
context:
space:
mode:
authornea <nea@nea.moe>2023-09-15 17:06:05 +0200
committernea <nea@nea.moe>2023-09-15 17:06:05 +0200
commite19b7e8198c771c6d9c533765b5a68c4a52080f7 (patch)
tree6f3e3bb20069b15e577004eb8e73f58fb04d6698 /src/main/kotlin/ScraperMain.kt
downloadwowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.tar.gz
wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.tar.bz2
wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.zip
Initial commit
Diffstat (limited to 'src/main/kotlin/ScraperMain.kt')
-rw-r--r--src/main/kotlin/ScraperMain.kt115
1 files changed, 115 insertions, 0 deletions
diff --git a/src/main/kotlin/ScraperMain.kt b/src/main/kotlin/ScraperMain.kt
new file mode 100644
index 0000000..674a87e
--- /dev/null
+++ b/src/main/kotlin/ScraperMain.kt
@@ -0,0 +1,115 @@
+import io.ktor.client.*
+import io.ktor.client.engine.cio.*
+import io.ktor.client.request.*
+import io.ktor.client.statement.*
+import kotlinx.serialization.*
+import kotlinx.serialization.descriptors.PolymorphicKind
+import kotlinx.serialization.descriptors.SerialDescriptor
+import kotlinx.serialization.descriptors.buildSerialDescriptor
+import kotlinx.serialization.encoding.Decoder
+import kotlinx.serialization.json.*
+import java.io.File
+
+@Serializable
+data class NpcListData(
+ val displayName: String,
+ val id: NpcId,
+ val tag: String? = null,
+)
+
+@Serializable
+data class ListData<T>(
+ val data: List<T>
+)
+
+
+val json = Json { ignoreUnknownKeys = true }
+val ListDataPattern = "new Listview\\((.*)\\);\n".toRegex()
+val NpcLocationDataPattern = "g_mapperData = (.*);\n".toRegex()
+val client = HttpClient(CIO)
+
+suspend fun getNpcList(zoneId: Int): List<NpcListData> {
+ return getNpcList("https://www.wowhead.com/npcs/react-a:1?filter=6;$zoneId;0") +
+ getNpcList("https://www.wowhead.com/npcs/react-a:0?filter=6;$zoneId;0") +
+ getNpcList("https://www.wowhead.com/npcs/react-a:-1?filter=6;$zoneId;0")
+}
+
+suspend fun getNpcList(url: String): List<NpcListData> {
+ val string =
+ client.get(url).bodyAsText()
+ val match = ListDataPattern.find(string)!!
+ val jsonData = match.groupValues[1]
+ val data = json.decodeFromString<ListData<NpcListData>>(jsonData).data
+ if (data.size == 1000) {
+ println("Warning: Encountered NPC limit for url $url")
+ }
+ return data
+}
+
+@Serializable
+@JvmInline
+value class ZoneId(val int: Int)
+
+@Serializable
+@JvmInline
+value class MapId(val int: Int)
+
+@Serializable
+@JvmInline
+value class NpcId(val int: Int)
+
+@Serializable
+data class MapCoordinates(
+ val count: Int,
+ val coords: List<List<Float>>,
+ val uiMapId: MapId? = null,
+ val uiMapName: String? = null,
+)
+
+@OptIn(InternalSerializationApi::class, ExperimentalSerializationApi::class)
+object NpcZoneMapSerializer : DeserializationStrategy<Map<ZoneId, List<MapCoordinates>>> {
+ override val descriptor: SerialDescriptor
+ get() = buildSerialDescriptor("NpcZoneMapSerializer", PolymorphicKind.SEALED)
+
+ override fun deserialize(decoder: Decoder): Map<ZoneId, List<MapCoordinates>> {
+ val d = decoder as JsonDecoder
+ val el = d.decodeJsonElement()
+ el as JsonObject
+
+ return el.mapNotNull { (t, u) ->
+ if (u is JsonArray)
+ ZoneId(t.toInt()) to d.json.decodeFromJsonElement<List<MapCoordinates>>(u)
+ else null
+ }.toMap()
+ }
+}
+
+suspend fun getNpcData(id: NpcId, name: String): Map<ZoneId, List<MapCoordinates>> {
+ val string = client.get("https://www.wowhead.com/npc=${id.int}/${name.replace(" ", "-").lowercase()}").bodyAsText()
+ val match = NpcLocationDataPattern.find(string)!!
+ val jsonData = match.groupValues[1]
+ try {
+ return json.decodeFromString(NpcZoneMapSerializer, jsonData)
+ } finally {
+ File("crash.html").writeText(string)
+ }
+}
+
+@Serializable
+data class CompleteNpcData(
+ val metadata: NpcListData,
+ val locations: Map<ZoneId, List<MapCoordinates>>,
+)
+
+@Serializable
+data class AllNpcData(
+ val npcData: Map<NpcId, CompleteNpcData>
+)
+
+suspend fun main() {
+ val zoneIdsToScrape = listOf(13646)
+ val npcsPerZone = zoneIdsToScrape.associateWith { getNpcList(it) }
+ val indexedNpcIds = npcsPerZone.flatMap { it.value }.associateBy { it.id }
+ val npcData = indexedNpcIds.values.associate { it.id to CompleteNpcData(it, getNpcData(it.id, it.displayName)) }
+ File("database.json").writeText(json.encodeToString(npcData))
+} \ No newline at end of file