diff options
author | nea <nea@nea.moe> | 2023-09-15 17:06:05 +0200 |
---|---|---|
committer | nea <nea@nea.moe> | 2023-09-15 17:06:05 +0200 |
commit | e19b7e8198c771c6d9c533765b5a68c4a52080f7 (patch) | |
tree | 6f3e3bb20069b15e577004eb8e73f58fb04d6698 /src/main/kotlin/ScraperMain.kt | |
download | wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.tar.gz wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.tar.bz2 wowheadscraper-e19b7e8198c771c6d9c533765b5a68c4a52080f7.zip |
Initial commit
Diffstat (limited to 'src/main/kotlin/ScraperMain.kt')
-rw-r--r-- | src/main/kotlin/ScraperMain.kt | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/src/main/kotlin/ScraperMain.kt b/src/main/kotlin/ScraperMain.kt new file mode 100644 index 0000000..674a87e --- /dev/null +++ b/src/main/kotlin/ScraperMain.kt @@ -0,0 +1,115 @@ +import io.ktor.client.* +import io.ktor.client.engine.cio.* +import io.ktor.client.request.* +import io.ktor.client.statement.* +import kotlinx.serialization.* +import kotlinx.serialization.descriptors.PolymorphicKind +import kotlinx.serialization.descriptors.SerialDescriptor +import kotlinx.serialization.descriptors.buildSerialDescriptor +import kotlinx.serialization.encoding.Decoder +import kotlinx.serialization.json.* +import java.io.File + +@Serializable +data class NpcListData( + val displayName: String, + val id: NpcId, + val tag: String? = null, +) + +@Serializable +data class ListData<T>( + val data: List<T> +) + + +val json = Json { ignoreUnknownKeys = true } +val ListDataPattern = "new Listview\\((.*)\\);\n".toRegex() +val NpcLocationDataPattern = "g_mapperData = (.*);\n".toRegex() +val client = HttpClient(CIO) + +suspend fun getNpcList(zoneId: Int): List<NpcListData> { + return getNpcList("https://www.wowhead.com/npcs/react-a:1?filter=6;$zoneId;0") + + getNpcList("https://www.wowhead.com/npcs/react-a:0?filter=6;$zoneId;0") + + getNpcList("https://www.wowhead.com/npcs/react-a:-1?filter=6;$zoneId;0") +} + +suspend fun getNpcList(url: String): List<NpcListData> { + val string = + client.get(url).bodyAsText() + val match = ListDataPattern.find(string)!! + val jsonData = match.groupValues[1] + val data = json.decodeFromString<ListData<NpcListData>>(jsonData).data + if (data.size == 1000) { + println("Warning: Encountered NPC limit for url $url") + } + return data +} + +@Serializable +@JvmInline +value class ZoneId(val int: Int) + +@Serializable +@JvmInline +value class MapId(val int: Int) + +@Serializable +@JvmInline +value class NpcId(val int: Int) + +@Serializable +data class MapCoordinates( + val count: Int, + val coords: List<List<Float>>, + val uiMapId: MapId? = null, + val uiMapName: String? = null, +) + +@OptIn(InternalSerializationApi::class, ExperimentalSerializationApi::class) +object NpcZoneMapSerializer : DeserializationStrategy<Map<ZoneId, List<MapCoordinates>>> { + override val descriptor: SerialDescriptor + get() = buildSerialDescriptor("NpcZoneMapSerializer", PolymorphicKind.SEALED) + + override fun deserialize(decoder: Decoder): Map<ZoneId, List<MapCoordinates>> { + val d = decoder as JsonDecoder + val el = d.decodeJsonElement() + el as JsonObject + + return el.mapNotNull { (t, u) -> + if (u is JsonArray) + ZoneId(t.toInt()) to d.json.decodeFromJsonElement<List<MapCoordinates>>(u) + else null + }.toMap() + } +} + +suspend fun getNpcData(id: NpcId, name: String): Map<ZoneId, List<MapCoordinates>> { + val string = client.get("https://www.wowhead.com/npc=${id.int}/${name.replace(" ", "-").lowercase()}").bodyAsText() + val match = NpcLocationDataPattern.find(string)!! + val jsonData = match.groupValues[1] + try { + return json.decodeFromString(NpcZoneMapSerializer, jsonData) + } finally { + File("crash.html").writeText(string) + } +} + +@Serializable +data class CompleteNpcData( + val metadata: NpcListData, + val locations: Map<ZoneId, List<MapCoordinates>>, +) + +@Serializable +data class AllNpcData( + val npcData: Map<NpcId, CompleteNpcData> +) + +suspend fun main() { + val zoneIdsToScrape = listOf(13646) + val npcsPerZone = zoneIdsToScrape.associateWith { getNpcList(it) } + val indexedNpcIds = npcsPerZone.flatMap { it.value }.associateBy { it.id } + val npcData = indexedNpcIds.values.associate { it.id to CompleteNpcData(it, getNpcData(it.id, it.displayName)) } + File("database.json").writeText(json.encodeToString(npcData)) +}
\ No newline at end of file |