#!/usr/bin/env python3
# Challenge 165
#
# Task 2: Line of Best Fit
# Submitted by: Ryan J Thompson
#
# When you have a scatter plot of points, a line of best fit is the line that
# best describes the relationship between the points, and is very useful in
# statistics. Otherwise known as linear regression, here is an example of what
# such a line might look like:
#
# Hull
#
# The method most often used is known as the least squares method, as it is
# straightforward and efficient, but you may use any method that generates the
# correct result.
#
# Calculate the line of best fit for the following 48 points:
#
# 333,129 39,189 140,156 292,134 393,52 160,166 362,122 13,193
# 341,104 320,113 109,177 203,152 343,100 225,110 23,186 282,102
# 284,98 205,133 297,114 292,126 339,112 327,79 253,136 61,169
# 128,176 346,72 316,103 124,162 65,181 159,137 212,116 337,86
# 215,136 153,137 390,104 100,180 76,188 77,181 69,195 92,186
# 275,96 250,147 34,174 213,134 186,129 189,154 361,82 363,89
import sys
def svg_header(width, height):
return f'''
'''
def svg_circle(cx, cy, r):
return f'\n'
def svg_point(cx, cy):
return svg_circle(cx, cy, 1)
def svg_line(x1, y1, x2, y2):
return f'\n'
def least_squares(points):
N = len(points)
sum_x = sum_y = sum_x2 = sum_xy = 0
for x, y in points:
sum_x += x
sum_y += y
sum_x2 += x * x
sum_xy += x * y
m = (N * sum_xy - sum_x * sum_y) / (N * sum_x2 - sum_x * sum_x)
b = (sum_y - m * sum_x) / N
return m, b
file = sys.argv[1] if len(sys.argv) > 1 else None
if file is None:
raise Exception("usage: ch-1.py file.svg")
with open(file, "w") as f:
f.write(svg_header(500, 500))
points = []
for line in sys.stdin:
for point in line.split():
x, y = map(int, point.split(','))
points.append((x, y))
f.write(svg_point(x, y))
m, b = least_squares(points)
f.write(svg_line(0, b, 500, m * 500 + b))
f.write(svg_footer())