1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
#!/usr/bin/python3
# Challenge 017
#
# Task #2
# Create a script to parse URL and print the components of URL. According to
# Wiki page, the URL syntax is as below:
#
# scheme:[//[userinfo@]host[:port]]path[?query][#fragment]
#
# For example: jdbc://user:password@localhost:3306/pwc?profile=true#h1
#
# scheme: jdbc
# userinfo: user:password
# host: localhost
# port: 3306
# path: /pwc
# query: profile=true
# fragment: h1
import sys
import re
url = sys.argv[1]
word = r"(?i:[a-z_][a-z_0-9+.-]*)"
pathre = r"(?:"+word+r"|/)+"
# scheme(1)
matches = re.match(r"^("+word+r"):", url)
if matches:
scheme = matches.group(1)
url = url[matches.end(0):]
else:
scheme = ""
# userinfo(1), host(2), port(3)
matches = re.match(r"^//"+ \
r"(?:("+word+r"(?:[:].*?)?)[@])?"+ \
r"("+word+r")"+ \
r"(?:[:](\d+))?", url)
if matches:
if matches.group(1):
userinfo = matches.group(1)
else:
userinfo = ""
host = matches.group(2)
if matches.group(3):
port = matches.group(3)
else:
port = ""
url = url[matches.end(0):]
else:
userinfo, host, port = "", "", ""
# path(1), query(2), fragment(3)
matches = re.match(r"("+pathre+r")"+ \
r"(?:[?]([^#]*))?"+ \
r"(?:[#](.*))?"+ \
r"$", url)
if matches:
path = matches.group(1)
if matches.group(2):
query = matches.group(2)
else:
query = ""
if matches.group(3):
fragment = matches.group(3)
else:
fragment = ""
url = url[matches.end(0):]
else:
path, query, fragment = "", "", ""
print("scheme: "+scheme)
print("userinfo: "+userinfo)
print("host: "+host)
print("port: "+port)
print("path: "+path)
print("query: "+query)
print("fragment: "+fragment)
|