aboutsummaryrefslogtreecommitdiff
path: root/challenge-017/paulo-custodio/python/ch-2.py
blob: 7493986e88a82a58ad020814544d5b45f0eb8ae6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/python3

# Challenge 017
#
# Task #2
# Create a script to parse URL and print the components of URL. According to
# Wiki page, the URL syntax is as below:
#
# scheme:[//[userinfo@]host[:port]]path[?query][#fragment]
#
# For example: jdbc://user:password@localhost:3306/pwc?profile=true#h1
#
#   scheme:   jdbc
#   userinfo: user:password
#   host:     localhost
#   port:     3306
#   path:     /pwc
#   query:    profile=true
#   fragment: h1

import sys
import re

url = sys.argv[1]

word = r"(?i:[a-z_][a-z_0-9+.-]*)"
pathre = r"(?:"+word+r"|/)+"

# scheme(1)
matches = re.match(r"^("+word+r"):", url)
if matches:
    scheme = matches.group(1)
    url = url[matches.end(0):]
else:
    scheme = ""

# userinfo(1), host(2), port(3)
matches = re.match(r"^//"+ \
                   r"(?:("+word+r"(?:[:].*?)?)[@])?"+ \
                   r"("+word+r")"+ \
                   r"(?:[:](\d+))?", url)
if matches:
    if matches.group(1):
        userinfo = matches.group(1)
    else:
        userinfo = ""

    host = matches.group(2)

    if matches.group(3):
        port = matches.group(3)
    else:
        port = ""

    url = url[matches.end(0):]
else:
    userinfo, host, port = "", "", ""

# path(1), query(2), fragment(3)
matches = re.match(r"("+pathre+r")"+ \
                   r"(?:[?]([^#]*))?"+ \
                   r"(?:[#](.*))?"+ \
                   r"$", url)
if matches:
    path = matches.group(1)

    if matches.group(2):
        query = matches.group(2)
    else:
        query = ""

    if matches.group(3):
        fragment = matches.group(3)
    else:
        fragment = ""

    url = url[matches.end(0):]
else:
    path, query, fragment = "", "", ""

print("scheme:   "+scheme)
print("userinfo: "+userinfo)
print("host:     "+host)
print("port:     "+port)
print("path:     "+path)
print("query:    "+query)
print("fragment: "+fragment)