From de05f6da01e450cfba35c0ef29fd857b443c020c Mon Sep 17 00:00:00 2001 From: Icelain <50962640+Icelain@users.noreply.github.com> Date: Fri, 16 Oct 2020 16:24:18 +0530 Subject: [PATCH 1/4] Update link_checker.py --- pyanchor/link_checker.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pyanchor/link_checker.py b/pyanchor/link_checker.py index 9e0e825..5bcc0e6 100644 --- a/pyanchor/link_checker.py +++ b/pyanchor/link_checker.py @@ -12,7 +12,7 @@ import requests from bs4 import BeautifulSoup - +import re class LinkResults: def __init__(self, url: str): @@ -42,11 +42,23 @@ def check_link_for_http_scheme(self, href: str) -> str: if href.startswith(self.base_url): return href + elif href.startswith("/"): href = self.base_url + href.lstrip("/") return href - else: # This catches any href set to '#' - return None # TODO: Deal with ./ or ../ relative links. + + elif href.startswith("./"): + return self.base_url + re.sub("./", "", href) #using re.sub to remove all instances of ./ in href + + elif href.startswith("../"): + return self.base_url + re.sub("../", "", href) #using re.sub to remove all instances of ./ in href + + elif href.startswith("#"): + if "#" not in list(self.base_url): + return self.base_url + re.sub("#", "", href) #checks if # exists in base_url + else: + return None #returns none if # already exists in self.base_url + def find_all_atags(self, url: str): """Find all anchor tags on a given URL. From c68bd27ebab120634c4d0b5017c2e686b8d020e6 Mon Sep 17 00:00:00 2001 From: Icelain <50962640+Icelain@users.noreply.github.com> Date: Fri, 16 Oct 2020 16:26:10 +0530 Subject: [PATCH 2/4] Update test_cli.py --- tests/test_cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index e619773..ddd95d9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -16,6 +16,8 @@ def test_exception_on_invalid_url_http_scheme(self): def test_successful_result_prints(self): results = runner.invoke(app, ["http://127.0.0.1:5000/", "--verbose"]) assert "[ 200 ] - http://127.0.0.1:5000/about" in results.output + assert "[ 200 ] - http://127.0.0.1:5000/rel" in results.output + assert "[ 200 ] - http://127.0.0.1:5000/rel2" in results.output def test_failing_result_prints(self): results = runner.invoke(app, ["http://127.0.0.1:5000/"]) From 8469f0be9d5e670c32b26d5668bb89e050519248 Mon Sep 17 00:00:00 2001 From: Icelain <50962640+Icelain@users.noreply.github.com> Date: Fri, 16 Oct 2020 16:27:58 +0530 Subject: [PATCH 3/4] Update app.py --- tests/test_webapp/app.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_webapp/app.py b/tests/test_webapp/app.py index a262397..2970b9b 100644 --- a/tests/test_webapp/app.py +++ b/tests/test_webapp/app.py @@ -29,12 +29,18 @@ def about_link_2(): def contact(): return render_template("success.html", name="Contact") +@app.route("/rel") +def relative_link(): + return render_template("success.html",name="Rel") + +@app.route("/rel2") +def relative_link2(): + return render_template("success.html",name="Rel2") @app.route("/login") def login(): return render_template("error.html", status_code=401), 401 - @app.route("/500") def five_hundred(): return render_template("error.html", status_code=500), 500 @@ -47,3 +53,4 @@ def sitemap(): if __name__ == "__main__": app.run() + From adb99e82032a11d0b15c1b12b5232b81e71ab2ac Mon Sep 17 00:00:00 2001 From: Icelain <50962640+Icelain@users.noreply.github.com> Date: Fri, 16 Oct 2020 16:28:19 +0530 Subject: [PATCH 4/4] Add files via upload --- tests/test_webapp/templates/index.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_webapp/templates/index.html b/tests/test_webapp/templates/index.html index 0345daa..ac5fe80 100644 --- a/tests/test_webapp/templates/index.html +++ b/tests/test_webapp/templates/index.html @@ -11,7 +11,8 @@ About Page Contact Page Hash Link - Rel Link + Rel Link + Second Rel Link Failing Link