From de05f6da01e450cfba35c0ef29fd857b443c020c Mon Sep 17 00:00:00 2001
From: Icelain <50962640+Icelain@users.noreply.github.com>
Date: Fri, 16 Oct 2020 16:24:18 +0530
Subject: [PATCH 1/4] Update link_checker.py
---
pyanchor/link_checker.py | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/pyanchor/link_checker.py b/pyanchor/link_checker.py
index 9e0e825..5bcc0e6 100644
--- a/pyanchor/link_checker.py
+++ b/pyanchor/link_checker.py
@@ -12,7 +12,7 @@
import requests
from bs4 import BeautifulSoup
-
+import re
class LinkResults:
def __init__(self, url: str):
@@ -42,11 +42,23 @@ def check_link_for_http_scheme(self, href: str) -> str:
if href.startswith(self.base_url):
return href
+
elif href.startswith("/"):
href = self.base_url + href.lstrip("/")
return href
- else: # This catches any href set to '#'
- return None # TODO: Deal with ./ or ../ relative links.
+
+ elif href.startswith("./"):
+ return self.base_url + re.sub("./", "", href) #using re.sub to remove all instances of ./ in href
+
+ elif href.startswith("../"):
+ return self.base_url + re.sub("../", "", href) #using re.sub to remove all instances of ./ in href
+
+ elif href.startswith("#"):
+ if "#" not in list(self.base_url):
+ return self.base_url + re.sub("#", "", href) #checks if # exists in base_url
+ else:
+ return None #returns none if # already exists in self.base_url
+
def find_all_atags(self, url: str):
"""Find all anchor tags on a given URL.
From c68bd27ebab120634c4d0b5017c2e686b8d020e6 Mon Sep 17 00:00:00 2001
From: Icelain <50962640+Icelain@users.noreply.github.com>
Date: Fri, 16 Oct 2020 16:26:10 +0530
Subject: [PATCH 2/4] Update test_cli.py
---
tests/test_cli.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index e619773..ddd95d9 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -16,6 +16,8 @@ def test_exception_on_invalid_url_http_scheme(self):
def test_successful_result_prints(self):
results = runner.invoke(app, ["http://127.0.0.1:5000/", "--verbose"])
assert "[ 200 ] - http://127.0.0.1:5000/about" in results.output
+ assert "[ 200 ] - http://127.0.0.1:5000/rel" in results.output
+ assert "[ 200 ] - http://127.0.0.1:5000/rel2" in results.output
def test_failing_result_prints(self):
results = runner.invoke(app, ["http://127.0.0.1:5000/"])
From 8469f0be9d5e670c32b26d5668bb89e050519248 Mon Sep 17 00:00:00 2001
From: Icelain <50962640+Icelain@users.noreply.github.com>
Date: Fri, 16 Oct 2020 16:27:58 +0530
Subject: [PATCH 3/4] Update app.py
---
tests/test_webapp/app.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tests/test_webapp/app.py b/tests/test_webapp/app.py
index a262397..2970b9b 100644
--- a/tests/test_webapp/app.py
+++ b/tests/test_webapp/app.py
@@ -29,12 +29,18 @@ def about_link_2():
def contact():
return render_template("success.html", name="Contact")
+@app.route("/rel")
+def relative_link():
+ return render_template("success.html",name="Rel")
+
+@app.route("/rel2")
+def relative_link2():
+ return render_template("success.html",name="Rel2")
@app.route("/login")
def login():
return render_template("error.html", status_code=401), 401
-
@app.route("/500")
def five_hundred():
return render_template("error.html", status_code=500), 500
@@ -47,3 +53,4 @@ def sitemap():
if __name__ == "__main__":
app.run()
+
From adb99e82032a11d0b15c1b12b5232b81e71ab2ac Mon Sep 17 00:00:00 2001
From: Icelain <50962640+Icelain@users.noreply.github.com>
Date: Fri, 16 Oct 2020 16:28:19 +0530
Subject: [PATCH 4/4] Add files via upload
---
tests/test_webapp/templates/index.html | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/test_webapp/templates/index.html b/tests/test_webapp/templates/index.html
index 0345daa..ac5fe80 100644
--- a/tests/test_webapp/templates/index.html
+++ b/tests/test_webapp/templates/index.html
@@ -11,7 +11,8 @@
About Page
Contact Page
Hash Link
- Rel Link
+ Rel Link
+ Second Rel Link
Failing Link