Skip to content

Commit bc1d944

Browse files
committed
images in the examples working
1 parent e6fee97 commit bc1d944

File tree

3 files changed

+42
-14
lines changed

3 files changed

+42
-14
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,7 @@ env/
3333

3434
# Project specific
3535
problems/
36+
leetcode_last_solution.py
37+
debug_soup.html
38+
debug_content_html.txt
39+
git_setup_commands.txt

leetcode_scraper.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,20 @@ def _process_problem_data(self, question):
9797
}
9898

9999
# Process content with BeautifulSoup to extract description, examples, and constraints
100+
import os
100101
content_html = question.get('content', '')
102+
debug_dir = os.path.dirname(os.path.abspath(__file__))
103+
debug_content_path = os.path.join(debug_dir, 'debug_content_html.txt')
104+
debug_soup_path = os.path.join(debug_dir, 'debug_soup.html')
105+
# Write content_html to a debug file for inspection
106+
with open(debug_content_path, 'w', encoding='utf-8') as f:
107+
f.write(content_html)
108+
print(f"[DEBUG] Wrote content_html to {debug_content_path}")
101109
soup = BeautifulSoup(content_html, 'html.parser')
102-
110+
# Write soup prettified HTML to a debug file for inspection
111+
with open(debug_soup_path, 'w', encoding='utf-8') as f:
112+
f.write(soup.prettify())
113+
print(f"[DEBUG] Wrote soup HTML to {debug_soup_path}")
103114
# Get description (text before the first <strong>Example</strong>)
104115
description = []
105116
current_element = soup.find()
@@ -113,14 +124,23 @@ def _process_problem_data(self, question):
113124

114125
problem_data['description'] = '\n'.join([d for d in description if d])
115126

116-
# Extract examples
127+
# Extract examples and attach the closest preceding image to each
117128
examples = []
118129
example_blocks = soup.find_all('pre')
119130
for i, example in enumerate(example_blocks, 1):
120-
examples.append({
131+
example_dict = {
121132
'example_num': i,
122-
'example_text': example.get_text().strip()
123-
})
133+
'example_text': example.get_text().strip(),
134+
'images': []
135+
}
136+
# Find the closest preceding <img> tag before this <pre>
137+
prev = example.previous_element
138+
while prev:
139+
if getattr(prev, 'name', None) == 'img' and prev.has_attr('src'):
140+
example_dict['images'].append(prev['src'])
141+
break
142+
prev = prev.previous_element
143+
examples.append(example_dict)
124144
problem_data['examples'] = examples
125145

126146
# Extract constraints
@@ -212,18 +232,22 @@ def scrape_problem_list(self, limit=10):
212232

213233
return problem_list
214234

215-
if __name__ == "__main__":
216-
scraper = LeetCodeScraper()
235+
# if __name__ == "__main__":
236+
# scraper = LeetCodeScraper()
217237

218238
# Option 1: Scrape a specific problem
219239
# problem_data = scraper.scrape_problem("two-sum")
220240
# print(json.dumps(problem_data, indent=2))
221241

242+
if __name__ == "__main__":
243+
scraper = LeetCodeScraper()
244+
problem_data = scraper.scrape_problem("linked-list-cycle")
245+
print(json.dumps(problem_data, indent=2))
222246
# Option 2: Scrape multiple problems from the list
223-
problem_list = scraper.scrape_problem_list(limit=5)
247+
# problem_list = scraper.scrape_problem_list(limit=5)
224248

225-
# Add a delay between requests to avoid being blocked
226-
for problem in problem_list:
227-
print(f"Scraping problem: {problem['title']} ({problem['slug']})")
228-
scraper.scrape_problem(problem['slug'])
229-
time.sleep(2) # Wait 2 seconds between requests
249+
# # Add a delay between requests to avoid being blocked
250+
# for problem in problem_list:
251+
# print(f"Scraping problem: {problem['title']} ({problem['slug']})")
252+
# scraper.scrape_problem(problem['slug'])
253+
# time.sleep(2) # Wait 2 seconds between requests

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
requests==2.28.2
1+
requests>=2.25.1
22
beautifulsoup4==4.12.2
33
lxml==4.9.2

0 commit comments

Comments
 (0)