Skip to content

Commit ab0d17a

Browse files
authored
Merge pull request #58 from ScrapeGraphAI/added-mock-and-plain-text-param
added mock and plain text param
2 parents 4f627dc + ebead2c commit ab0d17a

File tree

7 files changed

+26
-9
lines changed

7 files changed

+26
-9
lines changed

scrapegraph-js/src/smartScraper.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.j
1818
* @returns {Promise<string>} Extracted data in JSON format matching the provided schema
1919
* @throws - Will throw an error in case of an HTTP failure.
2020
*/
21-
export async function smartScraper(apiKey, url, prompt, schema = null, numberOfScrolls = null, totalPages = null, cookies = null, options = {}) {
21+
export async function smartScraper(apiKey, url, prompt, schema = null, numberOfScrolls = null, totalPages = null, cookies = null, options = {}, plain_text = false) {
2222
const { mock = null } = options;
2323

2424
// Check if mock mode is enabled
@@ -41,6 +41,7 @@ export async function smartScraper(apiKey, url, prompt, schema = null, numberOfS
4141
const payload = {
4242
website_url: url,
4343
user_prompt: prompt,
44+
plain_text: plain_text,
4445
};
4546

4647
if (cookies) {

scrapegraph-py/scrapegraph_py/client.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# Client implementation goes here
2-
from typing import Any, Dict, Optional, Callable
2+
import uuid as _uuid
3+
from typing import Any, Callable, Dict, Optional
4+
from urllib.parse import urlparse
35

46
import requests
57
import urllib3
68
from pydantic import BaseModel
79
from requests.exceptions import RequestException
8-
from urllib.parse import urlparse
9-
import uuid as _uuid
1010

1111
from scrapegraph_py.config import API_BASE_URL, DEFAULT_HEADERS
1212
from scrapegraph_py.exceptions import APIError
@@ -17,8 +17,8 @@
1717
)
1818
from scrapegraph_py.models.crawl import CrawlRequest, GetCrawlRequest
1919
from scrapegraph_py.models.feedback import FeedbackRequest
20-
from scrapegraph_py.models.scrape import GetScrapeRequest, ScrapeRequest
2120
from scrapegraph_py.models.markdownify import GetMarkdownifyRequest, MarkdownifyRequest
21+
from scrapegraph_py.models.scrape import GetScrapeRequest, ScrapeRequest
2222
from scrapegraph_py.models.searchscraper import (
2323
GetSearchScraperRequest,
2424
SearchScraperRequest,
@@ -255,13 +255,13 @@ def new_id(prefix: str) -> str:
255255
# Generic fallback
256256
return {"status": "mock", "url": url, "method": method, "kwargs": kwargs}
257257

258-
def markdownify(self, website_url: str, headers: Optional[dict[str, str]] = None):
258+
def markdownify(self, website_url: str, headers: Optional[dict[str, str]] = None, mock:bool=False):
259259
"""Send a markdownify request"""
260260
logger.info(f"🔍 Starting markdownify request for {website_url}")
261261
if headers:
262262
logger.debug("🔧 Using custom headers")
263263

264-
request = MarkdownifyRequest(website_url=website_url, headers=headers)
264+
request = MarkdownifyRequest(website_url=website_url, headers=headers, mock=mock)
265265
logger.debug("✅ Request validation passed")
266266

267267
result = self._make_request(
@@ -287,6 +287,7 @@ def scrape(
287287
website_url: str,
288288
render_heavy_js: bool = False,
289289
headers: Optional[dict[str, str]] = None,
290+
mock:bool=False,
290291
):
291292
"""Send a scrape request to get HTML content from a website
292293
@@ -304,6 +305,7 @@ def scrape(
304305
website_url=website_url,
305306
render_heavy_js=render_heavy_js,
306307
headers=headers,
308+
mock=mock
307309
)
308310
logger.debug("✅ Request validation passed")
309311

@@ -335,6 +337,8 @@ def smartscraper(
335337
output_schema: Optional[BaseModel] = None,
336338
number_of_scrolls: Optional[int] = None,
337339
total_pages: Optional[int] = None,
340+
mock:bool=False,
341+
plain_text:bool=False
338342
):
339343
"""Send a smartscraper request with optional pagination support and cookies"""
340344
logger.info("🔍 Starting smartscraper request")
@@ -361,6 +365,8 @@ def smartscraper(
361365
output_schema=output_schema,
362366
number_of_scrolls=number_of_scrolls,
363367
total_pages=total_pages,
368+
mock=mock,
369+
plain_text=plain_text,
364370
)
365371
logger.debug("✅ Request validation passed")
366372

@@ -420,6 +426,7 @@ def searchscraper(
420426
num_results: Optional[int] = 3,
421427
headers: Optional[dict[str, str]] = None,
422428
output_schema: Optional[BaseModel] = None,
429+
mock: bool=False
423430
):
424431
"""Send a searchscraper request
425432
@@ -443,6 +450,7 @@ def searchscraper(
443450
num_results=num_results,
444451
headers=headers,
445452
output_schema=output_schema,
453+
mock=mock
446454
)
447455
logger.debug("✅ Request validation passed")
448456

@@ -547,6 +555,7 @@ def agenticscraper(
547555
user_prompt: Optional[str] = None,
548556
output_schema: Optional[Dict[str, Any]] = None,
549557
ai_extraction: bool = False,
558+
mock: bool=False,
550559
):
551560
"""Send an agentic scraper request to perform automated actions on a webpage
552561
@@ -573,6 +582,7 @@ def agenticscraper(
573582
user_prompt=user_prompt,
574583
output_schema=output_schema,
575584
ai_extraction=ai_extraction,
585+
mock=mock
576586
)
577587
logger.debug("✅ Request validation passed")
578588

scrapegraph-py/scrapegraph_py/models/agenticscraper.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class AgenticScraperRequest(BaseModel):
4949
description="Whether to use AI for data extraction from the scraped content"
5050
)
5151

52+
mock: bool = Field(default=False, description="Whether to use mock mode for the request")
5253
@model_validator(mode="after")
5354
def validate_url(self) -> "AgenticScraperRequest":
5455
if not self.url.strip():

scrapegraph-py/scrapegraph_py/models/markdownify.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class MarkdownifyRequest(BaseModel):
1818
description="Optional headers to send with the request, including cookies "
1919
"and user agent",
2020
)
21+
mock: bool = Field(default=False, description="Whether to use mock mode for the request")
2122

2223
@model_validator(mode="after")
2324
def validate_url(self) -> "MarkdownifyRequest":

scrapegraph-py/scrapegraph_py/models/scrape.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ class ScrapeRequest(BaseModel):
2121
},
2222
description="Optional headers to send with the request, including cookies "
2323
"and user agent",
24-
)
24+
),
25+
mock: bool = Field(default=False, description="Whether to use mock mode for the request")
2526

2627
@model_validator(mode="after")
2728
def validate_url(self) -> "ScrapeRequest":

scrapegraph-py/scrapegraph_py/models/searchscraper.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class SearchScraperRequest(BaseModel):
2727
"and user agent",
2828
)
2929
output_schema: Optional[Type[BaseModel]] = None
30+
mock: bool = Field(default=False, description="Whether to use mock mode for the request")
3031

3132
@model_validator(mode="after")
3233
def validate_user_prompt(self) -> "SearchScraperRequest":

scrapegraph-py/scrapegraph_py/models/smartscraper.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ class SmartScraperRequest(BaseModel):
4949
"page will be scraped.",
5050
example=5,
5151
)
52-
52+
mock: bool = Field(default=False, description="Whether to use mock mode for the request")
53+
plain_text: bool = Field(default=False, description="Whether to return the result as plain text")
54+
5355
@model_validator(mode="after")
5456
def validate_user_prompt(self) -> "SmartScraperRequest":
5557
if self.user_prompt is None or not self.user_prompt.strip():

0 commit comments

Comments
 (0)