Skip to content

Commit a30bb5e

Browse files
committed
example on copying pages between to OSW instances added
1 parent cb77ceb commit a30bb5e

File tree

1 file changed

+165
-0
lines changed

1 file changed

+165
-0
lines changed

examples/inter_osw_copy_page.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
"""THis script provides the ability to copy a page from one OSW instance to another
2+
OSW instance."""
3+
from pathlib import Path
4+
5+
from typing_extensions import List, Optional, Union
6+
7+
from osw.auth import CredentialManager
8+
from osw.core import OSW
9+
from osw.model.static import OswBaseModel
10+
from osw.utils import util
11+
from osw.wtsite import SLOTS, WtPage, WtSite
12+
13+
14+
class OswInstance(OswBaseModel):
15+
domain: str
16+
cred_fp: Union[str, Path]
17+
credentials_manager: Optional[CredentialManager]
18+
osw: Optional[OSW]
19+
wtsite: Optional[WtSite]
20+
21+
class Config:
22+
arbitrary_types_allowed = True
23+
24+
def __init__(self, domain: str, cred_fp: Union[str, Path]):
25+
super().__init__(**{"domain": domain, "cred_fp": cred_fp})
26+
self.credentials_manager = CredentialManager(cred_filepath=cred_fp)
27+
self.osw = OSW(
28+
site=WtSite(
29+
WtSite.WtSiteConfig(iri=domain, cred_mngr=self.credentials_manager)
30+
)
31+
)
32+
self.wtsite = self.osw.site
33+
34+
def get_page_content(self, full_page_titles: List[str]) -> dict:
35+
get_page_res: WtSite.GetPageResult = self.wtsite.get_page(
36+
WtSite.GetPageParam(titles=full_page_titles)
37+
)
38+
39+
return_dict = {}
40+
for page in get_page_res.pages:
41+
title = page.title
42+
slot_contents = {}
43+
for slot in SLOTS:
44+
slot_content = page.get_slot_content(slot)
45+
if slot_content is not None:
46+
slot_contents[slot] = slot_content
47+
return_dict[title] = slot_contents
48+
49+
return return_dict
50+
51+
def set_single_page_content(
52+
self,
53+
handover_dict: dict,
54+
comment: str,
55+
overwrite: bool = False,
56+
):
57+
full_page_title: str = handover_dict["full_page_title"]
58+
content_dict: dict = handover_dict["content_dict"]
59+
wtpage = WtPage(
60+
wtSite=self.wtsite,
61+
title=full_page_title,
62+
)
63+
if wtpage.exists:
64+
if overwrite is False:
65+
print(
66+
f"Page '{full_page_title}' already exists. It will not be updated."
67+
)
68+
return {full_page_title: False}
69+
changed_slots = []
70+
for slot in SLOTS:
71+
remote_content = wtpage.get_slot_content(slot)
72+
if remote_content != content_dict.get(slot, None):
73+
changed_slots.append(slot)
74+
if len(changed_slots) == 0:
75+
print(
76+
f"Page '{full_page_title}' already has the same content."
77+
f" It will not be updated."
78+
)
79+
return {full_page_title: False}
80+
else:
81+
print(
82+
f"Page '{full_page_title}' has different content in slots "
83+
f"{changed_slots}. It will be updated."
84+
)
85+
for slot in content_dict.keys():
86+
wtpage.create_slot(
87+
slot_key=slot,
88+
content_model=SLOTS[slot]["content_model"],
89+
)
90+
wtpage.set_slot_content(
91+
slot_key=slot,
92+
content=content_dict[slot],
93+
)
94+
wtpage.edit(
95+
comment=comment,
96+
)
97+
print(f"Page updated: 'https://{self.domain}/wiki/{full_page_title}'")
98+
return {full_page_title: True}
99+
100+
def set_page_contents(
101+
self, content_list: List[dict], comment: str, overwrite: bool = False
102+
) -> list:
103+
result_list = util.parallelize(
104+
self.set_single_page_content,
105+
content_list,
106+
comment=comment,
107+
overwrite=overwrite,
108+
flush_at_end=True,
109+
)
110+
return result_list
111+
112+
113+
def copy_pages_from(
114+
source_domain: str,
115+
to_target_domains: List[str],
116+
page_titles: List[str],
117+
cred_fp: Union[str, Path],
118+
comment: str = None,
119+
overwrite: bool = False,
120+
):
121+
if comment is None:
122+
f"[bot edit] Copied from {source_domain}"
123+
osw_source = OswInstance(
124+
domain=source_domain,
125+
cred_fp=cred_fp,
126+
)
127+
osw_targets = [
128+
OswInstance(
129+
domain=domain,
130+
cred_fp=cred_fp,
131+
)
132+
for domain in to_target_domains
133+
]
134+
page_contents = osw_source.get_page_content(full_page_titles=page_titles)
135+
result = {}
136+
for osw_target in osw_targets: # could also be parallelized!
137+
result[osw_target.domain] = osw_target.set_page_contents(
138+
content_list=[
139+
{
140+
"full_page_title": full_page_title,
141+
"content_dict": page_content,
142+
}
143+
for full_page_title, page_content in page_contents.items()
144+
],
145+
comment=comment,
146+
overwrite=overwrite,
147+
)
148+
return result
149+
150+
151+
if __name__ == "__main__":
152+
credentials_fp = Path(r"accounts.pwd.yaml")
153+
source = "onto-wiki.eu"
154+
targets = ["wiki-dev.open-semantic-lab.org"]
155+
titles = [
156+
"Item:OSW8dca6aaebe005c5faca05bac33264e4d",
157+
"Item:OSWaeffcee25ccb5dd8b42a434dc644d62c",
158+
]
159+
copied_pages = copy_pages_from(
160+
source_domain=source,
161+
to_target_domains=targets,
162+
page_titles=titles,
163+
cred_fp=credentials_fp,
164+
overwrite=False,
165+
)

0 commit comments

Comments
 (0)