|
| 1 | +"""THis script provides the ability to copy a page from one OSW instance to another |
| 2 | +OSW instance.""" |
| 3 | +from pathlib import Path |
| 4 | + |
| 5 | +from typing_extensions import List, Optional, Union |
| 6 | + |
| 7 | +from osw.auth import CredentialManager |
| 8 | +from osw.core import OSW |
| 9 | +from osw.model.static import OswBaseModel |
| 10 | +from osw.utils import util |
| 11 | +from osw.wtsite import SLOTS, WtPage, WtSite |
| 12 | + |
| 13 | + |
| 14 | +class OswInstance(OswBaseModel): |
| 15 | + domain: str |
| 16 | + cred_fp: Union[str, Path] |
| 17 | + credentials_manager: Optional[CredentialManager] |
| 18 | + osw: Optional[OSW] |
| 19 | + wtsite: Optional[WtSite] |
| 20 | + |
| 21 | + class Config: |
| 22 | + arbitrary_types_allowed = True |
| 23 | + |
| 24 | + def __init__(self, domain: str, cred_fp: Union[str, Path]): |
| 25 | + super().__init__(**{"domain": domain, "cred_fp": cred_fp}) |
| 26 | + self.credentials_manager = CredentialManager(cred_filepath=cred_fp) |
| 27 | + self.osw = OSW( |
| 28 | + site=WtSite( |
| 29 | + WtSite.WtSiteConfig(iri=domain, cred_mngr=self.credentials_manager) |
| 30 | + ) |
| 31 | + ) |
| 32 | + self.wtsite = self.osw.site |
| 33 | + |
| 34 | + def get_page_content(self, full_page_titles: List[str]) -> dict: |
| 35 | + get_page_res: WtSite.GetPageResult = self.wtsite.get_page( |
| 36 | + WtSite.GetPageParam(titles=full_page_titles) |
| 37 | + ) |
| 38 | + |
| 39 | + return_dict = {} |
| 40 | + for page in get_page_res.pages: |
| 41 | + title = page.title |
| 42 | + slot_contents = {} |
| 43 | + for slot in SLOTS: |
| 44 | + slot_content = page.get_slot_content(slot) |
| 45 | + if slot_content is not None: |
| 46 | + slot_contents[slot] = slot_content |
| 47 | + return_dict[title] = slot_contents |
| 48 | + |
| 49 | + return return_dict |
| 50 | + |
| 51 | + def set_single_page_content( |
| 52 | + self, |
| 53 | + handover_dict: dict, |
| 54 | + comment: str, |
| 55 | + overwrite: bool = False, |
| 56 | + ): |
| 57 | + full_page_title: str = handover_dict["full_page_title"] |
| 58 | + content_dict: dict = handover_dict["content_dict"] |
| 59 | + wtpage = WtPage( |
| 60 | + wtSite=self.wtsite, |
| 61 | + title=full_page_title, |
| 62 | + ) |
| 63 | + if wtpage.exists: |
| 64 | + if overwrite is False: |
| 65 | + print( |
| 66 | + f"Page '{full_page_title}' already exists. It will not be updated." |
| 67 | + ) |
| 68 | + return {full_page_title: False} |
| 69 | + changed_slots = [] |
| 70 | + for slot in SLOTS: |
| 71 | + remote_content = wtpage.get_slot_content(slot) |
| 72 | + if remote_content != content_dict.get(slot, None): |
| 73 | + changed_slots.append(slot) |
| 74 | + if len(changed_slots) == 0: |
| 75 | + print( |
| 76 | + f"Page '{full_page_title}' already has the same content." |
| 77 | + f" It will not be updated." |
| 78 | + ) |
| 79 | + return {full_page_title: False} |
| 80 | + else: |
| 81 | + print( |
| 82 | + f"Page '{full_page_title}' has different content in slots " |
| 83 | + f"{changed_slots}. It will be updated." |
| 84 | + ) |
| 85 | + for slot in content_dict.keys(): |
| 86 | + wtpage.create_slot( |
| 87 | + slot_key=slot, |
| 88 | + content_model=SLOTS[slot]["content_model"], |
| 89 | + ) |
| 90 | + wtpage.set_slot_content( |
| 91 | + slot_key=slot, |
| 92 | + content=content_dict[slot], |
| 93 | + ) |
| 94 | + wtpage.edit( |
| 95 | + comment=comment, |
| 96 | + ) |
| 97 | + print(f"Page updated: 'https://{self.domain}/wiki/{full_page_title}'") |
| 98 | + return {full_page_title: True} |
| 99 | + |
| 100 | + def set_page_contents( |
| 101 | + self, content_list: List[dict], comment: str, overwrite: bool = False |
| 102 | + ) -> list: |
| 103 | + result_list = util.parallelize( |
| 104 | + self.set_single_page_content, |
| 105 | + content_list, |
| 106 | + comment=comment, |
| 107 | + overwrite=overwrite, |
| 108 | + flush_at_end=True, |
| 109 | + ) |
| 110 | + return result_list |
| 111 | + |
| 112 | + |
| 113 | +def copy_pages_from( |
| 114 | + source_domain: str, |
| 115 | + to_target_domains: List[str], |
| 116 | + page_titles: List[str], |
| 117 | + cred_fp: Union[str, Path], |
| 118 | + comment: str = None, |
| 119 | + overwrite: bool = False, |
| 120 | +): |
| 121 | + if comment is None: |
| 122 | + f"[bot edit] Copied from {source_domain}" |
| 123 | + osw_source = OswInstance( |
| 124 | + domain=source_domain, |
| 125 | + cred_fp=cred_fp, |
| 126 | + ) |
| 127 | + osw_targets = [ |
| 128 | + OswInstance( |
| 129 | + domain=domain, |
| 130 | + cred_fp=cred_fp, |
| 131 | + ) |
| 132 | + for domain in to_target_domains |
| 133 | + ] |
| 134 | + page_contents = osw_source.get_page_content(full_page_titles=page_titles) |
| 135 | + result = {} |
| 136 | + for osw_target in osw_targets: # could also be parallelized! |
| 137 | + result[osw_target.domain] = osw_target.set_page_contents( |
| 138 | + content_list=[ |
| 139 | + { |
| 140 | + "full_page_title": full_page_title, |
| 141 | + "content_dict": page_content, |
| 142 | + } |
| 143 | + for full_page_title, page_content in page_contents.items() |
| 144 | + ], |
| 145 | + comment=comment, |
| 146 | + overwrite=overwrite, |
| 147 | + ) |
| 148 | + return result |
| 149 | + |
| 150 | + |
| 151 | +if __name__ == "__main__": |
| 152 | + credentials_fp = Path(r"accounts.pwd.yaml") |
| 153 | + source = "onto-wiki.eu" |
| 154 | + targets = ["wiki-dev.open-semantic-lab.org"] |
| 155 | + titles = [ |
| 156 | + "Item:OSW8dca6aaebe005c5faca05bac33264e4d", |
| 157 | + "Item:OSWaeffcee25ccb5dd8b42a434dc644d62c", |
| 158 | + ] |
| 159 | + copied_pages = copy_pages_from( |
| 160 | + source_domain=source, |
| 161 | + to_target_domains=targets, |
| 162 | + page_titles=titles, |
| 163 | + cred_fp=credentials_fp, |
| 164 | + overwrite=False, |
| 165 | + ) |
0 commit comments