43 lines
1.6 KiB
Python
Executable File
43 lines
1.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
from seleniumbase import SB
|
|
from seleniumbase.common.exceptions import WebDriverException
|
|
import re
|
|
|
|
def main():
|
|
try:
|
|
with SB(uc=True, headless=True) as sb:
|
|
"""
|
|
UC mode is designed to make browser automation appear human and thus evade detection by anti-bot systems.
|
|
CDP mode uses the Chrome DevTools Protocol to allow more direct and lower-level control over the browser.
|
|
don't need this # sb.activate_cdp_mode(url)
|
|
"""
|
|
|
|
sb.open("https://www.openssh.org/goals.html")
|
|
|
|
# page source
|
|
print(sb.get_page_source())
|
|
print("-" * 80)
|
|
|
|
# enumerate all elements... causes TimeoutError
|
|
# all_elements = sb.find_elements("xpath", "/html/body/*")
|
|
# print(f"Total elements found: {len(all_elements)}")
|
|
# for elem in all_elements:
|
|
# print(elem.tag_name)
|
|
# print("-" * 80)
|
|
|
|
# specific html element and element id
|
|
print(sb.get_text("/html/body/h2[@id='OpenBSD']"))
|
|
print("-" * 80)
|
|
|
|
# similar to the above, but line breaks are differently presented
|
|
print(sb.get_element("xpath", "//h2[@id='OpenBSD']").get_attribute('innerText'))
|
|
print("-" * 80)
|
|
|
|
except WebDriverException as e:
|
|
print(f"Failed to get page: {e}")
|
|
except Exception as e:
|
|
print(e)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |