diff --git a/test/common/testlib.py b/test/common/testlib.py index 7a31afd9f71c..00f4428074b7 100644 --- a/test/common/testlib.py +++ b/test/common/testlib.py @@ -818,7 +818,12 @@ def logout(self): self.click('#go-logout') else: self.open_session_menu() - self.click('#logout') + try: + self.click('#logout') + except RuntimeError as e: + # logging out does destroy the current frame context, it races with the CDP driver finishing the command + if "Execution context was destroyed" not in str(e): + raise self.wait_visible('#login') self.machine.allow_restart_journal_messages() @@ -1530,10 +1535,13 @@ def cleanup_home_dirs(): "for dev in $(ls /sys/bus/pseudo/drivers/scsi_debug/adapter*/host*/target*/*:*/block); do " " for s in /sys/block/*/slaves/${dev}*; do [ -e $s ] || break; " " d=/dev/$(dirname $(dirname ${s#/sys/block/})); " + " while fuser --mount $d --kill; do sleep 0.1; done; " " umount $d || true; dmsetup remove --force $d || true; " " done; " - " umount /dev/$dev 2>/dev/null || true; " - "done; until rmmod scsi_debug; do sleep 0.2; done") + " while fuser --mount /dev/$dev --kill; do sleep 0.1; done; " + " umount /dev/$dev || true; " + " swapon --show=NAME --noheadings | grep $dev | xargs -r swapoff; " + "done; until rmmod scsi_debug; do sleep 0.2; done", stdout=None) def terminate_sessions(): # on OSTree we don't get "web console" sessions with the cockpit/ws container; just SSH; but also, some tests start diff --git a/test/verify/check-client b/test/verify/check-client index 9699334695f8..ab8d2179e525 100755 --- a/test/verify/check-client +++ b/test/verify/check-client @@ -67,9 +67,7 @@ Command = {self.libexecdir}/cockpit-beiboot def logout(self, check_last_host=None): b = self.browser - b.assert_no_oops() - b.open_session_menu() - b.click('#logout') + b.logout() # FIXME: This is broken, nothing appears # b.wait_text("#brand", "Connect to:") if check_last_host: @@ -83,17 +81,16 @@ Command = {self.libexecdir}/cockpit-beiboot timeout=30) def testBeibootNoBridge(self): - self.m_client.spawn(f"runuser -u admin -- {self.libexecdir}/cockpit-ws --no-tls", "ws.log") # set up target machine: no cockpit self.m_target.execute("rm /usr/bin/cockpit-bridge; rm -r /usr/share/cockpit") - self.checkLoginScenarios(local_bridge=False) def testBeibootWithBridge(self): - self.m_client.spawn(f"runuser -u admin -- {self.libexecdir}/cockpit-ws --no-tls", "ws.log") self.checkLoginScenarios(local_bridge=True) def checkLoginScenarios(self, *, local_bridge=True): + self.m_client.spawn(f"runuser -u admin -- {self.libexecdir}/cockpit-ws --no-tls", "ws.log") + b = self.browser b.open("/") diff --git a/test/verify/check-metrics b/test/verify/check-metrics index dbc0f7761d1e..872d6edbd481 100755 --- a/test/verify/check-metrics +++ b/test/verify/check-metrics @@ -1486,15 +1486,6 @@ class TestGrafanaClient(testlib.MachineCase): bg.click("button:contains('Log in')") bg.wait_in_text("body", "Add your first data source") - # HACK Unsigned plugin needs to be enabled manually - # See https://github.com/performancecopilot/grafana-pcp/issues/94 - bg.open("/plugins/performancecopilot-pcp-app") - with bg.wait_timeout(30): - bg.wait_visible(".gf-form-button-row button") - if bg.text(".gf-form-button-row button") == "Enable": - bg.click(".gf-form-button-row button") - bg.wait_text(".gf-form-button-row button", "Disable") - # Add the PCP redis data source for our client machine # Cog (Configuration) menu → Data Sources → Add # Select PCP redis, HTTP URL http://10.111.112.1:44322 @@ -1523,7 +1514,7 @@ class TestGrafanaClient(testlib.MachineCase): # .. and the dashboard name becomes clickable bg.click("a:contains('PCP Redis: Host Overview')") - bg.wait_in_text(".submenu-controls", "grafana-client") + bg.wait_in_text("#var-host", "grafana-client") # expect a "Load average" panel with a sensible number max_load = bg.text("div:contains('Load average') .graph-legend-series:contains('1 minute') .max") diff --git a/test/verify/check-packagekit b/test/verify/check-packagekit index 2d5b27e9f0b1..e254b78c5a7a 100755 --- a/test/verify/check-packagekit +++ b/test/verify/check-packagekit @@ -24,16 +24,6 @@ import time import packagelib import testlib -WAIT_SCRIPT = """ -for x in $(seq 1 200); do - if curl --insecure -s https://%(addr)s:8443/candlepin; then - break - else - sleep 1 - fi -done -""" - OSesWithoutTracer = ["debian-stable", "debian-testing", "ubuntu-2204", "ubuntu-stable", "fedora-coreos", "rhel4edge"] OSesWithoutKpatch = ["debian-*", "ubuntu-*", "arch", "fedora-*", "rhel4edge", "centos-*"] @@ -1275,7 +1265,7 @@ class TestUpdatesSubscriptions(packagelib.PackageCase): def register(self): # this fails with "Unable to find available subscriptions for all your installed products", but works anyway self.machine.execute( - "LC_ALL=C.UTF-8 subscription-manager register --insecure --serverurl https://10.111.112.100:8443/candlepin --org=admin --activationkey=awesome_os_pool || true") + "LC_ALL=C.UTF-8 subscription-manager register --serverurl https://services.cockpit.lan:8443/candlepin --org=admin --activationkey=awesome_os_pool || true") self.machine.execute("LC_ALL=C.UTF-8 subscription-manager attach --auto") def setUp(self): @@ -1297,11 +1287,15 @@ class TestUpdatesSubscriptions(packagelib.PackageCase): m.execute("mkdir -p /etc/pki/product") m.upload([product_file], "/etc/pki/product") - # make sure that rhsm skips certificate checks for the server - self.sed_file("s/insecure = 0/insecure = 1/g", "/etc/rhsm/rhsm.conf") + # set up CA + ca = self.candlepin.execute("cat /home/admin/candlepin/certs/candlepin-ca.crt") + m.write("/etc/pki/ca-trust/source/anchors/candlepin-ca.crt", ca) + m.write("/etc/hosts", "10.111.112.100 services.cockpit.lan\n", append=True) + m.execute("cp /etc/pki/ca-trust/source/anchors/candlepin-ca.crt /etc/rhsm/ca/candlepin-ca.pem") + m.execute("update-ca-trust") # Wait for the web service to be accessible - m.execute(WAIT_SCRIPT % {"addr": "10.111.112.100"}, timeout=360) + m.execute("until curl --fail --silent --show-error https://services.cockpit.lan:8443/candlepin/status; do sleep 1; done") self.update_icon = "#page_status_notification_updates svg" self.update_text = "#page_status_notification_updates" self.update_text_action = "#page_status_notification_updates a" diff --git a/test/verify/check-storage-mounting b/test/verify/check-storage-mounting index 5cf374d032bc..abbc4c7e6145 100755 --- a/test/verify/check-storage-mounting +++ b/test/verify/check-storage-mounting @@ -428,6 +428,11 @@ class TestStorageMountingLUKS(storagelib.StorageCase): self.login_and_go("/storage") + self.addCleanup(m.execute, + "umount /run/data || true;" + "cryptsetup close $(lsblk -lno NAME /dev/test/one | tail -1) || true;" + "vgremove --force test 2>/dev/null || true") + # Quickly make two logical volumes disk = self.add_ram_disk() b.wait_in_text("#drives", disk) diff --git a/test/verify/check-storage-used b/test/verify/check-storage-used index c15a88fd520f..4a0fb295b940 100755 --- a/test/verify/check-storage-used +++ b/test/verify/check-storage-used @@ -89,7 +89,21 @@ ExecStart=/usr/bin/sleep infinity b.wait_visible("#dialog tr:first-child button:contains(Currently in use)") b.assert_pixels('#dialog', "format-disk") self.dialog_apply() - self.dialog_wait_close() + try: + self.dialog_wait_close() + except testlib.Error: + if "Timed out waiting for object" in b.text("#dialog"): + # Sometimes /dev/sda1 is still held open by something + # immediately after locking it. This prevents the + # kernel from reading the new partition table. Let's + # just try again. + print("WARNING: Retrying partition table creation") + self.dialog_cancel() + self.dialog_wait_close() + b.click('button:contains(Create partition table)') + self.confirm() + else: + raise m.execute("! systemctl --quiet is-active keep-mnt-busy") diff --git a/test/verify/check-system-info b/test/verify/check-system-info index c90d981240ef..4433c62218c8 100755 --- a/test/verify/check-system-info +++ b/test/verify/check-system-info @@ -753,6 +753,7 @@ machine : 8561 spoof_threads(2, expect_link_present=True, expect_smt_state=self.expect_smt_default, cmdline=None) @testlib.skipImage("TODO: add Arch Linux grub entry support", "arch") + @testlib.timeout(1200) def testCPUSecurityMitigationsEnable(self): b = self.browser m = self.machine @@ -1041,6 +1042,8 @@ password=foobar b.click("#crypto-policy-button") func = b.wait_not_present if m.image.startswith('rhel-8') or m.image.startswith('centos-8') else b.wait_visible func(".pf-v5-c-menu__item-main .pf-v5-c-menu__item-text:contains('DEFAULT:SHA1')") + b.click("#crypto-policy-dialog button:contains('Cancel')") + b.wait_not_present("#crypto-policy-dialog") # Test if a new subpolicy can be set new_profile = "LEGACY:AD-SUPPORT" diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 3821c5f46fef..bbdf787a7b8b 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -84,6 +84,15 @@ ExecStart=/bin/true @testlib.skipDistroPackage() class CommonTests: + + def wait_discover(self): + with self.browser.wait_timeout(60): + self.browser.wait_attr("#realms-op-address", "data-discover", "done") + + def wait_address_helper(self, expected=None): + with self.browser.wait_timeout(60): + self.browser.wait_text("#realms-op-address-helper", expected or "Contacted domain") + @testlib.timeout(900) def testQualifiedUsers(self): m = self.machine @@ -94,10 +103,8 @@ class CommonTests: # Test that we reconnect on privileges change self.login_and_go("/system", superuser=False) - b.click("button:contains('Turn on administrative access')") - b.set_input_text("#switch-to-admin-access-password", "foobar") - b.click("button:contains('Authenticate')") - b.wait_not_present("#switch-to-admin-access-password") + b.wait_visible(f"{self.domain_sel}:disabled") + b.become_superuser() def wait_number_domains(n): if n == 0: @@ -108,22 +115,15 @@ class CommonTests: wait_number_domains(0) - def set_address(): - # old realmd/IPA don't support realmd auto-detection yet - if m.image == "rhel-8-7": - b.wait_attr("#realms-op-address", "data-discover", "done") - b.wait_val(self.op_address, "") - b.wait_not_present("#realms-op-address-helper") - b.set_input_text(self.op_address, "cockpit.lan") - else: - # on current OSes, domain and suggested admin get auto-detected + def wait_domain_detected(): + with b.wait_timeout(60): b.wait_val(self.op_address, "cockpit.lan") # Join cockpit.lan b.click(self.domain_sel) b.wait_popup("realms-join-dialog") - set_address() - b.wait_text("#realms-op-address-helper", "Contacted domain") + wait_domain_detected() + self.wait_address_helper() # admin gets auto-detected b.wait_val(self.op_admin, self.admin_user) b.set_input_text(self.op_admin_password, self.admin_password) @@ -150,13 +150,14 @@ class CommonTests: m.execute("! su -c klist " + self.admin_user) b.logout() + # wait until IPA user works + m.execute(f'while ! su - -c "echo {self.admin_password} | sudo -S true" {self.admin_user}@cockpit.lan; do ' + ' sleep 5; sss_cache -E || true; systemctl reset-failed sssd; systemctl try-restart sssd; done', + timeout=300) + # change existing local "admin" home dir to domain "admin" user m.execute(f"chown -R {self.admin_user}@cockpit.lan /home/admin") - # wait until IPA user works - m.execute('while ! su - -c "echo %s | sudo -S true" %s@cockpit.lan; do sleep 5; sss_cache -E || true; systemctl try-restart sssd; done' % ( - self.admin_password, self.admin_user), timeout=300) - # log in as domain admin and check that we can do privileged operations b.login_and_go('/system/services#/systemd-tmpfiles-clean.timer', user=f'{self.admin_user}@cockpit.lan', password=self.admin_password) b.wait_in_text("#statuses", "Running") @@ -206,7 +207,7 @@ class CommonTests: # b.assert_pixels("#realms-leave-dialog", "realm-leave", [".pf-v5-c-expandable-section__toggle-icon"]) b.click("#realms-op-leave") - with b.wait_timeout(30): + with b.wait_timeout(60): b.wait_not_present("#realms-leave-dialog") wait_number_domains(0) # re-enables hostname changing @@ -224,7 +225,7 @@ class CommonTests: # Send a wrong password b.click(self.domain_sel) b.wait_popup("realms-join-dialog") - set_address() + wait_domain_detected() b.wait_val(self.op_admin, self.admin_user) b.set_input_text(self.op_admin_password, "foo") b.click(f"#realms-join-dialog button{self.primary_btn_class}") @@ -260,10 +261,9 @@ class CommonTests: b.click(self.domain_sel) b.wait_popup("realms-join-dialog") # wait for auto-detection - set_address() + wait_domain_detected() b.set_input_text(self.op_address, "NOPE") - with b.wait_timeout(30): - b.wait_text("#realms-op-address-helper", "Domain could not be contacted") + self.wait_address_helper("Domain could not be contacted") b.wait_visible(f"#realms-join-dialog button{self.primary_btn_class}:disabled") b.click("#realms-join-dialog button.pf-m-link") b.wait_not_present("#realms-join-dialog") @@ -271,9 +271,9 @@ class CommonTests: # Join a domain with the server as address (input differs from domain name) b.click(self.domain_sel) b.wait_popup("realms-join-dialog") - b.wait_attr("#realms-op-address", "data-discover", "done") + self.wait_discover() b.set_input_text(self.op_address, "f0.cockpit.lan") - b.wait_text("#realms-op-address-helper", "Contacted domain") + self.wait_address_helper() # admin gets auto-detected b.wait_val(self.op_admin, self.admin_user) b.set_input_text(self.op_admin_password, self.admin_password) @@ -361,16 +361,17 @@ class CommonTests: self.login_and_go("/system") b.click("#system_information_domain_button") b.wait_popup("realms-join-dialog") - b.wait_attr("#realms-op-address", "data-discover", "done") + self.wait_discover() + b.set_input_text("#realms-op-address", "cockpit.lan") - b.wait_text("#realms-op-address-helper", "Contacted domain") + self.wait_address_helper() b.set_input_text("#realms-op-admin", self.admin_user) b.set_input_text("#realms-op-admin-password", self.admin_password) b.click(f"#realms-join-dialog button{self.primary_btn_class}") with b.wait_timeout(300): b.wait_not_present("#realms-join-dialog") b.logout() - m.execute('while ! id alice; do sleep 5; systemctl restart sssd; done', timeout=300) + m.execute('while ! id alice; do sleep 5; done', timeout=300) # alice's certificate was written by testClientCertAuthentication() alice_cert_key = ['--cert', "/var/tmp/alice.pem", '--key', "/var/tmp/alice.key"] @@ -394,13 +395,21 @@ class CommonTests: # certificates; it just rejects cert requests. For interactive tests, grab src/tls/ca/alice.p12 and import # it into the browser. - def do_test(authopts, expected, not_expected=None, session_leader=None): + def do_test(authopts, expected, not_expected=None, session_leader=None, retry=False): m.start_cockpit(tls=True) - output = m.execute(['curl', '-ksS', '-D-', *authopts, 'https://localhost:9090/cockpit/login']) - for s in expected: - self.assertIn(s, output) - for s in (not_expected or []): - self.assertNotIn(s, output) + + def try_auth(): + output = m.execute(['curl', '-ksS', '-D-', *authopts, 'https://localhost:9090/cockpit/login']) + for s in expected: + self.assertIn(s, output) + for s in (not_expected or []): + self.assertNotIn(s, output) + return True + + if retry: + testlib.wait(try_auth, delay=5, tries=10) + else: + try_auth() # sessions/users often hang around in State=closing for a long time, ignore these if session_leader: @@ -426,11 +435,13 @@ class CommonTests: # from sssd self.allow_journal_messages("alice is not allowed to run sudo on x0. This incident will be reported.") + # occasional intermediate error during password auth + self.allow_journal_messages("cockpit-session: user account access failed: 4 alice: System error") # cert auth should not be enabled by default do_test(alice_cert_key, ["HTTP/1.1 401 Authentication required", '"authorize"']) - # password auth should work - do_test(alice_user_pass, ['HTTP/1.1 200 OK', '"csrf-token"'], session_leader='cockpit-session') + # password auth should work (but might need to be retried) + do_test(alice_user_pass, ['HTTP/1.1 200 OK', '"csrf-token"'], session_leader='cockpit-session', retry=True) # enable cert based auth m.write("/etc/cockpit/cockpit.conf", '[WebService]\nClientCertAuthentication = true\n', append=True) @@ -681,9 +692,9 @@ class TestIPA(TestRealms, CommonTests): # Join cockpit.lan b.click(self.domain_sel) b.wait_popup("realms-join-dialog") - b.wait_attr("#realms-op-address", "data-discover", "done") + self.wait_discover() b.set_input_text(self.op_address, "cockpit.lan") - b.wait_in_text("#realms-op-address-helper", "Domain is not supported") + self.wait_address_helper("Domain is not supported") # no admin name auto-detection for unsupported domains b.wait_val(self.op_admin, "") b.set_input_text(self.op_admin, self.admin_user) @@ -820,8 +831,8 @@ ipa-advise enable-admins-sudo | sh -ex # ssh -K is supposed to forward the credentials cache, but doesn't; klist in the ssh session is empty # and there is no ccache; so, emulate what cockpit-ssh could eventually do and check that *if* the # session had the ticket forwarded, it *could* do sudo. See https://issues.redhat.com/browse/COCKPIT-643 - b.open("/@x0.cockpit.lan/system/terminal") with b.wait_timeout(60): + b.open("/@x0.cockpit.lan/system/terminal") b.enter_page("/system/terminal", host="x0.cockpit.lan") b.wait_in_text(".terminal .xterm-accessibility-tree", "alice") b.key_press(f"{ccache_env} sudo whoami\r") @@ -843,8 +854,6 @@ class TestAD(TestRealms, CommonTests): self.admin_password = "foobarFoo123" self.alice_password = 'WonderLand123' self.expected_server_software = "active-directory" - # necessary to run ldapmodify; FIXME: change this on the services image itself - self.machines['services'].execute("sed -i 's/-e/-e INSECURELDAP=true &/' /root/run-samba-domain") self.machines['services'].execute("/root/run-samba-domain") m = self.machine @@ -860,9 +869,6 @@ class TestAD(TestRealms, CommonTests): # similar to "ipa-advise enable-admins-sudo"? m.write("/etc/sudoers.d/domain-admins", r"%domain\ admins@COCKPIT.LAN ALL=(ALL) ALL") - # HACK: work around https://bugzilla.redhat.com/show_bug.cgi?id=1839805 - m.write("/etc/sssd/conf.d/rhbz1839805.conf", "[domain/cockpit.lan]\nad_gpo_access_control=disabled\n", perm="0600") - # HACK: Figure out why this happens self.allow_journal_messages(""".*didn't receive expected "authorize" message""", 'cockpit-session:$') @@ -890,7 +896,7 @@ class TestAD(TestRealms, CommonTests): # create another AD user self.machines['services'].execute(f"podman exec -i samba samba-tool user add alice {self.alice_password}") # ensure it works - m.execute('id alice') + m.execute('while ! id alice; do sleep 5; done', timeout=300) b.login_and_go('/system', user='alice', password=self.alice_password) b.wait_visible("#overview") b.logout() @@ -906,15 +912,19 @@ class TestAD(TestRealms, CommonTests): alice_cert = f.read().strip() # mangle into form palatable for LDAP alice_cert = ''.join([line for line in alice_cert.splitlines() if not line.startswith("----")]) - # set up an AD user and import their TLS certificate; avoid using the common "userCertificate;binary", - # as that does not work with Samba - services_machine.execute(r"""podman exec -i samba sh -exc ' -samba-tool user add alice %(alice_pass)s -printf "version: 1\ndn: cn=alice,cn=users,dc=cockpit,dc=lan\nchangetype: modify\nadd: userCertificate\nuserCertificate: %(alice_cert)s\n" | \ - ldapmodify -v -U Administrator -w '%(admin_pass)s' + # set up an AD user and import their TLS certificate + services_machine.write("/tmp/alice_edit", f'''#!/bin/sh -eu +sed -i "/^$/d" "$1" +echo "userCertificate: {alice_cert}" >> "$1" +''', perm="755") + services_machine.execute(f""" +podman cp /tmp/alice_edit samba:/tmp/ +podman exec -i samba sh -exc ' +samba-tool user add alice {self.alice_password} +samba-tool user edit --editor=/tmp/alice_edit alice # for debugging: -ldapsearch -v -U Administrator -w '%(admin_pass)s' -b 'cn=alice,cn=users,dc=cockpit,dc=lan' -' """ % {"alice_pass": self.alice_password, "admin_pass": self.admin_password, "alice_cert": alice_cert}) +samba-tool user show alice +' """, stdout=None) # set up sssd for certificate mapping to AD # see sssd.conf(5) "CERTIFICATE MAPPING SECTION" and sss-certmap(5) @@ -1217,7 +1227,8 @@ class TestPackageInstall(packagelib.PackageCase): b.wait_visible("#realms-join-dialog") # no auto-detected domain/admin - b.wait_attr("#realms-op-address", "data-discover", "done") + with b.wait_timeout(60): + b.wait_attr("#realms-op-address", "data-discover", "done") self.assertEqual(b.val("#realms-op-address"), "") self.assertEqual(b.val("#realms-op-admin"), "") diff --git a/test/verify/check-system-services b/test/verify/check-system-services index 2a8f99f8da21..6514bd10313c 100755 --- a/test/verify/check-system-services +++ b/test/verify/check-system-services @@ -162,10 +162,11 @@ trap "echo STOP" 0 if [ $(id -u) -eq 0 ]; then journalctl --sync -else - # increase the chance for journal to catch up - sleep 5 fi + +# increase the chance for journal to catch up +sleep 5 + echo START while true; do sleep 5 @@ -590,11 +591,13 @@ WantedBy=default.target b.wait_visible(self.svc_sel('test-onboot.timer')) b.wait_text(self.svc_sel('test-onboot.timer') + ' .service-unit-triggers', '') self.run_systemctl(user, "start test-onboot.timer") - # Check the next run. Since it triggers 200mins after the boot, it might be today or tomorrow (after 20:40) - today_stamp = int(m.execute("date +%s").strip()) - time_zone = b.eval_js("Intl.DateTimeFormat().resolvedOptions().timeZone") # get browser's time zone - today_plus_200min = m.execute(f"TZ='{time_zone}' date --date=@{today_stamp + 200 * 60} '+%b %-d, %Y'").strip() - b.wait_in_text(self.svc_sel('test-onboot.timer') + ' .service-unit-next-trigger', today_plus_200min) + # Check the next run. Since it triggers 200mins after the boot, it might be today or tomorrow + # this is too racy to predict accurately + today = m.execute("date '+%b %-d, %Y'").strip() + tomorrow = m.execute("date --date tomorrow '+%b %-d, %Y'").strip() + sel_next = self.svc_sel('test-onboot.timer') + ' .service-unit-next-trigger' + b.wait_in_text(sel_next, ", ") + self.assertRegex(b.text(sel_next), f"{today}|{tomorrow}") b.wait_in_text(self.svc_sel('test-onboot.timer') + ' .service-unit-last-trigger', "unknown") # last trigger self.run_systemctl(user, "stop test-onboot.timer")