| 21 Oct 2021 |
K900 | Stack trace? | 19:52:47 |
Janne Heß | qemu or python? | 19:52:57 |
K900 | QEMU | 19:53:10 |
Janne Heß | /proc/44797/task/44797/stack
[<0>] do_sys_poll+0x3ab/0x5b0
[<0>] __x64_sys_ppoll+0xbc/0x150
[<0>] do_syscall_64+0x33/0x40
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
/proc/44797/task/44805/stack
[<0>] futex_wait_queue_me+0xb6/0x110
[<0>] futex_wait+0xe9/0x240
[<0>] do_futex+0x174/0xbf0
[<0>] __x64_sys_futex+0x146/0x1c0
[<0>] do_syscall_64+0x33/0x40
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
/proc/44797/task/44810/stack
[<0>] kvm_vcpu_block+0x58/0x2f0 [kvm]
[<0>] kvm_arch_vcpu_ioctl_run+0x6c4/0x1720 [kvm]
[<0>] kvm_vcpu_ioctl+0x211/0x5a0 [kvm]
[<0>] __x64_sys_ioctl+0x83/0xb0
[<0>] do_syscall_64+0x33/0x40
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
/proc/44797/stack
[<0>] do_sys_poll+0x3ab/0x5b0
[<0>] __x64_sys_ppoll+0xbc/0x150
[<0>] do_syscall_64+0x33/0x40
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
| 19:53:31 |
K900 | What state is the QEMU process in? | 19:54:03 |
Janne Heß | sleeping | 19:54:07 |
K900 | Try to SIGKILL it manually? | 19:54:42 |
K900 | Oh god wait fuck | 19:56:00 |
K900 | I think I know what the problem is | 19:56:05 |
Janne Heß | yeah that works | 19:56:06 |
K900 | It could be ordering the atexit hooks wrong | 19:56:28 |
K900 | I'm not sure if they have a deterministic order | 19:56:36 |
K900 | diff --git a/nixos/lib/test-driver/test-driver.py b/nixos/lib/test-driver/test-driver.py
index 3ee8b3227c6..dd50696bfce 100755
--- a/nixos/lib/test-driver/test-driver.py
+++ b/nixos/lib/test-driver/test-driver.py
@@ -8,7 +8,6 @@ import queue
import io
import threading
import argparse
-import atexit
import base64
import codecs
import os
@@ -1128,11 +1127,13 @@ class Driver:
for cmd in cmd(start_scripts)
]
- @atexit.register
- def clean_up() -> None:
- with rootlog.nested("clean up"):
- for machine in self.machines:
- machine.release()
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *_):
+ with rootlog.nested("clean up"):
+ for machine in self.machines:
+ machine.release()
def subtest(self, name: str) -> Iterator[None]:
"""Group logs under a given test name"""
@@ -1307,14 +1308,13 @@ if __name__ == "__main__":
if not args.keep_vm_state:
rootlog.info("Machine state will be reset. To keep it, pass --keep-vm-state")
- driver = Driver(
+ with Driver(
args.start_scripts, args.vlans, args.testscript.read_text(), args.keep_vm_state
- )
-
- if args.interactive:
- ptpython.repl.embed(driver.test_symbols(), {})
- else:
- tic = time.time()
- driver.run_tests()
- toc = time.time()
- rootlog.info(f"test script finished in {(toc-tic):.2f}s")
+ ) as driver:
+ if args.interactive:
+ ptpython.repl.embed(driver.test_symbols(), {})
+ else:
+ tic = time.time()
+ driver.run_tests()
+ toc = time.time()
+ rootlog.info(f"test script finished in {(toc-tic):.2f}s")
| 19:58:16 |
K900 | Try this | 19:58:17 |
K900 | Ugh | 20:02:35 |
K900 | mypy | 20:02:36 |
K900 | def __enter__(self) -> "Driver":
return self
def __exit__(self, *_: Any) -> None:
with rootlog.nested("clean up"):
for machine in self.machines:
machine.release()
| 20:02:38 |
K900 | This should be correct (with type hints added) | 20:02:45 |
Janne Heß | it exits \o/ | 20:06:13 |
K900 | Run it a few more times? | 20:07:10 |
Janne Heß | doing that rn | 20:07:19 |
Janne Heß | you could to that in parallel, gives us twice the computing ;) | 20:07:32 |
K900 | Do you have a specific commit? | 20:07:44 |
K900 | Just to make sure we're on the same code | 20:07:56 |
Janne Heß | if chunk == "":
sys.exit(1)
| 20:08:06 |
K900 | Also the previous version has literally never locked up for me either | 20:08:12 |
Janne Heß | * diff --git a/nixos/lib/test-driver/test-driver.py b/nixos/lib/test-driver/test-driver.py
index 70a9ada3f3c..23278d1ce7b 100755
--- a/nixos/lib/test-driver/test-driver.py
+++ b/nixos/lib/test-driver/test-driver.py
@@ -8,7 +8,6 @@ import queue
import io
import threading
import argparse
-import atexit
import base64
import codecs
import os
@@ -593,6 +592,7 @@ class Machine:
status_code_pattern = re.compile(r"(.*)\|\!=EOF\s+(\d+)", flags=re.DOTALL)
while True:
+ print(f"current output is '{output}'")
chunk = self.shell.recv(4096).decode(errors="ignore")
match = status_code_pattern.match(output + chunk)
if match:
@@ -1034,7 +1034,7 @@ class Machine:
assert self.monitor
assert self.serial_thread
- self.process.terminate()
+ self.process.kill()
self.shell.close()
self.monitor.close()
self.serial_thread.join()
@@ -1128,11 +1128,13 @@ class Driver:
for cmd in cmd(start_scripts)
]
- @atexit.register
- def clean_up() -> None:
- with rootlog.nested("clean up"):
- for machine in self.machines:
- machine.release()
+ def __enter__(self) -> "Driver":
+ return self
+
+ def __exit__(self, *_: Any) -> None:
+ with rootlog.nested("clean up"):
+ for machine in self.machines:
+ machine.release()
def subtest(self, name: str) -> Iterator[None]:
"""Group logs under a given test name"""
@@ -1307,14 +1309,13 @@ if __name__ == "__main__":
if not args.keep_vm_state:
rootlog.info("Machine state will be reset. To keep it, pass --keep-vm-state")
- driver = Driver(
+ with Driver(
args.start_scripts, args.vlans, args.testscript.read_text(), args.keep_vm_state
- )
-
- if args.interactive:
- ptpython.repl.embed(driver.test_symbols(), {})
- else:
- tic = time.time()
- driver.run_tests()
- toc = time.time()
- rootlog.info(f"test script finished in {(toc-tic):.2f}s")
+ ) as driver:
+ if args.interactive:
+ ptpython.repl.embed(driver.test_symbols(), {})
+ else:
+ tic = time.time()
+ driver.run_tests()
+ toc = time.time()
+ rootlog.info(f"test script finished in {(toc-tic):.2f}s")
| 20:08:21 |
K900 | What test are you runnign? | 20:09:59 |
K900 | * What test are you running? | 20:10:01 |
Janne Heß | switch-test because I was initially testing something else | 20:10:14 |