Fix QEMU memory stats JSON mode
[libvirt.git] / src / qemu / qemu_driver.c
1 /*
2  * driver.c: core driver methods for managing qemu guests
3  *
4  * Copyright (C) 2006, 2007, 2008, 2009, 2010 Red Hat, Inc.
5  * Copyright (C) 2006 Daniel P. Berrange
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
20  *
21  * Author: Daniel P. Berrange <berrange@redhat.com>
22  */
23
24 #include <config.h>
25
26 #include <sys/types.h>
27 #include <sys/poll.h>
28 #include <sys/time.h>
29 #include <dirent.h>
30 #include <limits.h>
31 #include <string.h>
32 #include <stdbool.h>
33 #include <stdio.h>
34 #include <strings.h>
35 #include <stdarg.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <sys/utsname.h>
40 #include <sys/stat.h>
41 #include <fcntl.h>
42 #include <signal.h>
43 #include <paths.h>
44 #include <pwd.h>
45 #include <stdio.h>
46 #include <sys/wait.h>
47 #include <sys/ioctl.h>
48 #include <sys/un.h>
49
50 #ifdef __linux__
51 # include <sys/vfs.h>
52 # ifndef NFS_SUPER_MAGIC
53 #  define NFS_SUPER_MAGIC 0x6969
54 # endif /* NFS_SUPER_MAGIC */
55 #endif /* __linux__ */
56
57 #include "virterror_internal.h"
58 #include "logging.h"
59 #include "datatypes.h"
60 #include "qemu_driver.h"
61 #include "qemu_conf.h"
62 #include "qemu_monitor.h"
63 #include "qemu_bridge_filter.h"
64 #include "c-ctype.h"
65 #include "event.h"
66 #include "buf.h"
67 #include "util.h"
68 #include "nodeinfo.h"
69 #include "stats_linux.h"
70 #include "capabilities.h"
71 #include "memory.h"
72 #include "uuid.h"
73 #include "domain_conf.h"
74 #include "node_device_conf.h"
75 #include "pci.h"
76 #include "hostusb.h"
77 #include "processinfo.h"
78 #include "qemu_security_stacked.h"
79 #include "qemu_security_dac.h"
80 #include "cgroup.h"
81 #include "libvirt_internal.h"
82 #include "xml.h"
83 #include "cpu/cpu.h"
84 #include "macvtap.h"
85 #include "nwfilter/nwfilter_gentech_driver.h"
86 #include "hooks.h"
87
88
89 #define VIR_FROM_THIS VIR_FROM_QEMU
90
91 /* Only 1 job is allowed at any time
92  * A job includes *all* monitor commands, even those just querying
93  * information, not merely actions */
94 enum qemuDomainJob {
95     QEMU_JOB_NONE = 0,  /* Always set to 0 for easy if (jobActive) conditions */
96     QEMU_JOB_UNSPECIFIED,
97     QEMU_JOB_MIGRATION,
98 };
99
100 enum qemuDomainJobSignals {
101     QEMU_JOB_SIGNAL_CANCEL  = 1 << 0, /* Request job cancellation */
102     QEMU_JOB_SIGNAL_SUSPEND = 1 << 1, /* Request VM suspend to finish live migration offline */
103     QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME = 1 << 2, /* Request migration downtime change */
104 };
105
106 struct qemuDomainJobSignalsData {
107     unsigned long long migrateDowntime; /* Data for QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME */
108 };
109
110 typedef struct _qemuDomainObjPrivate qemuDomainObjPrivate;
111 typedef qemuDomainObjPrivate *qemuDomainObjPrivatePtr;
112 struct _qemuDomainObjPrivate {
113     virCond jobCond; /* Use in conjunction with main virDomainObjPtr lock */
114     enum qemuDomainJob jobActive;   /* Currently running job */
115     unsigned int jobSignals;        /* Signals for running job */
116     struct qemuDomainJobSignalsData jobSignalsData; /* Signal specific data */
117     virDomainJobInfo jobInfo;
118     unsigned long long jobStart;
119
120     qemuMonitorPtr mon;
121     virDomainChrDefPtr monConfig;
122     int monJSON;
123
124     int nvcpupids;
125     int *vcpupids;
126
127     qemuDomainPCIAddressSetPtr pciaddrs;
128     int persistentAddrs;
129 };
130
131 static int qemudShutdown(void);
132
133 static void qemuDriverLock(struct qemud_driver *driver)
134 {
135     virMutexLock(&driver->lock);
136 }
137 static void qemuDriverUnlock(struct qemud_driver *driver)
138 {
139     virMutexUnlock(&driver->lock);
140 }
141
142 static void qemuDomainEventFlush(int timer, void *opaque);
143 static void qemuDomainEventQueue(struct qemud_driver *driver,
144                                  virDomainEventPtr event);
145
146 static int qemudStartVMDaemon(virConnectPtr conn,
147                               struct qemud_driver *driver,
148                               virDomainObjPtr vm,
149                               const char *migrateFrom,
150                               int stdin_fd);
151
152 static void qemudShutdownVMDaemon(struct qemud_driver *driver,
153                                   virDomainObjPtr vm);
154
155 static int qemudDomainGetMaxVcpus(virDomainPtr dom);
156
157 static int qemuDetectVcpuPIDs(struct qemud_driver *driver,
158                               virDomainObjPtr vm);
159
160 static int qemuUpdateActivePciHostdevs(struct qemud_driver *driver,
161                                        virDomainDefPtr def);
162
163 static struct qemud_driver *qemu_driver = NULL;
164
165
166 static void *qemuDomainObjPrivateAlloc(void)
167 {
168     qemuDomainObjPrivatePtr priv;
169
170     if (VIR_ALLOC(priv) < 0)
171         return NULL;
172
173     return priv;
174 }
175
176 static void qemuDomainObjPrivateFree(void *data)
177 {
178     qemuDomainObjPrivatePtr priv = data;
179
180     qemuDomainPCIAddressSetFree(priv->pciaddrs);
181     virDomainChrDefFree(priv->monConfig);
182     VIR_FREE(priv->vcpupids);
183
184     /* This should never be non-NULL if we get here, but just in case... */
185     if (priv->mon) {
186         VIR_ERROR0("Unexpected QEMU monitor still active during domain deletion");
187         qemuMonitorClose(priv->mon);
188     }
189     VIR_FREE(priv);
190 }
191
192
193 static int qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data)
194 {
195     qemuDomainObjPrivatePtr priv = data;
196     const char *monitorpath;
197
198     /* priv->monitor_chr is set only for qemu */
199     if (priv->monConfig) {
200         switch (priv->monConfig->type) {
201         case VIR_DOMAIN_CHR_TYPE_UNIX:
202             monitorpath = priv->monConfig->data.nix.path;
203             break;
204         default:
205         case VIR_DOMAIN_CHR_TYPE_PTY:
206             monitorpath = priv->monConfig->data.file.path;
207             break;
208         }
209
210         virBufferEscapeString(buf, "  <monitor path='%s'", monitorpath);
211         if (priv->monJSON)
212             virBufferAddLit(buf, " json='1'");
213         virBufferVSprintf(buf, " type='%s'/>\n",
214                           virDomainChrTypeToString(priv->monConfig->type));
215     }
216
217
218     if (priv->nvcpupids) {
219         int i;
220         virBufferAddLit(buf, "  <vcpus>\n");
221         for (i = 0 ; i < priv->nvcpupids ; i++) {
222             virBufferVSprintf(buf, "    <vcpu pid='%d'/>\n", priv->vcpupids[i]);
223         }
224         virBufferAddLit(buf, "  </vcpus>\n");
225     }
226
227     return 0;
228 }
229
230 static int qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, void *data)
231 {
232     qemuDomainObjPrivatePtr priv = data;
233     char *monitorpath;
234     char *tmp;
235     int n, i;
236     xmlNodePtr *nodes = NULL;
237
238     if (VIR_ALLOC(priv->monConfig) < 0) {
239         virReportOOMError();
240         goto error;
241     }
242
243     if (!(priv->monConfig->info.alias = strdup("monitor"))) {
244         virReportOOMError();
245         goto error;
246     }
247
248     if (!(monitorpath =
249           virXPathString("string(./monitor[1]/@path)", ctxt))) {
250         qemuReportError(VIR_ERR_INTERNAL_ERROR,
251                         "%s", _("no monitor path"));
252         goto error;
253     }
254
255     tmp = virXPathString("string(./monitor[1]/@type)", ctxt);
256     if (tmp)
257         priv->monConfig->type = virDomainChrTypeFromString(tmp);
258     else
259         priv->monConfig->type = VIR_DOMAIN_CHR_TYPE_PTY;
260     VIR_FREE(tmp);
261
262     if (virXPathBoolean("count(./monitor[@json = '1']) > 0", ctxt)) {
263         priv->monJSON = 1;
264     } else {
265         priv->monJSON = 0;
266     }
267
268     switch (priv->monConfig->type) {
269     case VIR_DOMAIN_CHR_TYPE_PTY:
270         priv->monConfig->data.file.path = monitorpath;
271         break;
272     case VIR_DOMAIN_CHR_TYPE_UNIX:
273         priv->monConfig->data.nix.path = monitorpath;
274         break;
275     default:
276         VIR_FREE(monitorpath);
277         qemuReportError(VIR_ERR_INTERNAL_ERROR,
278                         _("unsupported monitor type '%s'"),
279                         virDomainChrTypeToString(priv->monConfig->type));
280         goto error;
281     }
282
283     n = virXPathNodeSet("./vcpus/vcpu", ctxt, &nodes);
284     if (n < 0)
285         goto error;
286     if (n) {
287         priv->nvcpupids = n;
288         if (VIR_REALLOC_N(priv->vcpupids, priv->nvcpupids) < 0) {
289             virReportOOMError();
290             goto error;
291         }
292
293         for (i = 0 ; i < n ; i++) {
294             char *pidstr = virXMLPropString(nodes[i], "pid");
295             if (!pidstr)
296                 goto error;
297
298             if (virStrToLong_i(pidstr, NULL, 10, &(priv->vcpupids[i])) < 0) {
299                 VIR_FREE(pidstr);
300                 goto error;
301             }
302             VIR_FREE(pidstr);
303         }
304         VIR_FREE(nodes);
305     }
306
307     return 0;
308
309 error:
310     virDomainChrDefFree(priv->monConfig);
311     priv->monConfig = NULL;
312     VIR_FREE(nodes);
313     return -1;
314 }
315
316
317
318 /*
319  * obj must be locked before calling, qemud_driver must NOT be locked
320  *
321  * This must be called by anything that will change the VM state
322  * in any way, or anything that will use the QEMU monitor.
323  *
324  * Upon successful return, the object will have its ref count increased,
325  * successful calls must be followed by EndJob eventually
326  */
327
328 /* Give up waiting for mutex after 30 seconds */
329 #define QEMU_JOB_WAIT_TIME (1000ull * 30)
330
331 static int qemuDomainObjBeginJob(virDomainObjPtr obj) ATTRIBUTE_RETURN_CHECK;
332 static int qemuDomainObjBeginJob(virDomainObjPtr obj)
333 {
334     qemuDomainObjPrivatePtr priv = obj->privateData;
335     struct timeval now;
336     unsigned long long then;
337
338     if (gettimeofday(&now, NULL) < 0) {
339         virReportSystemError(errno, "%s",
340                              _("cannot get time of day"));
341         return -1;
342     }
343     then = (now.tv_sec * 1000ull) + (now.tv_usec / 1000);
344     then += QEMU_JOB_WAIT_TIME;
345
346     virDomainObjRef(obj);
347
348     while (priv->jobActive) {
349         if (virCondWaitUntil(&priv->jobCond, &obj->lock, then) < 0) {
350             virDomainObjUnref(obj);
351             if (errno == ETIMEDOUT)
352                 qemuReportError(VIR_ERR_OPERATION_TIMEOUT,
353                                 "%s", _("cannot acquire state change lock"));
354             else
355                 virReportSystemError(errno,
356                                      "%s", _("cannot acquire job mutex"));
357             return -1;
358         }
359     }
360     priv->jobActive = QEMU_JOB_UNSPECIFIED;
361     priv->jobSignals = 0;
362     memset(&priv->jobSignalsData, 0, sizeof(priv->jobSignalsData));
363     priv->jobStart = (now.tv_sec * 1000ull) + (now.tv_usec / 1000);
364     memset(&priv->jobInfo, 0, sizeof(priv->jobInfo));
365
366     return 0;
367 }
368
369 /*
370  * obj must be locked before calling, qemud_driver must be locked
371  *
372  * This must be called by anything that will change the VM state
373  * in any way, or anything that will use the QEMU monitor.
374  */
375 static int qemuDomainObjBeginJobWithDriver(struct qemud_driver *driver,
376                                            virDomainObjPtr obj) ATTRIBUTE_RETURN_CHECK;
377 static int qemuDomainObjBeginJobWithDriver(struct qemud_driver *driver,
378                                            virDomainObjPtr obj)
379 {
380     qemuDomainObjPrivatePtr priv = obj->privateData;
381     struct timeval now;
382     unsigned long long then;
383
384     if (gettimeofday(&now, NULL) < 0) {
385         virReportSystemError(errno, "%s",
386                              _("cannot get time of day"));
387         return -1;
388     }
389     then = (now.tv_sec * 1000ull) + (now.tv_usec / 1000);
390     then += QEMU_JOB_WAIT_TIME;
391
392     virDomainObjRef(obj);
393     qemuDriverUnlock(driver);
394
395     while (priv->jobActive) {
396         if (virCondWaitUntil(&priv->jobCond, &obj->lock, then) < 0) {
397             virDomainObjUnref(obj);
398             if (errno == ETIMEDOUT)
399                 qemuReportError(VIR_ERR_OPERATION_TIMEOUT,
400                                 "%s", _("cannot acquire state change lock"));
401             else
402                 virReportSystemError(errno,
403                                      "%s", _("cannot acquire job mutex"));
404             qemuDriverLock(driver);
405             return -1;
406         }
407     }
408     priv->jobActive = QEMU_JOB_UNSPECIFIED;
409     priv->jobSignals = 0;
410     memset(&priv->jobSignalsData, 0, sizeof(priv->jobSignalsData));
411     priv->jobStart = (now.tv_sec * 1000ull) + (now.tv_usec / 1000);
412     memset(&priv->jobInfo, 0, sizeof(priv->jobInfo));
413
414     virDomainObjUnlock(obj);
415     qemuDriverLock(driver);
416     virDomainObjLock(obj);
417
418     return 0;
419 }
420
421 /*
422  * obj must be locked before calling, qemud_driver does not matter
423  *
424  * To be called after completing the work associated with the
425  * earlier  qemuDomainBeginJob() call
426  *
427  * Returns remaining refcount on 'obj', maybe 0 to indicated it
428  * was deleted
429  */
430 static int ATTRIBUTE_RETURN_CHECK qemuDomainObjEndJob(virDomainObjPtr obj)
431 {
432     qemuDomainObjPrivatePtr priv = obj->privateData;
433
434     priv->jobActive = QEMU_JOB_NONE;
435     priv->jobSignals = 0;
436     memset(&priv->jobSignalsData, 0, sizeof(priv->jobSignalsData));
437     priv->jobStart = 0;
438     memset(&priv->jobInfo, 0, sizeof(priv->jobInfo));
439     virCondSignal(&priv->jobCond);
440
441     return virDomainObjUnref(obj);
442 }
443
444
445 /*
446  * obj must be locked before calling, qemud_driver must be unlocked
447  *
448  * To be called immediately before any QEMU monitor API call
449  * Must have alrady called qemuDomainObjBeginJob().
450  *
451  * To be followed with qemuDomainObjExitMonitor() once complete
452  */
453 static void qemuDomainObjEnterMonitor(virDomainObjPtr obj)
454 {
455     qemuDomainObjPrivatePtr priv = obj->privateData;
456
457     qemuMonitorLock(priv->mon);
458     qemuMonitorRef(priv->mon);
459     virDomainObjUnlock(obj);
460 }
461
462
463 /* obj must NOT be locked before calling, qemud_driver must be unlocked
464  *
465  * Should be paired with an earlier  qemuDomainObjEnterMonitor() call
466  */
467 static void qemuDomainObjExitMonitor(virDomainObjPtr obj)
468 {
469     qemuDomainObjPrivatePtr priv = obj->privateData;
470     int refs;
471
472     refs = qemuMonitorUnref(priv->mon);
473
474     if (refs > 0)
475         qemuMonitorUnlock(priv->mon);
476
477     virDomainObjLock(obj);
478
479     if (refs == 0) {
480         virDomainObjUnref(obj);
481         priv->mon = NULL;
482     }
483 }
484
485
486 /*
487  * obj must be locked before calling, qemud_driver must be locked
488  *
489  * To be called immediately before any QEMU monitor API call
490  * Must have alrady called qemuDomainObjBeginJob().
491  *
492  * To be followed with qemuDomainObjExitMonitorWithDriver() once complete
493  */
494 static void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, virDomainObjPtr obj)
495 {
496     qemuDomainObjPrivatePtr priv = obj->privateData;
497
498     qemuMonitorLock(priv->mon);
499     qemuMonitorRef(priv->mon);
500     virDomainObjUnlock(obj);
501     qemuDriverUnlock(driver);
502 }
503
504
505 /* obj must NOT be locked before calling, qemud_driver must be unlocked,
506  * and will be locked after returning
507  *
508  * Should be paired with an earlier  qemuDomainObjEnterMonitor() call
509  */
510 static void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver, virDomainObjPtr obj)
511 {
512     qemuDomainObjPrivatePtr priv = obj->privateData;
513     int refs;
514
515     refs = qemuMonitorUnref(priv->mon);
516
517     if (refs > 0)
518         qemuMonitorUnlock(priv->mon);
519
520     qemuDriverLock(driver);
521     virDomainObjLock(obj);
522
523     if (refs == 0) {
524         virDomainObjUnref(obj);
525         priv->mon = NULL;
526     }
527 }
528
529
530 static int qemuCgroupControllerActive(struct qemud_driver *driver,
531                                       int controller)
532 {
533     if (driver->cgroup == NULL)
534         return 0;
535     if (driver->cgroupControllers & (1 << controller))
536         return 1;
537     return 0;
538 }
539
540 static int
541 qemudLogFD(struct qemud_driver *driver, const char* name)
542 {
543     char logfile[PATH_MAX];
544     mode_t logmode;
545     int ret, fd = -1;
546
547     if ((ret = snprintf(logfile, sizeof(logfile), "%s/%s.log",
548                         driver->logDir, name))
549         < 0 || ret >= sizeof(logfile)) {
550         virReportOOMError();
551         return -1;
552     }
553
554     logmode = O_CREAT | O_WRONLY;
555     /* Only logrotate files in /var/log, so only append if running privileged */
556     if (driver->privileged)
557         logmode |= O_APPEND;
558     else
559         logmode |= O_TRUNC;
560
561     if ((fd = open(logfile, logmode, S_IRUSR | S_IWUSR)) < 0) {
562         virReportSystemError(errno,
563                              _("failed to create logfile %s"),
564                              logfile);
565         return -1;
566     }
567     if (virSetCloseExec(fd) < 0) {
568         virReportSystemError(errno, "%s",
569                              _("Unable to set VM logfile close-on-exec flag"));
570         close(fd);
571         return -1;
572     }
573     return fd;
574 }
575
576
577 static int
578 qemudLogReadFD(const char* logDir, const char* name, off_t pos)
579 {
580     char logfile[PATH_MAX];
581     mode_t logmode = O_RDONLY;
582     int ret, fd = -1;
583
584     if ((ret = snprintf(logfile, sizeof(logfile), "%s/%s.log", logDir, name))
585         < 0 || ret >= sizeof(logfile)) {
586         qemuReportError(VIR_ERR_INTERNAL_ERROR,
587                         _("failed to build logfile name %s/%s.log"),
588                         logDir, name);
589         return -1;
590     }
591
592
593     if ((fd = open(logfile, logmode)) < 0) {
594         virReportSystemError(errno,
595                              _("failed to create logfile %s"),
596                              logfile);
597         return -1;
598     }
599     if (virSetCloseExec(fd) < 0) {
600         virReportSystemError(errno, "%s",
601                              _("Unable to set VM logfile close-on-exec flag"));
602         close(fd);
603         return -1;
604     }
605     if (pos < 0 || lseek(fd, pos, SEEK_SET) < 0) {
606       virReportSystemError(pos < 0 ? 0 : errno,
607                              _("Unable to seek to %lld in %s"),
608                              (long long) pos, logfile);
609         close(fd);
610     }
611     return fd;
612 }
613
614
615 struct qemuAutostartData {
616     struct qemud_driver *driver;
617     virConnectPtr conn;
618 };
619 static void
620 qemuAutostartDomain(void *payload, const char *name ATTRIBUTE_UNUSED, void *opaque)
621 {
622     virDomainObjPtr vm = payload;
623     struct qemuAutostartData *data = opaque;
624
625     virDomainObjLock(vm);
626     if (vm->autostart &&
627         !virDomainObjIsActive(vm)) {
628         int ret;
629
630         virResetLastError();
631         ret = qemudStartVMDaemon(data->conn, data->driver, vm, NULL, -1);
632         if (ret < 0) {
633             virErrorPtr err = virGetLastError();
634             VIR_ERROR(_("Failed to autostart VM '%s': %s"),
635                       vm->def->name,
636                       err ? err->message : "");
637         } else {
638             virDomainEventPtr event =
639                 virDomainEventNewFromObj(vm,
640                                          VIR_DOMAIN_EVENT_STARTED,
641                                          VIR_DOMAIN_EVENT_STARTED_BOOTED);
642             if (event)
643                 qemuDomainEventQueue(data->driver, event);
644         }
645     }
646     virDomainObjUnlock(vm);
647 }
648
649 static void
650 qemudAutostartConfigs(struct qemud_driver *driver) {
651     /* XXX: Figure out a better way todo this. The domain
652      * startup code needs a connection handle in order
653      * to lookup the bridge associated with a virtual
654      * network
655      */
656     virConnectPtr conn = virConnectOpen(driver->privileged ?
657                                         "qemu:///system" :
658                                         "qemu:///session");
659     /* Ignoring NULL conn which is mostly harmless here */
660     struct qemuAutostartData data = { driver, conn };
661
662     qemuDriverLock(driver);
663     virHashForEach(driver->domains.objs, qemuAutostartDomain, &data);
664     qemuDriverUnlock(driver);
665
666     if (conn)
667         virConnectClose(conn);
668 }
669
670
671 /**
672  * qemudRemoveDomainStatus
673  *
674  * remove all state files of a domain from statedir
675  *
676  * Returns 0 on success
677  */
678 static int
679 qemudRemoveDomainStatus(struct qemud_driver *driver,
680                         virDomainObjPtr vm)
681 {
682     char ebuf[1024];
683     char *file = NULL;
684
685     if (virAsprintf(&file, "%s/%s.xml", driver->stateDir, vm->def->name) < 0) {
686         virReportOOMError();
687         return(-1);
688     }
689
690     if (unlink(file) < 0 && errno != ENOENT && errno != ENOTDIR)
691         VIR_WARN(_("Failed to remove domain XML for %s: %s"),
692                  vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf)));
693     VIR_FREE(file);
694
695     if (virFileDeletePid(driver->stateDir, vm->def->name) != 0)
696         VIR_WARN(_("Failed to remove PID file for %s: %s"),
697                  vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf)));
698
699
700     return 0;
701 }
702
703
704 /*
705  * This is a callback registered with a qemuMonitorPtr  instance,
706  * and to be invoked when the monitor console hits an end of file
707  * condition, or error, thus indicating VM shutdown should be
708  * performed
709  */
710 static void
711 qemuHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
712                      virDomainObjPtr vm,
713                      int hasError) {
714     struct qemud_driver *driver = qemu_driver;
715     virDomainEventPtr event = NULL;
716
717     VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
718     virDomainObjLock(vm);
719
720     event = virDomainEventNewFromObj(vm,
721                                      VIR_DOMAIN_EVENT_STOPPED,
722                                      hasError ?
723                                      VIR_DOMAIN_EVENT_STOPPED_FAILED :
724                                      VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
725
726     qemudShutdownVMDaemon(driver, vm);
727     if (!vm->persistent)
728         virDomainRemoveInactive(&driver->domains, vm);
729     else
730         virDomainObjUnlock(vm);
731
732     if (event) {
733         qemuDriverLock(driver);
734         qemuDomainEventQueue(driver, event);
735         qemuDriverUnlock(driver);
736     }
737 }
738
739
740 static virDomainDiskDefPtr
741 findDomainDiskByPath(virDomainObjPtr vm,
742                      const char *path)
743 {
744     int i;
745
746     for (i = 0; i < vm->def->ndisks; i++) {
747         virDomainDiskDefPtr disk;
748
749         disk = vm->def->disks[i];
750         if (disk->src != NULL && STREQ(disk->src, path))
751             return disk;
752     }
753
754     qemuReportError(VIR_ERR_INTERNAL_ERROR,
755                     _("no disk found with path %s"),
756                     path);
757     return NULL;
758 }
759
760 static virDomainDiskDefPtr
761 findDomainDiskByAlias(virDomainObjPtr vm,
762                       const char *alias)
763 {
764     int i;
765
766     for (i = 0; i < vm->def->ndisks; i++) {
767         virDomainDiskDefPtr disk;
768
769         disk = vm->def->disks[i];
770         if (disk->info.alias != NULL && STREQ(disk->info.alias, alias))
771             return disk;
772     }
773
774     qemuReportError(VIR_ERR_INTERNAL_ERROR,
775                     _("no disk found with alias %s"),
776                     alias);
777     return NULL;
778 }
779
780 static int
781 getVolumeQcowPassphrase(virConnectPtr conn,
782                         virDomainDiskDefPtr disk,
783                         char **secretRet,
784                         size_t *secretLen)
785 {
786     virSecretPtr secret;
787     char *passphrase;
788     unsigned char *data;
789     size_t size;
790     int ret = -1;
791     virStorageEncryptionPtr enc;
792
793     if (!disk->encryption) {
794         qemuReportError(VIR_ERR_INTERNAL_ERROR,
795                         _("disk %s does not have any encryption information"),
796                         disk->src);
797         return -1;
798     }
799     enc = disk->encryption;
800
801     if (!conn) {
802         qemuReportError(VIR_ERR_NO_SUPPORT,
803                         "%s", _("cannot find secrets without a connection"));
804         goto cleanup;
805     }
806
807     if (conn->secretDriver == NULL ||
808         conn->secretDriver->lookupByUUID == NULL ||
809         conn->secretDriver->getValue == NULL) {
810         qemuReportError(VIR_ERR_NO_SUPPORT, "%s",
811                         _("secret storage not supported"));
812         goto cleanup;
813     }
814
815     if (enc->format != VIR_STORAGE_ENCRYPTION_FORMAT_QCOW ||
816         enc->nsecrets != 1 ||
817         enc->secrets[0]->type !=
818         VIR_STORAGE_ENCRYPTION_SECRET_TYPE_PASSPHRASE) {
819         qemuReportError(VIR_ERR_INVALID_DOMAIN,
820                         _("invalid <encryption> for volume %s"), disk->src);
821         goto cleanup;
822     }
823
824     secret = conn->secretDriver->lookupByUUID(conn,
825                                               enc->secrets[0]->uuid);
826     if (secret == NULL)
827         goto cleanup;
828     data = conn->secretDriver->getValue(secret, &size,
829                                         VIR_SECRET_GET_VALUE_INTERNAL_CALL);
830     virUnrefSecret(secret);
831     if (data == NULL)
832         goto cleanup;
833
834     if (memchr(data, '\0', size) != NULL) {
835         memset(data, 0, size);
836         VIR_FREE(data);
837         qemuReportError(VIR_ERR_INVALID_SECRET,
838                         _("format='qcow' passphrase for %s must not contain a "
839                           "'\\0'"), disk->src);
840         goto cleanup;
841     }
842
843     if (VIR_ALLOC_N(passphrase, size + 1) < 0) {
844         memset(data, 0, size);
845         VIR_FREE(data);
846         virReportOOMError();
847         goto cleanup;
848     }
849     memcpy(passphrase, data, size);
850     passphrase[size] = '\0';
851
852     memset(data, 0, size);
853     VIR_FREE(data);
854
855     *secretRet = passphrase;
856     *secretLen = size;
857
858     ret = 0;
859
860 cleanup:
861     return ret;
862 }
863
864 static int
865 findVolumeQcowPassphrase(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
866                          virConnectPtr conn,
867                          virDomainObjPtr vm,
868                          const char *path,
869                          char **secretRet,
870                          size_t *secretLen)
871 {
872     virDomainDiskDefPtr disk;
873     int ret = -1;
874
875     virDomainObjLock(vm);
876     disk = findDomainDiskByPath(vm, path);
877
878     if (!disk)
879         goto cleanup;
880
881     ret = getVolumeQcowPassphrase(conn, disk, secretRet, secretLen);
882
883 cleanup:
884     virDomainObjUnlock(vm);
885     return ret;
886 }
887
888
889 static int
890 qemuHandleDomainReset(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
891                       virDomainObjPtr vm)
892 {
893     struct qemud_driver *driver = qemu_driver;
894     virDomainEventPtr event;
895
896     virDomainObjLock(vm);
897     event = virDomainEventRebootNewFromObj(vm);
898     virDomainObjUnlock(vm);
899
900     if (event) {
901         qemuDriverLock(driver);
902         qemuDomainEventQueue(driver, event);
903         qemuDriverUnlock(driver);
904     }
905
906     return 0;
907 }
908
909
910 static int
911 qemuHandleDomainStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
912                      virDomainObjPtr vm)
913 {
914     struct qemud_driver *driver = qemu_driver;
915     virDomainEventPtr event = NULL;
916
917     virDomainObjLock(vm);
918     if (vm->state == VIR_DOMAIN_RUNNING) {
919         VIR_DEBUG("Transitioned guest %s to paused state due to unknown event", vm->def->name);
920
921         vm->state = VIR_DOMAIN_PAUSED;
922         event = virDomainEventNewFromObj(vm,
923                                          VIR_DOMAIN_EVENT_SUSPENDED,
924                                          VIR_DOMAIN_EVENT_SUSPENDED_PAUSED);
925
926         if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0)
927             VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name);
928     }
929     virDomainObjUnlock(vm);
930
931     if (event) {
932         qemuDriverLock(driver);
933         if (event)
934             qemuDomainEventQueue(driver, event);
935         qemuDriverUnlock(driver);
936     }
937
938     return 0;
939 }
940
941
942 static int
943 qemuHandleDomainRTCChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
944                           virDomainObjPtr vm,
945                           long long offset)
946 {
947     struct qemud_driver *driver = qemu_driver;
948     virDomainEventPtr event;
949
950     virDomainObjLock(vm);
951     event = virDomainEventRTCChangeNewFromObj(vm, offset);
952
953     if (vm->def->clock.offset == VIR_DOMAIN_CLOCK_OFFSET_VARIABLE)
954         vm->def->clock.data.adjustment = offset;
955
956     if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0)
957         VIR_WARN0("unable to save domain status with RTC change");
958
959     virDomainObjUnlock(vm);
960
961     if (event) {
962         qemuDriverLock(driver);
963         qemuDomainEventQueue(driver, event);
964         qemuDriverUnlock(driver);
965     }
966
967     return 0;
968 }
969
970
971 static int
972 qemuHandleDomainWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
973                          virDomainObjPtr vm,
974                          int action)
975 {
976     struct qemud_driver *driver = qemu_driver;
977     virDomainEventPtr watchdogEvent = NULL;
978     virDomainEventPtr lifecycleEvent = NULL;
979
980     virDomainObjLock(vm);
981     watchdogEvent = virDomainEventWatchdogNewFromObj(vm, action);
982
983     if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE &&
984         vm->state == VIR_DOMAIN_RUNNING) {
985         VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name);
986
987         vm->state = VIR_DOMAIN_PAUSED;
988         lifecycleEvent = virDomainEventNewFromObj(vm,
989                                                   VIR_DOMAIN_EVENT_SUSPENDED,
990                                                   VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG);
991
992         if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0)
993             VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name);
994     }
995     virDomainObjUnlock(vm);
996
997     if (watchdogEvent || lifecycleEvent) {
998         qemuDriverLock(driver);
999         if (watchdogEvent)
1000             qemuDomainEventQueue(driver, watchdogEvent);
1001         if (lifecycleEvent)
1002             qemuDomainEventQueue(driver, lifecycleEvent);
1003         qemuDriverUnlock(driver);
1004     }
1005
1006     return 0;
1007 }
1008
1009
1010 static int
1011 qemuHandleDomainIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
1012                         virDomainObjPtr vm,
1013                         const char *diskAlias,
1014                         int action)
1015 {
1016     struct qemud_driver *driver = qemu_driver;
1017     virDomainEventPtr ioErrorEvent = NULL;
1018     virDomainEventPtr lifecycleEvent = NULL;
1019     const char *srcPath;
1020     const char *devAlias;
1021     virDomainDiskDefPtr disk;
1022
1023     virDomainObjLock(vm);
1024     disk = findDomainDiskByAlias(vm, diskAlias);
1025
1026     if (disk) {
1027         srcPath = disk->src;
1028         devAlias = disk->info.alias;
1029     } else {
1030         srcPath = "";
1031         devAlias = "";
1032     }
1033
1034     ioErrorEvent = virDomainEventIOErrorNewFromObj(vm, srcPath, devAlias, action);
1035
1036     if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE &&
1037         vm->state == VIR_DOMAIN_RUNNING) {
1038         VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name);
1039
1040         vm->state = VIR_DOMAIN_PAUSED;
1041         lifecycleEvent = virDomainEventNewFromObj(vm,
1042                                                   VIR_DOMAIN_EVENT_SUSPENDED,
1043                                                   VIR_DOMAIN_EVENT_SUSPENDED_IOERROR);
1044
1045         if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0)
1046             VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name);
1047     }
1048     virDomainObjUnlock(vm);
1049
1050     if (ioErrorEvent || lifecycleEvent) {
1051         qemuDriverLock(driver);
1052         if (ioErrorEvent)
1053             qemuDomainEventQueue(driver, ioErrorEvent);
1054         if (lifecycleEvent)
1055             qemuDomainEventQueue(driver, lifecycleEvent);
1056         qemuDriverUnlock(driver);
1057     }
1058
1059     return 0;
1060 }
1061
1062
1063 static int
1064 qemuHandleDomainGraphics(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
1065                          virDomainObjPtr vm,
1066                          int phase,
1067                          int localFamily,
1068                          const char *localNode,
1069                          const char *localService,
1070                          int remoteFamily,
1071                          const char *remoteNode,
1072                          const char *remoteService,
1073                          const char *authScheme,
1074                          const char *x509dname,
1075                          const char *saslUsername)
1076 {
1077     struct qemud_driver *driver = qemu_driver;
1078     virDomainEventPtr event;
1079     virDomainEventGraphicsAddressPtr localAddr = NULL;
1080     virDomainEventGraphicsAddressPtr remoteAddr = NULL;
1081     virDomainEventGraphicsSubjectPtr subject = NULL;
1082     int i;
1083
1084     virDomainObjLock(vm);
1085
1086     if (VIR_ALLOC(localAddr) < 0)
1087         goto no_memory;
1088     localAddr->family = localFamily;
1089     if (!(localAddr->service = strdup(localService)) ||
1090         !(localAddr->node = strdup(localNode)))
1091         goto no_memory;
1092
1093     if (VIR_ALLOC(remoteAddr) < 0)
1094         goto no_memory;
1095     remoteAddr->family = remoteFamily;
1096     if (!(remoteAddr->service = strdup(remoteService)) ||
1097         !(remoteAddr->node = strdup(remoteNode)))
1098         goto no_memory;
1099
1100     if (VIR_ALLOC(subject) < 0)
1101         goto no_memory;
1102     if (x509dname) {
1103         if (VIR_REALLOC_N(subject->identities, subject->nidentity+1) < 0)
1104             goto no_memory;
1105         if (!(subject->identities[subject->nidentity].type = strdup("x509dname")) ||
1106             !(subject->identities[subject->nidentity].name = strdup(x509dname)))
1107             goto no_memory;
1108         subject->nidentity++;
1109     }
1110     if (saslUsername) {
1111         if (VIR_REALLOC_N(subject->identities, subject->nidentity+1) < 0)
1112             goto no_memory;
1113         if (!(subject->identities[subject->nidentity].type = strdup("saslUsername")) ||
1114             !(subject->identities[subject->nidentity].name = strdup(saslUsername)))
1115             goto no_memory;
1116         subject->nidentity++;
1117     }
1118
1119     event = virDomainEventGraphicsNewFromObj(vm, phase, localAddr, remoteAddr, authScheme, subject);
1120     virDomainObjUnlock(vm);
1121
1122     if (event) {
1123         qemuDriverLock(driver);
1124         qemuDomainEventQueue(driver, event);
1125         qemuDriverUnlock(driver);
1126     }
1127
1128     return 0;
1129
1130 no_memory:
1131     virReportOOMError();
1132     if (localAddr) {
1133         VIR_FREE(localAddr->service);
1134         VIR_FREE(localAddr->node);
1135         VIR_FREE(localAddr);
1136     }
1137     if (remoteAddr) {
1138         VIR_FREE(remoteAddr->service);
1139         VIR_FREE(remoteAddr->node);
1140         VIR_FREE(remoteAddr);
1141     }
1142     if (subject) {
1143         for (i = 0 ; i < subject->nidentity ; i++) {
1144             VIR_FREE(subject->identities[i].type);
1145             VIR_FREE(subject->identities[i].name);
1146         }
1147         VIR_FREE(subject->identities);
1148         VIR_FREE(subject);
1149     }
1150
1151     return -1;
1152 }
1153
1154
1155 static qemuMonitorCallbacks monitorCallbacks = {
1156     .eofNotify = qemuHandleMonitorEOF,
1157     .diskSecretLookup = findVolumeQcowPassphrase,
1158     .domainStop = qemuHandleDomainStop,
1159     .domainReset = qemuHandleDomainReset,
1160     .domainRTCChange = qemuHandleDomainRTCChange,
1161     .domainWatchdog = qemuHandleDomainWatchdog,
1162     .domainIOError = qemuHandleDomainIOError,
1163     .domainGraphics = qemuHandleDomainGraphics,
1164 };
1165
1166 static int
1167 qemuConnectMonitor(struct qemud_driver *driver, virDomainObjPtr vm)
1168 {
1169     qemuDomainObjPrivatePtr priv = vm->privateData;
1170     int ret;
1171
1172     /* Hold an extra reference because we can't allow 'vm' to be
1173      * deleted while the monitor is active */
1174     virDomainObjRef(vm);
1175
1176     if ((priv->mon = qemuMonitorOpen(vm,
1177                                      priv->monConfig,
1178                                      priv->monJSON,
1179                                      &monitorCallbacks)) == NULL) {
1180         VIR_ERROR(_("Failed to connect monitor for %s"), vm->def->name);
1181         return -1;
1182     }
1183
1184     qemuDomainObjEnterMonitorWithDriver(driver, vm);
1185     ret = qemuMonitorSetCapabilities(priv->mon);
1186     qemuDomainObjExitMonitorWithDriver(driver, vm);
1187
1188     if (ret < 0) {
1189         qemuMonitorClose(priv->mon);
1190         priv->mon = NULL;
1191     }
1192
1193     return ret;
1194 }
1195
1196 /*
1197  * Open an existing VM's monitor, re-detect VCPU threads
1198  * and re-reserve the security labels in use
1199  */
1200 static void
1201 qemuReconnectDomain(void *payload, const char *name ATTRIBUTE_UNUSED, void *opaque)
1202 {
1203     virDomainObjPtr obj = payload;
1204     struct qemud_driver *driver = opaque;
1205     qemuDomainObjPrivatePtr priv;
1206     unsigned long long qemuCmdFlags;
1207
1208     virDomainObjLock(obj);
1209
1210     VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name);
1211
1212     priv = obj->privateData;
1213
1214     /* XXX check PID liveliness & EXE path */
1215     if (qemuConnectMonitor(driver, obj) < 0)
1216         goto error;
1217
1218     if (qemuUpdateActivePciHostdevs(driver, obj->def) < 0) {
1219         goto error;
1220     }
1221
1222     /* XXX we should be persisting the original flags in the XML
1223      * not re-detecting them, since the binary may have changed
1224      * since launch time */
1225     if (qemudExtractVersionInfo(obj->def->emulator,
1226                                 NULL,
1227                                 &qemuCmdFlags) >= 0 &&
1228         (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE))
1229         priv->persistentAddrs = 1;
1230
1231     if (!(priv->pciaddrs = qemuDomainPCIAddressSetCreate(obj->def)))
1232         goto error;
1233
1234     if (driver->securityDriver &&
1235         driver->securityDriver->domainReserveSecurityLabel &&
1236         driver->securityDriver->domainReserveSecurityLabel(obj) < 0)
1237         goto error;
1238
1239     if (obj->def->id >= driver->nextvmid)
1240         driver->nextvmid = obj->def->id + 1;
1241
1242     virDomainObjUnlock(obj);
1243     return;
1244
1245 error:
1246     /* We can't get the monitor back, so must kill the VM
1247      * to remove danger of it ending up running twice if
1248      * user tries to start it again later */
1249     qemudShutdownVMDaemon(driver, obj);
1250     if (!obj->persistent)
1251         virDomainRemoveInactive(&driver->domains, obj);
1252     else
1253         virDomainObjUnlock(obj);
1254 }
1255
1256 /**
1257  * qemudReconnectDomains
1258  *
1259  * Try to re-open the resources for live VMs that we care
1260  * about.
1261  */
1262 static void
1263 qemuReconnectDomains(struct qemud_driver *driver)
1264 {
1265     virHashForEach(driver->domains.objs, qemuReconnectDomain, driver);
1266 }
1267
1268
1269 static int
1270 qemudSecurityInit(struct qemud_driver *qemud_drv)
1271 {
1272     int ret;
1273     virSecurityDriverPtr security_drv;
1274
1275     qemuSecurityStackedSetDriver(qemud_drv);
1276     qemuSecurityDACSetDriver(qemud_drv);
1277
1278     ret = virSecurityDriverStartup(&security_drv,
1279                                    qemud_drv->securityDriverName);
1280     if (ret == -1) {
1281         VIR_ERROR0(_("Failed to start security driver"));
1282         return -1;
1283     }
1284
1285     /* No primary security driver wanted to be enabled: just setup
1286      * the DAC driver on its own */
1287     if (ret == -2) {
1288         qemud_drv->securityDriver = &qemuDACSecurityDriver;
1289         VIR_INFO0(_("No security driver available"));
1290     } else {
1291         qemud_drv->securityPrimaryDriver = security_drv;
1292         qemud_drv->securitySecondaryDriver = &qemuDACSecurityDriver;
1293         qemud_drv->securityDriver = &qemuStackedSecurityDriver;
1294         VIR_INFO("Initialized security driver %s", security_drv->name);
1295     }
1296
1297     return 0;
1298 }
1299
1300
1301 static virCapsPtr
1302 qemuCreateCapabilities(virCapsPtr oldcaps,
1303                        struct qemud_driver *driver)
1304 {
1305     virCapsPtr caps;
1306
1307     /* Basic host arch / guest machine capabilities */
1308     if (!(caps = qemudCapsInit(oldcaps))) {
1309         virReportOOMError();
1310         return NULL;
1311     }
1312
1313     /* Domain XML parser hooks */
1314     caps->privateDataAllocFunc = qemuDomainObjPrivateAlloc;
1315     caps->privateDataFreeFunc = qemuDomainObjPrivateFree;
1316     caps->privateDataXMLFormat = qemuDomainObjPrivateXMLFormat;
1317     caps->privateDataXMLParse = qemuDomainObjPrivateXMLParse;
1318
1319
1320     /* Security driver data */
1321     if (driver->securityPrimaryDriver) {
1322         const char *doi, *model;
1323
1324         doi = virSecurityDriverGetDOI(driver->securityPrimaryDriver);
1325         model = virSecurityDriverGetModel(driver->securityPrimaryDriver);
1326
1327         if (!(caps->host.secModel.model = strdup(model)))
1328             goto no_memory;
1329         if (!(caps->host.secModel.doi = strdup(doi)))
1330             goto no_memory;
1331
1332         VIR_DEBUG("Initialized caps for security driver \"%s\" with "
1333                   "DOI \"%s\"", model, doi);
1334     }
1335
1336     return caps;
1337
1338 no_memory:
1339     virReportOOMError();
1340     virCapabilitiesFree(caps);
1341     return NULL;
1342 }
1343
1344 static void qemuDomainSnapshotLoad(void *payload,
1345                                    const char *name ATTRIBUTE_UNUSED,
1346                                    void *data)
1347 {
1348     virDomainObjPtr vm = (virDomainObjPtr)payload;
1349     char *baseDir = (char *)data;
1350     char *snapDir = NULL;
1351     DIR *dir = NULL;
1352     struct dirent *entry;
1353     char *xmlStr;
1354     int ret;
1355     char *fullpath;
1356     virDomainSnapshotDefPtr def = NULL;
1357     char ebuf[1024];
1358
1359     virDomainObjLock(vm);
1360     if (virAsprintf(&snapDir, "%s/%s", baseDir, vm->def->name) < 0) {
1361         VIR_ERROR("Failed to allocate memory for snapshot directory for domain %s",
1362                    vm->def->name);
1363         goto cleanup;
1364     }
1365
1366     VIR_INFO("Scanning for snapshots for domain %s in %s", vm->def->name,
1367              snapDir);
1368
1369     if (!(dir = opendir(snapDir))) {
1370         if (errno != ENOENT)
1371             VIR_ERROR("Failed to open snapshot directory %s for domain %s: %s",
1372                       snapDir, vm->def->name,
1373                       virStrerror(errno, ebuf, sizeof(ebuf)));
1374         goto cleanup;
1375     }
1376
1377     while ((entry = readdir(dir))) {
1378         if (entry->d_name[0] == '.')
1379             continue;
1380
1381         /* NB: ignoring errors, so one malformed config doesn't
1382            kill the whole process */
1383         VIR_INFO("Loading snapshot file '%s'", entry->d_name);
1384
1385         if (virAsprintf(&fullpath, "%s/%s", snapDir, entry->d_name) < 0) {
1386             VIR_ERROR0("Failed to allocate memory for path");
1387             continue;
1388         }
1389
1390         ret = virFileReadAll(fullpath, 1024*1024*1, &xmlStr);
1391         VIR_FREE(fullpath);
1392         if (ret < 0) {
1393             /* Nothing we can do here, skip this one */
1394             VIR_ERROR("Failed to read snapshot file %s: %s", fullpath,
1395                       virStrerror(errno, ebuf, sizeof(ebuf)));
1396             continue;
1397         }
1398
1399         def = virDomainSnapshotDefParseString(xmlStr, 0);
1400         if (def == NULL) {
1401             /* Nothing we can do here, skip this one */
1402             VIR_ERROR("Failed to parse snapshot XML from file '%s'", fullpath);
1403             VIR_FREE(xmlStr);
1404             continue;
1405         }
1406
1407         virDomainSnapshotAssignDef(&vm->snapshots, def);
1408
1409         VIR_FREE(xmlStr);
1410     }
1411
1412     /* FIXME: qemu keeps internal track of snapshots.  We can get access
1413      * to this info via the "info snapshots" monitor command for running
1414      * domains, or via "qemu-img snapshot -l" for shutoff domains.  It would
1415      * be nice to update our internal state based on that, but there is a
1416      * a problem.  qemu doesn't track all of the same metadata that we do.
1417      * In particular we wouldn't be able to fill in the <parent>, which is
1418      * pretty important in our metadata.
1419      */
1420
1421     virResetLastError();
1422
1423 cleanup:
1424     if (dir)
1425         closedir(dir);
1426     VIR_FREE(snapDir);
1427     virDomainObjUnlock(vm);
1428 }
1429
1430 /**
1431  * qemudStartup:
1432  *
1433  * Initialization function for the QEmu daemon
1434  */
1435 static int
1436 qemudStartup(int privileged) {
1437     char *base = NULL;
1438     char driverConf[PATH_MAX];
1439     int rc;
1440
1441     if (VIR_ALLOC(qemu_driver) < 0)
1442         return -1;
1443
1444     if (virMutexInit(&qemu_driver->lock) < 0) {
1445         VIR_ERROR("%s", _("cannot initialize mutex"));
1446         VIR_FREE(qemu_driver);
1447         return -1;
1448     }
1449     qemuDriverLock(qemu_driver);
1450     qemu_driver->privileged = privileged;
1451
1452     /* Don't have a dom0 so start from 1 */
1453     qemu_driver->nextvmid = 1;
1454
1455     if (virDomainObjListInit(&qemu_driver->domains) < 0)
1456         goto out_of_memory;
1457
1458     /* Init callback list */
1459     if (VIR_ALLOC(qemu_driver->domainEventCallbacks) < 0)
1460         goto out_of_memory;
1461     if (!(qemu_driver->domainEventQueue = virDomainEventQueueNew()))
1462         goto out_of_memory;
1463
1464     if ((qemu_driver->domainEventTimer =
1465          virEventAddTimeout(-1, qemuDomainEventFlush, qemu_driver, NULL)) < 0)
1466         goto error;
1467
1468     if (privileged) {
1469         if (virAsprintf(&qemu_driver->logDir,
1470                         "%s/log/libvirt/qemu", LOCAL_STATE_DIR) == -1)
1471             goto out_of_memory;
1472
1473         if ((base = strdup (SYSCONF_DIR "/libvirt")) == NULL)
1474             goto out_of_memory;
1475
1476         if (virAsprintf(&qemu_driver->stateDir,
1477                       "%s/run/libvirt/qemu", LOCAL_STATE_DIR) == -1)
1478             goto out_of_memory;
1479
1480         if (virAsprintf(&qemu_driver->libDir,
1481                       "%s/lib/libvirt/qemu", LOCAL_STATE_DIR) == -1)
1482             goto out_of_memory;
1483
1484         if (virAsprintf(&qemu_driver->cacheDir,
1485                       "%s/cache/libvirt/qemu", LOCAL_STATE_DIR) == -1)
1486             goto out_of_memory;
1487         if (virAsprintf(&qemu_driver->saveDir,
1488                       "%s/lib/libvirt/qemu/save/", LOCAL_STATE_DIR) == -1)
1489             goto out_of_memory;
1490         if (virAsprintf(&qemu_driver->snapshotDir,
1491                         "%s/lib/libvirt/qemu/snapshot", LOCAL_STATE_DIR) == -1)
1492             goto out_of_memory;
1493     } else {
1494         uid_t uid = geteuid();
1495         char *userdir = virGetUserDirectory(uid);
1496         if (!userdir)
1497             goto error;
1498
1499         if (virAsprintf(&qemu_driver->logDir,
1500                         "%s/.libvirt/qemu/log", userdir) == -1) {
1501             VIR_FREE(userdir);
1502             goto out_of_memory;
1503         }
1504
1505         if (virAsprintf(&base, "%s/.libvirt", userdir) == -1) {
1506             VIR_FREE(userdir);
1507             goto out_of_memory;
1508         }
1509         VIR_FREE(userdir);
1510
1511         if (virAsprintf(&qemu_driver->stateDir, "%s/qemu/run", base) == -1)
1512             goto out_of_memory;
1513         if (virAsprintf(&qemu_driver->libDir, "%s/qemu/lib", base) == -1)
1514             goto out_of_memory;
1515         if (virAsprintf(&qemu_driver->cacheDir, "%s/qemu/cache", base) == -1)
1516             goto out_of_memory;
1517         if (virAsprintf(&qemu_driver->saveDir, "%s/qemu/save", base) == -1)
1518             goto out_of_memory;
1519         if (virAsprintf(&qemu_driver->snapshotDir, "%s/qemu/snapshot", base) == -1)
1520             goto out_of_memory;
1521     }
1522
1523     if (virFileMakePath(qemu_driver->stateDir) != 0) {
1524         char ebuf[1024];
1525         VIR_ERROR(_("Failed to create state dir '%s': %s"),
1526                   qemu_driver->stateDir, virStrerror(errno, ebuf, sizeof ebuf));
1527         goto error;
1528     }
1529     if (virFileMakePath(qemu_driver->libDir) != 0) {
1530         char ebuf[1024];
1531         VIR_ERROR(_("Failed to create lib dir '%s': %s"),
1532                   qemu_driver->libDir, virStrerror(errno, ebuf, sizeof ebuf));
1533         goto error;
1534     }
1535     if (virFileMakePath(qemu_driver->cacheDir) != 0) {
1536         char ebuf[1024];
1537         VIR_ERROR(_("Failed to create cache dir '%s': %s"),
1538                   qemu_driver->cacheDir, virStrerror(errno, ebuf, sizeof ebuf));
1539         goto error;
1540     }
1541     if (virFileMakePath(qemu_driver->saveDir) != 0) {
1542         char ebuf[1024];
1543         VIR_ERROR(_("Failed to create save dir '%s': %s"),
1544                   qemu_driver->saveDir, virStrerror(errno, ebuf, sizeof ebuf));
1545         goto error;
1546     }
1547     if (virFileMakePath(qemu_driver->snapshotDir) != 0) {
1548         char ebuf[1024];
1549         VIR_ERROR(_("Failed to create save dir '%s': %s"),
1550                   qemu_driver->snapshotDir, virStrerror(errno, ebuf, sizeof ebuf));
1551         goto error;
1552     }
1553
1554     /* Configuration paths are either ~/.libvirt/qemu/... (session) or
1555      * /etc/libvirt/qemu/... (system).
1556      */
1557     if (snprintf (driverConf, sizeof(driverConf), "%s/qemu.conf", base) == -1)
1558         goto out_of_memory;
1559     driverConf[sizeof(driverConf)-1] = '\0';
1560
1561     if (virAsprintf(&qemu_driver->configDir, "%s/qemu", base) == -1)
1562         goto out_of_memory;
1563
1564     if (virAsprintf(&qemu_driver->autostartDir, "%s/qemu/autostart", base) == -1)
1565         goto out_of_memory;
1566
1567     VIR_FREE(base);
1568
1569     rc = virCgroupForDriver("qemu", &qemu_driver->cgroup, privileged, 1);
1570     if (rc < 0) {
1571         char buf[1024];
1572         VIR_WARN("Unable to create cgroup for driver: %s",
1573                  virStrerror(-rc, buf, sizeof(buf)));
1574     }
1575
1576     if (qemudLoadDriverConfig(qemu_driver, driverConf) < 0) {
1577         goto error;
1578     }
1579
1580     if (qemudSecurityInit(qemu_driver) < 0)
1581         goto error;
1582
1583     if ((qemu_driver->caps = qemuCreateCapabilities(NULL,
1584                                                     qemu_driver)) == NULL)
1585         goto error;
1586
1587     if ((qemu_driver->activePciHostdevs = pciDeviceListNew()) == NULL)
1588         goto error;
1589
1590     if (privileged) {
1591         if (chown(qemu_driver->libDir, qemu_driver->user, qemu_driver->group) < 0) {
1592             virReportSystemError(errno,
1593                                  _("unable to set ownership of '%s' to user %d:%d"),
1594                                  qemu_driver->libDir, qemu_driver->user, qemu_driver->group);
1595             goto error;
1596         }
1597         if (chown(qemu_driver->cacheDir, qemu_driver->user, qemu_driver->group) < 0) {
1598             virReportSystemError(errno,
1599                                  _("unable to set ownership of '%s' to %d:%d"),
1600                                  qemu_driver->cacheDir, qemu_driver->user, qemu_driver->group);
1601             goto error;
1602         }
1603         if (chown(qemu_driver->saveDir, qemu_driver->user, qemu_driver->group) < 0) {
1604             virReportSystemError(errno,
1605                                  _("unable to set ownership of '%s' to %d:%d"),
1606                                  qemu_driver->saveDir, qemu_driver->user, qemu_driver->group);
1607             goto error;
1608         }
1609         if (chown(qemu_driver->snapshotDir, qemu_driver->user, qemu_driver->group) < 0) {
1610             virReportSystemError(errno,
1611                                  _("unable to set ownership of '%s' to %d:%d"),
1612                                  qemu_driver->snapshotDir, qemu_driver->user, qemu_driver->group);
1613             goto error;
1614         }
1615     }
1616
1617     /* If hugetlbfs is present, then we need to create a sub-directory within
1618      * it, since we can't assume the root mount point has permissions that
1619      * will let our spawned QEMU instances use it.
1620      *
1621      * NB the check for '/', since user may config "" to disable hugepages
1622      * even when mounted
1623      */
1624     if (qemu_driver->hugetlbfs_mount &&
1625         qemu_driver->hugetlbfs_mount[0] == '/') {
1626         char *mempath = NULL;
1627         if (virAsprintf(&mempath, "%s/libvirt/qemu", qemu_driver->hugetlbfs_mount) < 0)
1628             goto out_of_memory;
1629
1630         if ((rc = virFileMakePath(mempath)) != 0) {
1631             virReportSystemError(rc,
1632                                  _("unable to create hugepage path %s"), mempath);
1633             VIR_FREE(mempath);
1634             goto error;
1635         }
1636         if (qemu_driver->privileged &&
1637             chown(mempath, qemu_driver->user, qemu_driver->group) < 0) {
1638             virReportSystemError(errno,
1639                                  _("unable to set ownership on %s to %d:%d"),
1640                                  mempath, qemu_driver->user, qemu_driver->group);
1641             VIR_FREE(mempath);
1642             goto error;
1643         }
1644
1645         qemu_driver->hugepage_path = mempath;
1646     }
1647
1648     /* Get all the running persistent or transient configs first */
1649     if (virDomainLoadAllConfigs(qemu_driver->caps,
1650                                 &qemu_driver->domains,
1651                                 qemu_driver->stateDir,
1652                                 NULL,
1653                                 1, NULL, NULL) < 0)
1654         goto error;
1655
1656     qemuReconnectDomains(qemu_driver);
1657
1658     /* Then inactive persistent configs */
1659     if (virDomainLoadAllConfigs(qemu_driver->caps,
1660                                 &qemu_driver->domains,
1661                                 qemu_driver->configDir,
1662                                 qemu_driver->autostartDir,
1663                                 0, NULL, NULL) < 0)
1664         goto error;
1665
1666
1667     virHashForEach(qemu_driver->domains.objs, qemuDomainSnapshotLoad,
1668                    qemu_driver->snapshotDir);
1669
1670     qemuDriverUnlock(qemu_driver);
1671
1672     qemudAutostartConfigs(qemu_driver);
1673
1674
1675     return 0;
1676
1677 out_of_memory:
1678     virReportOOMError();
1679 error:
1680     if (qemu_driver)
1681         qemuDriverUnlock(qemu_driver);
1682     VIR_FREE(base);
1683     qemudShutdown();
1684     return -1;
1685 }
1686
1687 static void qemudNotifyLoadDomain(virDomainObjPtr vm, int newVM, void *opaque)
1688 {
1689     struct qemud_driver *driver = opaque;
1690
1691     if (newVM) {
1692         virDomainEventPtr event =
1693             virDomainEventNewFromObj(vm,
1694                                      VIR_DOMAIN_EVENT_DEFINED,
1695                                      VIR_DOMAIN_EVENT_DEFINED_ADDED);
1696         if (event)
1697             qemuDomainEventQueue(driver, event);
1698     }
1699 }
1700
1701 /**
1702  * qemudReload:
1703  *
1704  * Function to restart the QEmu daemon, it will recheck the configuration
1705  * files and update its state and the networking
1706  */
1707 static int
1708 qemudReload(void) {
1709     if (!qemu_driver)
1710         return 0;
1711
1712     qemuDriverLock(qemu_driver);
1713     virDomainLoadAllConfigs(qemu_driver->caps,
1714                             &qemu_driver->domains,
1715                             qemu_driver->configDir,
1716                             qemu_driver->autostartDir,
1717                             0, qemudNotifyLoadDomain, qemu_driver);
1718     qemuDriverUnlock(qemu_driver);
1719
1720     qemudAutostartConfigs(qemu_driver);
1721
1722     return 0;
1723 }
1724
1725 /**
1726  * qemudActive:
1727  *
1728  * Checks if the QEmu daemon is active, i.e. has an active domain or
1729  * an active network
1730  *
1731  * Returns 1 if active, 0 otherwise
1732  */
1733 static int
1734 qemudActive(void) {
1735     int active = 0;
1736
1737     if (!qemu_driver)
1738         return 0;
1739
1740     /* XXX having to iterate here is not great because it requires many locks */
1741     qemuDriverLock(qemu_driver);
1742     active = virDomainObjListNumOfDomains(&qemu_driver->domains, 1);
1743     qemuDriverUnlock(qemu_driver);
1744     return active;
1745 }
1746
1747 /**
1748  * qemudShutdown:
1749  *
1750  * Shutdown the QEmu daemon, it will stop all active domains and networks
1751  */
1752 static int
1753 qemudShutdown(void) {
1754     int i;
1755
1756     if (!qemu_driver)
1757         return -1;
1758
1759     qemuDriverLock(qemu_driver);
1760     pciDeviceListFree(qemu_driver->activePciHostdevs);
1761     virCapabilitiesFree(qemu_driver->caps);
1762
1763     virDomainObjListDeinit(&qemu_driver->domains);
1764
1765     VIR_FREE(qemu_driver->securityDriverName);
1766     VIR_FREE(qemu_driver->logDir);
1767     VIR_FREE(qemu_driver->configDir);
1768     VIR_FREE(qemu_driver->autostartDir);
1769     VIR_FREE(qemu_driver->stateDir);
1770     VIR_FREE(qemu_driver->libDir);
1771     VIR_FREE(qemu_driver->cacheDir);
1772     VIR_FREE(qemu_driver->saveDir);
1773     VIR_FREE(qemu_driver->snapshotDir);
1774     VIR_FREE(qemu_driver->vncTLSx509certdir);
1775     VIR_FREE(qemu_driver->vncListen);
1776     VIR_FREE(qemu_driver->vncPassword);
1777     VIR_FREE(qemu_driver->vncSASLdir);
1778     VIR_FREE(qemu_driver->saveImageFormat);
1779     VIR_FREE(qemu_driver->hugetlbfs_mount);
1780     VIR_FREE(qemu_driver->hugepage_path);
1781
1782     if (qemu_driver->cgroupDeviceACL) {
1783         for (i = 0 ; qemu_driver->cgroupDeviceACL[i] != NULL ; i++)
1784             VIR_FREE(qemu_driver->cgroupDeviceACL[i]);
1785         VIR_FREE(qemu_driver->cgroupDeviceACL);
1786     }
1787
1788     /* Free domain callback list */
1789     virDomainEventCallbackListFree(qemu_driver->domainEventCallbacks);
1790     virDomainEventQueueFree(qemu_driver->domainEventQueue);
1791
1792     if (qemu_driver->domainEventTimer != -1)
1793         virEventRemoveTimeout(qemu_driver->domainEventTimer);
1794
1795     if (qemu_driver->brctl)
1796         brShutdown(qemu_driver->brctl);
1797
1798     virCgroupFree(&qemu_driver->cgroup);
1799
1800     qemuDriverUnlock(qemu_driver);
1801     virMutexDestroy(&qemu_driver->lock);
1802     VIR_FREE(qemu_driver);
1803
1804     return 0;
1805 }
1806
1807 typedef int qemuLogHandleOutput(virDomainObjPtr vm,
1808                                 const char *output,
1809                                 int fd);
1810
1811 /*
1812  * Returns -1 for error, 0 on success
1813  */
1814 static int
1815 qemudReadLogOutput(virDomainObjPtr vm,
1816                    int fd,
1817                    char *buf,
1818                    size_t buflen,
1819                    qemuLogHandleOutput func,
1820                    const char *what,
1821                    int timeout)
1822 {
1823     int retries = (timeout*10);
1824     int got = 0;
1825     buf[0] = '\0';
1826
1827     while (retries) {
1828         ssize_t func_ret, ret;
1829         int isdead = 0;
1830
1831         func_ret = func(vm, buf, fd);
1832
1833         if (kill(vm->pid, 0) == -1 && errno == ESRCH)
1834             isdead = 1;
1835
1836         /* Any failures should be detected before we read the log, so we
1837          * always have something useful to report on failure. */
1838         ret = saferead(fd, buf+got, buflen-got-1);
1839         if (ret < 0) {
1840             virReportSystemError(errno,
1841                                  _("Failure while reading %s log output"),
1842                                  what);
1843             return -1;
1844         }
1845
1846         got += ret;
1847         buf[got] = '\0';
1848         if (got == buflen-1) {
1849             qemuReportError(VIR_ERR_INTERNAL_ERROR,
1850                             _("Out of space while reading %s log output: %s"),
1851                             what, buf);
1852             return -1;
1853         }
1854
1855         if (isdead) {
1856             qemuReportError(VIR_ERR_INTERNAL_ERROR,
1857                             _("Process exited while reading %s log output: %s"),
1858                             what, buf);
1859             return -1;
1860         }
1861
1862         if (func_ret <= 0)
1863             return func_ret;
1864
1865         usleep(100*1000);
1866         retries--;
1867     }
1868
1869     qemuReportError(VIR_ERR_INTERNAL_ERROR,
1870                     _("Timed out while reading %s log output: %s"),
1871                     what, buf);
1872     return -1;
1873 }
1874
1875
1876 /*
1877  * Look at a chunk of data from the QEMU stdout logs and try to
1878  * find a TTY device, as indicated by a line like
1879  *
1880  * char device redirected to /dev/pts/3
1881  *
1882  * Returns -1 for error, 0 success, 1 continue reading
1883  */
1884 static int
1885 qemudExtractTTYPath(const char *haystack,
1886                     size_t *offset,
1887                     char **path)
1888 {
1889     static const char needle[] = "char device redirected to";
1890     char *tmp, *dev;
1891
1892     VIR_FREE(*path);
1893     /* First look for our magic string */
1894     if (!(tmp = strstr(haystack + *offset, needle))) {
1895         return 1;
1896     }
1897     tmp += sizeof(needle);
1898     dev = tmp;
1899
1900     /*
1901      * And look for first whitespace character and nul terminate
1902      * to mark end of the pty path
1903      */
1904     while (*tmp) {
1905         if (c_isspace(*tmp)) {
1906             *path = strndup(dev, tmp-dev);
1907             if (*path == NULL) {
1908                 virReportOOMError();
1909                 return -1;
1910             }
1911
1912             /* ... now further update offset till we get EOL */
1913             *offset = tmp - haystack;
1914             return 0;
1915         }
1916         tmp++;
1917     }
1918
1919     /*
1920      * We found a path, but didn't find any whitespace,
1921      * so it must be still incomplete - we should at
1922      * least see a \n - indicate that we want to carry
1923      * on trying again
1924      */
1925     return 1;
1926 }
1927
1928 static int
1929 qemudFindCharDevicePTYsMonitor(virDomainObjPtr vm,
1930                                virHashTablePtr paths)
1931 {
1932     int i;
1933
1934 #define LOOKUP_PTYS(array, arraylen, idprefix)                            \
1935     for (i = 0 ; i < (arraylen) ; i++) {                                  \
1936         virDomainChrDefPtr chr = (array)[i];                              \
1937         if (chr->type == VIR_DOMAIN_CHR_TYPE_PTY) {                       \
1938             char id[16];                                                  \
1939                                                                           \
1940             if (snprintf(id, sizeof(id), idprefix "%i", i) >= sizeof(id)) \
1941                 return -1;                                                \
1942                                                                           \
1943             const char *path = (const char *) virHashLookup(paths, id);   \
1944             if (path == NULL) {                                           \
1945                 if (chr->data.file.path == NULL) {                        \
1946                     /* neither the log output nor 'info chardev' had a */ \
1947                     /* pty path for this chardev, report an error */      \
1948                     qemuReportError(VIR_ERR_INTERNAL_ERROR,               \
1949                                     _("no assigned pty for device %s"), id); \
1950                     return -1;                                            \
1951                 } else {                                                  \
1952                     /* 'info chardev' had no pty path for this chardev, */\
1953                     /* but the log output had, so we're fine */           \
1954                     continue;                                             \
1955                 }                                                         \
1956             }                                                             \
1957                                                                           \
1958             VIR_FREE(chr->data.file.path);                                \
1959             chr->data.file.path = strdup(path);                           \
1960                                                                           \
1961             if (chr->data.file.path == NULL) {                            \
1962                 virReportOOMError();                                      \
1963                 return -1;                                                \
1964             }                                                             \
1965         }                                                                 \
1966     }
1967
1968     LOOKUP_PTYS(vm->def->serials,   vm->def->nserials,   "serial");
1969     LOOKUP_PTYS(vm->def->parallels, vm->def->nparallels, "parallel");
1970     LOOKUP_PTYS(vm->def->channels,  vm->def->nchannels,  "channel");
1971 #undef LOOKUP_PTYS
1972
1973     return 0;
1974 }
1975
1976 static int
1977 qemudFindCharDevicePTYs(virDomainObjPtr vm,
1978                         const char *output,
1979                         int fd ATTRIBUTE_UNUSED)
1980 {
1981     size_t offset = 0;
1982     int ret, i;
1983
1984     /* The order in which QEMU prints out the PTY paths is
1985        the order in which it procsses its serial and parallel
1986        device args. This code must match that ordering.... */
1987
1988     /* first comes the serial devices */
1989     for (i = 0 ; i < vm->def->nserials ; i++) {
1990         virDomainChrDefPtr chr = vm->def->serials[i];
1991         if (chr->type == VIR_DOMAIN_CHR_TYPE_PTY) {
1992             if ((ret = qemudExtractTTYPath(output, &offset,
1993                                            &chr->data.file.path)) != 0)
1994                 return ret;
1995         }
1996     }
1997
1998     /* then the parallel devices */
1999     for (i = 0 ; i < vm->def->nparallels ; i++) {
2000         virDomainChrDefPtr chr = vm->def->parallels[i];
2001         if (chr->type == VIR_DOMAIN_CHR_TYPE_PTY) {
2002             if ((ret = qemudExtractTTYPath(output, &offset,
2003                                            &chr->data.file.path)) != 0)
2004                 return ret;
2005         }
2006     }
2007
2008     /* then the channel devices */
2009     for (i = 0 ; i < vm->def->nchannels ; i++) {
2010         virDomainChrDefPtr chr = vm->def->channels[i];
2011         if (chr->type == VIR_DOMAIN_CHR_TYPE_PTY) {
2012             if ((ret = qemudExtractTTYPath(output, &offset,
2013                                            &chr->data.file.path)) != 0)
2014                 return ret;
2015         }
2016     }
2017
2018     return 0;
2019 }
2020
2021 static void qemudFreePtyPath(void *payload, const char *name ATTRIBUTE_UNUSED)
2022 {
2023     VIR_FREE(payload);
2024 }
2025
2026 static int
2027 qemudWaitForMonitor(struct qemud_driver* driver,
2028                     virDomainObjPtr vm, off_t pos)
2029 {
2030     char buf[4096]; /* Plenty of space to get startup greeting */
2031     int logfd;
2032     int ret = -1;
2033
2034     if ((logfd = qemudLogReadFD(driver->logDir, vm->def->name, pos))
2035         < 0)
2036         return -1;
2037
2038     ret = qemudReadLogOutput(vm, logfd, buf, sizeof(buf),
2039                              qemudFindCharDevicePTYs,
2040                              "console", 30);
2041     if (close(logfd) < 0) {
2042         char ebuf[4096];
2043         VIR_WARN(_("Unable to close logfile: %s"),
2044                  virStrerror(errno, ebuf, sizeof ebuf));
2045     }
2046
2047     if (ret < 0)
2048         return -1;
2049
2050     VIR_DEBUG("Connect monitor to %p '%s'", vm, vm->def->name);
2051     if (qemuConnectMonitor(driver, vm) < 0)
2052         return -1;
2053
2054     /* Try to get the pty path mappings again via the monitor. This is much more
2055      * reliable if it's available.
2056      * Note that the monitor itself can be on a pty, so we still need to try the
2057      * log output method. */
2058     virHashTablePtr paths = virHashCreate(0);
2059     if (paths == NULL) {
2060         virReportOOMError();
2061         goto cleanup;
2062     }
2063
2064     qemuDomainObjEnterMonitorWithDriver(driver, vm);
2065     qemuDomainObjPrivatePtr priv = vm->privateData;
2066     ret = qemuMonitorGetPtyPaths(priv->mon, paths);
2067     qemuDomainObjExitMonitorWithDriver(driver, vm);
2068
2069     VIR_DEBUG("qemuMonitorGetPtyPaths returned %i", ret);
2070     if (ret == 0) {
2071         ret = qemudFindCharDevicePTYsMonitor(vm, paths);
2072     }
2073
2074 cleanup:
2075     if (paths) {
2076         virHashFree(paths, qemudFreePtyPath);
2077     }
2078
2079     return ret;
2080 }
2081
2082 static int
2083 qemuDetectVcpuPIDs(struct qemud_driver *driver,
2084                    virDomainObjPtr vm) {
2085     pid_t *cpupids = NULL;
2086     int ncpupids;
2087     qemuDomainObjPrivatePtr priv = vm->privateData;
2088
2089     if (vm->def->virtType != VIR_DOMAIN_VIRT_KVM) {
2090         priv->nvcpupids = 1;
2091         if (VIR_ALLOC_N(priv->vcpupids, priv->nvcpupids) < 0) {
2092             virReportOOMError();
2093             return -1;
2094         }
2095         priv->vcpupids[0] = vm->pid;
2096         return 0;
2097     }
2098
2099     /* What follows is now all KVM specific */
2100
2101     qemuDomainObjEnterMonitorWithDriver(driver, vm);
2102     if ((ncpupids = qemuMonitorGetCPUInfo(priv->mon, &cpupids)) < 0) {
2103         qemuDomainObjExitMonitorWithDriver(driver, vm);
2104         return -1;
2105     }
2106     qemuDomainObjExitMonitorWithDriver(driver, vm);
2107
2108     /* Treat failure to get VCPU<->PID mapping as non-fatal */
2109     if (ncpupids == 0)
2110         return 0;
2111
2112     if (ncpupids != vm->def->vcpus) {
2113         qemuReportError(VIR_ERR_INTERNAL_ERROR,
2114                         _("got wrong number of vCPU pids from QEMU monitor. got %d, wanted %d"),
2115                         ncpupids, (int)vm->def->vcpus);
2116         VIR_FREE(cpupids);
2117         return -1;
2118     }
2119
2120     priv->nvcpupids = ncpupids;
2121     priv->vcpupids = cpupids;
2122     return 0;
2123 }
2124
2125 /*
2126  * To be run between fork/exec of QEMU only
2127  */
2128 static int
2129 qemudInitCpuAffinity(virDomainObjPtr vm)
2130 {
2131     int i, hostcpus, maxcpu = QEMUD_CPUMASK_LEN;
2132     virNodeInfo nodeinfo;
2133     unsigned char *cpumap;
2134     int cpumaplen;
2135
2136     DEBUG0("Setting CPU affinity");
2137
2138     if (nodeGetInfo(NULL, &nodeinfo) < 0)
2139         return -1;
2140
2141     /* setaffinity fails if you set bits for CPUs which
2142      * aren't present, so we have to limit ourselves */
2143     hostcpus = VIR_NODEINFO_MAXCPUS(nodeinfo);
2144     if (maxcpu > hostcpus)
2145         maxcpu = hostcpus;
2146
2147     cpumaplen = VIR_CPU_MAPLEN(maxcpu);
2148     if (VIR_ALLOC_N(cpumap, cpumaplen) < 0) {
2149         virReportOOMError();
2150         return -1;
2151     }
2152
2153     if (vm->def->cpumask) {
2154         /* XXX why don't we keep 'cpumask' in the libvirt cpumap
2155          * format to start with ?!?! */
2156         for (i = 0 ; i < maxcpu && i < vm->def->cpumasklen ; i++)
2157             if (vm->def->cpumask[i])
2158                 VIR_USE_CPU(cpumap, i);
2159     } else {
2160         /* You may think this is redundant, but we can't assume libvirtd
2161          * itself is running on all pCPUs, so we need to explicitly set
2162          * the spawned QEMU instance to all pCPUs if no map is given in
2163          * its config file */
2164         for (i = 0 ; i < maxcpu ; i++)
2165             VIR_USE_CPU(cpumap, i);
2166     }
2167
2168     /* We are pressuming we are running between fork/exec of QEMU
2169      * so use '0' to indicate our own process ID. No threads are
2170      * running at this point
2171      */
2172     if (virProcessInfoSetAffinity(0, /* Self */
2173                                   cpumap, cpumaplen, maxcpu) < 0) {
2174         VIR_FREE(cpumap);
2175         return -1;
2176     }
2177     VIR_FREE(cpumap);
2178
2179     return 0;
2180 }
2181
2182
2183 static int
2184 qemuInitPasswords(virConnectPtr conn,
2185                   struct qemud_driver *driver,
2186                   virDomainObjPtr vm,
2187                   unsigned long long qemuCmdFlags) {
2188     int ret = 0;
2189     qemuDomainObjPrivatePtr priv = vm->privateData;
2190
2191     if ((vm->def->ngraphics == 1) &&
2192         vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC &&
2193         (vm->def->graphics[0]->data.vnc.passwd || driver->vncPassword)) {
2194
2195         qemuDomainObjEnterMonitorWithDriver(driver, vm);
2196         ret = qemuMonitorSetVNCPassword(priv->mon,
2197                                         vm->def->graphics[0]->data.vnc.passwd ?
2198                                         vm->def->graphics[0]->data.vnc.passwd :
2199                                         driver->vncPassword);
2200         qemuDomainObjExitMonitorWithDriver(driver, vm);
2201     }
2202
2203     if (ret < 0)
2204         goto cleanup;
2205
2206     if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) {
2207         int i;
2208
2209         for (i = 0 ; i < vm->def->ndisks ; i++) {
2210             char *secret;
2211             size_t secretLen;
2212
2213             if (!vm->def->disks[i]->encryption ||
2214                 !vm->def->disks[i]->src)
2215                 continue;
2216
2217             if (getVolumeQcowPassphrase(conn,
2218                                         vm->def->disks[i],
2219                                         &secret, &secretLen) < 0)
2220                 goto cleanup;
2221
2222             qemuDomainObjEnterMonitorWithDriver(driver, vm);
2223             ret = qemuMonitorSetDrivePassphrase(priv->mon,
2224                                                 vm->def->disks[i]->info.alias,
2225                                                 secret);
2226             VIR_FREE(secret);
2227             qemuDomainObjExitMonitorWithDriver(driver, vm);
2228             if (ret < 0)
2229                 goto cleanup;
2230         }
2231     }
2232
2233 cleanup:
2234     return ret;
2235 }
2236
2237
2238 #define QEMU_PCI_VENDOR_INTEL     0x8086
2239 #define QEMU_PCI_VENDOR_LSI_LOGIC 0x1000
2240 #define QEMU_PCI_VENDOR_REDHAT    0x1af4
2241 #define QEMU_PCI_VENDOR_CIRRUS    0x1013
2242 #define QEMU_PCI_VENDOR_REALTEK   0x10ec
2243 #define QEMU_PCI_VENDOR_AMD       0x1022
2244 #define QEMU_PCI_VENDOR_ENSONIQ   0x1274
2245 #define QEMU_PCI_VENDOR_VMWARE    0x15ad
2246 #define QEMU_PCI_VENDOR_QEMU      0x1234
2247
2248 #define QEMU_PCI_PRODUCT_DISK_VIRTIO 0x1001
2249
2250 #define QEMU_PCI_PRODUCT_NIC_NE2K     0x8029
2251 #define QEMU_PCI_PRODUCT_NIC_PCNET    0x2000
2252 #define QEMU_PCI_PRODUCT_NIC_RTL8139  0x8139
2253 #define QEMU_PCI_PRODUCT_NIC_E1000    0x100E
2254 #define QEMU_PCI_PRODUCT_NIC_VIRTIO   0x1000
2255
2256 #define QEMU_PCI_PRODUCT_VGA_CIRRUS 0x00b8
2257 #define QEMU_PCI_PRODUCT_VGA_VMWARE 0x0405
2258 #define QEMU_PCI_PRODUCT_VGA_STDVGA 0x1111
2259
2260 #define QEMU_PCI_PRODUCT_AUDIO_AC97    0x2415
2261 #define QEMU_PCI_PRODUCT_AUDIO_ES1370  0x5000
2262
2263 #define QEMU_PCI_PRODUCT_CONTROLLER_PIIX 0x7010
2264 #define QEMU_PCI_PRODUCT_CONTROLLER_LSI  0x0012
2265
2266 #define QEMU_PCI_PRODUCT_WATCHDOG_I63000ESB 0x25ab
2267
2268 static int
2269 qemuAssignNextPCIAddress(virDomainDeviceInfo *info,
2270                          int vendor,
2271                          int product,
2272                          qemuMonitorPCIAddress *addrs,
2273                          int naddrs)
2274 {
2275     int found = 0;
2276     int i;
2277
2278     VIR_DEBUG("Look for %x:%x out of %d", vendor, product, naddrs);
2279
2280     for (i = 0 ; (i < naddrs) && !found; i++) {
2281         VIR_DEBUG("Maybe %x:%x", addrs[i].vendor, addrs[i].product);
2282         if (addrs[i].vendor == vendor &&
2283             addrs[i].product == product) {
2284             VIR_DEBUG("Match %d", i);
2285             found = 1;
2286             break;
2287         }
2288     }
2289     if (!found) {
2290         return -1;
2291     }
2292
2293     /* Blank it out so this device isn't matched again */
2294     addrs[i].vendor = 0;
2295     addrs[i].product = 0;
2296
2297     if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE)
2298         info->type = VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI;
2299
2300     if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) {
2301         info->addr.pci.domain = addrs[i].addr.domain;
2302         info->addr.pci.bus = addrs[i].addr.bus;
2303         info->addr.pci.slot = addrs[i].addr.slot;
2304         info->addr.pci.function = addrs[i].addr.function;
2305     }
2306
2307     return 0;
2308 }
2309
2310 static int
2311 qemuGetPCIDiskVendorProduct(virDomainDiskDefPtr def,
2312                             unsigned *vendor,
2313                             unsigned *product)
2314 {
2315     switch (def->bus) {
2316     case VIR_DOMAIN_DISK_BUS_VIRTIO:
2317         *vendor = QEMU_PCI_VENDOR_REDHAT;
2318         *product = QEMU_PCI_PRODUCT_DISK_VIRTIO;
2319         break;
2320
2321     default:
2322         return -1;
2323     }
2324
2325     return 0;
2326 }
2327
2328 static int
2329 qemuGetPCINetVendorProduct(virDomainNetDefPtr def,
2330                             unsigned *vendor,
2331                             unsigned *product)
2332 {
2333     if (!def->model)
2334         return -1;
2335
2336     if (STREQ(def->model, "ne2k_pci")) {
2337         *vendor = QEMU_PCI_VENDOR_REALTEK;
2338         *product = QEMU_PCI_PRODUCT_NIC_NE2K;
2339     } else if (STREQ(def->model, "pcnet")) {
2340         *vendor = QEMU_PCI_VENDOR_AMD;
2341         *product = QEMU_PCI_PRODUCT_NIC_PCNET;
2342     } else if (STREQ(def->model, "rtl8139")) {
2343         *vendor = QEMU_PCI_VENDOR_REALTEK;
2344         *product = QEMU_PCI_PRODUCT_NIC_RTL8139;
2345     } else if (STREQ(def->model, "e1000")) {
2346         *vendor = QEMU_PCI_VENDOR_INTEL;
2347         *product = QEMU_PCI_PRODUCT_NIC_E1000;
2348     } else if (STREQ(def->model, "virtio")) {
2349         *vendor = QEMU_PCI_VENDOR_REDHAT;
2350         *product = QEMU_PCI_PRODUCT_NIC_VIRTIO;
2351     } else {
2352         VIR_INFO("Unexpected NIC model %s, cannot get PCI address",
2353                  def->model);
2354         return -1;
2355     }
2356     return 0;
2357 }
2358
2359 static int
2360 qemuGetPCIControllerVendorProduct(virDomainControllerDefPtr def,
2361                                   unsigned *vendor,
2362                                   unsigned *product)
2363 {
2364     switch (def->type) {
2365     case VIR_DOMAIN_CONTROLLER_TYPE_SCSI:
2366         *vendor = QEMU_PCI_VENDOR_LSI_LOGIC;
2367         *product = QEMU_PCI_PRODUCT_CONTROLLER_LSI;
2368         break;
2369
2370     case VIR_DOMAIN_CONTROLLER_TYPE_FDC:
2371         /* XXX we could put in the ISA bridge address, but
2372            that's not technically the FDC's address */
2373         return -1;
2374
2375     case VIR_DOMAIN_CONTROLLER_TYPE_IDE:
2376         *vendor = QEMU_PCI_VENDOR_INTEL;
2377         *product = QEMU_PCI_PRODUCT_CONTROLLER_PIIX;
2378         break;
2379
2380     default:
2381         VIR_INFO("Unexpected controller type %s, cannot get PCI address",
2382                  virDomainControllerTypeToString(def->type));
2383         return -1;
2384     }
2385
2386     return 0;
2387 }
2388
2389 static int
2390 qemuGetPCIVideoVendorProduct(virDomainVideoDefPtr def,
2391                              unsigned *vendor,
2392                              unsigned *product)
2393 {
2394     switch (def->type) {
2395     case VIR_DOMAIN_VIDEO_TYPE_CIRRUS:
2396         *vendor = QEMU_PCI_VENDOR_CIRRUS;
2397         *product = QEMU_PCI_PRODUCT_VGA_CIRRUS;
2398         break;
2399
2400     case VIR_DOMAIN_VIDEO_TYPE_VGA:
2401         *vendor = QEMU_PCI_VENDOR_QEMU;
2402         *product = QEMU_PCI_PRODUCT_VGA_STDVGA;
2403         break;
2404
2405     case VIR_DOMAIN_VIDEO_TYPE_VMVGA:
2406         *vendor = QEMU_PCI_VENDOR_VMWARE;
2407         *product = QEMU_PCI_PRODUCT_VGA_VMWARE;
2408         break;
2409
2410     default:
2411         return -1;
2412     }
2413     return 0;
2414 }
2415
2416 static int
2417 qemuGetPCISoundVendorProduct(virDomainSoundDefPtr def,
2418                              unsigned *vendor,
2419                              unsigned *product)
2420 {
2421     switch (def->model) {
2422     case VIR_DOMAIN_SOUND_MODEL_ES1370:
2423         *vendor = QEMU_PCI_VENDOR_ENSONIQ;
2424         *product = QEMU_PCI_PRODUCT_AUDIO_ES1370;
2425         break;
2426
2427     case VIR_DOMAIN_SOUND_MODEL_AC97:
2428         *vendor = QEMU_PCI_VENDOR_INTEL;
2429         *product = QEMU_PCI_PRODUCT_AUDIO_AC97;
2430         break;
2431
2432     default:
2433         return -1;
2434     }
2435
2436     return 0;
2437 }
2438
2439 static int
2440 qemuGetPCIWatchdogVendorProduct(virDomainWatchdogDefPtr def,
2441                                 unsigned *vendor,
2442                                 unsigned *product)
2443 {
2444     switch (def->model) {
2445     case VIR_DOMAIN_WATCHDOG_MODEL_I6300ESB:
2446         *vendor = QEMU_PCI_VENDOR_INTEL;
2447         *product = QEMU_PCI_PRODUCT_WATCHDOG_I63000ESB;
2448         break;
2449
2450     default:
2451         return -1;
2452     }
2453
2454     return 0;
2455 }
2456
2457
2458 /*
2459  * This entire method assumes that PCI devices in 'info pci'
2460  * match ordering of devices specified on the command line
2461  * wrt to devices of matching vendor+product
2462  *
2463  * XXXX this might not be a valid assumption if we assign
2464  * some static addrs on CLI. Have to check that...
2465  */
2466 static int
2467 qemuDetectPCIAddresses(virDomainObjPtr vm,
2468                        qemuMonitorPCIAddress *addrs,
2469                        int naddrs)
2470 {
2471     unsigned int vendor = 0, product = 0;
2472     int i;
2473
2474     /* XXX should all these vendor/product IDs be kept in the
2475      * actual device data structure instead ?
2476      */
2477
2478     for (i = 0 ; i < vm->def->ndisks ; i++) {
2479         if (qemuGetPCIDiskVendorProduct(vm->def->disks[i], &vendor, &product) < 0)
2480             continue;
2481
2482         if (qemuAssignNextPCIAddress(&(vm->def->disks[i]->info),
2483                                      vendor, product,
2484                                      addrs, naddrs) < 0) {
2485             qemuReportError(VIR_ERR_INTERNAL_ERROR,
2486                             _("cannot find PCI address for VirtIO disk %s"),
2487                             vm->def->disks[i]->dst);
2488             return -1;
2489         }
2490     }
2491
2492     for (i = 0 ; i < vm->def->nnets ; i++) {
2493         if (qemuGetPCINetVendorProduct(vm->def->nets[i], &vendor, &product) < 0)
2494             continue;
2495
2496         if (qemuAssignNextPCIAddress(&(vm->def->nets[i]->info),
2497                                      vendor, product,
2498                                      addrs,  naddrs) < 0) {
2499             qemuReportError(VIR_ERR_INTERNAL_ERROR,
2500                             _("cannot find PCI address for %s NIC"),
2501                             vm->def->nets[i]->model);
2502             return -1;
2503         }
2504     }
2505
2506     for (i = 0 ; i < vm->def->ncontrollers ; i++) {
2507         if (qemuGetPCIControllerVendorProduct(vm->def->controllers[i], &vendor, &product) < 0)
2508             continue;
2509
2510         if (qemuAssignNextPCIAddress(&(vm->def->controllers[i]->info),
2511                                      vendor, product,
2512                                      addrs,  naddrs) < 0) {
2513             qemuReportError(VIR_ERR_INTERNAL_ERROR,
2514                             _("cannot find PCI address for controller %s"),
2515                             virDomainControllerTypeToString(vm->def->controllers[i]->type));
2516             return -1;
2517         }
2518     }
2519
2520     for (i = 0 ; i < vm->def->nvideos ; i++) {
2521         if (qemuGetPCIVideoVendorProduct(vm->def->videos[i], &vendor, &product) < 0)
2522             continue;
2523
2524         if (qemuAssignNextPCIAddress(&(vm->def->videos[i]->info),
2525                                      vendor, product,
2526                                      addrs,  naddrs) < 0) {
2527             qemuReportError(VIR_ERR_INTERNAL_ERROR,
2528                             _("cannot find PCI address for video adapter %s"),
2529                             virDomainVideoTypeToString(vm->def->videos[i]->type));
2530             return -1;
2531         }
2532     }
2533
2534     for (i = 0 ; i < vm->def->nsounds ; i++) {
2535         if (qemuGetPCISoundVendorProduct(vm->def->sounds[i], &vendor, &product) < 0)
2536             continue;
2537
2538         if (qemuAssignNextPCIAddress(&(vm->def->sounds[i]->info),
2539                                      vendor, product,
2540                                      addrs,  naddrs) < 0) {
2541             qemuReportError(VIR_ERR_INTERNAL_ERROR,
2542                             _("cannot find PCI address for sound adapter %s"),
2543                             virDomainSoundModelTypeToString(vm->def->sounds[i]->model));
2544             return -1;
2545         }
2546     }
2547
2548
2549     if (vm->def->watchdog &&
2550         qemuGetPCIWatchdogVendorProduct(vm->def->watchdog, &vendor, &product) == 0) {
2551         if (qemuAssignNextPCIAddress(&(vm->def->watchdog->info),
2552                                      vendor, product,
2553                                      addrs,  naddrs) < 0) {
2554             qemuReportError(VIR_ERR_INTERNAL_ERROR,
2555                             _("cannot find PCI address for watchdog %s"),
2556                             virDomainWatchdogModelTypeToString(vm->def->watchdog->model));
2557             return -1;
2558         }
2559     }
2560
2561     /* XXX console (virtio) */
2562
2563
2564     /* ... and now things we don't have in our xml */
2565
2566     /* XXX USB controller ? */
2567
2568     /* XXXX virtio balloon ? */
2569
2570     /* XXX what about other PCI devices (ie bridges) */
2571
2572     return 0;
2573 }
2574
2575 static int
2576 qemuInitPCIAddresses(struct qemud_driver *driver,
2577                      virDomainObjPtr vm)
2578 {
2579     qemuDomainObjPrivatePtr priv = vm->privateData;
2580     int naddrs;
2581     int ret;
2582     qemuMonitorPCIAddress *addrs = NULL;
2583
2584     qemuDomainObjEnterMonitorWithDriver(driver, vm);
2585     naddrs = qemuMonitorGetAllPCIAddresses(priv->mon,
2586                                            &addrs);
2587     qemuDomainObjExitMonitorWithDriver(driver, vm);
2588
2589     ret = qemuDetectPCIAddresses(vm, addrs, naddrs);
2590
2591     VIR_FREE(addrs);
2592
2593     return ret;
2594 }
2595
2596 static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) {
2597     int i;
2598
2599     for (i = 5900 ; i < 65535 ; i++) {
2600         int fd;
2601         int reuse = 1;
2602         struct sockaddr_in addr;
2603         addr.sin_family = AF_INET;
2604         addr.sin_port = htons(i);
2605         addr.sin_addr.s_addr = htonl(INADDR_ANY);
2606         fd = socket(PF_INET, SOCK_STREAM, 0);
2607         if (fd < 0)
2608             return -1;
2609
2610         if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void*)&reuse, sizeof(reuse)) < 0) {
2611             close(fd);
2612             break;
2613         }
2614
2615         if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) {
2616             /* Not in use, lets grab it */
2617             close(fd);
2618             return i;
2619         }
2620         close(fd);
2621
2622         if (errno == EADDRINUSE) {
2623             /* In use, try next */
2624             continue;
2625         }
2626         /* Some other bad failure, get out.. */
2627         break;
2628     }
2629     return -1;
2630 }
2631
2632
2633 static int
2634 qemuAssignPCIAddresses(virDomainDefPtr def)
2635 {
2636     int ret = -1;
2637     unsigned long long qemuCmdFlags = 0;
2638     qemuDomainPCIAddressSetPtr addrs = NULL;
2639     struct stat sb;
2640
2641     if (stat(def->emulator, &sb) < 0) {
2642         virReportSystemError(errno,
2643                              _("Cannot find QEMU binary %s"),
2644                              def->emulator);
2645         goto cleanup;
2646     }
2647
2648     if (qemudExtractVersionInfo(def->emulator,
2649                                 NULL,
2650                                 &qemuCmdFlags) < 0)
2651         goto cleanup;
2652
2653     if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) {
2654         if (!(addrs = qemuDomainPCIAddressSetCreate(def)))
2655             goto cleanup;
2656
2657         if (qemuAssignDevicePCISlots(def, addrs) < 0)
2658             goto cleanup;
2659     }
2660
2661     ret = 0;
2662
2663 cleanup:
2664     qemuDomainPCIAddressSetFree(addrs);
2665
2666     return ret;
2667 }
2668
2669
2670 static pciDeviceList *
2671 qemuGetPciHostDeviceList(virDomainDefPtr def)
2672 {
2673     pciDeviceList *list;
2674     int i;
2675
2676     if (!(list = pciDeviceListNew()))
2677         return NULL;
2678
2679     for (i = 0 ; i < def->nhostdevs ; i++) {
2680         virDomainHostdevDefPtr hostdev = def->hostdevs[i];
2681         pciDevice *dev;
2682
2683         if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS)
2684             continue;
2685         if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI)
2686             continue;
2687
2688         dev = pciGetDevice(hostdev->source.subsys.u.pci.domain,
2689                            hostdev->source.subsys.u.pci.bus,
2690                            hostdev->source.subsys.u.pci.slot,
2691                            hostdev->source.subsys.u.pci.function);
2692         if (!dev) {
2693             pciDeviceListFree(list);
2694             return NULL;
2695         }
2696
2697         if (pciDeviceListAdd(list, dev) < 0) {
2698             pciFreeDevice(dev);
2699             pciDeviceListFree(list);
2700             return NULL;
2701         }
2702
2703         pciDeviceSetManaged(dev, hostdev->managed);
2704     }
2705
2706     return list;
2707 }
2708
2709 static int
2710 qemuUpdateActivePciHostdevs(struct qemud_driver *driver,
2711                             virDomainDefPtr def)
2712 {
2713     pciDeviceList *pcidevs;
2714     int i;
2715     int ret = -1;
2716
2717     if (!def->nhostdevs)
2718         return 0;
2719
2720     if (!(pcidevs = qemuGetPciHostDeviceList(def)))
2721         return -1;
2722
2723     for (i = 0; i < pciDeviceListCount(pcidevs); i++) {
2724         pciDevice *dev = pciDeviceListGet(pcidevs, i);
2725         pciDeviceListSteal(pcidevs, dev);
2726         if (pciDeviceListAdd(driver->activePciHostdevs, dev) < 0) {
2727             pciFreeDevice(dev);
2728             goto cleanup;
2729         }
2730     }
2731
2732     ret = 0;
2733
2734 cleanup:
2735     pciDeviceListFree(pcidevs);
2736     return ret;
2737 }
2738
2739
2740 static int
2741 qemuPrepareHostPCIDevices(struct qemud_driver *driver,
2742                           virDomainDefPtr def)
2743 {
2744     pciDeviceList *pcidevs;
2745     int i;
2746     int ret = -1;
2747
2748     if (!(pcidevs = qemuGetPciHostDeviceList(def)))
2749         return -1;
2750
2751     /* We have to use 3 loops here. *All* devices must
2752      * be detached before we reset any of them, because
2753      * in some cases you have to reset the whole PCI,
2754      * which impacts all devices on it. Also, all devices
2755      * must be reset before being marked as active.
2756      */
2757
2758     /* XXX validate that non-managed device isn't in use, eg
2759      * by checking that device is either un-bound, or bound
2760      * to pci-stub.ko
2761      */
2762
2763     for (i = 0; i < pciDeviceListCount(pcidevs); i++) {
2764         pciDevice *dev = pciDeviceListGet(pcidevs, i);
2765         if (!pciDeviceIsAssignable(dev, !driver->relaxedACS))
2766             goto cleanup;
2767
2768         if (pciDeviceGetManaged(dev) &&
2769             pciDettachDevice(dev) < 0)
2770             goto cleanup;
2771     }
2772
2773     /* Now that all the PCI hostdevs have be dettached, we can safely
2774      * reset them */
2775     for (i = 0; i < pciDeviceListCount(pcidevs); i++) {
2776         pciDevice *dev = pciDeviceListGet(pcidevs, i);
2777         if (pciResetDevice(dev, driver->activePciHostdevs) < 0)
2778             goto cleanup;
2779     }
2780
2781     /* Now mark all the devices as active */
2782     for (i = 0; i < pciDeviceListCount(pcidevs); i++) {
2783         pciDevice *dev = pciDeviceListGet(pcidevs, i);
2784         pciDeviceListSteal(pcidevs, dev);
2785         if (pciDeviceListAdd(driver->activePciHostdevs, dev) < 0) {
2786             pciFreeDevice(dev);
2787             goto cleanup;
2788         }
2789     }
2790
2791     ret = 0;
2792
2793 cleanup:
2794     pciDeviceListFree(pcidevs);
2795     return ret;
2796 }
2797
2798
2799 static int
2800 qemuPrepareHostUSBDevices(struct qemud_driver *driver ATTRIBUTE_UNUSED,
2801                           virDomainDefPtr def)
2802 {
2803     int i;
2804     for (i = 0 ; i < def->nhostdevs ; i++) {
2805         virDomainHostdevDefPtr hostdev = def->hostdevs[i];
2806
2807         if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS)
2808             continue;
2809         if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB)
2810             continue;
2811
2812         /* Resolve a vendor/product to bus/device */
2813         if (hostdev->source.subsys.u.usb.vendor) {
2814             usbDevice *usb
2815                 = usbFindDevice(hostdev->source.subsys.u.usb.vendor,
2816                                 hostdev->source.subsys.u.usb.product);
2817
2818             if (!usb)
2819                 return -1;
2820
2821             hostdev->source.subsys.u.usb.bus = usbDeviceGetBus(usb);
2822             hostdev->source.subsys.u.usb.device = usbDeviceGetDevno(usb);
2823
2824             usbFreeDevice(usb);
2825         }
2826     }
2827
2828     return 0;
2829 }
2830
2831 static int
2832 qemuPrepareHostDevices(struct qemud_driver *driver,
2833                        virDomainDefPtr def)
2834 {
2835     if (!def->nhostdevs)
2836         return 0;
2837
2838     if (qemuPrepareHostPCIDevices(driver, def) < 0)
2839         return -1;
2840
2841     if (qemuPrepareHostUSBDevices(driver, def) < 0)
2842         return -1;
2843
2844     return 0;
2845 }
2846
2847
2848 static void
2849 qemudReattachManagedDevice(pciDevice *dev)
2850 {
2851     int retries = 100;
2852
2853     if (pciDeviceGetManaged(dev)) {
2854         while (pciWaitForDeviceCleanup(dev, "kvm_assigned_device")
2855                && retries) {
2856             usleep(100*1000);
2857             retries--;
2858         }
2859         if (pciReAttachDevice(dev) < 0) {
2860             virErrorPtr err = virGetLastError();
2861             VIR_ERROR(_("Failed to re-attach PCI device: %s"),
2862                       err ? err->message : "");
2863             virResetError(err);
2864         }
2865     }
2866 }
2867
2868 static void
2869 qemuDomainReAttachHostDevices(struct qemud_driver *driver,
2870                               virDomainDefPtr def)
2871 {
2872     pciDeviceList *pcidevs;
2873     int i;
2874
2875     if (!def->nhostdevs)
2876         return;
2877
2878     if (!(pcidevs = qemuGetPciHostDeviceList(def))) {
2879         virErrorPtr err = virGetLastError();
2880         VIR_ERROR(_("Failed to allocate pciDeviceList: %s"),
2881                   err ? err->message : "");
2882         virResetError(err);
2883         return;
2884     }
2885
2886     /* Again 3 loops; mark all devices as inactive before reset
2887      * them and reset all the devices before re-attach */
2888
2889     for (i = 0; i < pciDeviceListCount(pcidevs); i++) {
2890         pciDevice *dev = pciDeviceListGet(pcidevs, i);
2891         pciDeviceListDel(driver->activePciHostdevs, dev);
2892     }
2893
2894     for (i = 0; i < pciDeviceListCount(pcidevs); i++) {
2895         pciDevice *dev = pciDeviceListGet(pcidevs, i);
2896         if (pciResetDevice(dev, driver->activePciHostdevs) < 0) {
2897             virErrorPtr err = virGetLastError();
2898             VIR_ERROR(_("Failed to reset PCI device: %s"),
2899                       err ? err->message : "");
2900             virResetError(err);
2901         }
2902     }
2903
2904     for (i = 0; i < pciDeviceListCount(pcidevs); i++) {
2905         pciDevice *dev = pciDeviceListGet(pcidevs, i);
2906         qemudReattachManagedDevice(dev);
2907     }
2908
2909     pciDeviceListFree(pcidevs);
2910 }
2911
2912 static const char *const defaultDeviceACL[] = {
2913     "/dev/null", "/dev/full", "/dev/zero",
2914     "/dev/random", "/dev/urandom",
2915     "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
2916     "/dev/rtc", "/dev/hpet", "/dev/net/tun",
2917     NULL,
2918 };
2919 #define DEVICE_PTY_MAJOR 136
2920 #define DEVICE_SND_MAJOR 116
2921
2922 static int qemuSetupCgroup(struct qemud_driver *driver,
2923                            virDomainObjPtr vm)
2924 {
2925     virCgroupPtr cgroup = NULL;
2926     int rc;
2927     unsigned int i;
2928     const char *const *deviceACL =
2929         driver->cgroupDeviceACL ?
2930         (const char *const *)driver->cgroupDeviceACL :
2931         defaultDeviceACL;
2932
2933     if (driver->cgroup == NULL)
2934         return 0; /* Not supported, so claim success */
2935
2936     rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 1);
2937     if (rc != 0) {
2938         virReportSystemError(-rc,
2939                              _("Unable to create cgroup for %s"),
2940                              vm->def->name);
2941         goto cleanup;
2942     }
2943
2944     if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_DEVICES)) {
2945         rc = virCgroupDenyAllDevices(cgroup);
2946         if (rc != 0) {
2947             if (rc == -EPERM) {
2948                 VIR_WARN0("Group devices ACL is not accessible, disabling whitelisting");
2949                 goto done;
2950             }
2951
2952             virReportSystemError(-rc,
2953                                  _("Unable to deny all devices for %s"), vm->def->name);
2954             goto cleanup;
2955         }
2956
2957         for (i = 0; i < vm->def->ndisks ; i++) {
2958             if (vm->def->disks[i]->type != VIR_DOMAIN_DISK_TYPE_BLOCK ||
2959                 vm->def->disks[i]->src == NULL)
2960                 continue;
2961
2962             rc = virCgroupAllowDevicePath(cgroup,
2963                                           vm->def->disks[i]->src);
2964             if (rc != 0) {
2965                 virReportSystemError(-rc,
2966                                      _("Unable to allow device %s for %s"),
2967                                      vm->def->disks[i]->src, vm->def->name);
2968                 goto cleanup;
2969             }
2970         }
2971
2972         rc = virCgroupAllowDeviceMajor(cgroup, 'c', DEVICE_PTY_MAJOR);
2973         if (rc != 0) {
2974             virReportSystemError(-rc, "%s",
2975                                  _("unable to allow /dev/pts/ devices"));
2976             goto cleanup;
2977         }
2978
2979         if (vm->def->nsounds) {
2980             rc = virCgroupAllowDeviceMajor(cgroup, 'c', DEVICE_SND_MAJOR);
2981             if (rc != 0) {
2982                 virReportSystemError(-rc, "%s",
2983                                      _("unable to allow /dev/snd/ devices"));
2984                 goto cleanup;
2985             }
2986         }
2987
2988         for (i = 0; deviceACL[i] != NULL ; i++) {
2989             rc = virCgroupAllowDevicePath(cgroup,
2990                                           deviceACL[i]);
2991             if (rc < 0 &&
2992                 rc != -ENOENT) {
2993                 virReportSystemError(-rc,
2994                                      _("unable to allow device %s"),
2995                                      deviceACL[i]);
2996                 goto cleanup;
2997             }
2998         }
2999     }
3000
3001 done:
3002     virCgroupFree(&cgroup);
3003     return 0;
3004
3005 cleanup:
3006     if (cgroup) {
3007         virCgroupRemove(cgroup);
3008         virCgroupFree(&cgroup);
3009     }
3010     return -1;
3011 }
3012
3013
3014 static int qemuRemoveCgroup(struct qemud_driver *driver,
3015                             virDomainObjPtr vm,
3016                             int quiet)
3017 {
3018     virCgroupPtr cgroup;
3019     int rc;
3020
3021     if (driver->cgroup == NULL)
3022         return 0; /* Not supported, so claim success */
3023
3024     rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0);
3025     if (rc != 0) {
3026         if (!quiet)
3027             qemuReportError(VIR_ERR_INTERNAL_ERROR,
3028                             _("Unable to find cgroup for %s\n"),
3029                             vm->def->name);
3030         return rc;
3031     }
3032
3033     rc = virCgroupRemove(cgroup);
3034     virCgroupFree(&cgroup);
3035     return rc;
3036 }
3037
3038 static int qemuAddToCgroup(struct qemud_driver *driver,
3039                            virDomainDefPtr def)
3040 {
3041     virCgroupPtr cgroup = NULL;
3042     int ret = -1;
3043     int rc;
3044
3045     if (driver->cgroup == NULL)
3046         return 0; /* Not supported, so claim success */
3047
3048     rc = virCgroupForDomain(driver->cgroup, def->name, &cgroup, 0);
3049     if (rc != 0) {
3050         virReportSystemError(-rc,
3051                              _("unable to find cgroup for domain %s"),
3052                              def->name);
3053         goto cleanup;
3054     }
3055
3056     rc = virCgroupAddTask(cgroup, getpid());
3057     if (rc != 0) {
3058         virReportSystemError(-rc,
3059                              _("unable to add domain %s task %d to cgroup"),
3060                              def->name, getpid());
3061         goto cleanup;
3062     }
3063
3064     ret = 0;
3065
3066 cleanup:
3067     virCgroupFree(&cgroup);
3068     return ret;
3069 }
3070
3071
3072 struct qemudHookData {
3073     virConnectPtr conn;
3074     virDomainObjPtr vm;
3075     struct qemud_driver *driver;
3076 };
3077
3078 static int qemudSecurityHook(void *data) {
3079     struct qemudHookData *h = data;
3080
3081     /* This must take place before exec(), so that all QEMU
3082      * memory allocation is on the correct NUMA node
3083      */
3084     if (qemudInitCpuAffinity(h->vm) < 0)
3085         return -1;
3086
3087     if (qemuAddToCgroup(h->driver, h->vm->def) < 0)
3088         return -1;
3089
3090     if (h->driver->securityDriver &&
3091         h->driver->securityDriver->domainSetSecurityProcessLabel &&
3092         h->driver->securityDriver->domainSetSecurityProcessLabel(h->driver->securityDriver, h->vm) < 0)
3093         return -1;
3094
3095     return 0;
3096 }
3097
3098 static int
3099 qemuPrepareMonitorChr(struct qemud_driver *driver,
3100                       virDomainChrDefPtr monConfig,
3101                       const char *vm)
3102 {
3103     monConfig->targetType = VIR_DOMAIN_CHR_TARGET_TYPE_MONITOR;
3104
3105     monConfig->type = VIR_DOMAIN_CHR_TYPE_UNIX;
3106     monConfig->data.nix.listen = 1;
3107
3108     if (!(monConfig->info.alias = strdup("monitor"))) {
3109         virReportOOMError();
3110         return -1;
3111     }
3112
3113     if (virAsprintf(&monConfig->data.nix.path, "%s/%s.monitor",
3114                     driver->libDir, vm) < 0) {
3115         virReportOOMError();
3116         return -1;
3117     }
3118
3119     return 0;
3120 }
3121
3122 static int qemuDomainSnapshotSetActive(virDomainObjPtr vm,
3123                                        char *snapshotDir);
3124 static int qemuDomainSnapshotSetInactive(virDomainObjPtr vm,
3125                                          char *snapshotDir);
3126
3127 static int qemudStartVMDaemon(virConnectPtr conn,
3128                               struct qemud_driver *driver,
3129                               virDomainObjPtr vm,
3130                               const char *migrateFrom,
3131                               int stdin_fd) {
3132     const char **argv = NULL, **tmp;
3133     const char **progenv = NULL;
3134     int i, ret;
3135     struct stat sb;
3136     int *tapfds = NULL;
3137     int ntapfds = 0;
3138     unsigned long long qemuCmdFlags;
3139     fd_set keepfd;
3140     const char *emulator;
3141     pid_t child;
3142     int pos = -1;
3143     char ebuf[1024];
3144     char *pidfile = NULL;
3145     int logfile = -1;
3146     qemuDomainObjPrivatePtr priv = vm->privateData;
3147
3148     struct qemudHookData hookData;
3149     hookData.conn = conn;
3150     hookData.vm = vm;
3151     hookData.driver = driver;
3152
3153     FD_ZERO(&keepfd);
3154
3155     DEBUG0("Beginning VM startup process");
3156
3157     if (virDomainObjIsActive(vm)) {
3158         qemuReportError(VIR_ERR_OPERATION_INVALID,
3159                         "%s", _("VM is already active"));
3160         return -1;
3161     }
3162
3163     /* Must be run before security labelling */
3164     DEBUG0("Preparing host devices");
3165     if (qemuPrepareHostDevices(driver, vm->def) < 0)
3166         goto cleanup;
3167
3168     /* If you are using a SecurityDriver with dynamic labelling,
3169        then generate a security label for isolation */
3170     DEBUG0("Generating domain security label (if required)");
3171     if (driver->securityDriver &&
3172         driver->securityDriver->domainGenSecurityLabel &&
3173         driver->securityDriver->domainGenSecurityLabel(vm) < 0)
3174         return -1;
3175
3176     DEBUG0("Generating setting domain security labels (if required)");
3177     if (driver->securityDriver &&
3178         driver->securityDriver->domainSetSecurityAllLabel &&
3179         driver->securityDriver->domainSetSecurityAllLabel(vm) < 0)
3180         goto cleanup;
3181
3182     /* Ensure no historical cgroup for this VM is lying around bogus
3183      * settings */
3184     DEBUG0("Ensuring no historical cgroup is lying around");
3185     qemuRemoveCgroup(driver, vm, 1);
3186
3187     if ((vm->def->ngraphics == 1) &&
3188         vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC &&
3189         vm->def->graphics[0]->data.vnc.autoport) {
3190         DEBUG0("Determining VNC port");
3191         int port = qemudNextFreeVNCPort(driver);
3192         if (port < 0) {
3193             qemuReportError(VIR_ERR_INTERNAL_ERROR,
3194                             "%s", _("Unable to find an unused VNC port"));
3195             goto cleanup;
3196         }
3197         vm->def->graphics[0]->data.vnc.port = port;
3198     }
3199
3200     if (virFileMakePath(driver->logDir) != 0) {
3201         virReportSystemError(errno,
3202                              _("cannot create log directory %s"),
3203                              driver->logDir);
3204         goto cleanup;
3205     }
3206
3207     DEBUG0("Creating domain log file");
3208     if ((logfile = qemudLogFD(driver, vm->def->name)) < 0)
3209         goto cleanup;
3210
3211     emulator = vm->def->emulator;
3212
3213     /* Make sure the binary we are about to try exec'ing exists.
3214      * Technically we could catch the exec() failure, but that's
3215      * in a sub-process so its hard to feed back a useful error
3216      */
3217     if (stat(emulator, &sb) < 0) {
3218         virReportSystemError(errno,
3219                              _("Cannot find QEMU binary %s"),
3220                              emulator);
3221         goto cleanup;
3222     }
3223
3224     DEBUG0("Determing emulator version");
3225     if (qemudExtractVersionInfo(emulator,
3226                                 NULL,
3227                                 &qemuCmdFlags) < 0)
3228         goto cleanup;
3229
3230     DEBUG0("Setting up domain cgroup (if required)");
3231     if (qemuSetupCgroup(driver, vm) < 0)
3232         goto cleanup;
3233
3234     if (VIR_ALLOC(priv->monConfig) < 0) {
3235         virReportOOMError();
3236         goto cleanup;
3237     }
3238
3239     DEBUG0("Preparing monitor state");
3240     if (qemuPrepareMonitorChr(driver, priv->monConfig, vm->def->name) < 0)
3241         goto cleanup;
3242
3243 #if HAVE_YAJL
3244     if (qemuCmdFlags & QEMUD_CMD_FLAG_MONITOR_JSON)
3245         priv->monJSON = 1;
3246     else
3247 #endif
3248         priv->monJSON = 0;
3249
3250     if ((ret = virFileDeletePid(driver->stateDir, vm->def->name)) != 0) {
3251         virReportSystemError(ret,
3252                              _("Cannot remove stale PID file for %s"),
3253                              vm->def->name);
3254         goto cleanup;
3255     }
3256
3257     if (!(pidfile = virFilePid(driver->stateDir, vm->def->name))) {
3258         virReportSystemError(errno,
3259                              "%s", _("Failed to build pidfile path."));
3260         goto cleanup;
3261     }
3262
3263     /*
3264      * Normally PCI addresses are assigned in the virDomainCreate
3265      * or virDomainDefine methods. We might still need to assign
3266      * some here to cope with the question of upgrades. Regardless
3267      * we also need to populate the PCi address set cache for later
3268      * use in hotplug
3269      */
3270     if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) {
3271         DEBUG0("Assigning domain PCI addresses");
3272         /* Populate cache with current addresses */
3273         if (priv->pciaddrs) {
3274             qemuDomainPCIAddressSetFree(priv->pciaddrs);
3275             priv->pciaddrs = NULL;
3276         }
3277         if (!(priv->pciaddrs = qemuDomainPCIAddressSetCreate(vm->def)))
3278             goto cleanup;
3279
3280
3281         /* Assign any remaining addresses */
3282         if (qemuAssignDevicePCISlots(vm->def, priv->pciaddrs) < 0)
3283             goto cleanup;
3284
3285         priv->persistentAddrs = 1;
3286     } else {
3287         priv->persistentAddrs = 0;
3288     }
3289
3290     DEBUG0("Building emulator command line");
3291     vm->def->id = driver->nextvmid++;
3292     if (qemudBuildCommandLine(conn, driver, vm->def, priv->monConfig,
3293                               priv->monJSON, qemuCmdFlags, &argv, &progenv,
3294                               &tapfds, &ntapfds, migrateFrom,
3295                               vm->current_snapshot) < 0)
3296         goto cleanup;
3297
3298     if (qemuDomainSnapshotSetInactive(vm, driver->snapshotDir) < 0)
3299         goto cleanup;
3300
3301     /* now that we know it is about to start call the hook if present */
3302     if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
3303         char *xml = virDomainDefFormat(vm->def, 0);
3304         int hookret;
3305
3306         hookret = virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
3307                     VIR_HOOK_QEMU_OP_START, VIR_HOOK_SUBOP_BEGIN, NULL, xml);
3308         VIR_FREE(xml);
3309
3310         /*
3311          * If the script raised an error abort the launch
3312          */
3313         if (hookret < 0)
3314             goto cleanup;
3315     }
3316
3317     tmp = progenv;
3318     while (*tmp) {
3319         if (safewrite(logfile, *tmp, strlen(*tmp)) < 0)
3320             VIR_WARN(_("Unable to write envv to logfile: %s"),
3321                      virStrerror(errno, ebuf, sizeof ebuf));
3322         if (safewrite(logfile, " ", 1) < 0)
3323             VIR_WARN(_("Unable to write envv to logfile: %s"),
3324                      virStrerror(errno, ebuf, sizeof ebuf));
3325         tmp++;
3326     }
3327     tmp = argv;
3328     while (*tmp) {
3329         if (safewrite(logfile, *tmp, strlen(*tmp)) < 0)
3330             VIR_WARN(_("Unable to write argv to logfile: %s"),
3331                      virStrerror(errno, ebuf, sizeof ebuf));
3332         if (safewrite(logfile, " ", 1) < 0)
3333             VIR_WARN(_("Unable to write argv to logfile: %s"),
3334                      virStrerror(errno, ebuf, sizeof ebuf));
3335         tmp++;
3336     }
3337     if (safewrite(logfile, "\n", 1) < 0)
3338         VIR_WARN(_("Unable to write argv to logfile: %s"),
3339                  virStrerror(errno, ebuf, sizeof ebuf));
3340
3341     if ((pos = lseek(logfile, 0, SEEK_END)) < 0)
3342         VIR_WARN(_("Unable to seek to end of logfile: %s"),
3343                  virStrerror(errno, ebuf, sizeof ebuf));
3344
3345     for (i = 0 ; i < ntapfds ; i++)
3346         FD_SET(tapfds[i], &keepfd);
3347
3348     ret = virExecDaemonize(argv, progenv, &keepfd, &child,
3349                            stdin_fd, &logfile, &logfile,
3350                            VIR_EXEC_NONBLOCK | VIR_EXEC_CLEAR_CAPS,
3351                            qemudSecurityHook, &hookData,
3352                            pidfile);
3353     VIR_FREE(pidfile);
3354
3355     /* wait for qemu process to to show up */
3356     if (ret == 0) {
3357         if (virFileReadPid(driver->stateDir, vm->def->name, &vm->pid)) {
3358             qemuReportError(VIR_ERR_INTERNAL_ERROR,
3359                             _("Domain %s didn't show up\n"), vm->def->name);
3360             ret = -1;
3361         }
3362     } else if (ret == -2) {
3363         /* The virExec process that launches the daemon failed. Pending on
3364          * when it failed (we can't determine for sure), there may be
3365          * extra info in the domain log (if the hook failed for example).
3366          *
3367          * Pretend like things succeeded, and let 'WaitForMonitor' report
3368          * the log contents for us.
3369          */
3370         vm->pid = child;
3371         ret = 0;
3372     }
3373
3374     vm->state = migrateFrom ? VIR_DOMAIN_PAUSED : VIR_DOMAIN_RUNNING;
3375
3376     for (i = 0 ; argv[i] ; i++)
3377         VIR_FREE(argv[i]);
3378     VIR_FREE(argv);
3379
3380     for (i = 0 ; progenv[i] ; i++)
3381         VIR_FREE(progenv[i]);
3382     VIR_FREE(progenv);
3383
3384     if (ret == -1) /* The VM failed to start; tear filters before taps */
3385         virNWFilterTearVMNWFilters(vm);
3386
3387     if (tapfds) {
3388         for (i = 0 ; i < ntapfds ; i++) {
3389             close(tapfds[i]);
3390         }
3391         VIR_FREE(tapfds);
3392     }
3393
3394     if (ret == -1) /* The VM failed to start */
3395         goto cleanup;
3396
3397     DEBUG0("Waiting for monitor to show up");
3398     if (qemudWaitForMonitor(driver, vm, pos) < 0)
3399         goto abort;
3400
3401     DEBUG0("Detecting VCPU PIDs");
3402     if (qemuDetectVcpuPIDs(driver, vm) < 0)
3403         goto abort;
3404
3405     DEBUG0("Setting any required VM passwords");
3406     if (qemuInitPasswords(conn, driver, vm, qemuCmdFlags) < 0)
3407         goto abort;
3408
3409     /* If we have -device, then addresses are assigned explicitly.
3410      * If not, then we have to detect dynamic ones here */
3411     if (!(qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE)) {
3412         DEBUG0("Determining domain device PCI addresses");
3413         if (qemuInitPCIAddresses(driver, vm) < 0)
3414             goto abort;
3415     }
3416
3417     DEBUG0("Setting initial memory amount");
3418     qemuDomainObjEnterMonitorWithDriver(driver, vm);
3419     if (qemuMonitorSetBalloon(priv->mon, vm->def->memory) < 0) {
3420         qemuDomainObjExitMonitorWithDriver(driver, vm);
3421         goto abort;
3422     }
3423
3424     if (migrateFrom == NULL) {
3425         DEBUG0("Starting domain CPUs");
3426         /* Allow the CPUS to start executing */
3427         if (qemuMonitorStartCPUs(priv->mon, conn) < 0) {
3428             if (virGetLastError() == NULL)
3429                 qemuReportError(VIR_ERR_INTERNAL_ERROR,
3430                                 "%s", _("resume operation failed"));
3431             qemuDomainObjExitMonitorWithDriver(driver, vm);
3432             goto abort;
3433         }
3434     }
3435     qemuDomainObjExitMonitorWithDriver(driver, vm);
3436
3437
3438     DEBUG0("Writing domain status to disk");
3439     if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0)
3440         goto abort;
3441
3442     if (logfile != -1)
3443         close(logfile);
3444
3445     return 0;
3446
3447 cleanup:
3448     /* We jump here if we failed to start the VM for any reason
3449      * XXX investigate if we can kill this block and safely call
3450      * qemudShutdownVMDaemon even though no PID is running */
3451     qemuDomainReAttachHostDevices(driver, vm->def);
3452
3453     if (driver->securityDriver &&
3454         driver->securityDriver->domainRestoreSecurityAllLabel)
3455         driver->securityDriver->domainRestoreSecurityAllLabel(vm);
3456     if (driver->securityDriver &&
3457         driver->securityDriver->domainReleaseSecurityLabel)
3458         driver->securityDriver->domainReleaseSecurityLabel(vm);
3459     qemuRemoveCgroup(driver, vm, 1);
3460     if ((vm->def->ngraphics == 1) &&
3461         vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC &&
3462         vm->def->graphics[0]->data.vnc.autoport)
3463         vm->def->graphics[0]->data.vnc.port = -1;
3464     if (logfile != -1)
3465         close(logfile);
3466     vm->def->id = -1;
3467     return -1;
3468
3469 abort:
3470     /* We jump here if we failed to initialize the now running VM
3471      * killing it off and pretend we never started it */
3472     qemudShutdownVMDaemon(driver, vm);
3473
3474     if (logfile != -1)
3475         close(logfile);
3476
3477     return -1;
3478 }
3479
3480
3481 static void qemudShutdownVMDaemon(struct qemud_driver *driver,
3482                                   virDomainObjPtr vm) {
3483     int ret;
3484     int retries = 0;
3485     qemuDomainObjPrivatePtr priv = vm->privateData;
3486     virErrorPtr orig_err;
3487     virDomainDefPtr def;
3488     int i;
3489
3490     if (!virDomainObjIsActive(vm))
3491         return;
3492
3493     VIR_DEBUG("Shutting down VM '%s'", vm->def->name);
3494
3495     /* This method is routinely used in clean up paths. Disable error
3496      * reporting so we don't squash a legit error. */
3497     orig_err = virSaveLastError();
3498
3499     virNWFilterTearVMNWFilters(vm);
3500
3501     if (driver->macFilter) {
3502         def = vm->def;
3503         for (i = 0 ; i < def->nnets ; i++) {
3504             virDomainNetDefPtr net = def->nets[i];
3505             if (net->ifname == NULL)
3506                 continue;
3507             if ((errno = networkDisallowMacOnPort(driver, net->ifname,
3508                                                   net->mac))) {
3509                 virReportSystemError(errno,
3510              _("failed to remove ebtables rule to allow MAC address on  '%s'"),
3511                                      net->ifname);
3512             }
3513         }
3514     }
3515
3516     if (virKillProcess(vm->pid, 0) == 0 &&
3517         virKillProcess(vm->pid, SIGTERM) < 0)
3518         virReportSystemError(errno,
3519                              _("Failed to send SIGTERM to %s (%d)"),
3520                              vm->def->name, vm->pid);
3521
3522     if (priv->mon &&
3523         qemuMonitorClose(priv->mon) == 0) {
3524         virDomainObjUnref(vm);
3525         priv->mon = NULL;
3526     }
3527
3528     if (priv->monConfig) {
3529         if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
3530             unlink(priv->monConfig->data.nix.path);
3531         virDomainChrDefFree(priv->monConfig);
3532         priv->monConfig = NULL;
3533     }
3534
3535     /* shut it off for sure */
3536     virKillProcess(vm->pid, SIGKILL);
3537
3538     /* now that we know it's stopped call the hook if present */
3539     if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
3540         char *xml = virDomainDefFormat(vm->def, 0);
3541
3542         /* we can't stop the operation even if the script raised an error */
3543         virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
3544                     VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END, NULL, xml);
3545         VIR_FREE(xml);
3546     }
3547
3548     /* Reset Security Labels */
3549     if (driver->securityDriver &&
3550         driver->securityDriver->domainRestoreSecurityAllLabel)
3551         driver->securityDriver->domainRestoreSecurityAllLabel(vm);
3552     if (driver->securityDriver &&
3553         driver->securityDriver->domainReleaseSecurityLabel)
3554         driver->securityDriver->domainReleaseSecurityLabel(vm);
3555
3556     /* Clear out dynamically assigned labels */
3557     if (vm->def->seclabel.type == VIR_DOMAIN_SECLABEL_DYNAMIC) {
3558         VIR_FREE(vm->def->seclabel.model);
3559         VIR_FREE(vm->def->seclabel.label);
3560         VIR_FREE(vm->def->seclabel.imagelabel);
3561     }
3562
3563     virDomainDefClearDeviceAliases(vm->def);
3564     if (!priv->persistentAddrs) {
3565         virDomainDefClearPCIAddresses(vm->def);
3566         qemuDomainPCIAddressSetFree(priv->pciaddrs);
3567         priv->pciaddrs = NULL;
3568     }
3569
3570     qemuDomainReAttachHostDevices(driver, vm->def);
3571
3572 #if WITH_MACVTAP
3573     def = vm->def;
3574     for (i = 0; i < def->nnets; i++) {
3575         virDomainNetDefPtr net = def->nets[i];
3576         if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) {
3577             if (net->ifname)
3578                 delMacvtap(net->ifname);
3579         }
3580     }
3581 #endif
3582
3583 retry:
3584     if ((ret = qemuRemoveCgroup(driver, vm, 0)) < 0) {
3585         if (ret == -EBUSY && (retries++ < 5)) {
3586             usleep(200*1000);
3587             goto retry;
3588         }
3589         VIR_WARN("Failed to remove cgroup for %s",
3590                  vm->def->name);
3591     }
3592
3593     qemudRemoveDomainStatus(driver, vm);
3594
3595     vm->pid = -1;
3596     vm->def->id = -1;
3597     vm->state = VIR_DOMAIN_SHUTOFF;
3598     VIR_FREE(priv->vcpupids);
3599     priv->nvcpupids = 0;
3600
3601     if (vm->newDef) {
3602         virDomainDefFree(vm->def);
3603         vm->def = vm->newDef;
3604         vm->def->id = -1;
3605         vm->newDef = NULL;
3606     }
3607
3608     if (orig_err) {
3609         virSetError(orig_err);
3610         virFreeError(orig_err);
3611     }
3612 }
3613
3614
3615 static virDrvOpenStatus qemudOpen(virConnectPtr conn,
3616                                   virConnectAuthPtr auth ATTRIBUTE_UNUSED,
3617                                   int flags ATTRIBUTE_UNUSED) {
3618     if (conn->uri == NULL) {
3619         if (qemu_driver == NULL)
3620             return VIR_DRV_OPEN_DECLINED;
3621
3622         conn->uri = xmlParseURI(qemu_driver->privileged ?
3623                                 "qemu:///system" :