CVE-2013-6456: Avoid unsafe use of /proc/$PID/root in LXC disk hotplug
authorDaniel P. Berrange <berrange@redhat.com>
Thu, 30 Jan 2014 15:59:20 +0000 (15:59 +0000)
committerDaniel P. Berrange <berrange@redhat.com>
Tue, 18 Feb 2014 16:00:09 +0000 (16:00 +0000)
Rewrite lxcDomainAttachDeviceDiskLive function to use the
virProcessRunInMountNamespace helper. This avoids risk of
a malicious guest replacing /dev with a absolute symlink,
tricking the driver into changing the host OS filesystem.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
(cherry picked from commit 4dd3a7d5bc44980135a1b11810ba9aeab42a4a59)

src/lxc/lxc_driver.c

index cf45d8d..e38f037 100644 (file)
@@ -3062,6 +3062,115 @@ cleanup:
 }
 
 
+struct lxcDomainAttachDeviceMknodData {
+    virLXCDriverPtr driver;
+    mode_t mode;
+    dev_t dev;
+    virDomainObjPtr vm;
+    virDomainDeviceDefPtr def;
+    char *file;
+};
+
+static int
+lxcDomainAttachDeviceMknodHelper(pid_t pid ATTRIBUTE_UNUSED,
+                                 void *opaque)
+{
+    struct lxcDomainAttachDeviceMknodData *data = opaque;
+    int ret = -1;
+
+    virSecurityManagerPostFork(data->driver->securityManager);
+
+    if (virFileMakeParentPath(data->file) < 0) {
+        virReportSystemError(errno,
+                             _("Unable to create %s"), data->file);
+        goto cleanup;
+    }
+
+    /* Yes, the device name we're creating may not
+     * actually correspond to the major:minor number
+     * we're using, but we've no other option at this
+     * time. Just have to hope that containerized apps
+     * don't get upset that the major:minor is different
+     * to that normally implied by the device name
+     */
+    VIR_DEBUG("Creating dev %s (%d,%d)",
+              data->file, major(data->dev), minor(data->dev));
+    if (mknod(data->file, data->mode, data->dev) < 0) {
+        virReportSystemError(errno,
+                             _("Unable to create device %s"),
+                             data->file);
+        goto cleanup;
+    }
+
+    if (lxcContainerChown(data->vm->def, data->file) < 0)
+        goto cleanup;
+
+    /* Labelling normally operates on src, but we need
+     * to actually label the dst here, so hack the config */
+    switch (data->def->type) {
+    case VIR_DOMAIN_DEVICE_DISK: {
+        virDomainDiskDefPtr def = data->def->data.disk;
+        char *tmpsrc = def->src;
+        def->src = data->file;
+        if (virSecurityManagerSetImageLabel(data->driver->securityManager,
+                                            data->vm->def, def) < 0) {
+            def->src = tmpsrc;
+            goto cleanup;
+        }
+        def->src = tmpsrc;
+    }   break;
+
+    default:
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("Unexpected device type %d"),
+                       data->def->type);
+        goto cleanup;
+    }
+
+    ret = 0;
+
+ cleanup:
+    if (ret < 0)
+        unlink(data->file);
+    return ret;
+}
+
+
+static int
+lxcDomainAttachDeviceMknod(virLXCDriverPtr driver,
+                           mode_t mode,
+                           dev_t dev,
+                           virDomainObjPtr vm,
+                           virDomainDeviceDefPtr def,
+                           char *file)
+{
+    virLXCDomainObjPrivatePtr priv = vm->privateData;
+    struct lxcDomainAttachDeviceMknodData data;
+
+    memset(&data, 0, sizeof(data));
+
+    data.driver = driver;
+    data.mode = mode;
+    data.dev = dev;
+    data.vm = vm;
+    data.def = def;
+    data.file = file;
+
+    if (virSecurityManagerPreFork(driver->securityManager) < 0)
+        return -1;
+
+    if (virProcessRunInMountNamespace(priv->initpid,
+                                      lxcDomainAttachDeviceMknodHelper,
+                                      &data) < 0) {
+        virSecurityManagerPostFork(driver->securityManager);
+        return -1;
+    }
+
+    virSecurityManagerPostFork(driver->securityManager);
+    return 0;
+}
+
+
 static int
 lxcDomainAttachDeviceDiskLive(virLXCDriverPtr driver,
                               virDomainObjPtr vm,
@@ -3070,11 +3179,9 @@ lxcDomainAttachDeviceDiskLive(virLXCDriverPtr driver,
     virLXCDomainObjPrivatePtr priv = vm->privateData;
     virDomainDiskDefPtr def = dev->data.disk;
     int ret = -1;
-    char *dst = NULL;
     struct stat sb;
-    bool created = false;
-    mode_t mode = 0;
-    char *tmpsrc = def->src;
+    char *file = NULL;
+    int perms;
 
     if (!priv->initpid) {
         virReportError(VIR_ERR_OPERATION_INVALID, "%s",
@@ -3118,51 +3225,44 @@ lxcDomainAttachDeviceDiskLive(virLXCDriverPtr driver,
         goto cleanup;
     }
 
-    if (virAsprintf(&dst, "/proc/%llu/root/dev/%s",
-                    (unsigned long long)priv->initpid, def->dst) < 0)
-        goto cleanup;
-
-    if (VIR_REALLOC_N(vm->def->disks, vm->def->ndisks+1) < 0)
+    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) {
+        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+                       _("devices cgroup isn't mounted"));
         goto cleanup;
+    }
 
-    mode = 0700 | S_IFBLK;
+    perms = (def->readonly ?
+             VIR_CGROUP_DEVICE_READ :
+             VIR_CGROUP_DEVICE_RW) |
+        VIR_CGROUP_DEVICE_MKNOD;
 
-    /* Yes, the device name we're creating may not
-     * actually correspond to the major:minor number
-     * we're using, but we've no other option at this
-     * time. Just have to hope that containerized apps
-     * don't get upset that the major:minor is different
-     * to that normally implied by the device name
-     */
-    VIR_DEBUG("Creating dev %s (%d,%d) from %s",
-              dst, major(sb.st_rdev), minor(sb.st_rdev), def->src);
-    if (mknod(dst, mode, sb.st_rdev) < 0) {
-        virReportSystemError(errno,
-                             _("Unable to create device %s"),
-                             dst);
+    if (virCgroupAllowDevice(priv->cgroup,
+                             'b',
+                             major(sb.st_rdev),
+                             minor(sb.st_rdev),
+                             perms) < 0)
         goto cleanup;
-    }
 
-    if (lxcContainerChown(vm->def, dst) < 0)
+    if (VIR_REALLOC_N(vm->def->disks, vm->def->ndisks + 1) < 0)
         goto cleanup;
 
-    created = true;
-
-    /* Labelling normally operates on src, but we need
-     * to actually label the dst here, so hack the config */
-    def->src = dst;
-    if (virSecurityManagerSetImageLabel(driver->securityManager,
-                                        vm->def, def) < 0)
+    if (virAsprintf(&file,
+                    "/dev/%s", def->dst) < 0)
         goto cleanup;
 
-    if (virCgroupAllowDevicePath(priv->cgroup, def->src,
-                                 (def->readonly ?
-                                  VIR_CGROUP_DEVICE_READ :
-                                  VIR_CGROUP_DEVICE_RW) |
-                                 VIR_CGROUP_DEVICE_MKNOD) != 0) {
-        virReportError(VIR_ERR_INTERNAL_ERROR,
-                       _("cannot allow device %s for domain %s"),
-                       def->src, vm->def->name);
+    if (lxcDomainAttachDeviceMknod(driver,
+                                   0700 | S_IFBLK,
+                                   sb.st_rdev,
+                                   vm,
+                                   dev,
+                                   file) < 0) {
+        if (virCgroupDenyDevice(priv->cgroup,
+                                'b',
+                                major(sb.st_rdev),
+                                minor(sb.st_rdev),
+                                perms) < 0)
+            VIR_WARN("cannot deny device %s for domain %s",
+                     def->src, vm->def->name);
         goto cleanup;
     }
 
@@ -3171,11 +3271,8 @@ lxcDomainAttachDeviceDiskLive(virLXCDriverPtr driver,
     ret = 0;
 
 cleanup:
-    def->src = tmpsrc;
     virDomainAuditDisk(vm, NULL, def->src, "attach", ret == 0);
-    if (dst && created && ret < 0)
-        unlink(dst);
-    VIR_FREE(dst);
+    VIR_FREE(file);
     return ret;
 }