blob: 57a2b7c023ec22ff605e1bae7c504009e6d51de2 [file] [log] [blame]
// Copyright 2024 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "patchmaker/managed_directory.h"
#include <algorithm>
#include <cstdint>
#include <optional>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include <base/files/file_path.h>
#include <base/files/file_util.h>
#include <base/logging.h>
#include <brillo/proto_file_io.h>
#include "patchmaker/compression_util.h"
#include "patchmaker/directory_util.h"
#include "patchmaker/file_util.h"
#include "patchmaker/patch_util.h"
namespace {
std::vector<std::vector<base::FilePath>> ClusterFilesInDirectoryBySize(
const base::FilePath& src_path) {
util::SortableFileList file_entries = util::GetFilesInDirectory(src_path);
std::vector<std::vector<base::FilePath>> clusters{};
// Sort files by size so we can do a simple rolling cluster in one pass
std::sort(file_entries.begin(), file_entries.end(),
[](auto& left, auto& right) { return left.second < right.second; });
int num_clustered_files = 0, first_size_in_cluster;
for (const auto& entry : file_entries) {
if (clusters.empty()) {
clusters.emplace_back(std::vector<base::FilePath>{entry.first});
first_size_in_cluster = entry.second;
continue;
}
if (entry.second > kClusterRatio * first_size_in_cluster) {
num_clustered_files += clusters.back().size();
clusters.emplace_back(std::vector<base::FilePath>());
first_size_in_cluster = entry.second;
}
clusters.back().emplace_back(entry.first);
}
num_clustered_files += clusters.back().size();
CHECK(num_clustered_files == file_entries.size());
return clusters;
}
std::vector<int> IndicesMatchingNameAndDepth(
const base::FilePath& path, const std::vector<base::FilePath>& files) {
std::vector<int> matching_indices;
int reference_depth = path.GetComponents().size();
const base::FilePath reference_name = path.BaseName();
for (int i = 0; i < files.size(); i++) {
if (files[i].BaseName() == reference_name &&
files[i].GetComponents().size() == reference_depth) {
matching_indices.emplace_back(i);
}
}
return matching_indices;
}
std::vector<int> IndicesMatchingExtension(
const base::FilePath& path, const std::vector<base::FilePath>& files) {
std::vector<int> matching_indices;
const std::string reference_extension = path.FinalExtension();
for (int i = 0; i < files.size(); i++) {
if (files[i].FinalExtension() == reference_extension) {
matching_indices.emplace_back(i);
}
}
return matching_indices;
}
std::vector<int> IndicesMatchingAll(const base::FilePath& path,
const std::vector<base::FilePath>& files) {
std::vector<int> matching_indices;
for (int i = 0; i < files.size(); i++) {
matching_indices.emplace_back(i);
}
return matching_indices;
}
bool EntryIsMutable(const base::FilePath& entry,
const std::vector<base::FilePath>& immutable_paths) {
for (const auto& immutable_path : immutable_paths) {
if (entry == immutable_path || immutable_path.IsParent(entry)) {
return false;
}
}
return true;
}
int SelectBaseIdx(const base::FilePath& entry,
const std::vector<base::FilePath>& full_files,
const std::vector<base::FilePath>& immutable_paths,
const base::FilePath& temp_patch_file) {
std::set<int> visited_indices;
if (!EntryIsMutable(entry, immutable_paths)) {
LOG(INFO) << "Not touching immutable file " << entry;
return -1;
}
// Get baseline size for this file using compression
std::optional<size_t> compressed_size = util::GetCompressedSize(entry);
CHECK(compressed_size.has_value());
// Iterate a prioritized list of candidate base files
for (const auto& list : {IndicesMatchingNameAndDepth(entry, full_files),
IndicesMatchingExtension(entry, full_files),
IndicesMatchingAll(entry, full_files)}) {
for (auto base_candidate_idx : list) {
// Continue to the next file if we'd already visited this index
if (!visited_indices.insert(base_candidate_idx).second) {
continue;
}
// Set selected_base_candidate_idx and break if better than zstd
if (!util::DoBsDiff(full_files[base_candidate_idx], entry,
temp_patch_file)) {
// Something went wrong, continue to the next file
continue;
}
std::optional<int64_t> patched_size = base::GetFileSize(temp_patch_file);
CHECK(patched_size.has_value());
if (patched_size.value() < compressed_size) {
return base_candidate_idx;
}
}
}
return -1;
}
// Receive a cluster of similarly-sized files, and iteratively choose which
// files are shipped full-size, and which are patched.
std::vector<PatchManifestEntry> EncodeFileClusterFromSrcToDest(
const std::vector<base::FilePath>& cluster,
const base::FilePath& src_path,
const base::FilePath& dest_path,
const std::vector<base::FilePath>& immutable_paths) {
std::vector<base::FilePath> full_files;
std::vector<PatchManifestEntry> manifest_entries_for_cluster;
base::FilePath temp_patch_file;
base::CreateTemporaryFile(&temp_patch_file);
for (const auto& entry : cluster) {
PatchManifestEntry manifest_entry;
int selected_base_candidate_idx =
SelectBaseIdx(entry, full_files, immutable_paths, temp_patch_file);
base::FilePath dest_path_absl, src_path_rel, base_path_rel, patch_path_rel;
// Grab destination file for this entry in the destination directory
dest_path_absl = util::AppendRelativePathOn(src_path, entry, dest_path);
src_path_rel =
util::AppendRelativePathOn(src_path, entry, base::FilePath());
std::string md5_str = util::GetMD5SumForFile(entry);
if (selected_base_candidate_idx < 0) {
// We didn't find a good patch recipe, copy the full file
full_files.emplace_back(entry);
base::CopyFile(entry, dest_path_absl);
LOG(INFO) << "Compression is best, installed full at " << dest_path_absl;
} else {
// We found a good patch recipe, copy the patch
dest_path_absl = dest_path_absl.AddExtension(kPatchExtension);
base::CopyFile(temp_patch_file, dest_path_absl);
LOG(INFO) << "Found suitable patch, installed at " << dest_path_absl;
// Manifest bookkeeping
base_path_rel = util::AppendRelativePathOn(
src_path, full_files[selected_base_candidate_idx], base::FilePath());
patch_path_rel = util::AppendRelativePathOn(dest_path, dest_path_absl,
base::FilePath());
manifest_entry.set_base_file_name(base_path_rel.value());
manifest_entry.set_patch_file_name(patch_path_rel.value());
}
manifest_entry.set_original_file_md5_checksum(md5_str);
manifest_entry.set_original_file_name(src_path_rel.value());
manifest_entries_for_cluster.emplace_back(manifest_entry);
}
return manifest_entries_for_cluster;
}
bool GetManagedDirectoryRoot(const base::FilePath& target_path,
base::FilePath* directory_root) {
base::FilePath manifest_dir = target_path;
// Iterate upwards until we locate the patch manifest
while (!util::IsFile(manifest_dir.Append(kPatchManifestFilename))) {
if (manifest_dir.GetComponents().size() <= 1) {
// We already checked the root, quit
return false;
}
// Move up to parent directory
manifest_dir = manifest_dir.DirName();
}
*directory_root = manifest_dir;
return true;
}
} // namespace
// We are on the encode path, and we are creating a new managed directory. We
// may or may not have a precomputed patch manifest to follow
bool ManagedDirectory::CreateNew(
const base::FilePath& managed_dir_root,
std::optional<base::FilePath> input_manifest_path) {
directory_root_ = managed_dir_root;
if (!input_manifest_path.has_value()) {
return true;
}
if (!util::IsFile(*input_manifest_path)) {
LOG(ERROR) << "File " << *input_manifest_path << " doesn't exist";
return false;
}
if (!brillo::ReadTextProtobuf(*input_manifest_path, &manifest_)) {
LOG(ERROR) << "Couldn't load provided patch manifest";
return false;
}
return true;
}
bool ManagedDirectory::Encode(
const base::FilePath& src_path,
const base::FilePath& dest_path,
const std::vector<base::FilePath>& immutable_paths) {
// Create destination directory
if (!util::CopyEmptyTreeToDirectory(src_path, dest_path)) {
return false;
}
if (manifest_.entry().size()) {
// We were given a manifest, let's follow its recipe for each file.
for (const PatchManifestEntry& entry : manifest_.entry()) {
if (entry.has_patch_file_name()) {
if (!util::DoBsDiff(src_path.Append(entry.base_file_name()),
src_path.Append(entry.original_file_name()),
dest_path.Append(entry.patch_file_name()))) {
LOG(ERROR) << "Failed to call bsdiff";
return false;
}
} else {
if (!base::CopyFile(src_path.Append(entry.original_file_name()),
dest_path.Append(entry.original_file_name()))) {
return false;
}
}
}
// Verify that the resulting contents are identical in size to what the
// manifest anticipated.
CHECK(ComputeDirectorySize(src_path) == manifest_.directory_size_full());
CHECK(ComputeDirectorySize(dest_path) ==
manifest_.directory_size_patched());
} else {
// We were not given a manifest, let's find a recipe for each file.
// Gather files in clusters by size
std::vector<std::vector<base::FilePath>> file_clusters =
ClusterFilesInDirectoryBySize(src_path);
// Each cluster is processed individually for performance reasons, as files
// with very different sizes shouldn't be patched together anyways.
for (const auto& cluster : file_clusters) {
std::vector<PatchManifestEntry> entries = EncodeFileClusterFromSrcToDest(
cluster, src_path, dest_path, immutable_paths);
for (const auto& m_e : entries) {
*manifest_.add_entry() = m_e;
}
}
// Populate and commit manifest to file
manifest_.set_directory_size_full(ComputeDirectorySize(src_path));
manifest_.set_directory_size_patched(ComputeDirectorySize(dest_path));
}
return CommitManifestToFile();
}
bool ManagedDirectory::CommitManifestToFile() {
base::FilePath manifest_path = directory_root_.Append(kPatchManifestFilename);
base::File new_manifest(
manifest_path, base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
if (!brillo::WriteTextProtobuf(new_manifest.GetPlatformFile(), manifest_)) {
LOG(ERROR) << "Couldn't write new journal to temp file";
return false;
}
LOG(INFO) << "Wrote manifest to " << manifest_path;
return true;
}
// Create Managed Directory for the decode path. The input directory path may or
// may not be the root of the managed directory, as the caller may be preparing
// to decode an individual file or sub-directory.
bool ManagedDirectory::CreateFromExisting(const base::FilePath& managed_path) {
if (!GetManagedDirectoryRoot(managed_path, &directory_root_)) {
LOG(ERROR) << "Directory " << managed_path << " appears to be unmanaged";
return false;
}
base::FilePath manifest_path = directory_root_.Append(kPatchManifestFilename);
if (!brillo::ReadTextProtobuf(manifest_path, &manifest_)) {
LOG(ERROR) << "Failed to read manifest file, exiting";
return false;
}
return true;
}
bool ManagedDirectory::ManifestEntryIsUnderTargetPath(
const base::FilePath& target_path, const PatchManifestEntry& entry) {
// The entry's filename is a relative path within the managed directory.
// Here we convert the target path to be relative for comparison.
base::FilePath relative_target_path = util::AppendRelativePathOn(
directory_root_, target_path, base::FilePath());
// The entry's path will match the relative target_path exactly if the
// target is a single file, and will have the relative target path as a
// substring if the entry is below the target's directory
if (entry.original_file_name().find(relative_target_path.value()) !=
std::string::npos) {
return true;
}
return false;
}
bool ManagedDirectory::Decode(const base::FilePath& target_path,
const base::FilePath& dest_path) {
LOG(INFO) << "Decoding " << target_path << " to " << dest_path;
if (!util::CopyEmptyTreeToDirectory(directory_root_, dest_path)) {
LOG(ERROR) << "Failed to create empty tree";
return false;
}
base::FilePath dest_file;
for (const PatchManifestEntry& entry : manifest_.entry()) {
// Filter out any files outside the requested sub-tree
if (!ManifestEntryIsUnderTargetPath(target_path, entry)) {
continue;
}
dest_file = dest_path.Append(entry.original_file_name());
if (entry.has_patch_file_name()) {
if (!util::DoBsPatch(directory_root_.Append(entry.base_file_name()),
dest_file,
directory_root_.Append(entry.patch_file_name()))) {
LOG(ERROR) << "bspatch returned failure";
return false;
}
} else {
if (!CopyFile(directory_root_.Append(entry.original_file_name()),
dest_file)) {
LOG(ERROR) << "CopyFile returned false";
return false;
}
}
if (util::GetMD5SumForFile(dest_file) !=
entry.original_file_md5_checksum()) {
LOG(ERROR) << "MD5 checksum didn't match after reconstruction";
return false;
}
}
return true;
}