[client] Improve indexing performance for large repos.

This commit is contained in:
Jiaqiang Xu 2015-07-21 12:00:00 +08:00
parent 68cac1fd02
commit e779b1713a
9 changed files with 954 additions and 164 deletions

View File

@ -865,10 +865,10 @@ diff_results_to_description (GList *results)
{
GList *p;
DiffEntry *de;
char *new_file = NULL, *removed_file = NULL;
char *renamed_file = NULL, *modified_file = NULL;
char *add_mod_file = NULL, *removed_file = NULL;
char *renamed_file = NULL;
char *new_dir = NULL, *removed_dir = NULL;
int n_new = 0, n_removed = 0, n_renamed = 0, n_modified = 0;
int n_add_mod = 0, n_removed = 0, n_renamed = 0;
int n_new_dir = 0, n_removed_dir = 0;
GString *desc;
@ -879,9 +879,9 @@ diff_results_to_description (GList *results)
de = p->data;
switch (de->status) {
case DIFF_STATUS_ADDED:
if (n_new == 0)
new_file = get_basename(de->name);
n_new++;
if (n_add_mod == 0)
add_mod_file = get_basename(de->name);
n_add_mod++;
break;
case DIFF_STATUS_DELETED:
if (n_removed == 0)
@ -894,9 +894,9 @@ diff_results_to_description (GList *results)
n_renamed++;
break;
case DIFF_STATUS_MODIFIED:
if (n_modified == 0)
modified_file = get_basename(de->name);
n_modified++;
if (n_add_mod == 0)
add_mod_file = get_basename(de->name);
n_add_mod++;
break;
case DIFF_STATUS_DIR_ADDED:
if (n_new_dir == 0)
@ -913,11 +913,11 @@ diff_results_to_description (GList *results)
desc = g_string_new ("");
if (n_new == 1)
g_string_append_printf (desc, "Added \"%s\".\n", new_file);
else if (n_new > 1)
g_string_append_printf (desc, "Added \"%s\" and %d more files.\n",
new_file, n_new - 1);
if (n_add_mod == 1)
g_string_append_printf (desc, "Added or modified \"%s\".\n", add_mod_file);
else if (n_add_mod > 1)
g_string_append_printf (desc, "Added or modified \"%s\" and %d more files.\n",
add_mod_file, n_add_mod - 1);
if (n_removed == 1)
g_string_append_printf (desc, "Deleted \"%s\".\n", removed_file);
@ -931,12 +931,6 @@ diff_results_to_description (GList *results)
g_string_append_printf (desc, "Renamed \"%s\" and %d more files.\n",
renamed_file, n_renamed - 1);
if (n_modified == 1)
g_string_append_printf (desc, "Modified \"%s\".\n", modified_file);
else if (n_modified > 1)
g_string_append_printf (desc, "Modified \"%s\" and %d more files.\n",
modified_file, n_modified - 1);
if (n_new_dir == 1)
g_string_append_printf (desc, "Added directory \"%s\".\n", new_dir);
else if (n_new_dir > 1)

View File

@ -1578,6 +1578,7 @@ seaf_fs_manager_get_seafdir (SeafFSManager *mgr,
if (memcmp (dir_id, EMPTY_SHA1, 40) == 0) {
dir = g_new0 (SeafDir, 1);
dir->version = version;
memset (dir->dir_id, '0', 40);
return dir;
}

View File

@ -524,8 +524,8 @@ static int ce_match_stat_basic(struct cache_entry *ce, SeafStat *st)
}
if (ce->ce_mtime.sec != st->st_mtime)
changed |= MTIME_CHANGED;
if (ce->ce_ctime.sec != st->st_ctime)
changed |= CTIME_CHANGED;
/* if (ce->ce_ctime.sec != st->st_ctime) */
/* changed |= CTIME_CHANGED; */
#if 0
if (ce->ce_uid != (unsigned int) st->st_uid ||
@ -749,17 +749,21 @@ void remove_marked_cache_entries(struct index_state *istate)
{
struct cache_entry **ce_array = istate->cache;
unsigned int i, j;
gboolean removed = FALSE;
for (i = j = 0; i < istate->cache_nr; i++) {
if (ce_array[i]->ce_flags & CE_REMOVE) {
remove_name_hash(istate, ce_array[i]);
cache_entry_free (ce_array[i]);
removed = TRUE;
} else {
ce_array[j++] = ce_array[i];
}
}
istate->cache_changed = 1;
istate->cache_nr = j;
if (removed) {
istate->cache_changed = 1;
istate->cache_nr = j;
}
}
int remove_file_from_index(struct index_state *istate, const char *path)
@ -1176,7 +1180,7 @@ add_empty_dir_to_index (struct index_state *istate, const char *path, SeafStat *
return -1;
}
return 0;
return 1;
}
int

View File

@ -62,6 +62,7 @@ noinst_HEADERS = \
sync-status-tree.h \
filelock-mgr.h \
set-perm.h \
change-set.h \
$(proc_headers)
if LINUX
@ -103,6 +104,7 @@ common_src = \
sync-status-tree.c \
filelock-mgr.c \
set-perm.c \
change-set.c \
processors/check-tx-proc.c \
processors/check-tx-v2-proc.c \
processors/check-tx-v3-proc.c \

564
daemon/change-set.c Normal file
View File

@ -0,0 +1,564 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
#include "common.h"
#include "seafile-session.h"
#include "utils.h"
#include "log.h"
#include "index/index.h"
#include "diff-simple.h"
#include "change-set.h"
struct _ChangeSetDir {
int version;
char dir_id[41];
/* A hash table of dirents for fast lookup and insertion. */
GHashTable *dents;
#if defined WIN32 || defined __APPLE__
/* Case-insensitive hash table. */
GHashTable *dents_i;
#endif
};
typedef struct _ChangeSetDir ChangeSetDir;
struct _ChangeSetDirent {
guint32 mode;
char id[41];
char *name;
gint64 mtime;
char *modifier;
gint64 size;
/* Only used for directory. Most of time this is NULL
* unless we change the subdir too.
*/
ChangeSetDir *subdir;
};
typedef struct _ChangeSetDirent ChangeSetDirent;
/* Change set dirent. */
static ChangeSetDirent *
changeset_dirent_new (const char *id, guint32 mode, const char *name,
gint64 mtime, const char *modifier, gint64 size)
{
ChangeSetDirent *dent = g_new0 (ChangeSetDirent, 1);
dent->mode = mode;
memcpy (dent->id, id, 40);
dent->name = g_strdup(name);
dent->mtime = mtime;
dent->modifier = g_strdup(modifier);
dent->size = size;
return dent;
}
static ChangeSetDirent *
seaf_dirent_to_changeset_dirent (SeafDirent *seaf_dent)
{
return changeset_dirent_new (seaf_dent->id, seaf_dent->mode, seaf_dent->name,
seaf_dent->mtime, seaf_dent->modifier, seaf_dent->size);
}
static SeafDirent *
changeset_dirent_to_seaf_dirent (int version, ChangeSetDirent *dent)
{
return seaf_dirent_new (version, dent->id, dent->mode, dent->name,
dent->mtime, dent->modifier, dent->size);
}
static void
changeset_dir_free (ChangeSetDir *dir);
static void
changeset_dirent_free (ChangeSetDirent *dent)
{
if (!dent)
return;
g_free (dent->name);
g_free (dent->modifier);
/* Recursively free subdir. */
if (dent->subdir)
changeset_dir_free (dent->subdir);
g_free (dent);
}
/* Change set dir. */
static void
add_dent_to_dir (ChangeSetDir *dir, ChangeSetDirent *dent)
{
g_hash_table_insert (dir->dents,
g_strdup(dent->name),
dent);
#if defined WIN32 || defined __APPLE__
g_hash_table_insert (dir->dents_i,
g_utf8_strdown(dent->name, -1),
dent);
#endif
}
static void
remove_dent_from_dir (ChangeSetDir *dir, const char *dname)
{
char *key;
if (g_hash_table_lookup_extended (dir->dents, dname,
(gpointer*)&key, NULL)) {
g_hash_table_steal (dir->dents, dname);
g_free (key);
}
#if defined WIN32 || defined __APPLE__
char *dname_i = g_utf8_strdown (dname, -1);
g_hash_table_remove (dir->dents_i, dname_i);
g_free (dname_i);
#endif
}
static ChangeSetDir *
changeset_dir_new (int version, const char *id, GList *dirents)
{
ChangeSetDir *dir = g_new0 (ChangeSetDir, 1);
GList *ptr;
SeafDirent *dent;
ChangeSetDirent *changeset_dent;
dir->version = version;
if (id)
memcpy (dir->dir_id, id, 40);
dir->dents = g_hash_table_new_full (g_str_hash, g_str_equal,
g_free, (GDestroyNotify)changeset_dirent_free);
#if defined WIN32 || defined __APPLE__
dir->dents_i = g_hash_table_new_full (g_str_hash, g_str_equal,
g_free, NULL);
#endif
for (ptr = dirents; ptr; ptr = ptr->next) {
dent = ptr->data;
changeset_dent = seaf_dirent_to_changeset_dirent(dent);
add_dent_to_dir (dir, changeset_dent);
}
return dir;
}
static void
changeset_dir_free (ChangeSetDir *dir)
{
if (!dir)
return;
g_hash_table_destroy (dir->dents);
#if defined WIN32 || defined __APPLE__
g_hash_table_destroy (dir->dents_i);
#endif
g_free (dir);
}
static ChangeSetDir *
seaf_dir_to_changeset_dir (SeafDir *seaf_dir)
{
return changeset_dir_new (seaf_dir->version, seaf_dir->dir_id, seaf_dir->entries);
}
static gint
compare_dents (gconstpointer a, gconstpointer b)
{
const SeafDirent *denta = a, *dentb = b;
return strcmp(dentb->name, denta->name);
}
static SeafDir *
changeset_dir_to_seaf_dir (ChangeSetDir *dir)
{
GList *dents = NULL, *seaf_dents = NULL;
GList *ptr;
ChangeSetDirent *dent;
SeafDirent *seaf_dent;
SeafDir *seaf_dir;
dents = g_hash_table_get_values (dir->dents);
for (ptr = dents; ptr; ptr = ptr->next) {
dent = ptr->data;
seaf_dent = changeset_dirent_to_seaf_dirent (dir->version, dent);
seaf_dents = g_list_prepend (seaf_dents, seaf_dent);
}
/* Sort it in descending order. */
seaf_dents = g_list_sort (seaf_dents, compare_dents);
/* seaf_dir_new() computes the dir id. */
seaf_dir = seaf_dir_new (NULL, seaf_dents, dir->version);
g_list_free (dents);
return seaf_dir;
}
/* Change set. */
ChangeSet *
changeset_new (const char *repo_id)
{
SeafRepo *repo;
SeafCommit *commit = NULL;
SeafDir *seaf_dir = NULL;
ChangeSetDir *changeset_dir = NULL;
ChangeSet *changeset = NULL;
repo = seaf_repo_manager_get_repo (seaf->repo_mgr, repo_id);
if (!repo) {
seaf_warning ("Failed to find repo %s.\n", repo_id);
return NULL;
}
commit = seaf_commit_manager_get_commit (seaf->commit_mgr,
repo_id,
repo->version,
repo->head->commit_id);
if (!commit) {
seaf_warning ("Failed to find head commit %s for repo %s.\n",
repo->head->commit_id, repo_id);
return NULL;
}
seaf_dir = seaf_fs_manager_get_seafdir_sorted (seaf->fs_mgr,
repo_id,
repo->version,
commit->root_id);
if (!seaf_dir) {
seaf_warning ("Failed to find root dir %s in repo %s\n",
repo->root_id, repo_id);
goto out;
}
changeset_dir = seaf_dir_to_changeset_dir (seaf_dir);
if (!changeset_dir)
goto out;
changeset = g_new0 (ChangeSet, 1);
memcpy (changeset->repo_id, repo_id, 36);
changeset->tree_root = changeset_dir;
out:
seaf_commit_unref (commit);
seaf_dir_free (seaf_dir);
return changeset;
}
void
changeset_free (ChangeSet *changeset)
{
if (!changeset)
return;
g_list_free_full (changeset->diff, (GDestroyNotify)diff_entry_free);
changeset_dir_free (changeset->tree_root);
g_free (changeset);
}
static void
update_file (ChangeSetDirent *dent,
unsigned char *sha1,
SeafStat *st,
const char *modifier)
{
if (!sha1 || !st || !S_ISREG(st->st_mode))
return;
dent->mode = create_ce_mode(st->st_mode);
dent->mtime = (gint64)st->st_mtime;
dent->size = (gint64)st->st_size;
rawdata_to_hex (sha1, dent->id, 20);
g_free (dent->modifier);
dent->modifier = g_strdup(modifier);
}
static void
create_new_dent (ChangeSetDir *dir,
const char *dname,
unsigned char *sha1,
SeafStat *st,
const char *modifier,
ChangeSetDirent *in_new_dent)
{
if (in_new_dent) {
g_free (in_new_dent->name);
in_new_dent->name = g_strdup(dname);
add_dent_to_dir (dir, in_new_dent);
return;
}
char id[41];
rawdata_to_hex (sha1, id, 20);
ChangeSetDirent *new_dent;
new_dent = changeset_dirent_new (id, create_ce_mode(st->st_mode), dname,
st->st_mtime, modifier, st->st_size);
add_dent_to_dir (dir, new_dent);
}
static ChangeSetDir *
create_intermediate_dir (ChangeSetDir *parent, const char *dname)
{
ChangeSetDirent *dent;
dent = changeset_dirent_new (EMPTY_SHA1, S_IFDIR, dname, 0, NULL, 0);
dent->subdir = changeset_dir_new (parent->version, EMPTY_SHA1, NULL);
add_dent_to_dir (parent, dent);
return dent->subdir;
}
static void
add_to_tree (const char *repo_id,
ChangeSetDir *root,
unsigned char *sha1,
SeafStat *st,
const char *modifier,
const char *path,
ChangeSetDirent *new_dent)
{
char **parts, *dname;
int n, i;
ChangeSetDir *dir;
ChangeSetDirent *dent;
SeafDir *seaf_dir;
char *search_key;
parts = g_strsplit (path, "/", 0);
n = g_strv_length(parts);
dir = root;
for (i = 0; i < n; i++) {
try_again:
dname = parts[i];
dent = g_hash_table_lookup (dir->dents, dname);
if (dent) {
if (S_ISDIR(dent->mode)) {
if (i == (n-1))
/* Don't need to update empty dir */
break;
if (!dent->subdir) {
seaf_dir = seaf_fs_manager_get_seafdir(seaf->fs_mgr,
repo_id,
root->version,
dent->id);
dent->subdir = seaf_dir_to_changeset_dir (seaf_dir);
}
dir = dent->subdir;
} else if (S_ISREG(dent->mode)) {
if (i == (n-1)) {
/* File exists, update it. */
update_file (dent, sha1, st, modifier);
break;
}
}
} else {
#if defined WIN32 || defined __APPLE__
/* Only effective for add operation, not applicable to rename. */
if (!new_dent) {
search_key = g_utf8_strdown (dname, -1);
dent = g_hash_table_lookup (dir->dents_i, search_key);
g_free (search_key);
if (dent) {
remove_dent_from_dir (dir, dent->name);
g_free (dent->name);
dent->name = g_strdup(dname);
add_dent_to_dir (dir, dent);
goto try_again;
}
}
#endif
if (i == (n-1)) {
create_new_dent (dir, dname, sha1, st, modifier, new_dent);
} else {
dir = create_intermediate_dir (dir, dname);
}
}
}
g_strfreev (parts);
}
static ChangeSetDirent *
delete_from_tree (const char *repo_id,
ChangeSetDir *root,
const char *path)
{
char **parts, *dname;
int n, i;
ChangeSetDir *dir;
ChangeSetDirent *dent, *ret = NULL;
SeafDir *seaf_dir;
parts = g_strsplit (path, "/", 0);
n = g_strv_length(parts);
dir = root;
for (i = 0; i < n; i++) {
dname = parts[i];
dent = g_hash_table_lookup (dir->dents, dname);
if (!dent)
break;
if (S_ISDIR(dent->mode)) {
if (i == (n-1)) {
/* Remove from hash table without freeing dent. */
remove_dent_from_dir (dir, dname);
ret = dent;
break;
}
if (!dent->subdir) {
seaf_dir = seaf_fs_manager_get_seafdir(seaf->fs_mgr,
repo_id,
root->version,
dent->id);
dent->subdir = seaf_dir_to_changeset_dir (seaf_dir);
}
dir = dent->subdir;
} else if (S_ISREG(dent->mode)) {
if (i == (n-1)) {
/* Remove from hash table without freeing dent. */
remove_dent_from_dir (dir, dname);
ret = dent;
break;
}
}
}
g_strfreev (parts);
return ret;
}
static void
apply_to_tree (const char *repo_id,
ChangeSetDir *root,
char status,
unsigned char *sha1,
SeafStat *st,
const char *modifier,
const char *path,
const char *new_path)
{
ChangeSetDirent *dent, *dent_dst;
switch (status) {
case DIFF_STATUS_ADDED:
case DIFF_STATUS_MODIFIED:
case DIFF_STATUS_DIR_ADDED:
add_to_tree (repo_id, root, sha1, st, modifier, path, NULL);
break;
case DIFF_STATUS_DELETED:
case DIFF_STATUS_DIR_DELETED:
dent = delete_from_tree (repo_id, root, path);
changeset_dirent_free (dent);
break;
case DIFF_STATUS_RENAMED:
dent = delete_from_tree (repo_id, root, path);
if (!dent)
break;
dent_dst = delete_from_tree (repo_id, root, new_path);
changeset_dirent_free (dent_dst);
add_to_tree (repo_id, root, NULL, NULL, NULL, new_path, dent);
break;
}
}
void
add_to_changeset (ChangeSet *changeset,
char status,
unsigned char *sha1,
SeafStat *st,
const char *modifier,
const char *path,
const char *new_path,
gboolean add_to_diff)
{
DiffEntry *de;
unsigned char allzero[20] = {0};
if (add_to_diff) {
de = diff_entry_new (DIFF_TYPE_INDEX, status, allzero, path);
changeset->diff = g_list_prepend (changeset->diff, de);
}
apply_to_tree (changeset->repo_id, changeset->tree_root,
status, sha1, st, modifier, path, new_path);
}
static char *
commit_tree_recursive (const char *repo_id, ChangeSetDir *dir, gint64 *new_mtime)
{
ChangeSetDirent *dent;
GHashTableIter iter;
gpointer key, value;
char *new_id;
gint64 subdir_new_mtime;
gint64 dir_mtime = 0;
SeafDir *seaf_dir;
char *ret = NULL;
g_hash_table_iter_init (&iter, dir->dents);
while (g_hash_table_iter_next (&iter, &key, &value)) {
dent = value;
if (dent->subdir) {
new_id = commit_tree_recursive (repo_id, dent->subdir, &subdir_new_mtime);
if (!new_id)
return NULL;
memcpy (dent->id, new_id, 40);
dent->mtime = subdir_new_mtime;
g_free (new_id);
}
if (dir_mtime < dent->mtime)
dir_mtime = dent->mtime;
}
seaf_dir = changeset_dir_to_seaf_dir (dir);
if (!seaf_fs_manager_object_exists (seaf->fs_mgr,
repo_id, dir->version,
seaf_dir->dir_id)) {
if (seaf_dir_save (seaf->fs_mgr, repo_id, dir->version, seaf_dir) < 0) {
seaf_warning ("Failed to save dir object %s to repo %s.\n",
seaf_dir->dir_id, repo_id);
goto out;
}
}
ret = g_strdup(seaf_dir->dir_id);
out:
if (ret != NULL)
*new_mtime = dir_mtime;
seaf_dir_free (seaf_dir);
return ret;
}
/*
* This function does two things:
* - calculate dir id from bottom up;
* - create and save seaf dir objects.
* It returns root dir id of the new commit.
*/
char *
commit_tree_from_changeset (ChangeSet *changeset)
{
gint64 mtime;
char *root_id = commit_tree_recursive (changeset->repo_id,
changeset->tree_root,
&mtime);
return root_id;
}

39
daemon/change-set.h Normal file
View File

@ -0,0 +1,39 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
#ifndef SEAF_CHANGE_SET_H
#define SEAF_CHANGE_SET_H
#include <glib.h>
#include "utils.h"
struct _ChangeSetDir;
struct _ChangeSet {
char repo_id[37];
/* List of diff entries, used to generate commit description. */
GList *diff;
/* A partial tree for all changed directories. */
struct _ChangeSetDir *tree_root;
};
typedef struct _ChangeSet ChangeSet;
ChangeSet *
changeset_new (const char *repo_id);
void
changeset_free (ChangeSet *changeset);
void
add_to_changeset (ChangeSet *changeset,
char status,
unsigned char *sha1,
SeafStat *st,
const char *modifier,
const char *path,
const char *new_path,
gboolean add_to_diff);
char *
commit_tree_from_changeset (ChangeSet *changeset);
#endif

View File

@ -1811,7 +1811,6 @@ get_locked_files_thread (void *vdata)
}
if (status == HTTP_OK) {
seaf_message ("%s\n", rsp_content);
if (parse_locked_files (rsp_content, rsp_size, data) < 0)
goto out;
data->success = TRUE;

View File

@ -29,6 +29,7 @@
#include "index/cache-tree.h"
#include "unpack-trees.h"
#include "diff-simple.h"
#include "change-set.h"
#include "db.h"
@ -997,6 +998,7 @@ index_cb (const char *repo_id,
typedef struct _AddOptions {
LockedFileSet *fset;
ChangeSet *changeset;
gboolean is_repo_ro;
gboolean startup_scan;
} AddOptions;
@ -1017,6 +1019,7 @@ add_file (const char *repo_id,
gboolean added = FALSE;
int ret = 0;
gboolean is_writable = TRUE, is_locked = FALSE;
struct cache_entry *ce;
if (options)
is_writable = is_path_writable(repo_id,
@ -1026,11 +1029,9 @@ add_file (const char *repo_id,
repo_id, path);
if (options && options->startup_scan) {
struct cache_entry *ce;
SyncStatus status;
ce = index_name_exists (istate, path, strlen(path), 0);
if (!ce || ie_match_stat(ce, st, 0) != 0)
status = SYNC_STATUS_SYNCING;
else
@ -1078,6 +1079,18 @@ add_file (const char *repo_id,
S_IFREG,
SYNC_STATUS_SYNCED);
}
if (added && options && options->changeset) {
/* ce may be updated. */
ce = index_name_exists (istate, path, strlen(path), 0);
add_to_changeset (options->changeset,
DIFF_STATUS_ADDED,
ce->sha1,
st,
modifier,
path,
NULL,
TRUE);
}
} else if (*remain_files == NULL) {
ret = add_to_index (repo_id, version, istate, path, full_path,
st, 0, crypt, index_cb, modifier, &added);
@ -1092,6 +1105,18 @@ add_file (const char *repo_id,
S_IFREG,
SYNC_STATUS_SYNCED);
}
if (added && options && options->changeset) {
/* ce may be updated. */
ce = index_name_exists (istate, path, strlen(path), 0);
add_to_changeset (options->changeset,
DIFF_STATUS_ADDED,
ce->sha1,
st,
modifier,
path,
NULL,
TRUE);
}
} else
g_queue_push_tail (*remain_files, g_strdup(path));
@ -1227,9 +1252,20 @@ add_dir_recursive (const char *path, const char *full_path, SeafStat *st,
}
if (n == 0 && path[0] != 0 && is_writable) {
if (!params->remain_files || *(params->remain_files) == NULL)
add_empty_dir_to_index (params->istate, path, st);
else
if (!params->remain_files || *(params->remain_files) == NULL) {
int rc = add_empty_dir_to_index (params->istate, path, st);
if (rc == 1 && options && options->changeset) {
unsigned char allzero[20] = {0};
add_to_changeset (options->changeset,
DIFF_STATUS_DIR_ADDED,
allzero,
st,
NULL,
path,
NULL,
TRUE);
}
} else
g_queue_push_tail (*(params->remain_files), g_strdup(path));
}
@ -1464,9 +1500,20 @@ add_dir_recursive (const char *path, const char *full_path, SeafStat *st,
}
if (data.n == 0 && path[0] != 0 && !params->ignore_empty_dir && is_writable) {
if (!params->remain_files || *(params->remain_files) == NULL)
add_empty_dir_to_index (params->istate, path, st);
else
if (!params->remain_files || *(params->remain_files) == NULL) {
int rc = add_empty_dir_to_index (params->istate, path, st);
if (rc == 1 && options && options->changeset) {
unsigned char allzero[20] = {0};
add_to_changeset (options->changeset,
DIFF_STATUS_DIR_ADDED,
allzero,
st,
NULL,
path,
NULL,
TRUE);
}
} else
g_queue_push_tail (*(params->remain_files), g_strdup(path));
}
@ -1620,7 +1667,8 @@ check_locked_file_before_remove (LockedFileSet *fset, const char *path)
static void
remove_deleted (struct index_state *istate, const char *worktree, const char *prefix,
GList *ignore_list, LockedFileSet *fset,
const char *repo_id, gboolean is_repo_ro)
const char *repo_id, gboolean is_repo_ro,
ChangeSet *changeset)
{
struct cache_entry **ce_array = istate->cache;
struct cache_entry *ce;
@ -1650,10 +1698,24 @@ remove_deleted (struct index_state *istate, const char *worktree, const char *pr
not_exist = TRUE;
if (S_ISDIR (ce->ce_mode)) {
if ((not_exist || (ret == 0 && !S_ISDIR (st.st_mode))
|| !is_empty_dir (path, ignore_list)) &&
(ce->ce_ctime.sec != 0 || ce_stage(ce) != 0))
ce->ce_flags |= CE_REMOVE;
if (ce->ce_ctime.sec != 0 || ce_stage(ce) != 0) {
if (not_exist || (ret == 0 && !S_ISDIR (st.st_mode))) {
/* Add to changeset only if dir is removed. */
ce->ce_flags |= CE_REMOVE;
if (changeset)
add_to_changeset (changeset,
DIFF_STATUS_DIR_DELETED,
NULL,
NULL,
NULL,
ce->name,
NULL,
TRUE);
} else if (!is_empty_dir (path, ignore_list)) {
/* Don't add to changeset if empty dir became non-empty. */
ce->ce_flags |= CE_REMOVE;
}
}
} else {
/* If ce->ctime is 0 and stage is 0, it was not successfully checked out.
* In this case we don't want to mistakenly remove the file
@ -1662,7 +1724,18 @@ remove_deleted (struct index_state *istate, const char *worktree, const char *pr
if ((not_exist || (ret == 0 && !S_ISREG (st.st_mode))) &&
(ce->ce_ctime.sec != 0 || ce_stage(ce) != 0) &&
check_locked_file_before_remove (fset, ce->name))
{
ce_array[i]->ce_flags |= CE_REMOVE;
if (changeset)
add_to_changeset (changeset,
DIFF_STATUS_DELETED,
NULL,
NULL,
NULL,
ce->name,
NULL,
TRUE);
}
}
}
@ -1677,12 +1750,13 @@ scan_worktree_for_changes (struct index_state *istate, SeafRepo *repo,
LockedFileSet *fset)
{
remove_deleted (istate, repo->worktree, "", ignore_list, fset,
repo->id, repo->is_readonly);
repo->id, repo->is_readonly, repo->changeset);
AddOptions options;
memset (&options, 0, sizeof(options));
options.fset = fset;
options.is_repo_ro = repo->is_readonly;
options.changeset = repo->changeset;
if (add_recursive (repo->id, repo->version, repo->email,
istate, repo->worktree, "", crypt, FALSE, ignore_list,
@ -1737,12 +1811,13 @@ add_path_to_index (SeafRepo *repo, struct index_state *istate,
*/
if (path[0] == 0) {
remove_deleted (istate, repo->worktree, "", ignore_list, fset,
repo->id, repo->is_readonly);
repo->id, repo->is_readonly, repo->changeset);
memset (&options, 0, sizeof(options));
options.fset = fset;
options.is_repo_ro = repo->is_readonly;
options.startup_scan = TRUE;
options.changeset = repo->changeset;
add_recursive (repo->id, repo->version, repo->email, istate,
repo->worktree, path,
@ -1792,6 +1867,7 @@ add_path_to_index (SeafRepo *repo, struct index_state *istate,
memset (&options, 0, sizeof(options));
options.fset = fset;
options.is_repo_ro = repo->is_readonly;
options.changeset = repo->changeset;
/* Add is always recursive */
add_recursive (repo->id, repo->version, repo->email, istate, repo->worktree, path,
@ -1845,7 +1921,7 @@ add_path_to_index (SeafRepo *repo, struct index_state *istate,
return 0;
remove_deleted (istate, repo->worktree, path, ignore_list, NULL,
repo->id, repo->is_readonly);
repo->id, repo->is_readonly, repo->changeset);
*scanned_dirs = g_list_prepend (*scanned_dirs, g_strdup(path));
@ -1853,6 +1929,7 @@ add_path_to_index (SeafRepo *repo, struct index_state *istate,
memset (&options, 0, sizeof(options));
options.fset = fset;
options.is_repo_ro = repo->is_readonly;
options.changeset = repo->changeset;
/* When something is changed in the root directory, update active path
* sync status when scanning the worktree. This is inaccurate. This will
* be changed after we process fs events on Mac more precisely.
@ -1877,6 +1954,7 @@ add_remain_files (SeafRepo *repo, struct index_state *istate,
char *path;
char *full_path;
SeafStat st;
struct cache_entry *ce;
while ((path = g_queue_pop_head (remain_files)) != NULL) {
full_path = g_build_filename (repo->worktree, path, NULL);
@ -1905,9 +1983,33 @@ add_remain_files (SeafRepo *repo, struct index_state *istate,
S_IFREG,
SYNC_STATUS_SYNCED);
}
if (added) {
ce = index_name_exists (istate, path, strlen(path), 0);
add_to_changeset (repo->changeset,
DIFF_STATUS_ADDED,
ce->sha1,
&st,
repo->email,
path,
NULL,
TRUE);
}
} else if (S_ISDIR(st.st_mode)) {
if (is_empty_dir (full_path, ignore_list))
add_empty_dir_to_index (istate, path, &st);
if (is_empty_dir (full_path, ignore_list)) {
int rc = add_empty_dir_to_index (istate, path, &st);
if (rc == 1) {
unsigned char allzero[20] = {0};
add_to_changeset (repo->changeset,
DIFF_STATUS_DIR_ADDED,
allzero,
&st,
NULL,
path,
NULL,
TRUE);
}
}
}
g_free (path);
g_free (full_path);
@ -2024,7 +2126,6 @@ update_ce_mode (struct index_state *istate, const char *worktree, const char *pa
unsigned int new_mode = create_ce_mode (st.st_mode);
if (new_mode != ce->ce_mode)
ce->ce_mode = new_mode;
istate->cache_changed = 1;
}
#ifdef WIN32
@ -2036,13 +2137,15 @@ scan_subtree_for_deletion (const char *repo_id,
GList *ignore_list,
LockedFileSet *fset,
gboolean is_readonly,
GList **scanned_dirs)
GList **scanned_dirs,
ChangeSet *changeset)
{
wchar_t *path_w;
wchar_t *dir_w = NULL;
wchar_t *p;
char *dir = NULL;
char *p2;
gboolean convertion_failed = FALSE;
/* In most file systems, like NTFS, 8.3 format path should contain ~.
* Also note that *~ files are ignored.
@ -2060,11 +2163,14 @@ scan_subtree_for_deletion (const char *repo_id,
p = wcsrchr (path_w, L'\\');
if (p)
*p = L'\0';
else
break;
dir_w = win32_83_path_to_long_path (worktree, path_w, wcslen(path_w));
if (dir_w)
break;
if (!p)
break;
else
convertion_failed = TRUE;
}
if (!dir_w)
@ -2105,7 +2211,28 @@ scan_subtree_for_deletion (const char *repo_id,
*scanned_dirs = g_list_prepend (*scanned_dirs, g_strdup(dir));
remove_deleted (istate, worktree, dir, ignore_list, fset,
repo_id, is_readonly);
repo_id, is_readonly, changeset);
/* After remove_deleted(), empty dirs are left not removed in changeset.
* This can be fixed by removing the accurate deleted path. In most cases,
* basename doesn't contain ~, so we can always get the accurate path.
*/
if (!convertion_failed) {
char *basename = strrchr (path, '/');
char *deleted_path = NULL;
if (basename) {
deleted_path = g_build_path ("/", dir, basename, NULL);
add_to_changeset (changeset,
DIFF_STATUS_DELETED,
NULL,
NULL,
NULL,
deleted_path,
NULL,
FALSE);
g_free (deleted_path);
}
}
out:
g_free (path_w);
@ -2121,7 +2248,8 @@ scan_subtree_for_deletion (const char *repo_id,
GList *ignore_list,
LockedFileSet *fset,
gboolean is_readonly,
GList **scanned_dirs)
GList **scanned_dirs,
ChangeSet *changeset)
{
}
#endif
@ -2689,6 +2817,111 @@ update_path_sync_status (SeafRepo *repo, WTStatus *status,
}
}
static void
handle_rename (SeafRepo *repo, struct index_state *istate,
SeafileCrypt *crypt, GList *ignore_list,
LockedFileSet *fset,
WTEvent *event, GList **scanned_del_dirs)
{
gboolean not_found, src_ignored, dst_ignored;
if (!is_path_writable(repo->id,
repo->is_readonly, event->path) ||
!is_path_writable(repo->id,
repo->is_readonly, event->new_path)) {
seaf_debug ("Rename: %s or %s is not writable, ignore.\n",
event->path, event->new_path);
return;
}
if (seaf_filelock_manager_is_file_locked (seaf->filelock_mgr,
repo->id, event->path) ||
seaf_filelock_manager_is_file_locked (seaf->filelock_mgr,
repo->id, event->new_path)) {
seaf_debug ("Rename: %s or %s is locked on server, ignore.\n", event->path, event->new_path);
return;
}
src_ignored = check_full_path_ignore(repo->worktree, event->path, ignore_list);
dst_ignored = check_full_path_ignore(repo->worktree, event->new_path, ignore_list);
/* If the destination path is ignored, just remove the source path. */
if (dst_ignored) {
if (check_locked_file_before_remove (fset, event->path)) {
not_found = FALSE;
remove_from_index_with_prefix (istate, event->path, &not_found);
if (not_found)
scan_subtree_for_deletion (repo->id,
istate,
repo->worktree, event->path,
ignore_list, fset,
repo->is_readonly,
scanned_del_dirs,
repo->changeset);
add_to_changeset (repo->changeset,
DIFF_STATUS_DELETED,
NULL,
NULL,
NULL,
event->path,
NULL,
FALSE);
}
return;
}
if (check_locked_file_before_remove (fset, event->path)) {
not_found = FALSE;
rename_index_entries (istate, event->path, event->new_path, &not_found,
NULL, NULL);
if (not_found)
scan_subtree_for_deletion (repo->id,
istate,
repo->worktree, event->path,
ignore_list, fset,
repo->is_readonly,
scanned_del_dirs,
repo->changeset);
/* Moving files out of a dir may make it empty. */
try_add_empty_parent_dir_entry_from_wt (repo->worktree,
istate,
ignore_list,
event->path);
}
if (!dst_ignored && !src_ignored)
add_to_changeset (repo->changeset,
DIFF_STATUS_RENAMED,
NULL,
NULL,
NULL,
event->path,
event->new_path,
TRUE);
AddOptions options;
memset (&options, 0, sizeof(options));
options.fset = fset;
options.is_repo_ro = repo->is_readonly;
options.changeset = repo->changeset;
/* We should always scan the destination to compare with the renamed
* index entries. For example, in the following case:
* 1. file a.txt is updated;
* 2. a.txt is moved to test/a.txt;
* If the two operations are executed in a batch, the updated content
* of a.txt won't be committed if we don't scan the destination, because
* when we process the update event, a.txt is already not in its original
* place.
*/
add_recursive (repo->id, repo->version, repo->email,
istate, repo->worktree, event->new_path,
crypt, FALSE, ignore_list,
NULL, NULL, &options);
}
static int
apply_worktree_changes_to_index (SeafRepo *repo, struct index_state *istate,
SeafileCrypt *crypt, GList *ignore_list,
@ -2788,7 +3021,17 @@ apply_worktree_changes_to_index (SeafRepo *repo, struct index_state *istate,
repo->worktree, event->path,
ignore_list, fset,
repo->is_readonly,
&scanned_del_dirs);
&scanned_del_dirs,
repo->changeset);
add_to_changeset (repo->changeset,
DIFF_STATUS_DELETED,
NULL,
NULL,
NULL,
event->path,
NULL,
TRUE);
try_add_empty_parent_dir_entry_from_wt (repo->worktree,
istate,
@ -2797,77 +3040,7 @@ apply_worktree_changes_to_index (SeafRepo *repo, struct index_state *istate,
}
break;
case WT_EVENT_RENAME:
if (!is_path_writable(repo->id,
repo->is_readonly, event->path) ||
!is_path_writable(repo->id,
repo->is_readonly, event->new_path)) {
seaf_debug ("Rename: %s or %s is not writable, ignore.\n",
event->path, event->new_path);
break;
}
if (seaf_filelock_manager_is_file_locked (seaf->filelock_mgr,
repo->id, event->path) ||
seaf_filelock_manager_is_file_locked (seaf->filelock_mgr,
repo->id, event->new_path)) {
seaf_debug ("Rename: %s or %s is locked on server, ignore.\n", event->path, event->new_path);
break;
}
/* If the destination path is ignored, just remove the source path. */
if (check_full_path_ignore (repo->worktree, event->new_path,
ignore_list)) {
if (check_locked_file_before_remove (fset, event->path)) {
not_found = FALSE;
remove_from_index_with_prefix (istate, event->path, &not_found);
if (not_found)
scan_subtree_for_deletion (repo->id,
istate,
repo->worktree, event->path,
ignore_list, fset,
repo->is_readonly,
&scanned_del_dirs);
}
break;
}
if (check_locked_file_before_remove (fset, event->path)) {
not_found = FALSE;
rename_index_entries (istate, event->path, event->new_path, &not_found,
NULL, NULL);
if (not_found)
scan_subtree_for_deletion (repo->id,
istate,
repo->worktree, event->path,
ignore_list, fset,
repo->is_readonly,
&scanned_del_dirs);
/* Moving files out of a dir may make it empty. */
try_add_empty_parent_dir_entry_from_wt (repo->worktree,
istate,
ignore_list,
event->path);
}
AddOptions options;
memset (&options, 0, sizeof(options));
options.fset = fset;
options.is_repo_ro = repo->is_readonly;
/* We should always scan the destination to compare with the renamed
* index entries. For example, in the following case:
* 1. file a.txt is updated;
* 2. a.txt is moved to test/a.txt;
* If the two operations are executed in a batch, the updated content
* of a.txt won't be committed if we don't scan the destination, because
* when we process the update event, a.txt is already not in its original
* place.
*/
add_recursive (repo->id, repo->version, repo->email,
istate, repo->worktree, event->new_path,
crypt, FALSE, ignore_list,
NULL, NULL, &options);
handle_rename (repo, istate, crypt, ignore_list, fset, event, &scanned_del_dirs);
break;
case WT_EVENT_ATTRIB:
if (!is_path_writable(repo->id,
@ -3072,7 +3245,7 @@ seaf_repo_index_worktree_files (const char *repo_id,
NULL, NULL, NULL) < 0)
goto error;
remove_deleted (&istate, worktree, "", ignore_list, NULL, repo_id, FALSE);
remove_deleted (&istate, worktree, "", ignore_list, NULL, repo_id, FALSE, NULL);
it = cache_tree ();
if (cache_tree_update (repo_id, repo_version, worktree,
@ -3252,14 +3425,11 @@ seaf_repo_is_index_unmerged (SeafRepo *repo)
}
static int
commit_tree (SeafRepo *repo, struct cache_tree *it,
commit_tree (SeafRepo *repo, const char *root_id,
const char *desc, char commit_id[],
gboolean unmerged)
{
SeafCommit *commit;
char root_id[41];
rawdata_to_hex (it->sha1, root_id, 20);
commit = seaf_commit_new (NULL, repo->id, root_id,
repo->email ? repo->email
@ -3337,16 +3507,26 @@ print_index (struct index_state *istate)
return 0;
}
static inline void
print_time (const char *desc, GTimeVal *s, GTimeVal *e)
{
seaf_message ("%s: %lu\n", desc,
(e->tv_sec*G_USEC_PER_SEC+e->tv_usec - (s->tv_sec*G_USEC_PER_SEC+s->tv_usec))/1000);
}
char *
seaf_repo_index_commit (SeafRepo *repo, const char *desc, gboolean is_force_commit,
GError **error)
{
SeafRepoManager *mgr = repo->manager;
struct index_state istate;
struct cache_tree *it;
char index_path[SEAF_PATH_MAX];
char *root_id = NULL;
char commit_id[41];
gboolean unmerged = FALSE;
ChangeSet *changeset = NULL;
char *my_desc = NULL;
char *ret = NULL;
if (!check_worktree_common (repo))
return NULL;
@ -3362,62 +3542,63 @@ seaf_repo_index_commit (SeafRepo *repo, const char *desc, gboolean is_force_comm
if (need_handle_unmerged_index (repo, &istate))
unmerged = TRUE;
GTimeVal s, e;
g_get_current_time (&s);
changeset = changeset_new (repo->id);
repo->changeset = changeset;
if (index_add (repo, &istate, is_force_commit, unmerged) < 0) {
g_set_error (error, SEAFILE_DOMAIN, SEAF_ERR_GENERAL, "Failed to add");
goto error;
goto out;
}
/* Commit before updating the index, so that new blocks won't be GC'ed. */
g_get_current_time (&e);
print_time ("index_add", &s, &e);
char *my_desc = g_strdup(desc);
if (my_desc[0] == '\0') {
char *gen_desc = gen_commit_description (repo, &istate);
if (!gen_desc) {
/* error not set. */
g_free (my_desc);
if (!istate.cache_changed)
goto out;
/* Still need to update index even nothing to commit. */
update_index (&istate, index_path);
discard_index (&istate);
g_get_current_time (&s);
return NULL;
}
g_free (my_desc);
my_desc = gen_desc;
my_desc = diff_results_to_description (changeset->diff);
if (!my_desc)
my_desc = g_strdup("");
g_get_current_time (&e);
print_time ("gen_commit_description", &s, &e);
g_get_current_time (&s);
root_id = commit_tree_from_changeset (changeset);
if (!root_id) {
seaf_warning ("Create commit tree failed for repo %s\n", repo->id);
goto out;
}
it = cache_tree ();
if (cache_tree_update (repo->id, repo->version,
repo->worktree,
it, istate.cache,
istate.cache_nr, 0, 0, commit_trees_cb) < 0) {
seaf_warning ("Failed to build cache tree");
g_set_error (error, SEAFILE_DOMAIN, SEAF_ERR_INTERNAL, "Internal data structure error");
cache_tree_free (&it);
goto error;
}
g_get_current_time (&e);
print_time ("cache_tree_update", &s, &e);
if (commit_tree (repo, it, my_desc, commit_id, unmerged) < 0) {
if (commit_tree (repo, root_id, my_desc, commit_id, unmerged) < 0) {
seaf_warning ("Failed to save commit file");
g_set_error (error, SEAFILE_DOMAIN, SEAF_ERR_INTERNAL, "Internal error");
cache_tree_free (&it);
goto error;
goto out;
}
g_free (my_desc);
cache_tree_free (&it);
if (update_index (&istate, index_path) < 0)
goto error;
discard_index (&istate);
goto out;
g_signal_emit_by_name (seaf, "repo-committed", repo);
return g_strdup(commit_id);
ret = g_strdup(commit_id);
error:
out:
g_free (my_desc);
g_free (root_id);
changeset_free (changeset);
discard_index (&istate);
return NULL;
return ret;
}
#ifdef DEBUG_UNPACK_TREES

View File

@ -33,6 +33,8 @@
struct _SeafRepoManager;
typedef struct _SeafRepo SeafRepo;
struct _ChangeSet;
/* The caller can use the properties directly. But the caller should
* always write on repos via the API.
*/
@ -92,6 +94,10 @@ struct _SeafRepo {
/* Can be server_url or server_url:8082, depends on which one works. */
char *effective_host;
gboolean use_fileserver_port;
/* Detected file change set during indexing.
* Added to here to avoid passing additional arguments. */
struct _ChangeSet *changeset;
};