- Factor out the pipe creation part of p4CmdList() as p4CmdListPipe()
- Factor readP4Files() into openP4Files() and readP4File() and
changed P4Sync to use this.
The upshot is that git-p4 now read only one file at a time and
immediately pass it on to fast-import. This massively reduces the
memory requirement of git-p4.
Note, git-p4 still reads in the whole file in memory -- this can and
should be fixed in future.
Signed-off-by: Tommy Thorn <tommy-git@thorn.ws>
---
contrib/fast-import/git-p4 | 102 ++++++++++++++++++++++++++++++-------------
1 files changed, 71 insertions(+), 31 deletions(-)
diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index 2340876..78a5d02 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -144,7 +144,8 @@ def isModeExec(mode):
def isModeExecChanged(src_mode, dst_mode):
return isModeExec(src_mode) != isModeExec(dst_mode)
-def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
+# p4CmdListPipe returns a pipe delivering the result of the p4 command
+def p4CmdListPipe(cmd, stdin=None, stdin_mode='w+b'):
cmd = "p4 -G %s" % cmd
if verbose:
sys.stderr.write("Opening pipe: %s\n" % cmd)
@@ -162,7 +163,11 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
p4 = subprocess.Popen(cmd, shell=True,
stdin=stdin_file,
stdout=subprocess.PIPE)
+ return p4
+# p4CmdList returns the stdout result of the p4 command
+def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
+ p4 = p4CmdListPipe(cmd, stdin, stdin_mode)
result = []
try:
while True:
@@ -950,42 +955,62 @@ class P4Sync(Command):
return branches
## Should move this out, doesn't use SELF.
- def readP4Files(self, files):
+ def openP4Files(self, files):
files = [f for f in files
if f['action'] != 'delete']
if not files:
return
- filedata = p4CmdList('-x - print',
- stdin='\n'.join(['%s#%s' % (f['path'], f['rev'])
- for f in files]),
- stdin_mode='w+')
- if "p4ExitCode" in filedata[0]:
- die("Problems executing p4. Error: [%d]."
- % (filedata[0]['p4ExitCode']));
-
- j = 0;
- contents = {}
- while j < len(filedata):
- stat = filedata[j]
- j += 1
- text = ''
- while j < len(filedata) and filedata[j]['code'] in ('text',
- 'binary'):
- text += filedata[j]['data']
- j += 1
+ p4 = p4CmdListPipe('-x - print',
+ stdin='\n'.join(['%s#%s' % (f['path'], f['rev'])
+ for f in files]),
+ stdin_mode='w+')
+ self.curDepotFile = None
+ return p4
- if not stat.has_key('depotFile'):
- sys.stderr.write("p4 print fails with: %s\n" % repr(stat))
- continue
+ # Uisng the pipe handle provided by openP4File, read in a file and
+ # return the pair of (depot file name, contents)
+ def readP4File(self, p4):
+ text = ''
- contents[stat['depotFile']] = text
+ try:
+ while True:
+ entry = marshal.load(p4.stdout)
+
+ if entry['code'] in ('text', 'binary'):
+ text += entry['data']
+ elif entry['code'] == 'stat':
+ # We are done with the previous file
+ if self.curDepotFile is not None:
+ if not entry.has_key('depotFile'):
+ sys.stderr.write("p4 print fails with: %s\n" % repr(entry))
+ self.curDepotFile = None
+ continue
+
+ depotFile = self.curDepotFile
+ self.curDepotFile = entry['depotFile']
+ if verbose: sys.stderr.write('Read %s\n' % depotFile)
+ return (depotFile, text)
+
+ text = ''
+ self.curDepotFile = entry['depotFile']
+ else:
+ sys.stderr.write("p4 print returned unexpected code: %s\n" % entry['code'])
+
+ except EOFError:
+ pass
+
+ exitCode = p4.wait()
+ if exitCode != 0:
+ die("Problems executing p4. Error: [%d]." % exitCode);
+
+ depotFile = self.curDepotFile
+ self.curDepotFile = None
+ if verbose: sys.stderr.write('Read %s\n' % depotFile)
+ return (depotFile, text)
- for f in files:
- assert not f.has_key('data')
- f['data'] = contents[f['path']]
def commit(self, details, files, branch, branchPrefixes, parent = ""):
epoch = details["time"]
@@ -996,6 +1021,10 @@ class P4Sync(Command):
# start with reading files; if that fails, we should not
# create a commit.
+
+ # XXX No, reading all file contents into memory is to tall a
+ # price to pay. Right now if an error is found, it will abort,
+ # leaving cruft for git gc to prune.
new_files = []
for f in files:
if [p for p in branchPrefixes if f['path'].startswith(p)]:
@@ -1003,7 +1032,6 @@ class P4Sync(Command):
else:
sys.stderr.write("Ignoring file outside of prefix: %s\n" % path)
files = new_files
- self.readP4Files(files)
@@ -1034,17 +1062,26 @@ class P4Sync(Command):
print "parent %s" % parent
self.gitStream.write("from %s\n" % parent)
- for file in files:
+ # Create a depotFileName -> file mapping
+ revFile = {}
+ for f in files:
+ revFile[f['path']] = f
+
+ p4 = self.openP4Files(files)
+ (depotFile, data) = self.readP4File(p4)
+
+ while depotFile is not None:
+
+ file = revFile[depotFile]
if file["type"] == "apple":
print "\nfile %s is a strange apple file that forks. Ignoring!" % file['path']
+ (depotFile, data) = self.readP4File(p4)
continue
relPath = self.stripRepoPath(file['path'], branchPrefixes)
if file["action"] == "delete":
self.gitStream.write("D %s\n" % relPath)
else:
- data = file['data']
-
mode = "644"
if isP4Exec(file["type"]):
mode = "755"
@@ -1061,6 +1098,9 @@ class P4Sync(Command):
self.gitStream.write(data)
self.gitStream.write("\n")
+ (depotFile, data) = self.readP4File(p4)
+
+
self.gitStream.write("\n")
change = int(details["change"])
--
1.5.4.rc5.17.g22b645
-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html| Christoph Lameter | Re: [PATCH 0/24] make atomic_read() behave consistently across all architectures |
| Greg Kroah-Hartman | [PATCH 005/196] Chinese: add translation of SubmittingDrivers |
| Pekka Enberg | Re: [bug] mm/slab.c boot crash in -git, "kernel BUG at mm/slab.c:2103!" |
| Glauber de Oliveira Costa | [PATCH 08/79] [PATCH] use identify_boot_cpu |
git: | |
| Gerrit Renker | [PATCH 27/37] dccp: Integration of dynamic feature activation - part 2 (server side) |
| Jarek Poplawski | Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_lock(). |
| Natalie Protasevich | [BUG] New Kernel Bugs |
| David Miller | [GIT]: Networking |
