#!/usr/bin/python
import filecmp
import os
#Compare two files
print os.stat('server.py');
print os.stat('server2.py');
print filecmp.cmp('server.py', 'server2.py', shallow = False);
#Compare the assignments submitted by students
#Compare all files from one student with all files from another student
#!/usr/bin/python
import filecmp
import os
import sys
def getFiles(d):
'''Get all files in a folder
'''
files = []
for (dirpath, dirnames, filenames) in os.walk(d):
for f in filenames:
if f[0] != '.':
files.append(os.path.join(dirpath, f));
return files;
def getDir(d):
'''Get all directories in a folder
'''
l = os.listdir(d);
return [os.path.join(d, e) for e in l if os.path.isdir(os.path.join(d,e))];
def compareFolders(dir1, dir2):
'''Compare the files in a directory with the files in another directory
'''
files_1 = getFiles(dir1);
files_2 = getFiles(dir2);
for f1 in files_1:
for f2 in files_2:
if filecmp.cmp(f1, f2, shallow = False):
print f1, f2
if __name__ == '__main__':
if len(sys.argv) != 2:
print 'Usage: python comparison.py dir'
sys.exit(1);
#Get all folders
folders = getDir(sys.argv[1]);
for index, d1 in enumerate(folders):
for d2 in range(index+1, len(folders)):
compareFolders(d1, folders[d2]);
#Compare the assignments submitted by students
#Compare all files from one student with all files from another student, and report file pairs that have high similarity
#!/usr/bin/python
import filecmp
import os
import sys
from sklearn.feature_extraction.text import TfidfVectorizer
def getFiles(d):
'''Get all files in a folder
'''
files = []
for (dirpath, dirnames, filenames) in os.walk(d):
for f in filenames:
if f[0] != '.':
files.append(os.path.join(dirpath, f));
return files;
def getDir(d):
'''Get all directories in a folder
'''
l = os.listdir(d);
return [os.path.join(d, e) for e in l if os.path.isdir(os.path.join(d,e))];
def compareFiles(f1, f2):
'''Check the similarity of a pair of files
'''
with open(f1, 'r') as content_file:
c1= content_file.read()
with open(f2, 'r') as content_file:
c2= content_file.read()
documents = [c1, c2];
tfidf = TfidfVectorizer().fit_transform(documents)
pairwise_similarity = tfidf * tfidf.T
return pairwise_similarity[0, 1]
def compareFolders(dir1, dir2):
'''Compare the files in a directory with the files in another directory
'''
files_1 = getFiles(dir1);
files_2 = getFiles(dir2);
for f1 in files_1:
for f2 in files_2:
similarity = compareFiles(f1, f2);
if similarity > 0.8:
print f1, f2, similarity
if __name__ == '__main__':
if len(sys.argv) != 2:
print 'Usage: python comparison.py dir'
sys.exit(1);
#Get all folders
folders = getDir(sys.argv[1]);
for index, d1 in enumerate(folders):
for d2 in range(index+1, len(folders)):
compareFolders(d1, folders[d2]);
#import filecmp
dc = filecmp.dircmp(dir_left, dir_right);
print dc.left_list # terms in left directory
print dc.right_list # terms in right directory
print dc.left_only # terms in left directory only
print dc.right_only # terms in right directory only
print dc.common_files # files in both directories
print dc.diff_files # same file name but different content, compared with os.state() only
print dc.same_files # same file name and same content