Transfer from github.

2012-03-05 20:50:26 +01:00 · 2012-03-05 20:50:26 +01:00 · 7d1b908875
commit 7d1b908875
14 changed files with 461 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,9 @@
+#python specific
+*.pyc
+
+## generic files to ignore
+*~
+*.lock
+*.DS_Store
+*.swp
+*.out
--- a/0
+++ b/0
--- a/0
+++ b/0
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,2 @@
+include *.txt
+recursive-include docs *.txt
--- a/32
+++ b/32
@ -0,0 +1,32 @@
+PigeonHole
+==========
+
+The main purpose of this application is to sort some specific types of files into a well-arranged directory. 
+
+I used it for classifying tv shows from a garbage folder into the right one, based on the filename which will be cleaned to help sorting.
+
+How it works
+------------
+
+The project is splitted into several files : 
+* pigeonhole/pigeonhole.py : the one that should be run :)
+* setup.py : not used yet, sorry.
+* pigeonhole/config.py : where you should put your configuration.
+
+### config.py ###
+
+The configuration file contains the declaration of three variables : 
+
+1. useless_files_extensions : used to clean a folder when the content of this directory (and its subdirectories) is only composed by this kind of files. Do not try to put `*` inside this filter, I don't know the behavior yet...
+2. shows_extensions : the files that need to be organized. The `process` method of the `PigeonHole` class won't look for anything else than these filetype, based the recognition of extensions and not on [magic numbers](http://en.wikipedia.org/wiki/List_of_file_signatures).
+3. shows_dict : used for file that have a 'special name'
+(ie. using 'tbbt' while the real name that can be found in the destination folder is much much longer)
+
+Unit testing
+------------
+
+All tests are located inside the `pigeonhole/tests` directory. To launch them, use the following command, based on the python handbook:
+
+	python -m unittest discover 
+
+Temporary files and folders are created (and cleaned) to verify that the file behavior is going okay.
--- a/README.md
+++ b/README.md
@ -0,0 +1,32 @@
+PigeonHole
+==========
+
+The main purpose of this application is to sort some specific types of files into a well-arranged directory. 
+
+I used it for classifying tv shows from a garbage folder into the right one, based on the filename which will be cleaned to help sorting.
+
+How it works
+------------
+
+The project is splitted into several files : 
+* pigeonhole/pigeonhole.py : the one that should be run :)
+* setup.py : not used yet, sorry.
+* pigeonhole/config.py : where you should put your configuration.
+
+### config.py ###
+
+The configuration file contains the declaration of three variables : 
+
+1. useless_files_extensions : used to clean a folder when the content of this directory (and its subdirectories) is only composed by this kind of files. Do not try to put `*` inside this filter, I don't know the behavior yet...
+2. shows_extensions : the files that need to be organized. The `process` method of the `PigeonHole` class won't look for anything else than these filetype (sorry to based the recognition on extensions and not on [magic numbers](http://en.wikipedia.org/wiki/List_of_file_signatures))
+3. shows_dict : used for file that have a 'special name'
+(ie. using 'tbbt' while the real name that can be found in the destination folder is much much longer)
+
+Unit testing
+------------
+
+All tests are located inside the `pigeonhole/tests` directory. To launch them, use the following command, based on the python handbook:
+
+	python -m unittest discover 
+
+Temporary files and folders are created (and cleaned) to verify that the file behavior is going okay.
--- a/pigeonhole/init.py
+++ b/pigeonhole/init.py
--- a/pigeonhole/config.py
+++ b/pigeonhole/config.py
@ -0,0 +1,15 @@
+# -*- coding: UTF8 -*- 
+# Configuration file
+
+### If a folder only contains these types of files, we can delete it.
+useless_files_extensions = ('srr', 'nfo', 'sfv')
+
+### Consider only files with these extensions
+shows_extensions = ('avi', 'mkv')
+
+### Dictionary for special filename contents
+shows_dict = {
+	'wc' : 'white collar',
+	'tbbt' : 'the big bang theory',
+	'beingerica' : 'being erica',
+}
--- a/pigeonhole/model.py
+++ b/pigeonhole/model.py
@ -0,0 +1,87 @@
+from subQuery import *
+import os
+
+class Structure(object):
+	"""Represents the complete structure, with its shows, seasons and episodes"""
+	
+	def __init__(self, path):
+		self.shows = [Show(os.path.join(path, x)) for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))]
+
+	def writeUrls(self):
+		for s in self.shows:
+			for season in s.seasons:
+				season.writeUrl()
+
+class Show(object):
+	""" Represents a show file; ie. a file associated to its fullname """
+
+	def __init__(self, path):
+
+		self.path = path
+		self.name = os.path.basename(path)
+
+		self.url = queryShow(self.name)
+
+		self.seasons = [Season(self, os.path.join(path, x)) for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))]
+
+	def __str__(self):
+		return self.name
+
+class Season(object):
+	""" Represents a season within a show """
+
+	def __init__(self, parent, path):
+
+		self.parent = parent
+		self.path = path
+		self.name = os.path.basename(path)
+		self.seasonnumber = re.findall('[0-9]+', os.path.basename(path))[0]
+		self.episodes = [Episode(self, os.path.join(path, x)) for x in os.listdir(path) if os.path.isfile(os.path.join(path, x))]
+
+		self.url = querySeason(parent.name, self.seasonnumber)
+
+	def writeUrl(self):
+		if len(self.url) == 1:
+			results = querySeason(self.parent.name, self.seasonnumber)
+
+			if len(results) == 1:
+				print 'Writing subtitles shortcut for ' + self.parent.name
+				writeUrlShortcut(self.path, self.parent.name + '.url', str(self.url[0]), 'InternetShortcut')
+			elif len(results) == 0:
+				print 'no results have been found for ' + self.parent.name
+			else:
+				print 'too much results have been found'
+		elif len(self.url) == 0:
+			print 'too few urls for ' + self.parent.name
+		else:
+			print 'too many urls for ' + self.parent.name
+
+class Episode(object):
+	""" Represents an episode within a season """
+
+	def __init__(self, parent, path):
+		
+		self.parent = parent
+		self.path = path
+		self.name = os.path.basename(path)
+
+	def __str__(self):
+		return self.name
+
+class Folder(object):
+	""" Directory show instanciation, relative to a path on the disk
+		ie. Show name
+			- Season 1
+			- Season 2
+			- ...
+	"""
+	
+	directory = None
+	name = None
+
+	def __init__(self, path):
+		self.directory = path;
+		self.name = os.path.basename(self.directory)
+
+	def __str__(self):
+		return self.name + ' [' + self.directory + ']'
--- a/pigeonhole/pigeonhole.py
+++ b/pigeonhole/pigeonhole.py
@ -0,0 +1,113 @@
+#encoding: utf-8
+
+import os
+import re
+import shutil
+import filecmp
+import config
+
+from model import *
+
+class PigeonHole(object):
+	""" Takes all the media files in a (download) folder and sort 
+		them into the corresponding folder, based on the found file name
+	"""
+	
+	matches = None
+
+	def __init__(self, root, downloaddir):
+
+		self.structure = Structure(root)
+
+		self.downloadDir = downloaddir
+		self.rootShows = root
+		self.directories = os.listdir(self.rootShows)
+		self.series = list()
+
+	def walk(self):		
+		""" Walks through the downloaded folders and yields .avi and .mkv files """
+		for root, dirs, files in os.walk(self.downloadDir):
+			for filename in files:			
+				if filename.endswith(config.shows_extensions):
+					yield Show(os.path.join(root, filename), filename)
+
+	def walk2(self, foldername, extensions):
+		for root, dirs, files in os.walk(foldername):
+			for filename in files:
+				if not filename.endswith(extensions):
+					yield os.path.join(root, filename)
+	
+	def process(self):
+		""" Parses the directories within the 'rootShows' folder and stores them as shows in a list. """
+		self.series = [ Folder(os.path.join(self.rootShows, x)) for x in self.directories]
+
+		for path in self.walk():
+			self.moveToFolder(path)
+	
+	def moveToFolder(self, show):
+		""" Moves a specific show to its right folder. """
+		
+		destinationfile = self.findFolder(show)
+
+		if destinationfile is not None:
+			self.move(show.path, destinationfile)
+
+			if self.isDeletable(show.directory):
+				print '\tDeleting ' + show.directory
+				shutil.rmtree(show.directory)
+				
+		else:
+			for key in config.shows_dict:
+				if key.lower() in show.name.lower():
+					if os.path.exists(os.path.join(self.rootShows, config.shows_dict[key])):
+						destinationfile = os.path.join(self.rootShows, config.shows_dict[key], show.name)
+						print destinationfile
+						self.move(show.path, destinationfile)
+
+	def findFolder(self, show):
+		"""Finds and returns the complete destinationpath for a specific show."""
+		
+		rx = re.compile('\W+')
+		result = rx.sub(' ', show.name.lower()).strip()
+
+		for s in self.series:
+			if s.name.lower() in result:
+				return os.path.join(s.directory, show.name)
+
+
+	def move(self, originalfile, destinationfile):
+		""" Moves the downloaded file to the found folder. """
+		print 'Moving ' + originalfile + ' to ' + destinationfile
+		shutil.move(originalfile, destinationfile)
+
+	def isDeletable(self, foldername):
+		""" Walks through the current directory and deletes it if nothing's really important in it
+			ie. .nfo, .srr or .sfv files. 
+		"""
+		if foldername is None:
+			return False
+
+		if foldername == self.downloadDir or foldername == self.rootShows:
+			return False
+
+		if foldername in self.downloadDir or foldername in self.rootShows:
+			return False
+		
+		print 'I got ' + str(sum(1 for x in self.walk2(foldername, config.useless_files_extensions))) + ' int. files'
+
+		if sum(1 for x in self.walk2(foldername, config.useless_files_extensions)) is 0:
+			return True
+
+		return False
+		
+	def __str__(self):
+		return 'PigeonHole module'
+	
+	def __name__(self):
+		return 'PigeonHole'
+
+if __name__ == "__main__":
+	pHole = PigeonHole(r'C:\test', r'C:\temp')
+	pHole.process()
+	pHole.structure.writeUrls()
+
--- a/pigeonhole/subQuery.py
+++ b/pigeonhole/subQuery.py
@ -0,0 +1,81 @@
+import urllib2
+import re
+import os
+from BeautifulSoup import BeautifulSoup
+
+"""
+	Querying non web services interfaces 
+	through http interrogation and regex results retrieval.
+"""
+
+languages = ('en', 'es', 'fr', 'de')
+
+""" 
+	Represents a custom url object. 
+	It refers to a simple web page and can be embedded anywhere.
+"""
+class CustomUrl(object):
+	fullUrl = None
+	suffix = None
+	base = None
+
+	def __init__(self, base, suffix):
+		self.base = str(base)
+		self.suffix = str(suffix)
+		self.fullUrl = self.base + self.suffix
+
+	def __str__(self):
+		return str(self.fullUrl)
+
+	def __unicode__(self):
+		return str(self.fullUrl)
+
+	def replace(self, oldstr, newstr):
+		return CustomUrl(self.base, self.suffix.replace(oldstr, newstr))
+
+"""
+	Querying a base url with a specific regex and a query.
+
+	eg. baseurl = http://duckduckgo.com/?q=
+		query = my_query
+		baseregex = ... :)
+
+	It will query the url, adds the query string and will fetch every href link that match the regular expression.
+"""
+def queryUrl(baseurl, paramindicator, regex, querystring):
+	#print '\tProbing ' + baseurl + ' ' + paramindicator + ' ' + regex + ' ' + querystring
+	socket = urllib2.urlopen(baseurl + paramindicator + querystring)
+	soup = BeautifulSoup(socket.read())
+	socket.close()
+
+	tags = soup.findAll(href=re.compile(regex))
+
+	mylist = list()
+
+	for tag in tags:
+		bsoup = BeautifulSoup(str(tag))
+		mylist.append(CustomUrl(baseurl, bsoup.a['href']))
+
+	return mylist
+
+def queryShow(showname):
+	return queryUrl('http://www.tvsubtitles.net', '/search.php?q=', '/tvshow-([A-Za-z0-9]*)\.html$', showname.replace(' ', '%20'))
+
+def querySeason(showname, seasonnumber):
+	return [x.replace('.html', '-' + str(seasonnumber) + '.html') for x in queryShow(showname)]
+
+""" Write a shortcut to a specific web page and fix the shortcutname within the writtent file.
+	
+	eg. writeUrlShortcut('/opt/tmp', 'google.url', 'http://www.google.com', 'Google')	
+	>>> [Google]
+	>>> URL=http://www.google.com
+	>>> inside a file named /opt/tmp/google.url
+"""
+def writeUrlShortcut(folderpath, filename, url, shortcutname):
+	if not os.path.exists(folderpath):
+		raise Exception('Writing Url : Path does not exists')
+
+	filecontent = """[%s]\nURL=%s""" % (shortcutname, url)
+
+	with open(os.path.join(folderpath, filename), 'w+') as f:
+		f.write(filecontent)
--- a/pigeonhole/test/init.py
+++ b/pigeonhole/test/init.py
--- a/pigeonhole/test/test_pigeonhole.py
+++ b/pigeonhole/test/test_pigeonhole.py
@ -0,0 +1,75 @@
+import unittest
+import tempfile
+import shutil
+import os
+from pigeonhole import *
+#import config
+
+class TestPigeonHoleFunctions(unittest.TestCase):
+	"""Test the methods defined inside the PigeonHole class"""
+	
+	def setUp(self):
+		"""Set up the test environment"""
+		self.rootdir = tempfile.mkdtemp(prefix='pigeonHole_root_')
+		self.downloaddir = tempfile.mkdtemp(prefix='pigeonHole_dl_dir_')
+
+		# Create an environment with three folders
+		os.mkdir(os.path.join(self.rootdir, 'White Collar'))
+		os.mkdir(os.path.join(self.rootdir, 'The Big Bang Theory'))
+		os.mkdir(os.path.join(self.rootdir, 'Being Erica'))
+
+		self.pigeonHole = pigeonhole.PigeonHole(self.rootdir, self.downloaddir)
+		
+		self.notDeletableTmpDir = tempfile.mkdtemp(prefix='pigeonHole_')
+		self.deletableTmpDir = tempfile.mkdtemp(prefix='pigeonHole_')
+
+	def tearDown(self):
+		"""Tear down the test environment"""
+		self.pigeonHole = None
+		
+		shutil.rmtree(self.notDeletableTmpDir)
+		shutil.rmtree(self.deletableTmpDir)
+
+		shutil.rmtree(self.rootdir)
+		shutil.rmtree(self.downloaddir)
+
+		
+	def test_init(self):
+		""" Testing the constructor """
+		self.assertEqual(self.pigeonHole.rootShows, self.rootdir)
+		self.assertEqual(self.pigeonHole.downloadDir, self.downloaddir)
+		self.assertTrue(str(self.pigeonHole) == 'PigeonHole module', 'The module string is not correct.')
+		self.assertTrue(str(self.pigeonHole.__name__ == 'PigeonHole'), 'The module name is not correct.')
+
+	def test_clean(self):
+		"""Testing the cleaning method"""
+
+		self.generatedfiles_bad = list()
+		self.generatedfiles_good = list()
+
+		for x in config.useless_files_extensions + config.shows_extensions:
+			fd, temppath = tempfile.mkstemp(x, 'tmp', self.notDeletableTmpDir)
+			self.generatedfiles_bad.append(temppath)
+			os.close(fd)
+			
+		for y in config.useless_files_extensions:
+			fd, temppath = tempfile.mkstemp(y, 'tmp', self.deletableTmpDir)
+			self.generatedfiles_good.append(temppath)
+			os.close(fd)
+
+		self.assertFalse(self.pigeonHole.isDeletable(self.notDeletableTmpDir))
+		self.assertTrue(self.pigeonHole.isDeletable(self.deletableTmpDir))
+
+		self.assertFalse(self.pigeonHole.isDeletable(self.rootdir))
+		self.assertFalse(self.pigeonHole.isDeletable(self.downloaddir))
+
+	def test_findFolder(self):
+		"""Try to move a file to a specific location"""
+
+		pass
+		
+
+		
+
+if __name__ == '__main__':
+    unittest.main()
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,15 @@
+from distutils.core import setup
+
+#setup {
+#	name='PigeonHole',
+#	version='0.1.0',
+#	author='Fred Pauchet'
+#	author_email='fpauchet@gmail.com',
+#	packages=['pigeonhole','pigeonhole.test'],
+#	scripts=[],
+#	url='',
+#	licence='LICENCE',
+#	description='',
+#	long_description=long_description=open('README').read(),
+#	install_require=[],
+#}