commit 7d1b9088751cddb527b493a27f988f926a6b9ffe Author: Fred Pauchet Date: Mon Mar 5 20:50:26 2012 +0100 Transfer from github. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c4d72aa --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +#python specific +*.pyc + +## generic files to ignore +*~ +*.lock +*.DS_Store +*.swp +*.out \ No newline at end of file diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..e69de29 diff --git a/LICENCE b/LICENCE new file mode 100644 index 0000000..e69de29 diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..9203697 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include *.txt +recursive-include docs *.txt \ No newline at end of file diff --git a/README b/README new file mode 100644 index 0000000..79ea1b9 --- /dev/null +++ b/README @@ -0,0 +1,32 @@ +PigeonHole +========== + +The main purpose of this application is to sort some specific types of files into a well-arranged directory. + +I used it for classifying tv shows from a garbage folder into the right one, based on the filename which will be cleaned to help sorting. + +How it works +------------ + +The project is splitted into several files : +* pigeonhole/pigeonhole.py : the one that should be run :) +* setup.py : not used yet, sorry. +* pigeonhole/config.py : where you should put your configuration. + +### config.py ### + +The configuration file contains the declaration of three variables : + +1. useless_files_extensions : used to clean a folder when the content of this directory (and its subdirectories) is only composed by this kind of files. Do not try to put `*` inside this filter, I don't know the behavior yet... +2. shows_extensions : the files that need to be organized. The `process` method of the `PigeonHole` class won't look for anything else than these filetype, based the recognition of extensions and not on [magic numbers](http://en.wikipedia.org/wiki/List_of_file_signatures). +3. shows_dict : used for file that have a 'special name' +(ie. using 'tbbt' while the real name that can be found in the destination folder is much much longer) + +Unit testing +------------ + +All tests are located inside the `pigeonhole/tests` directory. To launch them, use the following command, based on the python handbook: + + python -m unittest discover + +Temporary files and folders are created (and cleaned) to verify that the file behavior is going okay. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b1ef63e --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +PigeonHole +========== + +The main purpose of this application is to sort some specific types of files into a well-arranged directory. + +I used it for classifying tv shows from a garbage folder into the right one, based on the filename which will be cleaned to help sorting. + +How it works +------------ + +The project is splitted into several files : +* pigeonhole/pigeonhole.py : the one that should be run :) +* setup.py : not used yet, sorry. +* pigeonhole/config.py : where you should put your configuration. + +### config.py ### + +The configuration file contains the declaration of three variables : + +1. useless_files_extensions : used to clean a folder when the content of this directory (and its subdirectories) is only composed by this kind of files. Do not try to put `*` inside this filter, I don't know the behavior yet... +2. shows_extensions : the files that need to be organized. The `process` method of the `PigeonHole` class won't look for anything else than these filetype (sorry to based the recognition on extensions and not on [magic numbers](http://en.wikipedia.org/wiki/List_of_file_signatures)) +3. shows_dict : used for file that have a 'special name' +(ie. using 'tbbt' while the real name that can be found in the destination folder is much much longer) + +Unit testing +------------ + +All tests are located inside the `pigeonhole/tests` directory. To launch them, use the following command, based on the python handbook: + + python -m unittest discover + +Temporary files and folders are created (and cleaned) to verify that the file behavior is going okay. \ No newline at end of file diff --git a/pigeonhole/__init__.py b/pigeonhole/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pigeonhole/config.py b/pigeonhole/config.py new file mode 100644 index 0000000..5e32b3f --- /dev/null +++ b/pigeonhole/config.py @@ -0,0 +1,15 @@ +# -*- coding: UTF8 -*- +# Configuration file + +### If a folder only contains these types of files, we can delete it. +useless_files_extensions = ('srr', 'nfo', 'sfv') + +### Consider only files with these extensions +shows_extensions = ('avi', 'mkv') + +### Dictionary for special filename contents +shows_dict = { + 'wc' : 'white collar', + 'tbbt' : 'the big bang theory', + 'beingerica' : 'being erica', +} \ No newline at end of file diff --git a/pigeonhole/model.py b/pigeonhole/model.py new file mode 100644 index 0000000..a110176 --- /dev/null +++ b/pigeonhole/model.py @@ -0,0 +1,87 @@ +from subQuery import * +import os + +class Structure(object): + """Represents the complete structure, with its shows, seasons and episodes""" + + def __init__(self, path): + self.shows = [Show(os.path.join(path, x)) for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))] + + def writeUrls(self): + for s in self.shows: + for season in s.seasons: + season.writeUrl() + +class Show(object): + """ Represents a show file; ie. a file associated to its fullname """ + + def __init__(self, path): + + self.path = path + self.name = os.path.basename(path) + + self.url = queryShow(self.name) + + self.seasons = [Season(self, os.path.join(path, x)) for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))] + + def __str__(self): + return self.name + +class Season(object): + """ Represents a season within a show """ + + def __init__(self, parent, path): + + self.parent = parent + self.path = path + self.name = os.path.basename(path) + self.seasonnumber = re.findall('[0-9]+', os.path.basename(path))[0] + self.episodes = [Episode(self, os.path.join(path, x)) for x in os.listdir(path) if os.path.isfile(os.path.join(path, x))] + + self.url = querySeason(parent.name, self.seasonnumber) + + def writeUrl(self): + if len(self.url) == 1: + results = querySeason(self.parent.name, self.seasonnumber) + + if len(results) == 1: + print 'Writing subtitles shortcut for ' + self.parent.name + writeUrlShortcut(self.path, self.parent.name + '.url', str(self.url[0]), 'InternetShortcut') + elif len(results) == 0: + print 'no results have been found for ' + self.parent.name + else: + print 'too much results have been found' + elif len(self.url) == 0: + print 'too few urls for ' + self.parent.name + else: + print 'too many urls for ' + self.parent.name + +class Episode(object): + """ Represents an episode within a season """ + + def __init__(self, parent, path): + + self.parent = parent + self.path = path + self.name = os.path.basename(path) + + def __str__(self): + return self.name + +class Folder(object): + """ Directory show instanciation, relative to a path on the disk + ie. Show name + - Season 1 + - Season 2 + - ... + """ + + directory = None + name = None + + def __init__(self, path): + self.directory = path; + self.name = os.path.basename(self.directory) + + def __str__(self): + return self.name + ' [' + self.directory + ']' \ No newline at end of file diff --git a/pigeonhole/pigeonhole.py b/pigeonhole/pigeonhole.py new file mode 100644 index 0000000..f1a619f --- /dev/null +++ b/pigeonhole/pigeonhole.py @@ -0,0 +1,113 @@ +#encoding: utf-8 + +import os +import re +import shutil +import filecmp +import config + +from model import * + +class PigeonHole(object): + """ Takes all the media files in a (download) folder and sort + them into the corresponding folder, based on the found file name + """ + + matches = None + + def __init__(self, root, downloaddir): + + self.structure = Structure(root) + + self.downloadDir = downloaddir + self.rootShows = root + self.directories = os.listdir(self.rootShows) + self.series = list() + + def walk(self): + """ Walks through the downloaded folders and yields .avi and .mkv files """ + for root, dirs, files in os.walk(self.downloadDir): + for filename in files: + if filename.endswith(config.shows_extensions): + yield Show(os.path.join(root, filename), filename) + + def walk2(self, foldername, extensions): + for root, dirs, files in os.walk(foldername): + for filename in files: + if not filename.endswith(extensions): + yield os.path.join(root, filename) + + def process(self): + """ Parses the directories within the 'rootShows' folder and stores them as shows in a list. """ + self.series = [ Folder(os.path.join(self.rootShows, x)) for x in self.directories] + + for path in self.walk(): + self.moveToFolder(path) + + def moveToFolder(self, show): + """ Moves a specific show to its right folder. """ + + destinationfile = self.findFolder(show) + + if destinationfile is not None: + self.move(show.path, destinationfile) + + if self.isDeletable(show.directory): + print '\tDeleting ' + show.directory + shutil.rmtree(show.directory) + + else: + for key in config.shows_dict: + if key.lower() in show.name.lower(): + if os.path.exists(os.path.join(self.rootShows, config.shows_dict[key])): + destinationfile = os.path.join(self.rootShows, config.shows_dict[key], show.name) + print destinationfile + self.move(show.path, destinationfile) + + def findFolder(self, show): + """Finds and returns the complete destinationpath for a specific show.""" + + rx = re.compile('\W+') + result = rx.sub(' ', show.name.lower()).strip() + + for s in self.series: + if s.name.lower() in result: + return os.path.join(s.directory, show.name) + + + def move(self, originalfile, destinationfile): + """ Moves the downloaded file to the found folder. """ + print 'Moving ' + originalfile + ' to ' + destinationfile + shutil.move(originalfile, destinationfile) + + def isDeletable(self, foldername): + """ Walks through the current directory and deletes it if nothing's really important in it + ie. .nfo, .srr or .sfv files. + """ + if foldername is None: + return False + + if foldername == self.downloadDir or foldername == self.rootShows: + return False + + if foldername in self.downloadDir or foldername in self.rootShows: + return False + + print 'I got ' + str(sum(1 for x in self.walk2(foldername, config.useless_files_extensions))) + ' int. files' + + if sum(1 for x in self.walk2(foldername, config.useless_files_extensions)) is 0: + return True + + return False + + def __str__(self): + return 'PigeonHole module' + + def __name__(self): + return 'PigeonHole' + +if __name__ == "__main__": + pHole = PigeonHole(r'C:\test', r'C:\temp') + pHole.process() + pHole.structure.writeUrls() + diff --git a/pigeonhole/subQuery.py b/pigeonhole/subQuery.py new file mode 100644 index 0000000..e0d206e --- /dev/null +++ b/pigeonhole/subQuery.py @@ -0,0 +1,81 @@ +import urllib2 +import re +import os +from BeautifulSoup import BeautifulSoup + +""" + Querying non web services interfaces + through http interrogation and regex results retrieval. +""" + +languages = ('en', 'es', 'fr', 'de') + +""" + Represents a custom url object. + It refers to a simple web page and can be embedded anywhere. +""" +class CustomUrl(object): + fullUrl = None + suffix = None + base = None + + def __init__(self, base, suffix): + self.base = str(base) + self.suffix = str(suffix) + self.fullUrl = self.base + self.suffix + + def __str__(self): + return str(self.fullUrl) + + def __unicode__(self): + return str(self.fullUrl) + + def replace(self, oldstr, newstr): + return CustomUrl(self.base, self.suffix.replace(oldstr, newstr)) + +""" + Querying a base url with a specific regex and a query. + + eg. baseurl = http://duckduckgo.com/?q= + query = my_query + baseregex = ... :) + + It will query the url, adds the query string and will fetch every href link that match the regular expression. +""" +def queryUrl(baseurl, paramindicator, regex, querystring): + #print '\tProbing ' + baseurl + ' ' + paramindicator + ' ' + regex + ' ' + querystring + socket = urllib2.urlopen(baseurl + paramindicator + querystring) + soup = BeautifulSoup(socket.read()) + socket.close() + + tags = soup.findAll(href=re.compile(regex)) + + mylist = list() + + for tag in tags: + bsoup = BeautifulSoup(str(tag)) + mylist.append(CustomUrl(baseurl, bsoup.a['href'])) + + return mylist + +def queryShow(showname): + return queryUrl('http://www.tvsubtitles.net', '/search.php?q=', '/tvshow-([A-Za-z0-9]*)\.html$', showname.replace(' ', '%20')) + +def querySeason(showname, seasonnumber): + return [x.replace('.html', '-' + str(seasonnumber) + '.html') for x in queryShow(showname)] + +""" Write a shortcut to a specific web page and fix the shortcutname within the writtent file. + + eg. writeUrlShortcut('/opt/tmp', 'google.url', 'http://www.google.com', 'Google') + >>> [Google] + >>> URL=http://www.google.com + >>> inside a file named /opt/tmp/google.url +""" +def writeUrlShortcut(folderpath, filename, url, shortcutname): + if not os.path.exists(folderpath): + raise Exception('Writing Url : Path does not exists') + + filecontent = """[%s]\nURL=%s""" % (shortcutname, url) + + with open(os.path.join(folderpath, filename), 'w+') as f: + f.write(filecontent) diff --git a/pigeonhole/test/__init__.py b/pigeonhole/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pigeonhole/test/test_pigeonhole.py b/pigeonhole/test/test_pigeonhole.py new file mode 100644 index 0000000..41abbb4 --- /dev/null +++ b/pigeonhole/test/test_pigeonhole.py @@ -0,0 +1,75 @@ +import unittest +import tempfile +import shutil +import os +from pigeonhole import * +#import config + +class TestPigeonHoleFunctions(unittest.TestCase): + """Test the methods defined inside the PigeonHole class""" + + def setUp(self): + """Set up the test environment""" + self.rootdir = tempfile.mkdtemp(prefix='pigeonHole_root_') + self.downloaddir = tempfile.mkdtemp(prefix='pigeonHole_dl_dir_') + + # Create an environment with three folders + os.mkdir(os.path.join(self.rootdir, 'White Collar')) + os.mkdir(os.path.join(self.rootdir, 'The Big Bang Theory')) + os.mkdir(os.path.join(self.rootdir, 'Being Erica')) + + self.pigeonHole = pigeonhole.PigeonHole(self.rootdir, self.downloaddir) + + self.notDeletableTmpDir = tempfile.mkdtemp(prefix='pigeonHole_') + self.deletableTmpDir = tempfile.mkdtemp(prefix='pigeonHole_') + + def tearDown(self): + """Tear down the test environment""" + self.pigeonHole = None + + shutil.rmtree(self.notDeletableTmpDir) + shutil.rmtree(self.deletableTmpDir) + + shutil.rmtree(self.rootdir) + shutil.rmtree(self.downloaddir) + + + def test_init(self): + """ Testing the constructor """ + self.assertEqual(self.pigeonHole.rootShows, self.rootdir) + self.assertEqual(self.pigeonHole.downloadDir, self.downloaddir) + self.assertTrue(str(self.pigeonHole) == 'PigeonHole module', 'The module string is not correct.') + self.assertTrue(str(self.pigeonHole.__name__ == 'PigeonHole'), 'The module name is not correct.') + + def test_clean(self): + """Testing the cleaning method""" + + self.generatedfiles_bad = list() + self.generatedfiles_good = list() + + for x in config.useless_files_extensions + config.shows_extensions: + fd, temppath = tempfile.mkstemp(x, 'tmp', self.notDeletableTmpDir) + self.generatedfiles_bad.append(temppath) + os.close(fd) + + for y in config.useless_files_extensions: + fd, temppath = tempfile.mkstemp(y, 'tmp', self.deletableTmpDir) + self.generatedfiles_good.append(temppath) + os.close(fd) + + self.assertFalse(self.pigeonHole.isDeletable(self.notDeletableTmpDir)) + self.assertTrue(self.pigeonHole.isDeletable(self.deletableTmpDir)) + + self.assertFalse(self.pigeonHole.isDeletable(self.rootdir)) + self.assertFalse(self.pigeonHole.isDeletable(self.downloaddir)) + + def test_findFolder(self): + """Try to move a file to a specific location""" + + pass + + + + +if __name__ == '__main__': + unittest.main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b982237 --- /dev/null +++ b/setup.py @@ -0,0 +1,15 @@ +from distutils.core import setup + +#setup { +# name='PigeonHole', +# version='0.1.0', +# author='Fred Pauchet' +# author_email='fpauchet@gmail.com', +# packages=['pigeonhole','pigeonhole.test'], +# scripts=[], +# url='', +# licence='LICENCE', +# description='', +# long_description=long_description=open('README').read(), +# install_require=[], +#}