Transfer from github.

This commit is contained in:
Fred Pauchet 2012-03-05 20:50:26 +01:00
commit 7d1b908875
14 changed files with 461 additions and 0 deletions

9
.gitignore vendored Normal file
View File

@ -0,0 +1,9 @@
#python specific
*.pyc
## generic files to ignore
*~
*.lock
*.DS_Store
*.swp
*.out

0
CHANGES Normal file
View File

0
LICENCE Normal file
View File

2
MANIFEST.in Normal file
View File

@ -0,0 +1,2 @@
include *.txt
recursive-include docs *.txt

32
README Normal file
View File

@ -0,0 +1,32 @@
PigeonHole
==========
The main purpose of this application is to sort some specific types of files into a well-arranged directory.
I used it for classifying tv shows from a garbage folder into the right one, based on the filename which will be cleaned to help sorting.
How it works
------------
The project is splitted into several files :
* pigeonhole/pigeonhole.py : the one that should be run :)
* setup.py : not used yet, sorry.
* pigeonhole/config.py : where you should put your configuration.
### config.py ###
The configuration file contains the declaration of three variables :
1. useless_files_extensions : used to clean a folder when the content of this directory (and its subdirectories) is only composed by this kind of files. Do not try to put `*` inside this filter, I don't know the behavior yet...
2. shows_extensions : the files that need to be organized. The `process` method of the `PigeonHole` class won't look for anything else than these filetype, based the recognition of extensions and not on [magic numbers](http://en.wikipedia.org/wiki/List_of_file_signatures).
3. shows_dict : used for file that have a 'special name'
(ie. using 'tbbt' while the real name that can be found in the destination folder is much much longer)
Unit testing
------------
All tests are located inside the `pigeonhole/tests` directory. To launch them, use the following command, based on the python handbook:
python -m unittest discover
Temporary files and folders are created (and cleaned) to verify that the file behavior is going okay.

32
README.md Normal file
View File

@ -0,0 +1,32 @@
PigeonHole
==========
The main purpose of this application is to sort some specific types of files into a well-arranged directory.
I used it for classifying tv shows from a garbage folder into the right one, based on the filename which will be cleaned to help sorting.
How it works
------------
The project is splitted into several files :
* pigeonhole/pigeonhole.py : the one that should be run :)
* setup.py : not used yet, sorry.
* pigeonhole/config.py : where you should put your configuration.
### config.py ###
The configuration file contains the declaration of three variables :
1. useless_files_extensions : used to clean a folder when the content of this directory (and its subdirectories) is only composed by this kind of files. Do not try to put `*` inside this filter, I don't know the behavior yet...
2. shows_extensions : the files that need to be organized. The `process` method of the `PigeonHole` class won't look for anything else than these filetype (sorry to based the recognition on extensions and not on [magic numbers](http://en.wikipedia.org/wiki/List_of_file_signatures))
3. shows_dict : used for file that have a 'special name'
(ie. using 'tbbt' while the real name that can be found in the destination folder is much much longer)
Unit testing
------------
All tests are located inside the `pigeonhole/tests` directory. To launch them, use the following command, based on the python handbook:
python -m unittest discover
Temporary files and folders are created (and cleaned) to verify that the file behavior is going okay.

0
pigeonhole/__init__.py Normal file
View File

15
pigeonhole/config.py Normal file
View File

@ -0,0 +1,15 @@
# -*- coding: UTF8 -*-
# Configuration file
### If a folder only contains these types of files, we can delete it.
useless_files_extensions = ('srr', 'nfo', 'sfv')
### Consider only files with these extensions
shows_extensions = ('avi', 'mkv')
### Dictionary for special filename contents
shows_dict = {
'wc' : 'white collar',
'tbbt' : 'the big bang theory',
'beingerica' : 'being erica',
}

87
pigeonhole/model.py Normal file
View File

@ -0,0 +1,87 @@
from subQuery import *
import os
class Structure(object):
"""Represents the complete structure, with its shows, seasons and episodes"""
def __init__(self, path):
self.shows = [Show(os.path.join(path, x)) for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))]
def writeUrls(self):
for s in self.shows:
for season in s.seasons:
season.writeUrl()
class Show(object):
""" Represents a show file; ie. a file associated to its fullname """
def __init__(self, path):
self.path = path
self.name = os.path.basename(path)
self.url = queryShow(self.name)
self.seasons = [Season(self, os.path.join(path, x)) for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))]
def __str__(self):
return self.name
class Season(object):
""" Represents a season within a show """
def __init__(self, parent, path):
self.parent = parent
self.path = path
self.name = os.path.basename(path)
self.seasonnumber = re.findall('[0-9]+', os.path.basename(path))[0]
self.episodes = [Episode(self, os.path.join(path, x)) for x in os.listdir(path) if os.path.isfile(os.path.join(path, x))]
self.url = querySeason(parent.name, self.seasonnumber)
def writeUrl(self):
if len(self.url) == 1:
results = querySeason(self.parent.name, self.seasonnumber)
if len(results) == 1:
print 'Writing subtitles shortcut for ' + self.parent.name
writeUrlShortcut(self.path, self.parent.name + '.url', str(self.url[0]), 'InternetShortcut')
elif len(results) == 0:
print 'no results have been found for ' + self.parent.name
else:
print 'too much results have been found'
elif len(self.url) == 0:
print 'too few urls for ' + self.parent.name
else:
print 'too many urls for ' + self.parent.name
class Episode(object):
""" Represents an episode within a season """
def __init__(self, parent, path):
self.parent = parent
self.path = path
self.name = os.path.basename(path)
def __str__(self):
return self.name
class Folder(object):
""" Directory show instanciation, relative to a path on the disk
ie. Show name
- Season 1
- Season 2
- ...
"""
directory = None
name = None
def __init__(self, path):
self.directory = path;
self.name = os.path.basename(self.directory)
def __str__(self):
return self.name + ' [' + self.directory + ']'

113
pigeonhole/pigeonhole.py Normal file
View File

@ -0,0 +1,113 @@
#encoding: utf-8
import os
import re
import shutil
import filecmp
import config
from model import *
class PigeonHole(object):
""" Takes all the media files in a (download) folder and sort
them into the corresponding folder, based on the found file name
"""
matches = None
def __init__(self, root, downloaddir):
self.structure = Structure(root)
self.downloadDir = downloaddir
self.rootShows = root
self.directories = os.listdir(self.rootShows)
self.series = list()
def walk(self):
""" Walks through the downloaded folders and yields .avi and .mkv files """
for root, dirs, files in os.walk(self.downloadDir):
for filename in files:
if filename.endswith(config.shows_extensions):
yield Show(os.path.join(root, filename), filename)
def walk2(self, foldername, extensions):
for root, dirs, files in os.walk(foldername):
for filename in files:
if not filename.endswith(extensions):
yield os.path.join(root, filename)
def process(self):
""" Parses the directories within the 'rootShows' folder and stores them as shows in a list. """
self.series = [ Folder(os.path.join(self.rootShows, x)) for x in self.directories]
for path in self.walk():
self.moveToFolder(path)
def moveToFolder(self, show):
""" Moves a specific show to its right folder. """
destinationfile = self.findFolder(show)
if destinationfile is not None:
self.move(show.path, destinationfile)
if self.isDeletable(show.directory):
print '\tDeleting ' + show.directory
shutil.rmtree(show.directory)
else:
for key in config.shows_dict:
if key.lower() in show.name.lower():
if os.path.exists(os.path.join(self.rootShows, config.shows_dict[key])):
destinationfile = os.path.join(self.rootShows, config.shows_dict[key], show.name)
print destinationfile
self.move(show.path, destinationfile)
def findFolder(self, show):
"""Finds and returns the complete destinationpath for a specific show."""
rx = re.compile('\W+')
result = rx.sub(' ', show.name.lower()).strip()
for s in self.series:
if s.name.lower() in result:
return os.path.join(s.directory, show.name)
def move(self, originalfile, destinationfile):
""" Moves the downloaded file to the found folder. """
print 'Moving ' + originalfile + ' to ' + destinationfile
shutil.move(originalfile, destinationfile)
def isDeletable(self, foldername):
""" Walks through the current directory and deletes it if nothing's really important in it
ie. .nfo, .srr or .sfv files.
"""
if foldername is None:
return False
if foldername == self.downloadDir or foldername == self.rootShows:
return False
if foldername in self.downloadDir or foldername in self.rootShows:
return False
print 'I got ' + str(sum(1 for x in self.walk2(foldername, config.useless_files_extensions))) + ' int. files'
if sum(1 for x in self.walk2(foldername, config.useless_files_extensions)) is 0:
return True
return False
def __str__(self):
return 'PigeonHole module'
def __name__(self):
return 'PigeonHole'
if __name__ == "__main__":
pHole = PigeonHole(r'C:\test', r'C:\temp')
pHole.process()
pHole.structure.writeUrls()

81
pigeonhole/subQuery.py Normal file
View File

@ -0,0 +1,81 @@
import urllib2
import re
import os
from BeautifulSoup import BeautifulSoup
"""
Querying non web services interfaces
through http interrogation and regex results retrieval.
"""
languages = ('en', 'es', 'fr', 'de')
"""
Represents a custom url object.
It refers to a simple web page and can be embedded anywhere.
"""
class CustomUrl(object):
fullUrl = None
suffix = None
base = None
def __init__(self, base, suffix):
self.base = str(base)
self.suffix = str(suffix)
self.fullUrl = self.base + self.suffix
def __str__(self):
return str(self.fullUrl)
def __unicode__(self):
return str(self.fullUrl)
def replace(self, oldstr, newstr):
return CustomUrl(self.base, self.suffix.replace(oldstr, newstr))
"""
Querying a base url with a specific regex and a query.
eg. baseurl = http://duckduckgo.com/?q=
query = my_query
baseregex = ... :)
It will query the url, adds the query string and will fetch every href link that match the regular expression.
"""
def queryUrl(baseurl, paramindicator, regex, querystring):
#print '\tProbing ' + baseurl + ' ' + paramindicator + ' ' + regex + ' ' + querystring
socket = urllib2.urlopen(baseurl + paramindicator + querystring)
soup = BeautifulSoup(socket.read())
socket.close()
tags = soup.findAll(href=re.compile(regex))
mylist = list()
for tag in tags:
bsoup = BeautifulSoup(str(tag))
mylist.append(CustomUrl(baseurl, bsoup.a['href']))
return mylist
def queryShow(showname):
return queryUrl('http://www.tvsubtitles.net', '/search.php?q=', '/tvshow-([A-Za-z0-9]*)\.html$', showname.replace(' ', '%20'))
def querySeason(showname, seasonnumber):
return [x.replace('.html', '-' + str(seasonnumber) + '.html') for x in queryShow(showname)]
""" Write a shortcut to a specific web page and fix the shortcutname within the writtent file.
eg. writeUrlShortcut('/opt/tmp', 'google.url', 'http://www.google.com', 'Google')
>>> [Google]
>>> URL=http://www.google.com
>>> inside a file named /opt/tmp/google.url
"""
def writeUrlShortcut(folderpath, filename, url, shortcutname):
if not os.path.exists(folderpath):
raise Exception('Writing Url : Path does not exists')
filecontent = """[%s]\nURL=%s""" % (shortcutname, url)
with open(os.path.join(folderpath, filename), 'w+') as f:
f.write(filecontent)

View File

View File

@ -0,0 +1,75 @@
import unittest
import tempfile
import shutil
import os
from pigeonhole import *
#import config
class TestPigeonHoleFunctions(unittest.TestCase):
"""Test the methods defined inside the PigeonHole class"""
def setUp(self):
"""Set up the test environment"""
self.rootdir = tempfile.mkdtemp(prefix='pigeonHole_root_')
self.downloaddir = tempfile.mkdtemp(prefix='pigeonHole_dl_dir_')
# Create an environment with three folders
os.mkdir(os.path.join(self.rootdir, 'White Collar'))
os.mkdir(os.path.join(self.rootdir, 'The Big Bang Theory'))
os.mkdir(os.path.join(self.rootdir, 'Being Erica'))
self.pigeonHole = pigeonhole.PigeonHole(self.rootdir, self.downloaddir)
self.notDeletableTmpDir = tempfile.mkdtemp(prefix='pigeonHole_')
self.deletableTmpDir = tempfile.mkdtemp(prefix='pigeonHole_')
def tearDown(self):
"""Tear down the test environment"""
self.pigeonHole = None
shutil.rmtree(self.notDeletableTmpDir)
shutil.rmtree(self.deletableTmpDir)
shutil.rmtree(self.rootdir)
shutil.rmtree(self.downloaddir)
def test_init(self):
""" Testing the constructor """
self.assertEqual(self.pigeonHole.rootShows, self.rootdir)
self.assertEqual(self.pigeonHole.downloadDir, self.downloaddir)
self.assertTrue(str(self.pigeonHole) == 'PigeonHole module', 'The module string is not correct.')
self.assertTrue(str(self.pigeonHole.__name__ == 'PigeonHole'), 'The module name is not correct.')
def test_clean(self):
"""Testing the cleaning method"""
self.generatedfiles_bad = list()
self.generatedfiles_good = list()
for x in config.useless_files_extensions + config.shows_extensions:
fd, temppath = tempfile.mkstemp(x, 'tmp', self.notDeletableTmpDir)
self.generatedfiles_bad.append(temppath)
os.close(fd)
for y in config.useless_files_extensions:
fd, temppath = tempfile.mkstemp(y, 'tmp', self.deletableTmpDir)
self.generatedfiles_good.append(temppath)
os.close(fd)
self.assertFalse(self.pigeonHole.isDeletable(self.notDeletableTmpDir))
self.assertTrue(self.pigeonHole.isDeletable(self.deletableTmpDir))
self.assertFalse(self.pigeonHole.isDeletable(self.rootdir))
self.assertFalse(self.pigeonHole.isDeletable(self.downloaddir))
def test_findFolder(self):
"""Try to move a file to a specific location"""
pass
if __name__ == '__main__':
unittest.main()

15
setup.py Normal file
View File

@ -0,0 +1,15 @@
from distutils.core import setup
#setup {
# name='PigeonHole',
# version='0.1.0',
# author='Fred Pauchet'
# author_email='fpauchet@gmail.com',
# packages=['pigeonhole','pigeonhole.test'],
# scripts=[],
# url='',
# licence='LICENCE',
# description='',
# long_description=long_description=open('README').read(),
# install_require=[],
#}