Transfer from github.
This commit is contained in:
commit
7d1b908875
|
@ -0,0 +1,9 @@
|
|||
#python specific
|
||||
*.pyc
|
||||
|
||||
## generic files to ignore
|
||||
*~
|
||||
*.lock
|
||||
*.DS_Store
|
||||
*.swp
|
||||
*.out
|
|
@ -0,0 +1,2 @@
|
|||
include *.txt
|
||||
recursive-include docs *.txt
|
|
@ -0,0 +1,32 @@
|
|||
PigeonHole
|
||||
==========
|
||||
|
||||
The main purpose of this application is to sort some specific types of files into a well-arranged directory.
|
||||
|
||||
I used it for classifying tv shows from a garbage folder into the right one, based on the filename which will be cleaned to help sorting.
|
||||
|
||||
How it works
|
||||
------------
|
||||
|
||||
The project is splitted into several files :
|
||||
* pigeonhole/pigeonhole.py : the one that should be run :)
|
||||
* setup.py : not used yet, sorry.
|
||||
* pigeonhole/config.py : where you should put your configuration.
|
||||
|
||||
### config.py ###
|
||||
|
||||
The configuration file contains the declaration of three variables :
|
||||
|
||||
1. useless_files_extensions : used to clean a folder when the content of this directory (and its subdirectories) is only composed by this kind of files. Do not try to put `*` inside this filter, I don't know the behavior yet...
|
||||
2. shows_extensions : the files that need to be organized. The `process` method of the `PigeonHole` class won't look for anything else than these filetype, based the recognition of extensions and not on [magic numbers](http://en.wikipedia.org/wiki/List_of_file_signatures).
|
||||
3. shows_dict : used for file that have a 'special name'
|
||||
(ie. using 'tbbt' while the real name that can be found in the destination folder is much much longer)
|
||||
|
||||
Unit testing
|
||||
------------
|
||||
|
||||
All tests are located inside the `pigeonhole/tests` directory. To launch them, use the following command, based on the python handbook:
|
||||
|
||||
python -m unittest discover
|
||||
|
||||
Temporary files and folders are created (and cleaned) to verify that the file behavior is going okay.
|
|
@ -0,0 +1,32 @@
|
|||
PigeonHole
|
||||
==========
|
||||
|
||||
The main purpose of this application is to sort some specific types of files into a well-arranged directory.
|
||||
|
||||
I used it for classifying tv shows from a garbage folder into the right one, based on the filename which will be cleaned to help sorting.
|
||||
|
||||
How it works
|
||||
------------
|
||||
|
||||
The project is splitted into several files :
|
||||
* pigeonhole/pigeonhole.py : the one that should be run :)
|
||||
* setup.py : not used yet, sorry.
|
||||
* pigeonhole/config.py : where you should put your configuration.
|
||||
|
||||
### config.py ###
|
||||
|
||||
The configuration file contains the declaration of three variables :
|
||||
|
||||
1. useless_files_extensions : used to clean a folder when the content of this directory (and its subdirectories) is only composed by this kind of files. Do not try to put `*` inside this filter, I don't know the behavior yet...
|
||||
2. shows_extensions : the files that need to be organized. The `process` method of the `PigeonHole` class won't look for anything else than these filetype (sorry to based the recognition on extensions and not on [magic numbers](http://en.wikipedia.org/wiki/List_of_file_signatures))
|
||||
3. shows_dict : used for file that have a 'special name'
|
||||
(ie. using 'tbbt' while the real name that can be found in the destination folder is much much longer)
|
||||
|
||||
Unit testing
|
||||
------------
|
||||
|
||||
All tests are located inside the `pigeonhole/tests` directory. To launch them, use the following command, based on the python handbook:
|
||||
|
||||
python -m unittest discover
|
||||
|
||||
Temporary files and folders are created (and cleaned) to verify that the file behavior is going okay.
|
|
@ -0,0 +1,15 @@
|
|||
# -*- coding: UTF8 -*-
|
||||
# Configuration file
|
||||
|
||||
### If a folder only contains these types of files, we can delete it.
|
||||
useless_files_extensions = ('srr', 'nfo', 'sfv')
|
||||
|
||||
### Consider only files with these extensions
|
||||
shows_extensions = ('avi', 'mkv')
|
||||
|
||||
### Dictionary for special filename contents
|
||||
shows_dict = {
|
||||
'wc' : 'white collar',
|
||||
'tbbt' : 'the big bang theory',
|
||||
'beingerica' : 'being erica',
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
from subQuery import *
|
||||
import os
|
||||
|
||||
class Structure(object):
|
||||
"""Represents the complete structure, with its shows, seasons and episodes"""
|
||||
|
||||
def __init__(self, path):
|
||||
self.shows = [Show(os.path.join(path, x)) for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))]
|
||||
|
||||
def writeUrls(self):
|
||||
for s in self.shows:
|
||||
for season in s.seasons:
|
||||
season.writeUrl()
|
||||
|
||||
class Show(object):
|
||||
""" Represents a show file; ie. a file associated to its fullname """
|
||||
|
||||
def __init__(self, path):
|
||||
|
||||
self.path = path
|
||||
self.name = os.path.basename(path)
|
||||
|
||||
self.url = queryShow(self.name)
|
||||
|
||||
self.seasons = [Season(self, os.path.join(path, x)) for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))]
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
class Season(object):
|
||||
""" Represents a season within a show """
|
||||
|
||||
def __init__(self, parent, path):
|
||||
|
||||
self.parent = parent
|
||||
self.path = path
|
||||
self.name = os.path.basename(path)
|
||||
self.seasonnumber = re.findall('[0-9]+', os.path.basename(path))[0]
|
||||
self.episodes = [Episode(self, os.path.join(path, x)) for x in os.listdir(path) if os.path.isfile(os.path.join(path, x))]
|
||||
|
||||
self.url = querySeason(parent.name, self.seasonnumber)
|
||||
|
||||
def writeUrl(self):
|
||||
if len(self.url) == 1:
|
||||
results = querySeason(self.parent.name, self.seasonnumber)
|
||||
|
||||
if len(results) == 1:
|
||||
print 'Writing subtitles shortcut for ' + self.parent.name
|
||||
writeUrlShortcut(self.path, self.parent.name + '.url', str(self.url[0]), 'InternetShortcut')
|
||||
elif len(results) == 0:
|
||||
print 'no results have been found for ' + self.parent.name
|
||||
else:
|
||||
print 'too much results have been found'
|
||||
elif len(self.url) == 0:
|
||||
print 'too few urls for ' + self.parent.name
|
||||
else:
|
||||
print 'too many urls for ' + self.parent.name
|
||||
|
||||
class Episode(object):
|
||||
""" Represents an episode within a season """
|
||||
|
||||
def __init__(self, parent, path):
|
||||
|
||||
self.parent = parent
|
||||
self.path = path
|
||||
self.name = os.path.basename(path)
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
class Folder(object):
|
||||
""" Directory show instanciation, relative to a path on the disk
|
||||
ie. Show name
|
||||
- Season 1
|
||||
- Season 2
|
||||
- ...
|
||||
"""
|
||||
|
||||
directory = None
|
||||
name = None
|
||||
|
||||
def __init__(self, path):
|
||||
self.directory = path;
|
||||
self.name = os.path.basename(self.directory)
|
||||
|
||||
def __str__(self):
|
||||
return self.name + ' [' + self.directory + ']'
|
|
@ -0,0 +1,113 @@
|
|||
#encoding: utf-8
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import filecmp
|
||||
import config
|
||||
|
||||
from model import *
|
||||
|
||||
class PigeonHole(object):
|
||||
""" Takes all the media files in a (download) folder and sort
|
||||
them into the corresponding folder, based on the found file name
|
||||
"""
|
||||
|
||||
matches = None
|
||||
|
||||
def __init__(self, root, downloaddir):
|
||||
|
||||
self.structure = Structure(root)
|
||||
|
||||
self.downloadDir = downloaddir
|
||||
self.rootShows = root
|
||||
self.directories = os.listdir(self.rootShows)
|
||||
self.series = list()
|
||||
|
||||
def walk(self):
|
||||
""" Walks through the downloaded folders and yields .avi and .mkv files """
|
||||
for root, dirs, files in os.walk(self.downloadDir):
|
||||
for filename in files:
|
||||
if filename.endswith(config.shows_extensions):
|
||||
yield Show(os.path.join(root, filename), filename)
|
||||
|
||||
def walk2(self, foldername, extensions):
|
||||
for root, dirs, files in os.walk(foldername):
|
||||
for filename in files:
|
||||
if not filename.endswith(extensions):
|
||||
yield os.path.join(root, filename)
|
||||
|
||||
def process(self):
|
||||
""" Parses the directories within the 'rootShows' folder and stores them as shows in a list. """
|
||||
self.series = [ Folder(os.path.join(self.rootShows, x)) for x in self.directories]
|
||||
|
||||
for path in self.walk():
|
||||
self.moveToFolder(path)
|
||||
|
||||
def moveToFolder(self, show):
|
||||
""" Moves a specific show to its right folder. """
|
||||
|
||||
destinationfile = self.findFolder(show)
|
||||
|
||||
if destinationfile is not None:
|
||||
self.move(show.path, destinationfile)
|
||||
|
||||
if self.isDeletable(show.directory):
|
||||
print '\tDeleting ' + show.directory
|
||||
shutil.rmtree(show.directory)
|
||||
|
||||
else:
|
||||
for key in config.shows_dict:
|
||||
if key.lower() in show.name.lower():
|
||||
if os.path.exists(os.path.join(self.rootShows, config.shows_dict[key])):
|
||||
destinationfile = os.path.join(self.rootShows, config.shows_dict[key], show.name)
|
||||
print destinationfile
|
||||
self.move(show.path, destinationfile)
|
||||
|
||||
def findFolder(self, show):
|
||||
"""Finds and returns the complete destinationpath for a specific show."""
|
||||
|
||||
rx = re.compile('\W+')
|
||||
result = rx.sub(' ', show.name.lower()).strip()
|
||||
|
||||
for s in self.series:
|
||||
if s.name.lower() in result:
|
||||
return os.path.join(s.directory, show.name)
|
||||
|
||||
|
||||
def move(self, originalfile, destinationfile):
|
||||
""" Moves the downloaded file to the found folder. """
|
||||
print 'Moving ' + originalfile + ' to ' + destinationfile
|
||||
shutil.move(originalfile, destinationfile)
|
||||
|
||||
def isDeletable(self, foldername):
|
||||
""" Walks through the current directory and deletes it if nothing's really important in it
|
||||
ie. .nfo, .srr or .sfv files.
|
||||
"""
|
||||
if foldername is None:
|
||||
return False
|
||||
|
||||
if foldername == self.downloadDir or foldername == self.rootShows:
|
||||
return False
|
||||
|
||||
if foldername in self.downloadDir or foldername in self.rootShows:
|
||||
return False
|
||||
|
||||
print 'I got ' + str(sum(1 for x in self.walk2(foldername, config.useless_files_extensions))) + ' int. files'
|
||||
|
||||
if sum(1 for x in self.walk2(foldername, config.useless_files_extensions)) is 0:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def __str__(self):
|
||||
return 'PigeonHole module'
|
||||
|
||||
def __name__(self):
|
||||
return 'PigeonHole'
|
||||
|
||||
if __name__ == "__main__":
|
||||
pHole = PigeonHole(r'C:\test', r'C:\temp')
|
||||
pHole.process()
|
||||
pHole.structure.writeUrls()
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
import urllib2
|
||||
import re
|
||||
import os
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
"""
|
||||
Querying non web services interfaces
|
||||
through http interrogation and regex results retrieval.
|
||||
"""
|
||||
|
||||
languages = ('en', 'es', 'fr', 'de')
|
||||
|
||||
"""
|
||||
Represents a custom url object.
|
||||
It refers to a simple web page and can be embedded anywhere.
|
||||
"""
|
||||
class CustomUrl(object):
|
||||
fullUrl = None
|
||||
suffix = None
|
||||
base = None
|
||||
|
||||
def __init__(self, base, suffix):
|
||||
self.base = str(base)
|
||||
self.suffix = str(suffix)
|
||||
self.fullUrl = self.base + self.suffix
|
||||
|
||||
def __str__(self):
|
||||
return str(self.fullUrl)
|
||||
|
||||
def __unicode__(self):
|
||||
return str(self.fullUrl)
|
||||
|
||||
def replace(self, oldstr, newstr):
|
||||
return CustomUrl(self.base, self.suffix.replace(oldstr, newstr))
|
||||
|
||||
"""
|
||||
Querying a base url with a specific regex and a query.
|
||||
|
||||
eg. baseurl = http://duckduckgo.com/?q=
|
||||
query = my_query
|
||||
baseregex = ... :)
|
||||
|
||||
It will query the url, adds the query string and will fetch every href link that match the regular expression.
|
||||
"""
|
||||
def queryUrl(baseurl, paramindicator, regex, querystring):
|
||||
#print '\tProbing ' + baseurl + ' ' + paramindicator + ' ' + regex + ' ' + querystring
|
||||
socket = urllib2.urlopen(baseurl + paramindicator + querystring)
|
||||
soup = BeautifulSoup(socket.read())
|
||||
socket.close()
|
||||
|
||||
tags = soup.findAll(href=re.compile(regex))
|
||||
|
||||
mylist = list()
|
||||
|
||||
for tag in tags:
|
||||
bsoup = BeautifulSoup(str(tag))
|
||||
mylist.append(CustomUrl(baseurl, bsoup.a['href']))
|
||||
|
||||
return mylist
|
||||
|
||||
def queryShow(showname):
|
||||
return queryUrl('http://www.tvsubtitles.net', '/search.php?q=', '/tvshow-([A-Za-z0-9]*)\.html$', showname.replace(' ', '%20'))
|
||||
|
||||
def querySeason(showname, seasonnumber):
|
||||
return [x.replace('.html', '-' + str(seasonnumber) + '.html') for x in queryShow(showname)]
|
||||
|
||||
""" Write a shortcut to a specific web page and fix the shortcutname within the writtent file.
|
||||
|
||||
eg. writeUrlShortcut('/opt/tmp', 'google.url', 'http://www.google.com', 'Google')
|
||||
>>> [Google]
|
||||
>>> URL=http://www.google.com
|
||||
>>> inside a file named /opt/tmp/google.url
|
||||
"""
|
||||
def writeUrlShortcut(folderpath, filename, url, shortcutname):
|
||||
if not os.path.exists(folderpath):
|
||||
raise Exception('Writing Url : Path does not exists')
|
||||
|
||||
filecontent = """[%s]\nURL=%s""" % (shortcutname, url)
|
||||
|
||||
with open(os.path.join(folderpath, filename), 'w+') as f:
|
||||
f.write(filecontent)
|
|
@ -0,0 +1,75 @@
|
|||
import unittest
|
||||
import tempfile
|
||||
import shutil
|
||||
import os
|
||||
from pigeonhole import *
|
||||
#import config
|
||||
|
||||
class TestPigeonHoleFunctions(unittest.TestCase):
|
||||
"""Test the methods defined inside the PigeonHole class"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up the test environment"""
|
||||
self.rootdir = tempfile.mkdtemp(prefix='pigeonHole_root_')
|
||||
self.downloaddir = tempfile.mkdtemp(prefix='pigeonHole_dl_dir_')
|
||||
|
||||
# Create an environment with three folders
|
||||
os.mkdir(os.path.join(self.rootdir, 'White Collar'))
|
||||
os.mkdir(os.path.join(self.rootdir, 'The Big Bang Theory'))
|
||||
os.mkdir(os.path.join(self.rootdir, 'Being Erica'))
|
||||
|
||||
self.pigeonHole = pigeonhole.PigeonHole(self.rootdir, self.downloaddir)
|
||||
|
||||
self.notDeletableTmpDir = tempfile.mkdtemp(prefix='pigeonHole_')
|
||||
self.deletableTmpDir = tempfile.mkdtemp(prefix='pigeonHole_')
|
||||
|
||||
def tearDown(self):
|
||||
"""Tear down the test environment"""
|
||||
self.pigeonHole = None
|
||||
|
||||
shutil.rmtree(self.notDeletableTmpDir)
|
||||
shutil.rmtree(self.deletableTmpDir)
|
||||
|
||||
shutil.rmtree(self.rootdir)
|
||||
shutil.rmtree(self.downloaddir)
|
||||
|
||||
|
||||
def test_init(self):
|
||||
""" Testing the constructor """
|
||||
self.assertEqual(self.pigeonHole.rootShows, self.rootdir)
|
||||
self.assertEqual(self.pigeonHole.downloadDir, self.downloaddir)
|
||||
self.assertTrue(str(self.pigeonHole) == 'PigeonHole module', 'The module string is not correct.')
|
||||
self.assertTrue(str(self.pigeonHole.__name__ == 'PigeonHole'), 'The module name is not correct.')
|
||||
|
||||
def test_clean(self):
|
||||
"""Testing the cleaning method"""
|
||||
|
||||
self.generatedfiles_bad = list()
|
||||
self.generatedfiles_good = list()
|
||||
|
||||
for x in config.useless_files_extensions + config.shows_extensions:
|
||||
fd, temppath = tempfile.mkstemp(x, 'tmp', self.notDeletableTmpDir)
|
||||
self.generatedfiles_bad.append(temppath)
|
||||
os.close(fd)
|
||||
|
||||
for y in config.useless_files_extensions:
|
||||
fd, temppath = tempfile.mkstemp(y, 'tmp', self.deletableTmpDir)
|
||||
self.generatedfiles_good.append(temppath)
|
||||
os.close(fd)
|
||||
|
||||
self.assertFalse(self.pigeonHole.isDeletable(self.notDeletableTmpDir))
|
||||
self.assertTrue(self.pigeonHole.isDeletable(self.deletableTmpDir))
|
||||
|
||||
self.assertFalse(self.pigeonHole.isDeletable(self.rootdir))
|
||||
self.assertFalse(self.pigeonHole.isDeletable(self.downloaddir))
|
||||
|
||||
def test_findFolder(self):
|
||||
"""Try to move a file to a specific location"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -0,0 +1,15 @@
|
|||
from distutils.core import setup
|
||||
|
||||
#setup {
|
||||
# name='PigeonHole',
|
||||
# version='0.1.0',
|
||||
# author='Fred Pauchet'
|
||||
# author_email='fpauchet@gmail.com',
|
||||
# packages=['pigeonhole','pigeonhole.test'],
|
||||
# scripts=[],
|
||||
# url='',
|
||||
# licence='LICENCE',
|
||||
# description='',
|
||||
# long_description=long_description=open('README').read(),
|
||||
# install_require=[],
|
||||
#}
|
Loading…
Reference in New Issue