Transfer from github.
This commit is contained in:
commit
7d1b908875
|
@ -0,0 +1,9 @@
|
||||||
|
#python specific
|
||||||
|
*.pyc
|
||||||
|
|
||||||
|
## generic files to ignore
|
||||||
|
*~
|
||||||
|
*.lock
|
||||||
|
*.DS_Store
|
||||||
|
*.swp
|
||||||
|
*.out
|
|
@ -0,0 +1,2 @@
|
||||||
|
include *.txt
|
||||||
|
recursive-include docs *.txt
|
|
@ -0,0 +1,32 @@
|
||||||
|
PigeonHole
|
||||||
|
==========
|
||||||
|
|
||||||
|
The main purpose of this application is to sort some specific types of files into a well-arranged directory.
|
||||||
|
|
||||||
|
I used it for classifying tv shows from a garbage folder into the right one, based on the filename which will be cleaned to help sorting.
|
||||||
|
|
||||||
|
How it works
|
||||||
|
------------
|
||||||
|
|
||||||
|
The project is splitted into several files :
|
||||||
|
* pigeonhole/pigeonhole.py : the one that should be run :)
|
||||||
|
* setup.py : not used yet, sorry.
|
||||||
|
* pigeonhole/config.py : where you should put your configuration.
|
||||||
|
|
||||||
|
### config.py ###
|
||||||
|
|
||||||
|
The configuration file contains the declaration of three variables :
|
||||||
|
|
||||||
|
1. useless_files_extensions : used to clean a folder when the content of this directory (and its subdirectories) is only composed by this kind of files. Do not try to put `*` inside this filter, I don't know the behavior yet...
|
||||||
|
2. shows_extensions : the files that need to be organized. The `process` method of the `PigeonHole` class won't look for anything else than these filetype, based the recognition of extensions and not on [magic numbers](http://en.wikipedia.org/wiki/List_of_file_signatures).
|
||||||
|
3. shows_dict : used for file that have a 'special name'
|
||||||
|
(ie. using 'tbbt' while the real name that can be found in the destination folder is much much longer)
|
||||||
|
|
||||||
|
Unit testing
|
||||||
|
------------
|
||||||
|
|
||||||
|
All tests are located inside the `pigeonhole/tests` directory. To launch them, use the following command, based on the python handbook:
|
||||||
|
|
||||||
|
python -m unittest discover
|
||||||
|
|
||||||
|
Temporary files and folders are created (and cleaned) to verify that the file behavior is going okay.
|
|
@ -0,0 +1,32 @@
|
||||||
|
PigeonHole
|
||||||
|
==========
|
||||||
|
|
||||||
|
The main purpose of this application is to sort some specific types of files into a well-arranged directory.
|
||||||
|
|
||||||
|
I used it for classifying tv shows from a garbage folder into the right one, based on the filename which will be cleaned to help sorting.
|
||||||
|
|
||||||
|
How it works
|
||||||
|
------------
|
||||||
|
|
||||||
|
The project is splitted into several files :
|
||||||
|
* pigeonhole/pigeonhole.py : the one that should be run :)
|
||||||
|
* setup.py : not used yet, sorry.
|
||||||
|
* pigeonhole/config.py : where you should put your configuration.
|
||||||
|
|
||||||
|
### config.py ###
|
||||||
|
|
||||||
|
The configuration file contains the declaration of three variables :
|
||||||
|
|
||||||
|
1. useless_files_extensions : used to clean a folder when the content of this directory (and its subdirectories) is only composed by this kind of files. Do not try to put `*` inside this filter, I don't know the behavior yet...
|
||||||
|
2. shows_extensions : the files that need to be organized. The `process` method of the `PigeonHole` class won't look for anything else than these filetype (sorry to based the recognition on extensions and not on [magic numbers](http://en.wikipedia.org/wiki/List_of_file_signatures))
|
||||||
|
3. shows_dict : used for file that have a 'special name'
|
||||||
|
(ie. using 'tbbt' while the real name that can be found in the destination folder is much much longer)
|
||||||
|
|
||||||
|
Unit testing
|
||||||
|
------------
|
||||||
|
|
||||||
|
All tests are located inside the `pigeonhole/tests` directory. To launch them, use the following command, based on the python handbook:
|
||||||
|
|
||||||
|
python -m unittest discover
|
||||||
|
|
||||||
|
Temporary files and folders are created (and cleaned) to verify that the file behavior is going okay.
|
|
@ -0,0 +1,15 @@
|
||||||
|
# -*- coding: UTF8 -*-
|
||||||
|
# Configuration file
|
||||||
|
|
||||||
|
### If a folder only contains these types of files, we can delete it.
|
||||||
|
useless_files_extensions = ('srr', 'nfo', 'sfv')
|
||||||
|
|
||||||
|
### Consider only files with these extensions
|
||||||
|
shows_extensions = ('avi', 'mkv')
|
||||||
|
|
||||||
|
### Dictionary for special filename contents
|
||||||
|
shows_dict = {
|
||||||
|
'wc' : 'white collar',
|
||||||
|
'tbbt' : 'the big bang theory',
|
||||||
|
'beingerica' : 'being erica',
|
||||||
|
}
|
|
@ -0,0 +1,87 @@
|
||||||
|
from subQuery import *
|
||||||
|
import os
|
||||||
|
|
||||||
|
class Structure(object):
|
||||||
|
"""Represents the complete structure, with its shows, seasons and episodes"""
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
self.shows = [Show(os.path.join(path, x)) for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))]
|
||||||
|
|
||||||
|
def writeUrls(self):
|
||||||
|
for s in self.shows:
|
||||||
|
for season in s.seasons:
|
||||||
|
season.writeUrl()
|
||||||
|
|
||||||
|
class Show(object):
|
||||||
|
""" Represents a show file; ie. a file associated to its fullname """
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
|
||||||
|
self.path = path
|
||||||
|
self.name = os.path.basename(path)
|
||||||
|
|
||||||
|
self.url = queryShow(self.name)
|
||||||
|
|
||||||
|
self.seasons = [Season(self, os.path.join(path, x)) for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
class Season(object):
|
||||||
|
""" Represents a season within a show """
|
||||||
|
|
||||||
|
def __init__(self, parent, path):
|
||||||
|
|
||||||
|
self.parent = parent
|
||||||
|
self.path = path
|
||||||
|
self.name = os.path.basename(path)
|
||||||
|
self.seasonnumber = re.findall('[0-9]+', os.path.basename(path))[0]
|
||||||
|
self.episodes = [Episode(self, os.path.join(path, x)) for x in os.listdir(path) if os.path.isfile(os.path.join(path, x))]
|
||||||
|
|
||||||
|
self.url = querySeason(parent.name, self.seasonnumber)
|
||||||
|
|
||||||
|
def writeUrl(self):
|
||||||
|
if len(self.url) == 1:
|
||||||
|
results = querySeason(self.parent.name, self.seasonnumber)
|
||||||
|
|
||||||
|
if len(results) == 1:
|
||||||
|
print 'Writing subtitles shortcut for ' + self.parent.name
|
||||||
|
writeUrlShortcut(self.path, self.parent.name + '.url', str(self.url[0]), 'InternetShortcut')
|
||||||
|
elif len(results) == 0:
|
||||||
|
print 'no results have been found for ' + self.parent.name
|
||||||
|
else:
|
||||||
|
print 'too much results have been found'
|
||||||
|
elif len(self.url) == 0:
|
||||||
|
print 'too few urls for ' + self.parent.name
|
||||||
|
else:
|
||||||
|
print 'too many urls for ' + self.parent.name
|
||||||
|
|
||||||
|
class Episode(object):
|
||||||
|
""" Represents an episode within a season """
|
||||||
|
|
||||||
|
def __init__(self, parent, path):
|
||||||
|
|
||||||
|
self.parent = parent
|
||||||
|
self.path = path
|
||||||
|
self.name = os.path.basename(path)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
class Folder(object):
|
||||||
|
""" Directory show instanciation, relative to a path on the disk
|
||||||
|
ie. Show name
|
||||||
|
- Season 1
|
||||||
|
- Season 2
|
||||||
|
- ...
|
||||||
|
"""
|
||||||
|
|
||||||
|
directory = None
|
||||||
|
name = None
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
self.directory = path;
|
||||||
|
self.name = os.path.basename(self.directory)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name + ' [' + self.directory + ']'
|
|
@ -0,0 +1,113 @@
|
||||||
|
#encoding: utf-8
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import filecmp
|
||||||
|
import config
|
||||||
|
|
||||||
|
from model import *
|
||||||
|
|
||||||
|
class PigeonHole(object):
|
||||||
|
""" Takes all the media files in a (download) folder and sort
|
||||||
|
them into the corresponding folder, based on the found file name
|
||||||
|
"""
|
||||||
|
|
||||||
|
matches = None
|
||||||
|
|
||||||
|
def __init__(self, root, downloaddir):
|
||||||
|
|
||||||
|
self.structure = Structure(root)
|
||||||
|
|
||||||
|
self.downloadDir = downloaddir
|
||||||
|
self.rootShows = root
|
||||||
|
self.directories = os.listdir(self.rootShows)
|
||||||
|
self.series = list()
|
||||||
|
|
||||||
|
def walk(self):
|
||||||
|
""" Walks through the downloaded folders and yields .avi and .mkv files """
|
||||||
|
for root, dirs, files in os.walk(self.downloadDir):
|
||||||
|
for filename in files:
|
||||||
|
if filename.endswith(config.shows_extensions):
|
||||||
|
yield Show(os.path.join(root, filename), filename)
|
||||||
|
|
||||||
|
def walk2(self, foldername, extensions):
|
||||||
|
for root, dirs, files in os.walk(foldername):
|
||||||
|
for filename in files:
|
||||||
|
if not filename.endswith(extensions):
|
||||||
|
yield os.path.join(root, filename)
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
""" Parses the directories within the 'rootShows' folder and stores them as shows in a list. """
|
||||||
|
self.series = [ Folder(os.path.join(self.rootShows, x)) for x in self.directories]
|
||||||
|
|
||||||
|
for path in self.walk():
|
||||||
|
self.moveToFolder(path)
|
||||||
|
|
||||||
|
def moveToFolder(self, show):
|
||||||
|
""" Moves a specific show to its right folder. """
|
||||||
|
|
||||||
|
destinationfile = self.findFolder(show)
|
||||||
|
|
||||||
|
if destinationfile is not None:
|
||||||
|
self.move(show.path, destinationfile)
|
||||||
|
|
||||||
|
if self.isDeletable(show.directory):
|
||||||
|
print '\tDeleting ' + show.directory
|
||||||
|
shutil.rmtree(show.directory)
|
||||||
|
|
||||||
|
else:
|
||||||
|
for key in config.shows_dict:
|
||||||
|
if key.lower() in show.name.lower():
|
||||||
|
if os.path.exists(os.path.join(self.rootShows, config.shows_dict[key])):
|
||||||
|
destinationfile = os.path.join(self.rootShows, config.shows_dict[key], show.name)
|
||||||
|
print destinationfile
|
||||||
|
self.move(show.path, destinationfile)
|
||||||
|
|
||||||
|
def findFolder(self, show):
|
||||||
|
"""Finds and returns the complete destinationpath for a specific show."""
|
||||||
|
|
||||||
|
rx = re.compile('\W+')
|
||||||
|
result = rx.sub(' ', show.name.lower()).strip()
|
||||||
|
|
||||||
|
for s in self.series:
|
||||||
|
if s.name.lower() in result:
|
||||||
|
return os.path.join(s.directory, show.name)
|
||||||
|
|
||||||
|
|
||||||
|
def move(self, originalfile, destinationfile):
|
||||||
|
""" Moves the downloaded file to the found folder. """
|
||||||
|
print 'Moving ' + originalfile + ' to ' + destinationfile
|
||||||
|
shutil.move(originalfile, destinationfile)
|
||||||
|
|
||||||
|
def isDeletable(self, foldername):
|
||||||
|
""" Walks through the current directory and deletes it if nothing's really important in it
|
||||||
|
ie. .nfo, .srr or .sfv files.
|
||||||
|
"""
|
||||||
|
if foldername is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if foldername == self.downloadDir or foldername == self.rootShows:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if foldername in self.downloadDir or foldername in self.rootShows:
|
||||||
|
return False
|
||||||
|
|
||||||
|
print 'I got ' + str(sum(1 for x in self.walk2(foldername, config.useless_files_extensions))) + ' int. files'
|
||||||
|
|
||||||
|
if sum(1 for x in self.walk2(foldername, config.useless_files_extensions)) is 0:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'PigeonHole module'
|
||||||
|
|
||||||
|
def __name__(self):
|
||||||
|
return 'PigeonHole'
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pHole = PigeonHole(r'C:\test', r'C:\temp')
|
||||||
|
pHole.process()
|
||||||
|
pHole.structure.writeUrls()
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
import urllib2
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
"""
|
||||||
|
Querying non web services interfaces
|
||||||
|
through http interrogation and regex results retrieval.
|
||||||
|
"""
|
||||||
|
|
||||||
|
languages = ('en', 'es', 'fr', 'de')
|
||||||
|
|
||||||
|
"""
|
||||||
|
Represents a custom url object.
|
||||||
|
It refers to a simple web page and can be embedded anywhere.
|
||||||
|
"""
|
||||||
|
class CustomUrl(object):
|
||||||
|
fullUrl = None
|
||||||
|
suffix = None
|
||||||
|
base = None
|
||||||
|
|
||||||
|
def __init__(self, base, suffix):
|
||||||
|
self.base = str(base)
|
||||||
|
self.suffix = str(suffix)
|
||||||
|
self.fullUrl = self.base + self.suffix
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.fullUrl)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return str(self.fullUrl)
|
||||||
|
|
||||||
|
def replace(self, oldstr, newstr):
|
||||||
|
return CustomUrl(self.base, self.suffix.replace(oldstr, newstr))
|
||||||
|
|
||||||
|
"""
|
||||||
|
Querying a base url with a specific regex and a query.
|
||||||
|
|
||||||
|
eg. baseurl = http://duckduckgo.com/?q=
|
||||||
|
query = my_query
|
||||||
|
baseregex = ... :)
|
||||||
|
|
||||||
|
It will query the url, adds the query string and will fetch every href link that match the regular expression.
|
||||||
|
"""
|
||||||
|
def queryUrl(baseurl, paramindicator, regex, querystring):
|
||||||
|
#print '\tProbing ' + baseurl + ' ' + paramindicator + ' ' + regex + ' ' + querystring
|
||||||
|
socket = urllib2.urlopen(baseurl + paramindicator + querystring)
|
||||||
|
soup = BeautifulSoup(socket.read())
|
||||||
|
socket.close()
|
||||||
|
|
||||||
|
tags = soup.findAll(href=re.compile(regex))
|
||||||
|
|
||||||
|
mylist = list()
|
||||||
|
|
||||||
|
for tag in tags:
|
||||||
|
bsoup = BeautifulSoup(str(tag))
|
||||||
|
mylist.append(CustomUrl(baseurl, bsoup.a['href']))
|
||||||
|
|
||||||
|
return mylist
|
||||||
|
|
||||||
|
def queryShow(showname):
|
||||||
|
return queryUrl('http://www.tvsubtitles.net', '/search.php?q=', '/tvshow-([A-Za-z0-9]*)\.html$', showname.replace(' ', '%20'))
|
||||||
|
|
||||||
|
def querySeason(showname, seasonnumber):
|
||||||
|
return [x.replace('.html', '-' + str(seasonnumber) + '.html') for x in queryShow(showname)]
|
||||||
|
|
||||||
|
""" Write a shortcut to a specific web page and fix the shortcutname within the writtent file.
|
||||||
|
|
||||||
|
eg. writeUrlShortcut('/opt/tmp', 'google.url', 'http://www.google.com', 'Google')
|
||||||
|
>>> [Google]
|
||||||
|
>>> URL=http://www.google.com
|
||||||
|
>>> inside a file named /opt/tmp/google.url
|
||||||
|
"""
|
||||||
|
def writeUrlShortcut(folderpath, filename, url, shortcutname):
|
||||||
|
if not os.path.exists(folderpath):
|
||||||
|
raise Exception('Writing Url : Path does not exists')
|
||||||
|
|
||||||
|
filecontent = """[%s]\nURL=%s""" % (shortcutname, url)
|
||||||
|
|
||||||
|
with open(os.path.join(folderpath, filename), 'w+') as f:
|
||||||
|
f.write(filecontent)
|
|
@ -0,0 +1,75 @@
|
||||||
|
import unittest
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
import os
|
||||||
|
from pigeonhole import *
|
||||||
|
#import config
|
||||||
|
|
||||||
|
class TestPigeonHoleFunctions(unittest.TestCase):
|
||||||
|
"""Test the methods defined inside the PigeonHole class"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""Set up the test environment"""
|
||||||
|
self.rootdir = tempfile.mkdtemp(prefix='pigeonHole_root_')
|
||||||
|
self.downloaddir = tempfile.mkdtemp(prefix='pigeonHole_dl_dir_')
|
||||||
|
|
||||||
|
# Create an environment with three folders
|
||||||
|
os.mkdir(os.path.join(self.rootdir, 'White Collar'))
|
||||||
|
os.mkdir(os.path.join(self.rootdir, 'The Big Bang Theory'))
|
||||||
|
os.mkdir(os.path.join(self.rootdir, 'Being Erica'))
|
||||||
|
|
||||||
|
self.pigeonHole = pigeonhole.PigeonHole(self.rootdir, self.downloaddir)
|
||||||
|
|
||||||
|
self.notDeletableTmpDir = tempfile.mkdtemp(prefix='pigeonHole_')
|
||||||
|
self.deletableTmpDir = tempfile.mkdtemp(prefix='pigeonHole_')
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
"""Tear down the test environment"""
|
||||||
|
self.pigeonHole = None
|
||||||
|
|
||||||
|
shutil.rmtree(self.notDeletableTmpDir)
|
||||||
|
shutil.rmtree(self.deletableTmpDir)
|
||||||
|
|
||||||
|
shutil.rmtree(self.rootdir)
|
||||||
|
shutil.rmtree(self.downloaddir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_init(self):
|
||||||
|
""" Testing the constructor """
|
||||||
|
self.assertEqual(self.pigeonHole.rootShows, self.rootdir)
|
||||||
|
self.assertEqual(self.pigeonHole.downloadDir, self.downloaddir)
|
||||||
|
self.assertTrue(str(self.pigeonHole) == 'PigeonHole module', 'The module string is not correct.')
|
||||||
|
self.assertTrue(str(self.pigeonHole.__name__ == 'PigeonHole'), 'The module name is not correct.')
|
||||||
|
|
||||||
|
def test_clean(self):
|
||||||
|
"""Testing the cleaning method"""
|
||||||
|
|
||||||
|
self.generatedfiles_bad = list()
|
||||||
|
self.generatedfiles_good = list()
|
||||||
|
|
||||||
|
for x in config.useless_files_extensions + config.shows_extensions:
|
||||||
|
fd, temppath = tempfile.mkstemp(x, 'tmp', self.notDeletableTmpDir)
|
||||||
|
self.generatedfiles_bad.append(temppath)
|
||||||
|
os.close(fd)
|
||||||
|
|
||||||
|
for y in config.useless_files_extensions:
|
||||||
|
fd, temppath = tempfile.mkstemp(y, 'tmp', self.deletableTmpDir)
|
||||||
|
self.generatedfiles_good.append(temppath)
|
||||||
|
os.close(fd)
|
||||||
|
|
||||||
|
self.assertFalse(self.pigeonHole.isDeletable(self.notDeletableTmpDir))
|
||||||
|
self.assertTrue(self.pigeonHole.isDeletable(self.deletableTmpDir))
|
||||||
|
|
||||||
|
self.assertFalse(self.pigeonHole.isDeletable(self.rootdir))
|
||||||
|
self.assertFalse(self.pigeonHole.isDeletable(self.downloaddir))
|
||||||
|
|
||||||
|
def test_findFolder(self):
|
||||||
|
"""Try to move a file to a specific location"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
|
@ -0,0 +1,15 @@
|
||||||
|
from distutils.core import setup
|
||||||
|
|
||||||
|
#setup {
|
||||||
|
# name='PigeonHole',
|
||||||
|
# version='0.1.0',
|
||||||
|
# author='Fred Pauchet'
|
||||||
|
# author_email='fpauchet@gmail.com',
|
||||||
|
# packages=['pigeonhole','pigeonhole.test'],
|
||||||
|
# scripts=[],
|
||||||
|
# url='',
|
||||||
|
# licence='LICENCE',
|
||||||
|
# description='',
|
||||||
|
# long_description=long_description=open('README').read(),
|
||||||
|
# install_require=[],
|
||||||
|
#}
|
Loading…
Reference in New Issue