Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Cinemast provider #817

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Changelog
^^^^^
**release date:** 2016-09-03

* Fix subscenter
* Add Cinemast provider


2.0.3
Expand Down
5 changes: 5 additions & 0 deletions docs/api/providers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ Addic7ed
.. automodule:: subliminal.providers.addic7ed
:private-members:

Cinemast
----------
.. automodule:: subliminal.providers.cinemast
:private-members:

LegendasTv
----------
.. automodule:: subliminal.providers.legendastv
Expand Down
1 change: 1 addition & 0 deletions docs/user/how_it_works.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Subliminal uses multiple providers to give users a vast choice and have a better
subtitles. Current supported providers are:

* Addic7ed
* Cinemast
* LegendasTV
* NapiProjekt
* OpenSubtitles
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def find_version(*file_paths):
entry_points={
'subliminal.providers': [
'addic7ed = subliminal.providers.addic7ed:Addic7edProvider',
'cinemast = subliminal.providers.cinemast:CinemastProvider',
'legendastv = subliminal.providers.legendastv:LegendasTVProvider',
'opensubtitles = subliminal.providers.opensubtitles:OpenSubtitlesProvider',
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
Expand Down
5 changes: 4 additions & 1 deletion subliminal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ def convert(self, value, param, ctx):
@click.group(context_settings={'max_content_width': 100}, epilog='Suggestions and bug reports are greatly appreciated: '
'https://github.com/Diaoul/subliminal/')
@click.option('--addic7ed', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='Addic7ed configuration.')
@click.option('--cinemast', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='Cinemast configuration.')
@click.option('--legendastv', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='LegendasTV configuration.')
@click.option('--opensubtitles', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD',
help='OpenSubtitles configuration.')
Expand All @@ -224,7 +225,7 @@ def convert(self, value, param, ctx):
@click.option('--debug', is_flag=True, help='Print useful information for debugging subliminal and for reporting bugs.')
@click.version_option(__version__)
@click.pass_context
def subliminal(ctx, addic7ed, legendastv, opensubtitles, cache_dir, debug):
def subliminal(ctx, addic7ed, cinemast, legendastv, opensubtitles, cache_dir, debug):
"""Subtitles, faster than your thoughts."""
# create cache directory
try:
Expand All @@ -248,6 +249,8 @@ def subliminal(ctx, addic7ed, legendastv, opensubtitles, cache_dir, debug):
ctx.obj = {'provider_configs': {}}
if addic7ed:
ctx.obj['provider_configs']['addic7ed'] = {'username': addic7ed[0], 'password': addic7ed[1]}
if cinemast:
ctx.obj['provider_configs']['cinemast'] = {'username': cinemast[0], 'password': cinemast[1]}
if legendastv:
ctx.obj['provider_configs']['legendastv'] = {'username': legendastv[0], 'password': legendastv[1]}
if opensubtitles:
Expand Down
1 change: 1 addition & 0 deletions subliminal/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def unregister(self, entry_point):
#: Provider manager
provider_manager = RegistrableExtensionManager('subliminal.providers', [
'addic7ed = subliminal.providers.addic7ed:Addic7edProvider',
'cinemast = subliminal.providers.cinemast:CinemastProvider',
'legendastv = subliminal.providers.legendastv:LegendasTVProvider',
'opensubtitles = subliminal.providers.opensubtitles:OpenSubtitlesProvider',
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
Expand Down
231 changes: 231 additions & 0 deletions subliminal/providers/cinemast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
# -*- coding: utf-8 -*-
import bisect
import io
import logging
import zipfile

from babelfish import Language
from guessit import guessit
from requests import Session

from . import Provider
from .. import __short_version__
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..video import Episode, Movie

logger = logging.getLogger(__name__)


class CinemastSubtitle(Subtitle):
"""Cinemast Subtitle."""
provider_name = 'cinemast'

def __init__(self, language, page_link, series, season, episode, title, subtitle_id, subtitle_key,
releases):
super(CinemastSubtitle, self).__init__(language, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
self.title = title
self.subtitle_id = subtitle_id
self.subtitle_key = subtitle_key
self.downloaded = 0
self.releases = releases

@property
def id(self):
return str(self.subtitle_id)

def get_matches(self, video):
matches = set()

# episode
if isinstance(video, Episode):
# series
if video.series and (sanitize(self.title) in (
sanitize(name) for name in [video.series] + video.alternative_series)):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'episode'}))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is potentially going to give an unreal good score to the subtitles as this is the union of all releases instead of picking the best.

Copy link
Contributor Author

@ofir123 ofir123 Dec 1, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I understand what you mean..
If the subtitle matches multiple releases, I want to check all of them.
Should I change it so each subtitle will match only one release?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know in that case if we shouldn't do a single Subtitle instance for each release. Even if this is the same actual subtitle.
I think for now if it does the job leave it as is but that's a thought for the long term.
@pannal: what about harmonizing the Subtitle object so that it's not provider specific but rather "subliminal"-specific? This would avoid you patching all the subtitle classes to change the guessing rules. Not sure the feasability though.

# movie
elif isinstance(video, Movie):
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'movie'}))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here


# title
if video.title and (sanitize(self.title) in (
sanitize(name) for name in [video.title] + video.alternative_titles)):
matches.add('title')

return matches


class CinemastProvider(Provider):
"""Cinemast Provider."""
languages = {Language.fromalpha2(l) for l in ['he']}
server_url = 'http://www.cinemast.org/he/cinemast/api/'
subtitle_class = CinemastSubtitle

default_username = '[email protected]'
default_password = 'subliminal'

def __init__(self, username=None, password=None):
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')

self.session = None
self.username = username or self.default_username
self.password = password or self.default_password
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is there a default user?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

API is accessible only to registered users.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, don't provide a default user, just use this one for testing and require USER and PASSWORD to be provided in the CLI.
I don't think we have private-only providers so this is all new for you to do.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really don't think this is necessary for this provider, since it's not really private.
Only the API requires user and password, and the actual website doesn't.
They don't really care about it being a default user as far as I can tell..

Copy link
Owner

@Diaoul Diaoul Dec 1, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If/When they introduce a limited number of downloads per user this is not going to work anymore. Moreover this is not a widely used providers as per the only language it supports. I don't think people would mind entering a user/password in their command line.
Unless you have explicit agreement with Cinemast to use a default user I'd rather require subliminal users to register.

A websites generates ad-revenue, an API does not. does not so maybe api restriction is intentional.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, removed.

self.user_id = None
self.token = None
self.session = None

def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)

# login
if self.username and self.password:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will always evaluate to True with default values.

Copy link
Contributor Author

@ofir123 ofir123 Dec 1, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Verification is for user input (in case None or an '' were inserted).

Copy link
Collaborator

@fernandog fernandog Dec 1, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ofir123
do like legendastv:

if self.username is not None and self.password is not None:

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.username = username or self.default_username this already takes care of that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

logger.debug('Logging in')
url = self.server_url + 'login/'

# actual login
data = {'username': self.username, 'password': self.password}
r = self.session.post(url, data=data, allow_redirects=False, timeout=10)

if r.status_code != 200:
raise AuthenticationError(self.username)

try:
result = r.json()
if 'token' not in result:
raise AuthenticationError(self.username)

logger.info('Logged in')
self.user_id = r.json().get('user')
self.token = r.json().get('token')
except ValueError:
raise AuthenticationError(self.username)

def terminate(self):
# logout
if self.token or self.user_id:
logger.info('Logged out')
self.token = None
self.user_id = None

self.session.close()

def query(self, title, season=None, episode=None, year=None):
query = {
'q': title,
'user': self.user_id,
'token': self.token
}

# episode
if season and episode:
query['type'] = 'series'
query['season'] = season
query['episode'] = episode
else:
query['type'] = 'movies'
if year:
query['year_start'] = year - 1
query['year_end'] = year

# get the list of subtitles
logger.debug('Getting the list of subtitles')
url = self.server_url + 'search/'
r = self.session.post(url, data=query)
r.raise_for_status()

try:
results = r.json()
except ValueError:
return {}

# loop over results
subtitles = {}
for group_data in results.get('data', []):
# create page link
slug_name = group_data.get('name_en').lower().replace(' ', '-').replace('\'', '').replace('"', '')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about other characters? e.g. dot, semicolons
Is there any other way to get this information?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really..
It's not my site so I'm just guessing and checking how to replace each character.
I'll added dot and semicolons though! Good idea.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make this a function so it doesn't look too long.

if query['type'] == 'series':
page_link = self.server_url + 'subtitle/series/{}/{}/{}/'.format(slug_name, season, episode)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

urlencode is required here due to above comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

Copy link
Collaborator

@fernandog fernandog Dec 1, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Diaoul urlencode is from urllib right? Requests already does the encoding and it's not needed. confirm?

no other provider uses urlencode

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For params or data you are correct but even for the URL requests does that? Anyway, I'm not sure this makes any sense as with special characters the URL will probably be wrong no? If you can add tests that pass with accentuated characters and weird characters that'll be fine for me.

else:
page_link = self.server_url + 'subtitle/movie/{}/'.format(slug_name)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!


# go over each language
for language_code, subtitles_data in group_data.get('subtitles', {}).items():
for subtitle_item in subtitles_data:
# read the item
language = Language.fromalpha2(language_code)
subtitle_id = subtitle_item['id']
subtitle_key = subtitle_item['key']
release = subtitle_item['version']

# add the release and increment downloaded count if we already have the subtitle
if subtitle_id in subtitles:
logger.debug('Found additional release %r for subtitle %r', release, subtitle_id)
bisect.insort_left(subtitles[subtitle_id].releases, release) # deterministic order
subtitles[subtitle_id].downloaded += 1
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you increase the downloaded count? Isn't that the same as len(subtitles[subtitle_id].releases)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

continue

# otherwise create it
subtitle = self.subtitle_class(language, page_link, title, season, episode, title, subtitle_id,
subtitle_key, [release])
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1 release = 1 instance

logger.debug('Found subtitle %r', subtitle)
subtitles[subtitle_id] = subtitle

return subtitles.values()

def list_subtitles(self, video, languages):
season = episode = None

if isinstance(video, Episode):
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
else:
titles = [video.title] + video.alternative_titles

for title in titles:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I said before, this is going to flood the servers with many useless requests sometimes returning duplicated results. I'm really not a big fan of this.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Diaoul It will stop in the first result found. And dogpile caches it so next time it won't hit server.

subtitles = [s for s in self.query(title, season, episode) if s.language in languages]
if subtitles:
return subtitles

return []

def download_subtitle(self, subtitle):
# download
url = self.server_url + 'subtitle/download/{}/'.format(subtitle.language.alpha2)
params = {
'v': subtitle.releases[0],
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the result change if you take another release?

'key': subtitle.subtitle_key,
'sub_id': subtitle.subtitle_id
}
data = {
'user': self.user_id,
'token': self.token
}
r = self.session.post(url, data=data, params=params, timeout=10)
r.raise_for_status()

# open the zip
with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
# remove some filenames from the namelist
namelist = [n for n in zf.namelist() if not n.endswith('.txt')]
if len(namelist) > 1:
raise ProviderError('More than one file to unzip')

subtitle.content = fix_line_ending(zf.read(namelist[0]))
Loading