Program Listing for File mdcommon.py¶
↰ Return to documentation for file (pymdtools/mdcommon.py)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
#
# Copyright (c) 2018 Florent TOURNOIS
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# -----------------------------------------------------------------------------
""" Core markdown functions."""
import logging
import json
import os
import re
import copy
from urllib.parse import urlparse
if (__package__ in [None, '']) and ('.' not in __name__):
import common
else:
from . import common
# -----------------------------------------------------------------------------
# test for an external internet link
#
# @param url
# @return True if it is an external link
# -----------------------------------------------------------------------------
def is_external_link(url):
try:
result = urlparse(url)
if result.scheme in ['http', 'https']:
return all([result.scheme, result.netloc])
return all([result.scheme])
except ValueError:
return False
# -----------------------------------------------------------------------------
# return the domain name
#
# @param url
# @return the domain name or the url if its not external
# -----------------------------------------------------------------------------
def get_domain_name(url):
if not is_external_link(url):
return url
try:
analyse = urlparse(url)
if analyse.scheme in ['mailto']:
return analyse.path
return analyse.netloc
except ValueError:
return url
# -----------------------------------------------------------------------------
# An object to rule the web page (only one page)
# -----------------------------------------------------------------------------
class Link(dict):
# -------------------------------------------------------------------------
# The name of the link
# @return the value
# -------------------------------------------------------------------------
@property
def name(self):
if 'name' not in self:
return None
return self['name']
# -------------------------------------------------------------------------
# The name of the link
# @param value The value to set
# -------------------------------------------------------------------------
@name.setter
def name(self, value):
self['name'] = value
if value is None:
del self['name']
# -------------------------------------------------------------------------
# The name of the link
# @return the value
# -------------------------------------------------------------------------
@property
def label(self):
if 'name' not in self:
return None
return self['name']
# -------------------------------------------------------------------------
# The name of the link
# @param value The value to set
# -------------------------------------------------------------------------
@label.setter
def label(self, value):
self['name'] = value
if value is None:
del self['name']
# -------------------------------------------------------------------------
# The url of the link
# @return the value
# -------------------------------------------------------------------------
@property
def url(self):
if 'url' not in self:
return None
return self['url']
# -------------------------------------------------------------------------
# The url of the link
# @param value The value to set
# -------------------------------------------------------------------------
@url.setter
def url(self, value):
self['url'] = value
if value is None:
del self['url']
# -------------------------------------------------------------------------
# The title of the link
# @return the value
# -------------------------------------------------------------------------
@property
def title(self):
if 'title' not in self:
return None
return self['title']
# -------------------------------------------------------------------------
# The title of the link
# @param value The value to set
# -------------------------------------------------------------------------
@title.setter
def title(self, value):
self['title'] = value
if value is None:
del self['title']
# -------------------------------------------------------------------------
# __str__ is a built-in function that computes the "informal"
# string reputation of an object
# __str__ goal is to be readable
# -------------------------------------------------------------------------
def __str__(self):
result = "Link name='%s' title='%s'\n" % (self.name, self.title)
result += " url=%s\n" % (self.url)
return result
# -----------------------------------------------------------------------------
# Find the markdown links contained in the text
# This function return a dict with the links.
#
# @param text the text to parse
# @param previous_links the previous links to add to the result
# @return a dict with the links
# -----------------------------------------------------------------------------
def search_link_in_md_text(text, previous_links=None):
md_link_re = re.compile(
r"""(\[(?P<name>[^]]*)]\s*"""
r"""\(\s*(?P<url>([^()]+?))\s*(?:\"(?P<title>[\s\S]*?)\")*\))""")
md_link_ref_name_re = re.compile(
r"""\[(?P<name>.*?)\]\s*?\[(?P<id_link>.*?)\]""")
md_link_ref_url_re = re.compile(
r"""\[(?P<id_link>\S*?)\]:\s*"""
r"""(?P<url>\S+)\s*(?:\"(?P<title>[\s\S]*?)\")?""")
# md_link_ref_re = re.compile(r"""\[(?P<name>.*?)\].*\[(.*?)\]"""
# r"""[\s\S]*?\[(.*)\][\s\S]*?:\s*?(?P<url>.*?)\"(?P<title>[\s\S]*?)\"""")
# <-- global RE but broken with Python
result = []
if previous_links is not None:
result = previous_links
for match in re.finditer(md_link_re, text):
result.append({'name': match.group('name'),
'url': match.group('url'),
'title': match.group('title')})
links_by_ref = {}
for match in re.finditer(md_link_ref_name_re, text):
links_by_ref[match.group('id_link')] = {
'name': match.group('name'), 'url': None}
for match in re.finditer(md_link_ref_url_re, text):
id_link = match.group('id_link')
if links_by_ref.get(id_link) is not None:
links_by_ref[id_link]['url'] = match.group('url')
links_by_ref[id_link]['title'] = match.group('title')
result.append(links_by_ref[id_link])
return result
# -----------------------------------------------------------------------------
# create a json
# -----------------------------------------------------------------------------
def search_link_in_md_text_json(text_md):
links = search_link_in_md_text(text_md)
return json.dumps(links, sort_keys=True, indent=2)
# -----------------------------------------------------------------------------
# Find the markdown links contained in the file
# This function return a dict with the links.
#
# @param filename the filename of the file to parse
# @param filename_ext the new extension with a dot (ext = '.md')
# @param encoding the encoding of the file
# @param previous_links all previous links
# @return a dict with the links
# -----------------------------------------------------------------------------
def search_link_in_md_file(filename, filename_ext=".md",
encoding="utf-8", previous_links=None):
logging.debug('Search link in the file %s', filename)
filename = common.check_is_file_and_correct_path(filename, filename_ext)
# Read the file
text = common.get_file_content(filename, encoding=encoding)
# Analyze
result = search_link_in_md_text(text, previous_links=previous_links)
return result
# -----------------------------------------------------------------------------
# Replace the links in an MD text.
#
# A link is a dict caracterized by 3 entries :
# a_link_example = {'name' : 'my_name',
# 'name_to_replace' : 'my_old_name', <-- option
# 'url' : 'www.my_url.fr',
# 'title' : 'my title'} <-- option
#
# @param text_md the markdown text
# @param links the new links (or a single link)
# @return the string
# -----------------------------------------------------------------------------
def update_links_in_md_text(text_md, links):
links_to_update = links
if not isinstance(links, list):
links_to_update = [links]
result = text_md
for link in links_to_update:
name_to_replace = link['name']
if 'name_to_replace' in link:
name_to_replace = link['name_to_replace']
result = update_link_in_md_text(result, name_to_replace, link)
return result
# -----------------------------------------------------------------------------
# Change the base path for the relative path link
#
# @param text_md the markdown text
# @param mv_base_path
# @return the md text
# -----------------------------------------------------------------------------
def move_base_path_in_md_text(text_md, mv_base_path):
links = search_link_in_md_text(text_md)
links_replace = []
for link in links:
if not is_external_link(link['url']):
new_link = copy.deepcopy(link)
new_link['url'] = os.path.join(mv_base_path, new_link['url'])
new_link['url'] = os.path.normpath(new_link['url'])
links_replace.append((link, new_link))
return update_links_from_old_link(text_md, links_replace)
# -----------------------------------------------------------------------------
# Replace the link with the name as a pivot
#
# A link is a dict caracterized by 3 entries :
# a_link_example = {'name' : 'my_name',
# 'url' : 'www.my_url.fr',
# 'title' : 'my title'}
#
# @param text_md the markdown text
# @param name the name of the link
# @param new_link the new link
# @return the string
# -----------------------------------------------------------------------------
def update_link_in_md_text(text_md, name, new_link):
# replace simple link
result = re.sub(
r"""(\[%s]\s*\(\s*(?P<url>([^()]+?))"""
r"""\s*(?:\"(?P<title>[\s\S]*?)\")*\))""" % (re.escape(name)),
lambda m: sub_string_link_md(m.group(), new_link), text_md)
# replace reference
match_var = re.search(
r"""\[(%s)\]\s*?\[(?P<id_link>.*?)\]""" % (re.escape(name)), text_md)
if not match_var:
return result
id_link = match_var.group('id_link')
new_link['id_link'] = id_link
# sub le nom
result = re.sub(
r"""\[(%s)\]\s*?\[(?P<id_link>.*?)\]""" % (re.escape(name)),
lambda m: sub_string_name_by_ref_md(m.group(), new_link), result)
result = re.sub(
r"""\[%s]:\s*(?P<url>\S+)\s*(?:\"(?P<title>[\s\S]*?)\")?""" %
(re.escape(id_link)),
lambda m: sub_string_link_by_ref_md(m.group(), new_link), result)
return result
# -----------------------------------------------------------------------------
# Replace the oldlink with the new one
#
# A link is a dict caracterized by 3 entries :
# a_link_example = {'name' : 'my_name',
# 'url' : 'www.my_url.fr',
# 'title' : 'my title'}
#
# @param text_md the markdown text
# @param old_link the old link
# @param new_link the new link
# @return the string
# -----------------------------------------------------------------------------
def update_link_from_old_link(text_md, old_link, new_link):
name = old_link['name']
url = old_link['url']
new_text_md = re.sub(
r"""(\[%s]\s*\(\s*(%s([^()]*?))\s*(?:\"(?P<title>[\s\S]*?)\")*\))""" %
(re.escape(name), re.escape(url)), lambda m: sub_string_link_md(
m.group(), new_link), text_md)
if new_text_md == text_md:
match_var = re.search(
r"""\[(%s)\]\s*?\[(?P<id_link>.*?)\]""" % (re.escape(name)),
text_md)
if not match_var:
return new_text_md
id_link = match_var.group('id_link')
new_link['id_link'] = id_link
# sub le nom
new_text_md = re.sub(r"""\[(%s)\]\s*?\[(?P<id_link>.*?)\]"""
% (re.escape(name)),
lambda m: sub_string_name_by_ref_md(m.group(),
new_link),
new_text_md)
new_text_md = re.sub(r"""\[%s]:\s*(%s)\s*(?:\""""
r"""(?P<title>[\s\S]*?)\")?"""
% (re.escape(id_link), re.escape(url)), lambda m:
sub_string_link_by_ref_md(m.group(), new_link),
new_text_md)
return new_text_md
# -----------------------------------------------------------------------------
# Replace the oldlink with the new one
#
# A link is a dict caracterized by 3 entries :
# a_link_example = {'name' : 'my_name',
# 'url' : 'www.my_url.fr',
# 'title' : 'my title'}
#
# @param text_md the markdown text
# @param links_couple the old link, the new link couple
# @return the string
# -----------------------------------------------------------------------------
def update_links_from_old_link(text_md, links_couple):
result = text_md
for link_couple in links_couple:
result = update_link_from_old_link(result,
link_couple[0], link_couple[1])
return result
# -----------------------------------------------------------------------------
# Create a string link
#
# @param unused_dummy unused paramter
# @param link the link with key 'url', 'title' and 'name'
# @return the string
# -----------------------------------------------------------------------------
def sub_string_link_md(unused_dummy, link):
name = link['name']
new_url = link['url']
new_title = ""
if 'title' in link and link['title'] is not None:
new_title = ' \"%s\"' % link['title']
return "[%s](%s%s)" % (name, new_url, new_title)
# -----------------------------------------------------------------------------
# Create a string with a reference link
#
# @param unused_dummy unused paramter
# @param link the link with key 'url', 'title' and 'id_link'
# @return the string
# -----------------------------------------------------------------------------
def sub_string_link_by_ref_md(unused_dummy, link):
id_link = ""
new_title = ""
new_url = link['url']
if 'id_link' in link and link['id_link'] is not None:
id_link = link['id_link']
if 'title' in link and link['title'] is not None:
new_title = ' \"%s\"' % link['title']
return "[%s]: %s%s\n" % (id_link, new_url, new_title)
# -----------------------------------------------------------------------------
# Create a sub string with a reference
#
# @param unused_dummy unused paramter
# @param link the link with key 'name' and 'id_link'
# @return the string
# -----------------------------------------------------------------------------
def sub_string_name_by_ref_md(unused_dummy, link):
return "[%s][%s]" % (link['name'], link['id_link'])