`
san_yun
  • 浏览: 2662982 次
  • 来自: 杭州
文章分类
社区版块
存档分类
最新评论

django 处理unicode编码

 
阅读更多

django.util.encoding.py

import types
import urllib
import locale
import datetime
import codecs
from decimal import Decimal

from django.utils.functional import Promise

class DjangoUnicodeDecodeError(UnicodeDecodeError):
    def __init__(self, obj, *args):
        self.obj = obj
        UnicodeDecodeError.__init__(self, *args)

    def __str__(self):
        original = UnicodeDecodeError.__str__(self)
        return '%s. You passed in %r (%s)' % (original, self.obj,
                type(self.obj))

class StrAndUnicode(object):
    """
    A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.

    Useful as a mix-in.
    """
    def __str__(self):
        return self.__unicode__().encode('utf-8')

def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
    """
    Returns a unicode object representing 's'. Treats bytestrings using the
    'encoding' codec.

    If strings_only is True, don't convert (some) non-string-like objects.
    """
    if isinstance(s, Promise):
        # The input is the result of a gettext_lazy() call.
        return s
    return force_unicode(s, encoding, strings_only, errors)

def is_protected_type(obj):
    """Determine if the object instance is of a protected type.

    Objects of protected types are preserved as-is when passed to
    force_unicode(strings_only=True).
    """
    return isinstance(obj, (
        types.NoneType,
        int, long,
        datetime.datetime, datetime.date, datetime.time,
        float, Decimal)
    )

def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
    """
    Similar to smart_unicode, except that lazy instances are resolved to
    strings, rather than kept as lazy objects.

    If strings_only is True, don't convert (some) non-string-like objects.
    """
    # Handle the common case first, saves 30-40% in performance when s
    # is an instance of unicode. This function gets called often in that
    # setting.
    if isinstance(s, unicode):
        return s
    if strings_only and is_protected_type(s):
        return s
    try:
        if not isinstance(s, basestring,):
            if hasattr(s, '__unicode__'):
                s = unicode(s)
            else:
                try:
                    s = unicode(str(s), encoding, errors)
                except UnicodeEncodeError:
                    if not isinstance(s, Exception):
                        raise
                    # If we get to here, the caller has passed in an Exception
                    # subclass populated with non-ASCII data without special
                    # handling to display as a string. We need to handle this
                    # without raising a further exception. We do an
                    # approximation to what the Exception's standard str()
                    # output should be.
                    s = ' '.join([force_unicode(arg, encoding, strings_only,
                            errors) for arg in s])
        elif not isinstance(s, unicode):
            # Note: We use .decode() here, instead of unicode(s, encoding,
            # errors), so that if s is a SafeString, it ends up being a
            # SafeUnicode at the end.
            s = s.decode(encoding, errors)
    except UnicodeDecodeError, e:
        if not isinstance(s, Exception):
            raise DjangoUnicodeDecodeError(s, *e.args)
        else:
            # If we get to here, the caller has passed in an Exception
            # subclass populated with non-ASCII bytestring data without a
            # working unicode method. Try to handle this without raising a
            # further exception by individually forcing the exception args
            # to unicode.
            s = ' '.join([force_unicode(arg, encoding, strings_only,
                    errors) for arg in s])
    return s

def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
    """
    Returns a bytestring version of 's', encoded as specified in 'encoding'.

    If strings_only is True, don't convert (some) non-string-like objects.
    """
    if strings_only and isinstance(s, (types.NoneType, int)):
        return s
    if isinstance(s, Promise):
        return unicode(s).encode(encoding, errors)
    elif not isinstance(s, basestring):
        try:
            return str(s)
        except UnicodeEncodeError:
            if isinstance(s, Exception):
                # An Exception subclass containing non-ASCII data that doesn't
                # know how to print itself properly. We shouldn't raise a
                # further exception.
                return ' '.join([smart_str(arg, encoding, strings_only,
                        errors) for arg in s])
            return unicode(s).encode(encoding, errors)
    elif isinstance(s, unicode):
        return s.encode(encoding, errors)
    elif s and encoding != 'utf-8':
        return s.decode('utf-8', errors).encode(encoding, errors)
    else:
        return s

def iri_to_uri(iri):
    """
    Convert an Internationalized Resource Identifier (IRI) portion to a URI
    portion that is suitable for inclusion in a URL.

    This is the algorithm from section 3.1 of RFC 3987.  However, since we are
    assuming input is either UTF-8 or unicode already, we can simplify things a
    little from the full method.

    Returns an ASCII string containing the encoded result.
    """
    # The list of safe characters here is constructed from the "reserved" and
    # "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
    #     reserved    = gen-delims / sub-delims
    #     gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
    #     sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
    #                   / "*" / "+" / "," / ";" / "="
    #     unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
    # Of the unreserved characters, urllib.quote already considers all but
    # the ~ safe.
    # The % character is also added to the list of safe characters here, as the
    # end of section 3.1 of RFC 3987 specifically mentions that % must not be
    # converted.
    if iri is None:
        return iri
    return urllib.quote(smart_str(iri), safe="/#%[]=:;$&()+,!?*@'~")

def filepath_to_uri(path):
    """Convert an file system path to a URI portion that is suitable for
    inclusion in a URL.

    We are assuming input is either UTF-8 or unicode already.

    This method will encode certain chars that would normally be recognized as
    special chars for URIs.  Note that this method does not encode the '
    character, as it is a valid character within URIs.  See
    encodeURIComponent() JavaScript function for more details.

    Returns an ASCII string containing the encoded result.
    """
    if path is None:
        return path
    # I know about `os.sep` and `os.altsep` but I want to leave
    # some flexibility for hardcoding separators.
    return urllib.quote(smart_str(path).replace("\\", "/"), safe="/~!*()'")

# The encoding of the default system locale but falls back to the
# given fallback encoding if the encoding is unsupported by python or could
# not be determined.  See tickets #10335 and #5846
try:
    DEFAULT_LOCALE_ENCODING = locale.getdefaultlocale()[1] or 'ascii'
    codecs.lookup(DEFAULT_LOCALE_ENCODING)
except:
    DEFAULT_LOCALE_ENCODING = 'ascii'

 

分享到:
评论

相关推荐

    django3.0+rest framework + Vue 生鲜超市项目-后端

    Django 3.0版本引入了多项改进和新特性,包括对Python 3.8的支持、更好的性能优化以及对Unicode路径的支持。在本项目中,Django将负责处理数据库交互、路由、身份验证和授权等核心功能。 2. Django Rest Framework ...

    django实现HttpResponse返回json数据为中文

    这是因为默认情况下,`json.dumps()` 方法会将所有非 ASCII 字符转义成 Unicode 编码,而 `HttpResponse` 默认使用的编码可能不支持这些转义后的字符。为了解决这个问题,我们需要做两件事: 1. **设置 `ensure_...

    Python django Extjs 项目开发中的错误小记

    - 在使用Django ORM时,如果使用FileInfo类的__unicode__方法,应该返回具体的属性名,如self.file_name,而不是self.name。这涉及到对象属性访问错误,通常因为类定义不正确或方法引用了不存在的属性。 - 当使用...

    django-queryset-csv, 用于 Django querysets的CSV导出程序.zip

    django-queryset-csv, 用于 Django querysets的CSV导出程序 用于 Django querysets的CSV导出程序。这里工具是在 Django 中重复执行以下操作而创建的:编写基于简单...自动将unicode字符编码为 UTF-8创建一个将qu

    django-docs-2.2-zh-hans.zip

    Django 中的 Unicode Django开源项目 了解 Django 项目本身的开发进程以及您如何为 Django 做贡献: 社区: 如何参与其中 | 发布进程 | 团队组织 | Django 源代码仓库 | 安全政策 | 邮件列表 设计哲学: 概览 文档...

    Python库 | django-bom-1.195.tar.gz

    在处理Unicode文本时,BOM是一个特殊的字符序列,用于标识文本编码的字节顺序。在某些情况下,BOM可能会引起解析问题,特别是当它不被预期或处理不当的时候。Django-BOM库提供了工具和方法来自动检测并处理这些情况...

    使用Celery的Django异步信号处理。_Python_下载.zip

    在Python的Web开发框架Django中,为了提高应用性能和用户体验,我们经常需要处理异步任务。Celery是一个强大的分布式任务队列,它允许我们在Django应用中轻松地实现异步任务,包括信号处理。本篇文章将深入探讨如何...

    Django切换MySQL数据库.rar

    MySQL通常使用`utf8mb4`编码来支持Unicode,因此需要确保在settings.py中设置正确的字符集: ```python DATABASES['default']['OPTIONS'] = {'charset': 'utf8mb4'} ``` 此外,如果你的Django模型中包含了日期或...

    开源会议室预约小程序+Django服务端后台.zip

    django-rest-framework + django-cool框架开发创建数据库CREATE SCHEMA `meeting` DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci ;初始化数据将server/meeting/local_settings.py.default重命名为...

    python和django问题.docx

    Django 2.2 默认支持Unicode编码,因此建议所有文件,特别是处理文本内容的文件,如HTML模板,都采用UTF-8编码。在读取文件时,应明确指定`encoding='utf-8'`,例如: ```python with Path(CURRENT_DIR, '...

    Python库 | django_emoticons-1.0.1-py2.py3-none-any.whl

    Django强调DRY(Don't Repeat Yourself)原则,避免重复编码,提高开发效率。 **二、Emoticons和django_emoticons库** 表情符号在现代社交媒体和在线通信中扮演着重要角色,它们可以增强文本的情感表达。`django_...

    Python库 | django_xicon-0.14.31-py3-none-any.whl

    这可能包括使用图标名称、Unicode码点或者直接链接来获取图标,使得UI设计更加灵活和多样化。 **后端开发** 后端开发是指构建服务器端应用程序的过程,包括数据库设计、服务器端逻辑和API接口等。`django_xicon` ...

    Django接受前端数据的几种方法总结

    需要注意的是,在Python 2.7环境下,获取到的数据是Unicode编码的,有时需要转换为str类型才能进一步处理。 对于数组型数据,如果尝试用获取字符串的方式去获取,可能会得到None。正确的做法是使用`request.GET....

    基于python+django+mysql的超市管理系统源码+项目说明(计算机毕设).zip

    mysql> CREATE DATABASE demo_django_supermarket DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; ``` - 检查数据库默认编码 ```mysql mysql> USE demo_django_supermarket; mysql> SELECT @@...

    django queryset 去重 .distinct()说明

    此外,`urllib`中的`urlencode`函数用于编码URL参数,但不支持直接处理Unicode对象,需要先进行编码转换。 随着Python的发展,`requests`库已经成为处理HTTP请求的首选库,因为它具有更简洁的API和更好的错误处理。...

    django 2.2和mysql使用的常见问题

    在某些情况下,str类型并不直接拥有decode方法,因为它已经是一种Unicode编码的字符串。 - 如果代码中尝试对一个str类型使用decode方法,会导致错误。正确的做法是将str编码成bytes类型后再进行decode操作。 4. ...

    django项目运行因中文而乱码报错的几种情况解决

    总之,处理Django项目中的中文乱码问题通常涉及到文件编码、数据库配置、HTML模板、Django设置以及系统和邮件编码等方面。确保整个应用链路中的编码一致且支持UTF-8是解决这类问题的关键。在实际开发中,遇到此类...

    Django返回json数据用法示例

    需要注意的是,在Django 1.7及更高版本中,更推荐使用`JsonResponse`类来创建JSON响应,因为这个类提供了更多的便利性和安全性,例如自动设置正确的Content-Type头和处理Unicode编码: ```python from django....

    PyPI 官网下载 | django_fb_phoneauth-0.1.0-py3-none-any.whl

    3. **Python 3**:Python 3是Python语言的最新版本,具有更现代的语法和功能,相比Python 2有诸多改进,包括更好的字符串处理、新的内存管理以及对Unicode的全面支持。 4. **Python库的发布与安装**:在Python社区...

    详解如何用django实现redirect的几种方法总结

    总的来说,Django提供了多种方式来处理URL重定向,可以根据项目需求选择最适合的方法。在URL配置中设置重定向简洁明了,视图中的`HttpResponseRedirect`适合视图逻辑后的跳转,而`redirects`应用则适用于管理大量和...

Global site tag (gtag.js) - Google Analytics