"""
PDU 7-bit 编码
第一个字符,去掉最高位,将第二个字符的最低位移入第一个字符的最高位。
第二个字符,右移1位,去掉最高2位,将第三个字符的最低2位移入第二个字符的最高2位。
第三个字符,右移2位,去掉最高3位,将第四个字符的最低3位移入第三个字符的最高3位。
以此类推,到第八个字符,右移7位,因位移前最高位是0,也就相当于空出第八个字符。
"""
gsm7_regular_decode_dict = {
‘\x00‘: u‘\u0040‘, # COMMERCIAL AT
‘\x01‘: u‘\u00A3‘, # POUND SIGN
‘\x02‘: u‘\u0024‘, # DOLLAR SIGN
‘\x03‘: u‘\u00A5‘, # YEN SIGN
‘\x04‘: u‘\u00E8‘, # LATIN SMALL LETTER E WITH GRAVE
‘\x05‘: u‘\u00E9‘, # LATIN SMALL LETTER E WITH ACUTE
‘\x06‘: u‘\u00F9‘, # LATIN SMALL LETTER U WITH GRAVE
‘\x07‘: u‘\u00EC‘, # LATIN SMALL LETTER I WITH GRAVE
‘\x08‘: u‘\u00F2‘, # LATIN SMALL LETTER O WITH GRAVE
‘\x09‘: u‘\u00C7‘, # LATIN CAPITAL LETTER C WITH CEDILLA
# The Unicode page suggests this is a mistake: but
# it‘s still in the latest version of the spec and
# our implementation has to be exact.
‘\x0A‘: u‘\u000A‘, # LINE FEED
‘\x0B‘: u‘\u00D8‘, # LATIN CAPITAL LETTER O WITH STROKE
‘\x0C‘: u‘\u00F8‘, # LATIN SMALL LETTER O WITH STROKE
‘\x0D‘: u‘\u000D‘, # CARRIAGE RETURN
‘\x0E‘: u‘\u00C5‘, # LATIN CAPITAL LETTER A WITH RING ABOVE
‘\x0F‘: u‘\u00E5‘, # LATIN SMALL LETTER A WITH RING ABOVE
‘\x10‘: u‘\u0394‘, # GREEK CAPITAL LETTER DELTA
‘\x11‘: u‘\u005F‘, # LOW LINE
‘\x12‘: u‘\u03A6‘, # GREEK CAPITAL LETTER PHI
‘\x13‘: u‘\u0393‘, # GREEK CAPITAL LETTER GAMMA
‘\x14‘: u‘\u039B‘, # GREEK CAPITAL LETTER LAMDA
‘\x15‘: u‘\u03A9‘, # GREEK CAPITAL LETTER OMEGA
‘\x16‘: u‘\u03A0‘, # GREEK CAPITAL LETTER PI
‘\x17‘: u‘\u03A8‘, # GREEK CAPITAL LETTER PSI
‘\x18‘: u‘\u03A3‘, # GREEK CAPITAL LETTER SIGMA
‘\x19‘: u‘\u0398‘, # GREEK CAPITAL LETTER THETA
‘\x1A‘: u‘\u039E‘, # GREEK CAPITAL LETTER XI
‘\x1C‘: u‘\u00C6‘, # LATIN CAPITAL LETTER AE
‘\x1D‘: u‘\u00E6‘, # LATIN SMALL LETTER AE
‘\x1E‘: u‘\u00DF‘, # LATIN SMALL LETTER SHARP S (German)
‘\x1F‘: u‘\u00C9‘, # LATIN CAPITAL LETTER E WITH ACUTE
‘\x20‘: u‘\u0020‘, # SPACE
‘\x21‘: u‘\u0021‘, # EXCLAMATION MARK
‘\x22‘: u‘\u0022‘, # QUOTATION MARK
‘\x23‘: u‘\u0023‘, # NUMBER SIGN
‘\x24‘: u‘\u00A4‘, # CURRENCY SIGN
‘\x25‘: u‘\u0025‘, # PERCENT SIGN
‘\x26‘: u‘\u0026‘, # AMPERSAND
‘\x27‘: u‘\u0027‘, # APOSTROPHE
‘\x28‘: u‘\u0028‘, # LEFT PARENTHESIS
‘\x29‘: u‘\u0029‘, # RIGHT PARENTHESIS
‘\x2A‘: u‘\u002A‘, # ASTERISK
‘\x2B‘: u‘\u002B‘, # PLUS SIGN
‘\x2C‘: u‘\u002C‘, # COMMA
‘\x2D‘: u‘\u002D‘, # HYPHEN-MINUS
‘\x2E‘: u‘\u002E‘, # FULL STOP
‘\x2F‘: u‘\u002F‘, # SOLIDUS
‘\x30‘: u‘\u0030‘, # DIGIT ZERO
‘\x31‘: u‘\u0031‘, # DIGIT ONE
‘\x32‘: u‘\u0032‘, # DIGIT TWO
‘\x33‘: u‘\u0033‘, # DIGIT THREE
‘\x34‘: u‘\u0034‘, # DIGIT FOUR
‘\x35‘: u‘\u0035‘, # DIGIT FIVE
‘\x36‘: u‘\u0036‘, # DIGIT SIX
‘\x37‘: u‘\u0037‘, # DIGIT SEVEN
‘\x38‘: u‘\u0038‘, # DIGIT EIGHT
‘\x39‘: u‘\u0039‘, # DIGIT NINE
‘\x3A‘: u‘\u003A‘, # COLON
‘\x3B‘: u‘\u003B‘, # SEMICOLON
‘\x3C‘: u‘\u003C‘, # LESS-THAN SIGN
‘\x3D‘: u‘\u003D‘, # EQUALS SIGN
‘\x3E‘: u‘\u003E‘, # GREATER-THAN SIGN
‘\x3F‘: u‘\u003F‘, # QUESTION MARK
‘\x40‘: u‘\u00A1‘, # INVERTED EXCLAMATION MARK
‘\x41‘: u‘\u0041‘, # LATIN CAPITAL LETTER A
‘\x42‘: u‘\u0042‘, # LATIN CAPITAL LETTER B
‘\x43‘: u‘\u0043‘, # LATIN CAPITAL LETTER C
‘\x44‘: u‘\u0044‘, # LATIN CAPITAL LETTER D
‘\x45‘: u‘\u0045‘, # LATIN CAPITAL LETTER E
‘\x46‘: u‘\u0046‘, # LATIN CAPITAL LETTER F
‘\x47‘: u‘\u0047‘, # LATIN CAPITAL LETTER G
‘\x48‘: u‘\u0048‘, # LATIN CAPITAL LETTER H
‘\x49‘: u‘\u0049‘, # LATIN CAPITAL LETTER I
‘\x4A‘: u‘\u004A‘, # LATIN CAPITAL LETTER J
‘\x4B‘: u‘\u004B‘, # LATIN CAPITAL LETTER K
‘\x4C‘: u‘\u004C‘, # LATIN CAPITAL LETTER L
‘\x4D‘: u‘\u004D‘, # LATIN CAPITAL LETTER M
‘\x4E‘: u‘\u004E‘, # LATIN CAPITAL LETTER N
‘\x4F‘: u‘\u004F‘, # LATIN CAPITAL LETTER O
‘\x50‘: u‘\u0050‘, # LATIN CAPITAL LETTER P
‘\x51‘: u‘\u0051‘, # LATIN CAPITAL LETTER Q
‘\x52‘: u‘\u0052‘, # LATIN CAPITAL LETTER R
‘\x53‘: u‘\u0053‘, # LATIN CAPITAL LETTER S
‘\x54‘: u‘\u0054‘, # LATIN CAPITAL LETTER T
‘\x55‘: u‘\u0055‘, # LATIN CAPITAL LETTER U
‘\x56‘: u‘\u0056‘, # LATIN CAPITAL LETTER V
‘\x57‘: u‘\u0057‘, # LATIN CAPITAL LETTER W
‘\x58‘: u‘\u0058‘, # LATIN CAPITAL LETTER X
‘\x59‘: u‘\u0059‘, # LATIN CAPITAL LETTER Y
‘\x5A‘: u‘\u005A‘, # LATIN CAPITAL LETTER Z
‘\x5B‘: u‘\u00C4‘, # LATIN CAPITAL LETTER A WITH DIAERESIS
‘\x5C‘: u‘\u00D6‘, # LATIN CAPITAL LETTER O WITH DIAERESIS
‘\x5D‘: u‘\u00D1‘, # LATIN CAPITAL LETTER N WITH TILDE
‘\x5E‘: u‘\u00DC‘, # LATIN CAPITAL LETTER U WITH DIAERESIS
‘\x5F‘: u‘\u00A7‘, # SECTION SIGN
‘\x60‘: u‘\u00BF‘, # INVERTED QUESTION MARK
‘\x61‘: u‘\u0061‘, # LATIN SMALL LETTER A
‘\x62‘: u‘\u0062‘, # LATIN SMALL LETTER B
‘\x63‘: u‘\u0063‘, # LATIN SMALL LETTER C
‘\x64‘: u‘\u0064‘, # LATIN SMALL LETTER D
‘\x65‘: u‘\u0065‘, # LATIN SMALL LETTER E
‘\x66‘: u‘\u0066‘, # LATIN SMALL LETTER F
‘\x67‘: u‘\u0067‘, # LATIN SMALL LETTER G
‘\x68‘: u‘\u0068‘, # LATIN SMALL LETTER H
‘\x69‘: u‘\u0069‘, # LATIN SMALL LETTER I
‘\x6A‘: u‘\u006A‘, # LATIN SMALL LETTER J
‘\x6B‘: u‘\u006B‘, # LATIN SMALL LETTER K
‘\x6C‘: u‘\u006C‘, # LATIN SMALL LETTER L
‘\x6D‘: u‘\u006D‘, # LATIN SMALL LETTER M
‘\x6E‘: u‘\u006E‘, # LATIN SMALL LETTER N
‘\x6F‘: u‘\u006F‘, # LATIN SMALL LETTER O
‘\x70‘: u‘\u0070‘, # LATIN SMALL LETTER P
‘\x71‘: u‘\u0071‘, # LATIN SMALL LETTER Q
‘\x72‘: u‘\u0072‘, # LATIN SMALL LETTER R
‘\x73‘: u‘\u0073‘, # LATIN SMALL LETTER S
‘\x74‘: u‘\u0074‘, # LATIN SMALL LETTER T
‘\x75‘: u‘\u0075‘, # LATIN SMALL LETTER U
‘\x76‘: u‘\u0076‘, # LATIN SMALL LETTER V
‘\x77‘: u‘\u0077‘, # LATIN SMALL LETTER W
‘\x78‘: u‘\u0078‘, # LATIN SMALL LETTER X
‘\x79‘: u‘\u0079‘, # LATIN SMALL LETTER Y
‘\x7A‘: u‘\u007A‘, # LATIN SMALL LETTER Z
‘\x7B‘: u‘\u00E4‘, # LATIN SMALL LETTER A WITH DIAERESIS
‘\x7C‘: u‘\u00F6‘, # LATIN SMALL LETTER O WITH DIAERESIS
‘\x7D‘: u‘\u00F1‘, # LATIN SMALL LETTER N WITH TILDE
‘\x7E‘: u‘\u00FC‘, # LATIN SMALL LETTER U WITH DIAERESIS
‘\x7F‘: u‘\u00E0‘, # LATIN SMALL LETTER A WITH GRAVE
}
# default GSM 03.38 escaped characters -> unicode
gsm7_escape_decode_dict = {
‘\x0A‘: u‘\u000C‘, # FORM FEED
‘\x14‘: u‘\u005E‘, # CIRCUMFLEX ACCENT
‘\x28‘: u‘\u007B‘, # LEFT CURLY BRACKET
‘\x29‘: u‘\u007D‘, # RIGHT CURLY BRACKET
‘\x2F‘: u‘\u005C‘, # REVERSE SOLIDUS
‘\x3C‘: u‘\u005B‘, # LEFT SQUARE BRACKET
‘\x3D‘: u‘\u007E‘, # TILDE
‘\x3E‘: u‘\u005D‘, # RIGHT SQUARE BRACKET
‘\x40‘: u‘\u007C‘, # VERTICAL LINE
‘\x65‘: u‘\u20AC‘, # EURO SIGN
}
# Replacement characters, default is question mark. Used when it is not too
# important to ensure exact UTF-8 -> GSM -> UTF-8 equivilence, such as when
# humans read and write SMS. But for USSD and other M2M applications it‘s
# important to ensure the conversion is exact.
gsm7_replace_encode_dict = {
u‘\u00E7‘: ‘\x09‘, # LATIN SMALL LETTER C WITH CEDILLA
u‘\u0391‘: ‘\x41‘, # GREEK CAPITAL LETTER ALPHA
u‘\u0392‘: ‘\x42‘, # GREEK CAPITAL LETTER BETA
u‘\u0395‘: ‘\x45‘, # GREEK CAPITAL LETTER EPSILON
u‘\u0397‘: ‘\x48‘, # GREEK CAPITAL LETTER ETA
u‘\u0399‘: ‘\x49‘, # GREEK CAPITAL LETTER IOTA
u‘\u039A‘: ‘\x4B‘, # GREEK CAPITAL LETTER KAPPA
u‘\u039C‘: ‘\x4D‘, # GREEK CAPITAL LETTER MU
u‘\u039D‘: ‘\x4E‘, # GREEK CAPITAL LETTER NU
u‘\u039F‘: ‘\x4F‘, # GREEK CAPITAL LETTER OMICRON
u‘\u03A1‘: ‘\x50‘, # GREEK CAPITAL LETTER RHO
u‘\u03A4‘: ‘\x54‘, # GREEK CAPITAL LETTER TAU
u‘\u03A7‘: ‘\x58‘, # GREEK CAPITAL LETTER CHI
u‘\u03A5‘: ‘\x59‘, # GREEK CAPITAL LETTER UPSILON
u‘\u0396‘: ‘\x5A‘, # GREEK CAPITAL LETTER ZETA
}
gsm7 = gsm7_regular_decode_dict
gsm7t = dict(zip(gsm7.values(), gsm7.keys()))
def pdu7encode(strs):
strs = strs[::-1]
binstr = ‘‘.join(
["{0:07b}".format((ord(c))).replace(‘0b‘, ‘‘) for c in strs])
n = 1
hexstr = ""
while (8*n-8 < len(binstr)):
if n == 1:
sub = binstr[-8:]
else:
sub = binstr[-8*n:-8*(n-1)]
# 长度不够时,高位补0
if len(sub) < 8:
sub = "0"*(8-len(sub))+sub
hexstr = hexstr + "{0:02X}".format((int(sub, 2)))
n = n+1
return hexstr
def pdu7decode(hexstr):
s = ["{0:04b}".format(int("0x"+c, 16)).replace("0b", "") for c in hexstr]
s = ‘‘.join(s)
sublst = []
n = 1
while 8*n <= len(s):
sub = s[8*n-8:8*n]
sublst.append(sub)
n = n+1
sublst = sublst[::-1]
binstr = ‘‘.join(sublst)
n = 1
strs = ""
while (7*n-7 <= len(binstr)):
if n == 1:
sub = binstr[-7:]
else:
sub = binstr[-7*n:-7*(n-1)]
if len(sub) < 7:
sub = "0"*(7-len(sub))+sub
c = chr(int("0b"+sub, 2))
strs = strs+c
n = n+1
return strs
def gsm7encode(strs):
gstr = ‘‘
try:
gstr = ‘‘.join([gsm7t[c] for c in strs])
except KeyError as e:
print("keyerror:", e)
return pdu7encode(gstr)
def gsm7decode(hexstr):
strs = pdu7decode(hexstr)
gstr = ‘‘.join(gsm7[c] for c in strs)
return gstr
if __name__ == "__main__":
s1 = "abcd1234"
s2 = "!@#&*()"
ret1 = gsm7encode(s1)
ret2 = gsm7encode(s2)
print(ret1, len(ret1))
# 61F1981C93CD68 14
print(ret2, len(ret2))
# 21C0C8A442A500 14
h1 = "21C0C8A442A5"
print(pdu7decode(h1), len(pdu7decode(h1)))
# ! #&*() 7
print(gsm7decode(h1), len(gsm7decode(h1)))
# !@#&*() 7
python GSM 7bit 编码
原文:https://www.cnblogs.com/rsrm/p/12184432.html