311 lines
8.5 KiB
Python
311 lines
8.5 KiB
Python
|
from __future__ import annotations
|
||
|
|
||
|
from typing import Final
|
||
|
|
||
|
magic: Final = (
|
||
|
b"\x00\x00\x00\x00\x00\x00\x00\x00"
|
||
|
b"\x00\x00\x00\x00\xc2\xea\x81\x60"
|
||
|
b"\xb3\x14\x11\xcf\xbd\x92\x08\x00"
|
||
|
b"\x09\xc7\x31\x8c\x18\x1f\x10\x11"
|
||
|
)
|
||
|
|
||
|
align_1_checker_value: Final = b"3"
|
||
|
align_1_offset: Final = 32
|
||
|
align_1_length: Final = 1
|
||
|
align_1_value: Final = 4
|
||
|
u64_byte_checker_value: Final = b"3"
|
||
|
align_2_offset: Final = 35
|
||
|
align_2_length: Final = 1
|
||
|
align_2_value: Final = 4
|
||
|
endianness_offset: Final = 37
|
||
|
endianness_length: Final = 1
|
||
|
platform_offset: Final = 39
|
||
|
platform_length: Final = 1
|
||
|
encoding_offset: Final = 70
|
||
|
encoding_length: Final = 1
|
||
|
dataset_offset: Final = 92
|
||
|
dataset_length: Final = 64
|
||
|
file_type_offset: Final = 156
|
||
|
file_type_length: Final = 8
|
||
|
date_created_offset: Final = 164
|
||
|
date_created_length: Final = 8
|
||
|
date_modified_offset: Final = 172
|
||
|
date_modified_length: Final = 8
|
||
|
header_size_offset: Final = 196
|
||
|
header_size_length: Final = 4
|
||
|
page_size_offset: Final = 200
|
||
|
page_size_length: Final = 4
|
||
|
page_count_offset: Final = 204
|
||
|
page_count_length: Final = 4
|
||
|
sas_release_offset: Final = 216
|
||
|
sas_release_length: Final = 8
|
||
|
sas_server_type_offset: Final = 224
|
||
|
sas_server_type_length: Final = 16
|
||
|
os_version_number_offset: Final = 240
|
||
|
os_version_number_length: Final = 16
|
||
|
os_maker_offset: Final = 256
|
||
|
os_maker_length: Final = 16
|
||
|
os_name_offset: Final = 272
|
||
|
os_name_length: Final = 16
|
||
|
page_bit_offset_x86: Final = 16
|
||
|
page_bit_offset_x64: Final = 32
|
||
|
subheader_pointer_length_x86: Final = 12
|
||
|
subheader_pointer_length_x64: Final = 24
|
||
|
page_type_offset: Final = 0
|
||
|
page_type_length: Final = 2
|
||
|
block_count_offset: Final = 2
|
||
|
block_count_length: Final = 2
|
||
|
subheader_count_offset: Final = 4
|
||
|
subheader_count_length: Final = 2
|
||
|
page_type_mask: Final = 0x0F00
|
||
|
# Keep "page_comp_type" bits
|
||
|
page_type_mask2: Final = 0xF000 | page_type_mask
|
||
|
page_meta_type: Final = 0x0000
|
||
|
page_data_type: Final = 0x0100
|
||
|
page_mix_type: Final = 0x0200
|
||
|
page_amd_type: Final = 0x0400
|
||
|
page_meta2_type: Final = 0x4000
|
||
|
page_comp_type: Final = 0x9000
|
||
|
page_meta_types: Final = [page_meta_type, page_meta2_type]
|
||
|
subheader_pointers_offset: Final = 8
|
||
|
truncated_subheader_id: Final = 1
|
||
|
compressed_subheader_id: Final = 4
|
||
|
compressed_subheader_type: Final = 1
|
||
|
text_block_size_length: Final = 2
|
||
|
row_length_offset_multiplier: Final = 5
|
||
|
row_count_offset_multiplier: Final = 6
|
||
|
col_count_p1_multiplier: Final = 9
|
||
|
col_count_p2_multiplier: Final = 10
|
||
|
row_count_on_mix_page_offset_multiplier: Final = 15
|
||
|
column_name_pointer_length: Final = 8
|
||
|
column_name_text_subheader_offset: Final = 0
|
||
|
column_name_text_subheader_length: Final = 2
|
||
|
column_name_offset_offset: Final = 2
|
||
|
column_name_offset_length: Final = 2
|
||
|
column_name_length_offset: Final = 4
|
||
|
column_name_length_length: Final = 2
|
||
|
column_data_offset_offset: Final = 8
|
||
|
column_data_length_offset: Final = 8
|
||
|
column_data_length_length: Final = 4
|
||
|
column_type_offset: Final = 14
|
||
|
column_type_length: Final = 1
|
||
|
column_format_text_subheader_index_offset: Final = 22
|
||
|
column_format_text_subheader_index_length: Final = 2
|
||
|
column_format_offset_offset: Final = 24
|
||
|
column_format_offset_length: Final = 2
|
||
|
column_format_length_offset: Final = 26
|
||
|
column_format_length_length: Final = 2
|
||
|
column_label_text_subheader_index_offset: Final = 28
|
||
|
column_label_text_subheader_index_length: Final = 2
|
||
|
column_label_offset_offset: Final = 30
|
||
|
column_label_offset_length: Final = 2
|
||
|
column_label_length_offset: Final = 32
|
||
|
column_label_length_length: Final = 2
|
||
|
rle_compression: Final = b"SASYZCRL"
|
||
|
rdc_compression: Final = b"SASYZCR2"
|
||
|
|
||
|
compression_literals: Final = [rle_compression, rdc_compression]
|
||
|
|
||
|
# Incomplete list of encodings, using SAS nomenclature:
|
||
|
# https://support.sas.com/documentation/onlinedoc/dfdmstudio/2.6/dmpdmsug/Content/dfU_Encodings_SAS.html
|
||
|
# corresponding to the Python documentation of standard encodings
|
||
|
# https://docs.python.org/3/library/codecs.html#standard-encodings
|
||
|
encoding_names: Final = {
|
||
|
20: "utf-8",
|
||
|
29: "latin1",
|
||
|
30: "latin2",
|
||
|
31: "latin3",
|
||
|
32: "latin4",
|
||
|
33: "cyrillic",
|
||
|
34: "arabic",
|
||
|
35: "greek",
|
||
|
36: "hebrew",
|
||
|
37: "latin5",
|
||
|
38: "latin6",
|
||
|
39: "cp874",
|
||
|
40: "latin9",
|
||
|
41: "cp437",
|
||
|
42: "cp850",
|
||
|
43: "cp852",
|
||
|
44: "cp857",
|
||
|
45: "cp858",
|
||
|
46: "cp862",
|
||
|
47: "cp864",
|
||
|
48: "cp865",
|
||
|
49: "cp866",
|
||
|
50: "cp869",
|
||
|
51: "cp874",
|
||
|
# 52: "", # not found
|
||
|
# 53: "", # not found
|
||
|
# 54: "", # not found
|
||
|
55: "cp720",
|
||
|
56: "cp737",
|
||
|
57: "cp775",
|
||
|
58: "cp860",
|
||
|
59: "cp863",
|
||
|
60: "cp1250",
|
||
|
61: "cp1251",
|
||
|
62: "cp1252",
|
||
|
63: "cp1253",
|
||
|
64: "cp1254",
|
||
|
65: "cp1255",
|
||
|
66: "cp1256",
|
||
|
67: "cp1257",
|
||
|
68: "cp1258",
|
||
|
118: "cp950",
|
||
|
# 119: "", # not found
|
||
|
123: "big5",
|
||
|
125: "gb2312",
|
||
|
126: "cp936",
|
||
|
134: "euc_jp",
|
||
|
136: "cp932",
|
||
|
138: "shift_jis",
|
||
|
140: "euc-kr",
|
||
|
141: "cp949",
|
||
|
227: "latin8",
|
||
|
# 228: "", # not found
|
||
|
# 229: "" # not found
|
||
|
}
|
||
|
|
||
|
|
||
|
class SASIndex:
|
||
|
row_size_index: Final = 0
|
||
|
column_size_index: Final = 1
|
||
|
subheader_counts_index: Final = 2
|
||
|
column_text_index: Final = 3
|
||
|
column_name_index: Final = 4
|
||
|
column_attributes_index: Final = 5
|
||
|
format_and_label_index: Final = 6
|
||
|
column_list_index: Final = 7
|
||
|
data_subheader_index: Final = 8
|
||
|
|
||
|
|
||
|
subheader_signature_to_index: Final = {
|
||
|
b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
|
||
|
b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
|
||
|
b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index,
|
||
|
b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index,
|
||
|
b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
|
||
|
b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
|
||
|
b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index,
|
||
|
b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index,
|
||
|
b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index,
|
||
|
b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
|
||
|
b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index,
|
||
|
b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
|
||
|
b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index,
|
||
|
b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
|
||
|
b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index,
|
||
|
b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
|
||
|
b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
|
||
|
b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
|
||
|
b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index,
|
||
|
b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
|
||
|
b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index,
|
||
|
b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
|
||
|
b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index,
|
||
|
b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
|
||
|
b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index,
|
||
|
b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
|
||
|
b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index,
|
||
|
b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
|
||
|
b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index,
|
||
|
b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
|
||
|
}
|
||
|
|
||
|
|
||
|
# List of frequently used SAS date and datetime formats
|
||
|
# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm
|
||
|
# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java
|
||
|
sas_date_formats: Final = (
|
||
|
"DATE",
|
||
|
"DAY",
|
||
|
"DDMMYY",
|
||
|
"DOWNAME",
|
||
|
"JULDAY",
|
||
|
"JULIAN",
|
||
|
"MMDDYY",
|
||
|
"MMYY",
|
||
|
"MMYYC",
|
||
|
"MMYYD",
|
||
|
"MMYYP",
|
||
|
"MMYYS",
|
||
|
"MMYYN",
|
||
|
"MONNAME",
|
||
|
"MONTH",
|
||
|
"MONYY",
|
||
|
"QTR",
|
||
|
"QTRR",
|
||
|
"NENGO",
|
||
|
"WEEKDATE",
|
||
|
"WEEKDATX",
|
||
|
"WEEKDAY",
|
||
|
"WEEKV",
|
||
|
"WORDDATE",
|
||
|
"WORDDATX",
|
||
|
"YEAR",
|
||
|
"YYMM",
|
||
|
"YYMMC",
|
||
|
"YYMMD",
|
||
|
"YYMMP",
|
||
|
"YYMMS",
|
||
|
"YYMMN",
|
||
|
"YYMON",
|
||
|
"YYMMDD",
|
||
|
"YYQ",
|
||
|
"YYQC",
|
||
|
"YYQD",
|
||
|
"YYQP",
|
||
|
"YYQS",
|
||
|
"YYQN",
|
||
|
"YYQR",
|
||
|
"YYQRC",
|
||
|
"YYQRD",
|
||
|
"YYQRP",
|
||
|
"YYQRS",
|
||
|
"YYQRN",
|
||
|
"YYMMDDP",
|
||
|
"YYMMDDC",
|
||
|
"E8601DA",
|
||
|
"YYMMDDN",
|
||
|
"MMDDYYC",
|
||
|
"MMDDYYS",
|
||
|
"MMDDYYD",
|
||
|
"YYMMDDS",
|
||
|
"B8601DA",
|
||
|
"DDMMYYN",
|
||
|
"YYMMDDD",
|
||
|
"DDMMYYB",
|
||
|
"DDMMYYP",
|
||
|
"MMDDYYP",
|
||
|
"YYMMDDB",
|
||
|
"MMDDYYN",
|
||
|
"DDMMYYC",
|
||
|
"DDMMYYD",
|
||
|
"DDMMYYS",
|
||
|
"MINGUO",
|
||
|
)
|
||
|
|
||
|
sas_datetime_formats: Final = (
|
||
|
"DATETIME",
|
||
|
"DTWKDATX",
|
||
|
"B8601DN",
|
||
|
"B8601DT",
|
||
|
"B8601DX",
|
||
|
"B8601DZ",
|
||
|
"B8601LX",
|
||
|
"E8601DN",
|
||
|
"E8601DT",
|
||
|
"E8601DX",
|
||
|
"E8601DZ",
|
||
|
"E8601LX",
|
||
|
"DATEAMPM",
|
||
|
"DTDATE",
|
||
|
"DTMONYY",
|
||
|
"DTMONYY",
|
||
|
"DTWKDATX",
|
||
|
"DTYEAR",
|
||
|
"TOD",
|
||
|
"MDYAMPM",
|
||
|
)
|