CCL Home Page
Up Directory CCL tex-koi8
# Jan Labanowski, jkl@ccl.net, Jan. 10, 1992
# File tex_koi8.rus

# This is a transliteration data file for converting from Latex to KOI8
# as used by RELCOM (GOST 19768-74). It assumes that the sister file:
# koi8-latex.rus was used to obtain the LaTeX file. 
# Since Latex is a program, frequently complicated, it is probably possible
# to convert LaTeX to KOI8 only with TeX. However, if you have a simple
# LaTeX document, without math, tables, different font sized and shapes,
# you can easily convert it to KOI8 tex, by listing symbols for Russian
# lettes on the left side and appropriate KOI8 codes on the right.
# Since there are many possible assignements in LaTeX, you need to
# modify this file to the ones actually used. First of all, some other
# symbols may be used to represent Russian letters. I included some more
# popular sequences, but there might be more, which I am not aware of.
# The TeX tranliteration sequences follow AMS cyrillic convention for
# WNCYR fonts with cyracc.def file
# To be used with translit.c program by Jan Labanowski. For a format of
# this file consult translit documentation

   1            file version number

   "    "      # string delimiters
   [    ]      # list delimites
   {    }      # regular expression delimiters


#starting sequence
""


#ending sequence
""


   4     # number input SHIFT sequences
# The last two of the input "character sets" are used simply to delete all
# characters from LaTeX preamble and the \end{document} closing
# For set 2, note that \cyr may be followed by spaces or new lines
# end preceded with some LaTeX escape sequences

#SO-match #SO-subs  #Nest-up  #Nest-down  #SI-match   #SI-subs
    ""       ""        ""         ""         ""         ""  #Latin letters

{{\\cyr[ \0x09-\0x0D]+}
             ""        "{"        "}"        "}"        ""  #cyrillic

"\documentstyle" 
             ""        ""         ""  {\begin{document\0x7D[\0d09-\15]*} ""

"\end{document}" ""    ""         ""         ""         ""


   0     # number of output SHIFT sequences, only one set of output characters

# conversion table
# inp_set     inp_seq           out_set    out_seq

     3   [\001-\255]                0     "\00"      # delete LaTeX preamble
     4   [\001-\255]                0     "\00"      # delete end{document}

     1      [A-Za-z]                0    [A-Za-z]    #Latin letters A-Z and a-z

# If double backslash is followed by new line, skip the double backslash
     0   {\\\\[\0x09-\0x0d]*}       0   "\0x0D\0x0A"    #restore new lines

     0       "\\040"                0         " "    # protected space


# Convert some special TeX characters

#  these do not require going out of {\cyr ....}
     0       "$[$"                    0         "["             
     0       "$]$"                    0         "]"             
     0       "$\wedge$"               0         "^"             
     0       "$\lbrace$"              0         "{"             
     0       "$\rbrace$"              0         "}"             
     0       "$\sim$"                 0         "~"             
     0       "$\backslash$"           0         "\"             
     0       "$\mid$"                 0         "|"             
     0       "$\star$"                0         "*"             
     0       "$<$"                    0         "<"             
     0       "$>$"                    0         ">"             
     0       "\$"                     0         "$"             
     0       "\%"                     0         "%"             

#  These were represented correctly only in Latin charset
     1       "\_"                     0         "_"       
     1       "\&"                     0         "&"       
     1       "\#"                     0         "#"       
     1       "\@"                     0         "@"       
     

# Cyrillic letters  

# note that TS and ts sequences are checked before the \cydot is removed

 2  {\\T[Ss][ \0x09-\0x0d]*}    0         "\0xE3"
 2       {T[Ss]}                0         "\0xE3"           # Tse

 2  {\\ts][ \0x09-\0x0d]*}      0         "\0xC3"
 2       {t[Ss]}                0         "\0xC3"           # tse

 2 {\\S[Hh][Cc][Hh][ \0x09-\0x0d]*} 0     "\0xFD"
 2 {S[Hh][Cc][Hh]}              0         "\0xFD"           

 2 {\\s[Hh][Cc][Hh][ \0x09-\0x0d]*} 0     "\0xDD"
 2 {s[Hh][Cc][Hh]}              0         "\0xDD"           

 2  {\\Cdprime[ \0x09-\0x0d]*}  0         "\0xFF"           # Tverdyj znak 
 2  {\\T[Zz][ \0x09-\0x0d]*}    0         "\0xFF"

 2  {\\Cprime[ \0x09-\0x0d]*}   0         "\0xF8"           # Myagkij znak 
 2  {\\M[Zz][ \0x09-\0x0d]*}    0         "\0xF8"

 2  {\\cdprime[ \0x09-\0x0d]*}  0         "\0xDF"           # tverdyj znak
 2  {\\tz[ \0x09-\0x0d]*}       0         "\0xDF"
 
 2  {\\cprime[ \0x09-\0x0d]*}   0         "\0xD8"           # myagkij znak
 2  {\\mz[ \0x09-\0x0d]*}       0         "\0xD8"

 2  {\\u[ \0x09-\0x0d]*I}       0         "\0xEA"          # I kratkoje 
 2  "\u{I}"                     0         "\0xEA"
 2  {\\[Uu]I[ \0x09-\0x0d]*}    0         "\0xEA"

 2  {\\u[ \0x09-\0x0d]*i}       0         "\0xCA"          # i kratkoje
 2  {\\ui[ \0x09-\0x0d]*}       0         "\0xCA"
 2  "\u{i}"                     0         "\0xCA"

 2  {\\`[ \0x09-\0x0d]*E}       0         "\0xFC"          # E obortnoye
 2  "\`{E}"                     0         "\0xFC"

 2  {\\`[ \0x09-\0x0d]*e}       0         "\0xDC"          # e oborotnoye
 2  "\`{e}"                     0         "\0xDC"

 2 {\\K[Hh][ \0x09-\0x0d]*}     0         "\0xE8"           
 2 {K[Hh]}                      0         "\0xE8"           

 2 {\\k[Hh][ \0x09-\0x0d]*}     0         "\0xC8"           
 2 {k[Hh]}                      0         "\0xC8"           

 2 {\\T[Cc][Hh][ \0x09-\0x0d]*} 0         "\0xFE"
 2 {\\C[Hh][ \0x09-\0x0d]*}     0         "\0xFE"
 2 {C[Hh]}                      0         "\0xFE"           


 2 {\\S[Hh][ \0x09-\0x0d]*}     0         "\0xFB"           
 2 {S[Hh]}                      0         "\0xFB"           

 2 {\\c[Hh][ \0x09-\0x0d]*}     0         "\0xDE"           
 2 {\\t[Cc][Hh][ \0x09-\0x0d]*} 0         "\0xDE"
 2 {c[Hh]}                      0         "\0xDE"           

 2 {\\s[Hh][ \0x09-\0x0d]*}     0         "\0xDB"           
 2 {s[Hh]}                      0         "\0xDB"           

 2 {\\Z[Hh][ \0x09-\0x0d]*}     0         "\0xF6"           
 2 {Z[Hh]}                      0         "\0xF6"           

 2 {\\z[Hh][ \0x09-\0x0d]*}     0         "\0xD6"           
 2 {z[Hh]}                      0         "\0xD6"           

 2 {\\Y[Uu][ \0x09-\0x0d]*}     0         "\0xE0"           
 2 {Y[Uu]}                      0         "\0xE0"           

 2 {\\Y[Aa][ \0x09-\0x0d]*}     0         "\0xF1"           
 2 {Y[Aa]}                      0         "\0xF1"           

 2 {\\y[Uu][ \0x09-\0x0d]*}     0         "\0xC0"           
 2 {y[Uu]}                      0         "\0xC0"           

 2 {\\y[Aa][ \0x09-\0x0d]*}     0         "\0xD1"           
 2 {y[Aa]}                      0         "\0xD1"           

 2 {\\"[ \0x09-\0x0D]*e}        0         "\0xA3"           # small \"e (yo)
 2 "\\0o42{e}"                  0         "\0xA3"
 2 {\\y[Oo][ \0x09-\0x0D]*}     0         "\0xA3"

 2 {\\"[ \0x09-\0x0D]*E}        0         "\0xB3"           # capital \"E (Yo)
 2 "\\0o42{E}"                  0         "\0xB3"
 2 {\\Y[Oo][ \0x09-\0x0D]*}     0         "\0xB3"

 2  {\\cydot[ \0x09-\0x0d]*}    0         ""                #\cydot out

    2       "H"                 0         "\0xE8"           
    2       "h"                 0         "\0xC8"           
    2       "W"                 0         "\0xFD"           
    2       "w"                 0         "\0xDD"           
    2       "X"                 0         "\0xFB"           
    2       "x"                 0         "\0xDB"           

    2       "A"                 0         "\0xE1"           
    2       "B"                 0         "\0xE2"           
    2       "V"                 0         "\0xF7"           
    2       "G"                 0         "\0xE7"           
    2       "D"                 0         "\0xE4"           
    2       "E"                 0         "\0xE5"           
    2       "Z"                 0         "\0xFA"           
    2       "I"                 0         "\0xE9"           
    2       "K"                 0         "\0xEB"           
    2       "L"                 0         "\0xEC"           
    2       "M"                 0         "\0xED"           
    2       "N"                 0         "\0xEE"           
    2       "O"                 0         "\0xEF"           
    2       "P"                 0         "\0xF0"           
    2       "R"                 0         "\0xF2"           
    2       "S"                 0         "\0xF3"           
    2       "T"                 0         "\0xF4"           
    2       "U"                 0         "\0xF5"           
    2       "F"                 0         "\0xE6"           
    2       "C"                 0         "\0xE3"           
    2       "Y"                 0         "\0xF9"           
    2       "a"                 0         "\0xC1"           
    2       "b"                 0         "\0xC2"           
    2       "v"                 0         "\0xD7"           
    2       "g"                 0         "\0xC7"           
    2       "d"                 0         "\0xC4"           
    2       "e"                 0         "\0xC5"           
    2       "z"                 0         "\0xDA"           
    2       "i"                 0         "\0xC9"           
    2       "k"                 0         "\0xCB"           
    2       "l"                 0         "\0xCC"           
    2       "m"                 0         "\0xCD"           
    2       "n"                 0         "\0xCE"           
    2       "o"                 0         "\0xCF"           
    2       "p"                 0         "\0xD0"           
    2       "r"                 0         "\0xD2"           
    2       "s"                 0         "\0xD3"           
    2       "t"                 0         "\0xD4"           
    2       "u"                 0         "\0xD5"           
    2       "f"                 0         "\0xC6"           
    2       "c"                 0         "\0xC3"           
    2       "y"                 0         "\0xD9"           

# Trash {}
    0       "{"                 0         ""
Modified: Tue Feb 15 17:00:00 1994 GMT
Page accessed 4779 times since Sat Apr 17 21:33:34 1999 GMT