jonlabelle / crlf.py
#!/usr/bin/env python |
«»»Replace line breaks, from one format to another.»»» |
from __future__ import print_function |
import argparse |
import glob |
import os |
import sys |
import tempfile |
from stat import ST_ATIME , ST_MTIME |
LF = ‘ \n ‘ |
CRLF = ‘ \r \n ‘ |
CR = ‘ \r ‘ |
def _normalize_line_endings ( lines , line_ending = ‘unix’ ): |
r»»»Normalize line endings to unix (\n), windows (\r\n) or mac (\r). |
:param lines: The lines to normalize. |
:param line_ending: The line ending format. |
Acceptable values are ‘unix’ (default), ‘windows’ and ‘mac’. |
:return: Line endings normalized. |
«»» |
lines = lines . replace ( CRLF , LF ). replace ( CR , LF ) |
if line_ending == ‘windows’ : |
lines = lines . replace ( LF , CRLF ) |
elif line_ending == ‘mac’ : |
lines = lines . replace ( LF , CR ) |
return lines |
def _copy_file_time ( source , destination ): |
«»»Copy one file’s atime and mtime to another. |
:param source: Source file. |
:param destination: Destination file. |
«»» |
file1 , file2 = source , destination |
try : |
stat1 = os . stat ( file1 ) |
except os . error : |
sys . stderr . write ( file1 + ‘ : cannot stat \n ‘ ) |
sys . exit ( 1 ) |
try : |
os . utime ( file2 , ( stat1 [ ST_ATIME ], stat1 [ ST_MTIME ])) |
except os . error : |
sys . stderr . write ( file2 + ‘ : cannot change time \n ‘ ) |
sys . exit ( 2 ) |
def _create_temp_file ( contents ): |
«»»Create a temp file. |
:param contents: The temp file contents. |
:return: The absolute path of the created temp file. |
«»» |
tf = tempfile . NamedTemporaryFile ( mode = ‘wb’ , suffix = ‘txt’ , delete = False ) |
tf . write ( contents ) |
tf . close () |
return tf . name |
def _delete_file_if_exists ( filepath ): |
«»»Delete the file if it exists. |
:param filepath: The file path. |
«»» |
if os . path . exists ( filepath ): |
os . remove ( filepath ) |
def _read_file_data ( filepath ): |
«»»Read file data. |
:param filepath: The file path. |
:return: The file contents. |
«»» |
data = open ( filepath , ‘rb’ ). read () |
return data |
def _write_file_data ( filepath , data ): |
«»»Write file data. |
:param filepath: The file path. |
:param data: The data to write. |
«»» |
f = open ( filepath , ‘wb’ ) |
f . write ( data ) |
f . close () |
def main (): |
«»»Main.»»» |
parser = argparse . ArgumentParser ( |
prog = ‘crlf’ , |
description = ‘Replace CRLF (windows) line endings with LF (unix) ‘ |
‘line endings in files, and vice-versa’ ) |
parser . add_argument ( |
‘-q’ , ‘—quiet’ , |
help = ‘suppress descriptive messages from output’ , |
action = ‘store_true’ , |
default = False ) |
parser . add_argument ( |
‘-n’ , ‘—dryrun’ , |
help = ‘show changes, but do not modify files’ , |
action = ‘store_true’ , |
default = False ) |
parser . add_argument ( |
‘-w’ , ‘—windows’ , |
help = ‘replace LF (unix) line endings with CRLF (windows) line endings’ , |
action = ‘store_true’ , |
default = False ) |
parser . add_argument ( |
‘-u’ , ‘—unix’ , |
help = ‘replace CRLF (windows) line endings with LF (unix) ‘ |
‘line endings (default)’ , |
action = ‘store_true’ , |
default = False ) |
parser . add_argument ( |
‘-t’ , ‘—timestamps’ , |
help = «maintains the modified file’s time stamps (atime and mtime)» , |
action = ‘store_true’ , |
default = False ) |
parser . add_argument ( |
‘files’ , |
nargs = ‘+’ , |
help = «a list of files or file glob patterns to process» , |
default = ‘.’ ) |
if len ( sys . argv ) 2 : |
parser . print_help () |
sys . exit ( 2 ) |
args = parser . parse_args () |
if args . windows is True and args . unix is True : |
sys . stderr . write ( «Ambiguous options specified, ‘unix’ and ‘windows’. « |
«Please choose one option, or the other. \n » ) |
sys . exit ( 2 ) |
files_to_process = [] |
for arg_file in args . files : |
files_to_process . extend ( glob . glob ( arg_file )) |
if len ( files_to_process ) 0 : |
if args . quiet is False : |
sys . stderr . write ( ‘No files matched the specified pattern. \n ‘ ) |
sys . exit ( 2 ) |
if args . dryrun is True and args . quiet is False : |
print ( ‘Dry-run only, files will NOT be modified.’ ) |
for file_to_process in files_to_process : |
if os . path . isdir ( file_to_process ): |
if args . quiet is False : |
print ( «- ‘<0>‘ : is a directory (skip)» . format ( file_to_process )) |
continue |
if os . path . isfile ( file_to_process ): |
data = _read_file_data ( file_to_process ) |
if ‘ \\ 0’ in data : |
if args . quiet is False : |
print ( «- ‘<0>‘ : is a binary file (skip)» . format ( file_to_process )) |
continue |
if args . windows is True : |
new_data = _normalize_line_endings ( data , line_ending = ‘windows’ ) |
else : |
new_data = _normalize_line_endings ( data , line_ending = ‘unix’ ) |
if new_data != data : |
if args . quiet is False : |
if args . windows is True : |
if args . dryrun is True : |
print ( «+ ‘<0>‘ : LF would be replaced with CRLF» . format ( file_to_process )) |
else : |
print ( «+ ‘<0>‘ : replacing LF with CRLF» . format ( file_to_process )) |
else : |
if args . dryrun is True : |
print ( «+ ‘<0>‘ : CRLF would be replaced with LF» . format ( file_to_process )) |
else : |
print ( «+ ‘<0>‘ : replacing CRLF with LF» . format ( file_to_process )) |
tmp_file_path = «» |
if args . dryrun is False : |
try : |
if args . timestamps is True : |
# create a temp file with the original file |
# contents and copy the old file’s atime a mtime |
tmp_file_path = _create_temp_file ( data ) |
_copy_file_time ( file_to_process , tmp_file_path ) |
# overwrite the current file with the modified contents |
_write_file_data ( file_to_process , new_data ) |
if args . timestamps is True : |
# copy the original file’s atime and mtime back to |
# the original file w/ the modified contents, |
# and delete the temp file. |
_copy_file_time ( tmp_file_path , file_to_process ) |
_delete_file_if_exists ( tmp_file_path ) |
except Exception as ex : |
sys . stderr . write ( ‘error : <0>\n ‘ . format ( str ( ex ))) |
sys . exit ( 1 ) |
else : |
if args . quiet is False : |
if args . windows is True : |
print ( «- ‘<0>‘ : line endings already CRLF (windows)» . format ( file_to_process )) |
else : |
print ( «- ‘<0>‘ : line endings already LF (unix)» . format ( file_to_process )) |
else : |
sys . stderr . write ( «- ‘<0>‘ : file not found \n » . format ( file_to_process )) |
sys . exit ( 1 ) |
if __name__ == ‘__main__’ : |
main () |
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.
Источник
Configuring Git to handle line endings
To avoid problems in your diffs, you can configure Git to properly handle line endings.
About line endings
Every time you press return on your keyboard you insert an invisible character called a line ending. Different operating systems handle line endings differently.
When you’re collaborating on projects with Git and GitHub, Git might produce unexpected results if, for example, you’re working on a Windows machine, and your collaborator has made a change in macOS.
You can configure Git to handle line endings automatically so you can collaborate effectively with people who use different operating systems.
Global settings for line endings
The git config core.autocrlf command is used to change how Git handles line endings. It takes a single argument.
On macOS, you simply pass input to the configuration. For example:
On Windows, you simply pass true to the configuration. For example:
On Linux, you simply pass input to the configuration. For example:
Optionally, you can configure a .gitattributes file to manage how Git reads line endings in a specific repository. When you commit this file to a repository, it overrides the core.autocrlf setting for all repository contributors. This ensures consistent behavior for all users, regardless of their Git settings and environment.
The .gitattributes file must be created in the root of the repository and committed like any other file.
A .gitattributes file looks like a table with two columns:
- On the left is the file name for Git to match.
- On the right is the line ending configuration that Git should use for those files.
Here’s an example .gitattributes file. You can use it as a template for your repositories:
You’ll notice that files are matched— *.c , *.sln , *.png —, separated by a space, then given a setting— text , text eol=crlf , binary . We’ll go over some possible settings below.
text=auto Git will handle the files in whatever way it thinks is best. This is a good default option.
text eol=crlf Git will always convert line endings to CRLF on checkout. You should use this for files that must keep CRLF endings, even on OSX or Linux.
text eol=lf Git will always convert line endings to LF on checkout. You should use this for files that must keep LF endings, even on Windows.
binary Git will understand that the files specified are not text, and it should not try to change them. The binary setting is also an alias for -text -diff .
Refreshing a repository after changing line endings
When you set the core.autocrlf option or commit a .gitattributes file, you may find that Git reports changes to files that you have not modified. Git has changed line endings to match your new configuration.
To ensure that all the line endings in your repository match your new configuration, backup your files with Git, delete all files in your repository (except the .git directory), then restore the files all at once.
- Save your current files in Git, so that none of your work is lost.
- Add all your changed files back and normalize the line endings.
- Show the rewritten, normalized files.
- Commit the changes to your repository.
Источник
Why Windows and Linux line endings don’t line up (and how to fix it)
I recently wrote a few automated database-populating scripts. Specifically, I am running Microsoft SQL Server in a container in a Kubernetes cluster—okay, it’s Red Hat OpenShift, but it’s still Kubernetes. It was all fun and games until I started mixing Windows and Linux; I was developing on my Windows machine, but obviously the container is running Linux. That’s when I got the gem of an error shown in Figure 1. Well, not so much an error as errant output.
What in the world? Here’s the CSV data I used to populate the table:
Here’s the T-SQL code I used for the same purpose:
What is going on here?
TL;DR: Line endings
It’s the line endings. They are the issue.
Specifically, Windows and Linux handle line endings differently. To understand why, we need to go back a ways in history.
ASDFJKL
Ever use a manual typewriter? Okay, okay . enough of the «That’s old!» jokes. Figure 2 illustrates.
The typewriter mechanism that holds the rubber cylinder is called the carriage because it carries the paper. (That rubber cylinder is technically known as a platen, but stay with me as I employ poetic license and use «carriage.»)
As you type, the carriage moves to the left. When you reach the edge of the paper, you use the big lever on the far left to return the carriage to the starting position; that is, you perform a carriage return. In addition, as the lever moves, it advances the paper up one line, which is known as a line feed.
When you do both movements, you get «carriage return plus line feed,» sometimes abbreviated to CRLF or CR/LF. You can move the carriage without feeding one line, and you can advance one line without moving the carriage. They are two distinct and separate actions, but anyone who has mastered the manual typewriter knows that they are typically done in one, swift, soulful, and athletic motion, akin to desktop gymnastics of the highest order. (Please excuse more poetic license as I romanticize about typing.)
Teletype
Meanwhile, over in the world of automation, the Teletype machine became very popular. This allowed the transmission of text around the world, across telephone lines. But long distance calls were expensive, so minimizing the time and data sent was paramount. So, it was decided that one and only one character would be used for a carriage return and line feed, the so-called new line character. You see it as » \n » in code. You paid for every byte, back then, so cutting costs was important.
We’re talking about 300 baud modems here, folks. Just think about that; 300 bits per second; three hundred. Now, we want gigabits everywhere.
Back to line endings
The reasons don’t matter: Windows chose the CR/LF model, while Linux uses the \n model. So, when you create a file on one system and use it on the other, hilarity ensues. Or, in this case, two hours of debugging ending in madness and me contemplating a new career in woodworking.
Quick fix for Linux and Windows line endings
The quick fix for those incompatible line endings was very simple: I altered my T-SQL to include the ROWTERMINATOR specification, like this:
That works when uploading my CSV from my Windows machine. When uploading from my Linux machine, I use the following, where the ROWTERMINATOR is the simple new line character:
Simple, but unless you know about it, you either get weird results or some seemingly unrelated error messages. So, be advised. For example, if I try to use the Windows-specific command (where ROWTERMINATOR is » \r\n «) in my Linux environment, I get the following error:
What does it all mean?
The upshot is this: You might see some hiccups and weird behavior when you use a file in both Windows and Linux. Just be aware of it and you’ll be fine.
Visit my GitHub repository NetCandyStore for all of the code referenced in this article.
Источник