jonlabelle / crlf.py
#!/usr/bin/env python |
«»»Replace line breaks, from one format to another.»»» |
from __future__ import print_function |
import argparse |
import glob |
import os |
import sys |
import tempfile |
from stat import ST_ATIME , ST_MTIME |
LF = ‘ \n ‘ |
CRLF = ‘ \r \n ‘ |
CR = ‘ \r ‘ |
def _normalize_line_endings ( lines , line_ending = ‘unix’ ): |
r»»»Normalize line endings to unix (\n), windows (\r\n) or mac (\r). |
:param lines: The lines to normalize. |
:param line_ending: The line ending format. |
Acceptable values are ‘unix’ (default), ‘windows’ and ‘mac’. |
:return: Line endings normalized. |
«»» |
lines = lines . replace ( CRLF , LF ). replace ( CR , LF ) |
if line_ending == ‘windows’ : |
lines = lines . replace ( LF , CRLF ) |
elif line_ending == ‘mac’ : |
lines = lines . replace ( LF , CR ) |
return lines |
def _copy_file_time ( source , destination ): |
«»»Copy one file’s atime and mtime to another. |
:param source: Source file. |
:param destination: Destination file. |
«»» |
file1 , file2 = source , destination |
try : |
stat1 = os . stat ( file1 ) |
except os . error : |
sys . stderr . write ( file1 + ‘ : cannot stat \n ‘ ) |
sys . exit ( 1 ) |
try : |
os . utime ( file2 , ( stat1 [ ST_ATIME ], stat1 [ ST_MTIME ])) |
except os . error : |
sys . stderr . write ( file2 + ‘ : cannot change time \n ‘ ) |
sys . exit ( 2 ) |
def _create_temp_file ( contents ): |
«»»Create a temp file. |
:param contents: The temp file contents. |
:return: The absolute path of the created temp file. |
«»» |
tf = tempfile . NamedTemporaryFile ( mode = ‘wb’ , suffix = ‘txt’ , delete = False ) |
tf . write ( contents ) |
tf . close () |
return tf . name |
def _delete_file_if_exists ( filepath ): |
«»»Delete the file if it exists. |
:param filepath: The file path. |
«»» |
if os . path . exists ( filepath ): |
os . remove ( filepath ) |
def _read_file_data ( filepath ): |
«»»Read file data. |
:param filepath: The file path. |
:return: The file contents. |
«»» |
data = open ( filepath , ‘rb’ ). read () |
return data |
def _write_file_data ( filepath , data ): |
«»»Write file data. |
:param filepath: The file path. |
:param data: The data to write. |
«»» |
f = open ( filepath , ‘wb’ ) |
f . write ( data ) |
f . close () |
def main (): |
«»»Main.»»» |
parser = argparse . ArgumentParser ( |
prog = ‘crlf’ , |
description = ‘Replace CRLF (windows) line endings with LF (unix) ‘ |
‘line endings in files, and vice-versa’ ) |
parser . add_argument ( |
‘-q’ , ‘—quiet’ , |
help = ‘suppress descriptive messages from output’ , |
action = ‘store_true’ , |
default = False ) |
parser . add_argument ( |
‘-n’ , ‘—dryrun’ , |
help = ‘show changes, but do not modify files’ , |
action = ‘store_true’ , |
default = False ) |
parser . add_argument ( |
‘-w’ , ‘—windows’ , |
help = ‘replace LF (unix) line endings with CRLF (windows) line endings’ , |
action = ‘store_true’ , |
default = False ) |
parser . add_argument ( |
‘-u’ , ‘—unix’ , |
help = ‘replace CRLF (windows) line endings with LF (unix) ‘ |
‘line endings (default)’ , |
action = ‘store_true’ , |
default = False ) |
parser . add_argument ( |
‘-t’ , ‘—timestamps’ , |
help = «maintains the modified file’s time stamps (atime and mtime)» , |
action = ‘store_true’ , |
default = False ) |
parser . add_argument ( |
‘files’ , |
nargs = ‘+’ , |
help = «a list of files or file glob patterns to process» , |
default = ‘.’ ) |
if len ( sys . argv ) 2 : |
parser . print_help () |
sys . exit ( 2 ) |
args = parser . parse_args () |
if args . windows is True and args . unix is True : |
sys . stderr . write ( «Ambiguous options specified, ‘unix’ and ‘windows’. « |
«Please choose one option, or the other. \n » ) |
sys . exit ( 2 ) |
files_to_process = [] |
for arg_file in args . files : |
files_to_process . extend ( glob . glob ( arg_file )) |
if len ( files_to_process ) 0 : |
if args . quiet is False : |
sys . stderr . write ( ‘No files matched the specified pattern. \n ‘ ) |
sys . exit ( 2 ) |
if args . dryrun is True and args . quiet is False : |
print ( ‘Dry-run only, files will NOT be modified.’ ) |
for file_to_process in files_to_process : |
if os . path . isdir ( file_to_process ): |
if args . quiet is False : |
print ( «- ‘<0>‘ : is a directory (skip)» . format ( file_to_process )) |
continue |
if os . path . isfile ( file_to_process ): |
data = _read_file_data ( file_to_process ) |
if ‘ \\ 0’ in data : |
if args . quiet is False : |
print ( «- ‘<0>‘ : is a binary file (skip)» . format ( file_to_process )) |
continue |
if args . windows is True : |
new_data = _normalize_line_endings ( data , line_ending = ‘windows’ ) |
else : |
new_data = _normalize_line_endings ( data , line_ending = ‘unix’ ) |
if new_data != data : |
if args . quiet is False : |
if args . windows is True : |
if args . dryrun is True : |
print ( «+ ‘<0>‘ : LF would be replaced with CRLF» . format ( file_to_process )) |
else : |
print ( «+ ‘<0>‘ : replacing LF with CRLF» . format ( file_to_process )) |
else : |
if args . dryrun is True : |
print ( «+ ‘<0>‘ : CRLF would be replaced with LF» . format ( file_to_process )) |
else : |
print ( «+ ‘<0>‘ : replacing CRLF with LF» . format ( file_to_process )) |
tmp_file_path = «» |
if args . dryrun is False : |
try : |
if args . timestamps is True : |
# create a temp file with the original file |
# contents and copy the old file’s atime a mtime |
tmp_file_path = _create_temp_file ( data ) |
_copy_file_time ( file_to_process , tmp_file_path ) |
# overwrite the current file with the modified contents |
_write_file_data ( file_to_process , new_data ) |
if args . timestamps is True : |
# copy the original file’s atime and mtime back to |
# the original file w/ the modified contents, |
# and delete the temp file. |
_copy_file_time ( tmp_file_path , file_to_process ) |
_delete_file_if_exists ( tmp_file_path ) |
except Exception as ex : |
sys . stderr . write ( ‘error : <0>\n ‘ . format ( str ( ex ))) |
sys . exit ( 1 ) |
else : |
if args . quiet is False : |
if args . windows is True : |
print ( «- ‘<0>‘ : line endings already CRLF (windows)» . format ( file_to_process )) |
else : |
print ( «- ‘<0>‘ : line endings already LF (unix)» . format ( file_to_process )) |
else : |
sys . stderr . write ( «- ‘<0>‘ : file not found \n » . format ( file_to_process )) |
sys . exit ( 1 ) |
if __name__ == ‘__main__’ : |
main () |
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.
Convert Unix line endings to Windows
I recently moved back to Windows from Linux. I have some files with CRLFs, some with LFs and some that are mixed. Is there a utility that will help me find all my Unix-touched files and convert them to proper CRLF terminated files?
The utility must run on Windows, not Linux. I have already moved. I’d rather not install Cygwin if I can avoid it.
14 Answers 14
You can convert them with the unix2dos utility on your Linux platform. There are unix2dos versions available for Windows as well.
If you have Perl installed you can also use this one liner:
Here is an easy and quick way.
Drag and drop the text file into Chrome (I don’t know about other browsers) and then cut and paste back into the original file 🙂
The one I found best for recursively going through folders, allowing file filters and allowing a simple search for «\r\n» and replacing it with just «\n» was Notepad++.
Notepad++ is one of the best, free, open source notepad programs for Windows. It is very simple and powerful. It handled the line ending search/replace just fine. A contractor check a bunch of .c and .h files in to our repository with Linux \r\n line endings, but since most people have standardized on Windows/Eclipse build tools, the files won’t build until the line endings are converted.
For example: sfk addcr -dir . -file .txt -norec
changes LF endings into CR/LF for Windows, on all .txt files of the current directory, but NOT within subdirectories (no recursion).
But this program does a lot more than just that.
On Cygwin, you can convert between Unix and «DOS» AKA Windows files using two built-in utilities:
Convert to DOS CR/LF format:
Convert back to Unix CR format:
The file is left in place with the same name.
I’m going to throw this solution out there. Git will do this. See this post about it
So theoretically you could do this to convert an entire tree
Change crlf to lf if you want to go the other way. NOTE: you’re not done yet, keep reading
Type git status to see which files will be affected. You might have to add lines like
etc to .gitattributes to avoid converting certain files. You can also explicit mark certain files as text
Then just repeat these 2 lines after you’ve edited .gitattributes
Then use git status again to see which files will be changed. When you’re sure all the files you want affected are listed by git status then commit
now check all the files out again
They should now have whatever your desired line endings are
** NOTE: If you were already using git skip the first 3 commands git commands. If you were not using git you can now delete the .gitattributes file and the .git folder.
** Back up your files: the git rm —cached -r deletes them all (although they are theoretically in your git repo (the .git folder) which is how they get restored by the last command git reset —hard . It’s just since files are getting deleted it’s probably best to back them up.
Hackaday
In what is probably this century’s greatest advancement in technology, Windows Notepad now supports Unix line endings. This is it, people. Where were you when Kennedy was assassinated? Where were you when Neil Armstrong set foot on the moon? Where were you when Challenger blew up? Where are you now?
Previously, Windows Notepad only supported Windows End of Line Characters — a Carriage Return (CR) and Line Feed (LF). Unix text documents use LF for line endings, and Macs use CR for line endings. The end result of this toppling of the Tower of Babel for End of Line characters is a horrific mess; Windows users can’t read Unix text files in Notepad, and everything is just terrible. Opening a Unix text file in Windows produces a solid block of text without any whitespace. Opening a Windows text file in anything else puts little rectangles at the end of each line.
Starting with the current Window 10 Insider build, Notepad now supports Unix line endings, Macintosh line endings, and Windows line endings. Rejoice, the greatest problem in technology has now been solved.
141 thoughts on “ Windows Notepad Now Supports Unix Line Endings ”
The future is now.
“We have seen the enemy, and they are us.”
Why do people even mess with this when Notepad++ exists?
Installing Notepad++ has been part of my “make a fresh Windows install usable” ritual for years.
That’s a bit heavy, but there is also notepad2 and notepad2-mod if you prefer, for the lighter use.
My personal favorite. Still a pain in a pinch on a computer that’s not yours though. About damn time
UNIX line endings are not LF. That’s just a default. UNIX line ending is “”nl” or newline (‘\n’ in printf): it’s whatever character was defined as “newline” by the stty command. ASCII contains no ‘newline’ character. LF was just convenient and intuitive for a default.
This dates back to text terminals and special character meanings, all of which can be mapped to other values. Thus demonstrating even more deeply how flawed Windows (but not its predecessor VMS!) have always been. At least VMS knew about special character mapping.
I found it interesting to see an article about windows notepad now supporting *nix line endings and then using the windows terminology [CR] [LF] instead of the *nix terminology \r \n
The association is like this –
[CR] Carriage Return => \r return
[LF] Line feed => \n new line
in web development this was solved long ago, to convert anything to *nix –
str_replace(“\r\n”, “\r”, $string); str_replace(“\r”, “\n”, $string);
which can be simplified to –
str_replace(“\r”, “\n”, str_replace(“\r\n”, “\r”, $string));
from there you can convert to the other standards easily if needed and without any double line spacing.
I use metapad instead of notepad. Another favorite is Crimson Editor (has a new name now) because you often need to first find the text to be edited and Crimson Editor has search within text, search within file and search withing directory.
A line feed is impossible to add to the ASCII standard, Yet hundreds of emojji’s are added every year. totally makes sense!
Line feed was in the ASCII standard from the beginning. “New line” could not be added because ASCII was a 7 bit code, and all 128 codes were assigned by the time the concept of newline was brought up. Unicode gets crap added to it all the time because that’s a 32 bit code, so there are over two billion “code points” available. Going fast, I’m guessing.
“Rejoice, the greatest problem in technology has now been solved.”
Two spaces after period is better…maybe.
You are wrong about spaces: https://xkcd.com/1989/
Holy crap that sounds like pseudoscience! I can’t believe people have studied this.
Annals of Improbable Research
In what way is that pseduoscience? And why shouldn’t such things be studied given that they may improve the lives of people?
It may be a flawed experiment but that doesn’t make it fake science – just (potentially) incomplete.
Is this a wonderful time to be alive or what.
First, they came for my backslashes, and I did not complain….
“The biggest problem in technology has been solved!”
For a second, I thought that meant that we all agree on how many spaces a tab character represents.
Disagree. See, not solved.
But 42 is the ultimate answer to life, the universe, and everything; therefore, it must in someway be correct……..from a certain perspective at least.
ASCII character 42 is *. So 42 really does mean everything.
That’s gold Conrad
Shouldn’t matter because they should be converted to spaces 🙂
Do that in a Makefile and see how it works for you.
Exactly! Tabs should represent levels of indentation. 1-tab per level of indent. That’s perfect! How much whitespace I want to see per level of indentation is my business and how much you want to see is yours. We can configure that in our editors’ preferences and each see the same source code in the way we want to see it.
I grew up when monitors and resolution were much smaller. Huge indents meant line wrapping and nobody wanted that. We didn’t have wide screens for coding, those were for watching movies! In college my Computer Science professors varied, some wanted 2 spaces per indent, some wanted 3. That’s it, no more than 3! I preferred 3 because that is also how far I was taught to indent the beginning of a new paragraph in English class. I like consistency.
Kids these days like huge indents with large numbers of spaces. That’s fine for you if you like it. I find that incredibly distracting when I try to read the code. All that left/right movement makes me tend to lose my place. I know some people have told me that they find my narrower indents harder to see. Well.. different people see things differently! That’s why we need to use tabs!
But… all the style guides these days push spaces instead of tabs for indents. WTF. With tabs we can ALL have our way! Just configure your editor to display tabs the way YOU like them and leave ME alone about it. That is a way better solution than trying to figuratively club everyone over the head until they agree to use YOUR favorite number of space characters.
Come on, quite being A@@H0l3s!
Ok… Sorry… End Rant
I used to think as you do, and for the same reasons. I’ve recently decided to throw in the towel and use spaces for indentation. If your editor can display tabs as your preferred amount of horizontal space, it should be able to scan the code, figure out the number of spaces used per level of indentation, and display it the same as if tabs had been used (as $DEITY intended). I suppose some unholy mixed-tabs-and-spaces files will defeat the indentation heuristic.
I’ve resolved to write plugins or modify editor code to accomplish this, if necessary, but so far I am able to just deal with it.
Don’t even get me started on where some of these heathens put their braces.
That might be a solution but it’s a difficult and brittle solution to a problem that was already solved by all our keyboards having a TAB key.
It requires you or someone to write plugins.
It only works for text editors for which such plugins have been written.
It will fail if people type the wrong number of spaces.
This solution describes displaying code. How does one write it? Do you still press the space bar x number of times at the beginning of each line? What does the plugin do while the user is entering those spaces?
How does it react to space characters elsewhere in the code. Maybe there are space characters inside string literals for example. How do those display? What is it like to edit them?
You obviously don’t get the problem. What if spaces and tabs are mixed? You may not do that but in a multi-user environment that is a real problem.
That is a really common and yet stupid excuse to use spaces instead of tabs.
Obviously you do the same thing that you do with code that has the incorrect number of spaces. Honestly it’s just another variation of the same problem!
What do you do when your standard is to use 25 spaces per indent (cause that seems to be the direction things are headed) and some dummy submits a million lines of code with only 15? If it’s a workplace the manager reminds them of the company policy and they have to fix it. If it’s an open source project then it depends on how desperate the maintainer is to attract coders. They either painstakingly fix it all or they deny the pull request with a message to go read the f’n style guide.
Switching away from using tabs does nothing to fix the problem of people not doing what they are supposed to.
Maybe I am being a little unfair. If your tab character width matches that of the dummy that mixed in spaces then the problem might go unnoticed. Well.. if someone else has a different tab width it can be dealt with once they notice it. If not… well.. do you enjoy worrying about problems when nobody even notices them? If a tree falls in the woods… or if a problem goes unnoticed is it really even a problem?
How about a text editor plugin that sightly shades tabs? Or.. better yet, maybe it brightly flags space characters that occur somewhere within a region of whitespace that is on the begining edge of a line? Then you know when somebody is a jerk right away and can deal with it.
Or… if you discover that mixed indentation has been piling up for the last 10 years unnoticed because everyone used the same tab width just run it through a code beautifier.
Or… can a script be added to git that either fixes, flags or bounces crappy pull requests that include space indented lines?
If only editors had a search and replace eh.
Ramen. And yeah, if only the placement of those curly braces had some consistency as well. I prefer them in the older style of putting the brace on the next line. I don’t really understand the argument of having it on the same line as the class/method/branch definition. Particularly because there is no consistency on where to break that line. Is everyone else still stuck on 80 columns?
I’ve always considered this a mute dispute. What improves readability for some people can have the opposite effect for others.
I’m dyslexic so I use “whitesmiths” format so that I can quickly scroll through indentation levels to find the return from one level.
https://en.wikipedia.org/wiki/Indentation_style
If someone doesn’t like it then I just say – well can you code, if you can then just change it to what you like, it’s just a string of text
So did windows 10 decide to go with emacs or vim?