2008/02/09

Delete duplicate row without disarranging original order

這是在ptt的linux版看到的討論串,還蠻有趣的,有許多不同的語言作法。

問題
$ cat file.txt
00120
00345
00345
00567
00789
00789

請問如何將重複的列刪除,變成下面這樣:
00120
00345
00567
00789


Linux command (by kenduest)
cat -n test.txt | sort -k 2 -n -u | sort -k 1 -n | cut -f 2
Bash (by FourDollars)
#!/usr/bin/env bash

declare -a db
declare -i index=0

cat file.txt | while read line; do
if ! echo ${db[*]} | grep $line > /dev/null; then
db[$((index++))]="$line"
echo "$line"
fi
done
Perl (by OuTian)
cat test.txt | perl -ne 'print unless($s{$_}++)'
C (by HZYSoft)
/* File: remove_dup.c: Remove duplicated string in a file.
Author: PCMan (C) 2008.02.08
License: GNU GPL V2 */

#include <stdio.h
>
#include <stdlib.h>
#include <string.h>
int main( int argc, char** argv ) {
int i, j, n, buf_size = 1024;
char **strv, line[1024];
FILE *f;
if( ! (f = fopen( argv[1], "r" )) ) return 1;
strv = (char**)malloc(buf_size * sizeof(char*) );
for( n = 0; fgets( line, sizeof(line), f ); ++n ) {
if( n >= buf_size ) {
buf_size += 1024;
strv = (char**)realloc( strv, buf_size );
}
strv[n] = strdup( line );
}
fclose(f);
for( i = 0; i < n; ++i ) {
for( j = i + 1; j < n; ) {
if( 0 == strcmp( strv[i], strv[j] ) ) {
free( strv[j] );
#ifdef KEEP_ORDER /* 如果保持原有順序 */
memcpy( &strv[j], &strv[j+1], sizeof(char*) * (n-j-1) );
#else /* 如果不管順序,這樣速度會快很多 */
strv[j] = strv[n - 1];
#endif
--n;
}
else {
++j;
}
}
}
if( ! (f = fopen( argv[1], "w" )) ) return 1;
for( i = 0; i < n; ++i ) {
fputs( strv[i], f );
free( strv[i] );
}
fclose( f );
return 0;
}

No comments: